Commit ad189eb9 by Tobias Fuchs

performance tests: added base_cpp/perf, added algorithms performance tests

parent 5abeb065
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
build* build*
*~ *~
*# *#
*.swp
*.swo
# Eclipse CDT project files # Eclipse CDT project files
.project .project
......
...@@ -3,14 +3,16 @@ project (project_embb_algorithms) ...@@ -3,14 +3,16 @@ project (project_embb_algorithms)
file(GLOB_RECURSE EMBB_ALGORITHMS_CPP_SOURCES "src/*.cc" "src/*.h") file(GLOB_RECURSE EMBB_ALGORITHMS_CPP_SOURCES "src/*.cc" "src/*.h")
file(GLOB_RECURSE EMBB_ALGORITHMS_CPP_HEADERS "include/*.h") file(GLOB_RECURSE EMBB_ALGORITHMS_CPP_HEADERS "include/*.h")
file(GLOB_RECURSE EMBB_ALGORITHMS_CPP_TEST_SOURCES "test/*.cc" "test/*.h") file(GLOB_RECURSE EMBB_ALGORITHMS_CPP_TEST_SOURCES "test/*.cc" "test/*.h")
file(GLOB_RECURSE EMBB_ALGORITHMS_CPP_PERF_SOURCES "perf/*.cc" "perf/*.h")
# Execute the GroupSources macro # Execute the GroupSources macro
include(${CMAKE_SOURCE_DIR}/CMakeCommon/GroupSourcesMSVC.cmake) include(${CMAKE_SOURCE_DIR}/CMakeCommon/GroupSourcesMSVC.cmake)
GroupSourcesMSVC(include) GroupSourcesMSVC(include)
GroupSourcesMSVC(src) GroupSourcesMSVC(src)
GroupSourcesMSVC(test) GroupSourcesMSVC(test)
GroupSourcesMSVC(perf)
set (EMBB_ALGORITHMS_CPP_INCLUDE_DIRS "include" "src" "test") set (EMBB_ALGORITHMS_CPP_INCLUDE_DIRS "include" "src" "test" "perf")
include_directories(${EMBB_ALGORITHMS_CPP_INCLUDE_DIRS} include_directories(${EMBB_ALGORITHMS_CPP_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/../base_c/include ${CMAKE_CURRENT_SOURCE_DIR}/../base_c/include
${CMAKE_CURRENT_BINARY_DIR}/../base_c/include ${CMAKE_CURRENT_BINARY_DIR}/../base_c/include
...@@ -25,12 +27,21 @@ add_library(embb_algorithms_cpp ${EMBB_ALGORITHMS_CPP_SOURCES} ...@@ -25,12 +27,21 @@ add_library(embb_algorithms_cpp ${EMBB_ALGORITHMS_CPP_SOURCES}
target_link_libraries(embb_algorithms_cpp embb_tasks_cpp) target_link_libraries(embb_algorithms_cpp embb_tasks_cpp)
if (BUILD_TESTS STREQUAL ON) if (BUILD_TESTS STREQUAL ON)
# Unit tests
include_directories(${CMAKE_CURRENT_BINARY_DIR}/../partest/include) include_directories(${CMAKE_CURRENT_BINARY_DIR}/../partest/include)
add_executable (embb_algorithms_cpp_test ${EMBB_ALGORITHMS_CPP_TEST_SOURCES}) add_executable (embb_algorithms_cpp_test ${EMBB_ALGORITHMS_CPP_TEST_SOURCES})
target_link_libraries(embb_algorithms_cpp_test embb_algorithms_cpp target_link_libraries(embb_algorithms_cpp_test embb_algorithms_cpp
embb_tasks_cpp embb_mtapi_c partest embb_base_cpp embb_tasks_cpp embb_mtapi_c partest embb_base_cpp
embb_base_c ${compiler_libs}) embb_base_c ${compiler_libs})
CopyBin(BIN embb_algorithms_cpp_test DEST ${local_install_dir}) CopyBin(BIN embb_algorithms_cpp_test DEST ${local_install_dir})
# Performance tests
include_directories(${CMAKE_CURRENT_BINARY_DIR}/../partest/include
${CMAKE_CURRENT_SOURCE_DIR}/../base_cpp/perf/include)
add_executable (embb_algorithms_cpp_perf ${EMBB_ALGORITHMS_CPP_PERF_SOURCES})
target_link_libraries(embb_algorithms_cpp_perf embb_algorithms_cpp
embb_tasks_cpp embb_mtapi_c partest embb_base_cpp
embb_base_c embb_base_cpp_perf ${compiler_libs})
CopyBin(BIN embb_algorithms_cpp_perf DEST ${local_install_dir})
endif() endif()
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/embb install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/embb
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_COUNT_PERF_INL_H_
#define EMBB_ALGORITHMS_PERF_COUNT_PERF_INL_H_
#include <embb/base/perf/call_args.h>
#include <count_perf.h>
#include <embb/base/memory_allocation.h>
#include <embb/algorithms/count.h>
#include <index_iterator.h>
#include <embb/tasks/tasks.h>
namespace embb {
namespace algorithms {
namespace perf {
using embb::base::perf::CallArgs;
template<typename T>
SerialCount<T>::SerialCount(const embb::base::perf::CallArgs & args) :
cargs(args), vector_size(args.VectorSize()), result(0) {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
a = (T *)Allocation::AllocateCacheAligned(
vector_size * sizeof(T));
for (size_t i = 0; i < vector_size; i++) {
a[i] = static_cast<T>(i);
}
}
else {
a = 0;
}
}
template<typename T>
SerialCount<T>::~SerialCount() {
if (a != 0) {
Allocation::FreeAligned(a);
}
}
template<typename T>
void SerialCount<T>::Run() {
T element = static_cast<T>(vector_size / 2);
for (size_t i = 0; i != vector_size; ++i) {
if (a[i] == element) {
++result;
}
}
}
template<typename T>
ParallelCount<T>::ParallelCount(const embb::base::perf::CallArgs & args) :
cargs(args), vector_size(args.VectorSize()), result(0) {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
a = (T *)Allocation::AllocateCacheAligned(
vector_size * sizeof(T));
for (size_t i = 0; i < vector_size; i++) {
a[i] = static_cast<T>(i);
}
}
else {
a = 0;
}
}
template<typename T>
ParallelCount<T>::~ParallelCount() {
if (a != 0) {
Allocation::FreeAligned(a);
}
}
template<typename T>
void ParallelCount<T>::Run(unsigned int numThreads) {
T element = static_cast<T>(vector_size / 2);
if (cargs.StressMode() == CallArgs::CPU_STRESS) {
result = static_cast<size_t>(embb::algorithms::Count(
// Using iterator returning index value to avoid
// memory access
IndexIterator<T>(0),
IndexIterator<T>(static_cast<int>(vector_size)),
element,
embb::tasks::ExecutionPolicy(),
vector_size / numThreads));
}
else if (cargs.StressMode() == CallArgs::RAM_STRESS) {
result = static_cast<size_t>(embb::algorithms::Count(
a,
a + vector_size,
element,
embb::tasks::ExecutionPolicy(),
vector_size / numThreads));
}
}
} // namespace perf
} // namespace algorithms
} // namespace embb
#endif /* EMBB_ALGORITHMS_PERF_COUNT_PERF_INL_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_CPP_PERF_COUNT_PERF_H_
#define EMBB_ALGORITHMS_CPP_PERF_COUNT_PERF_H_
#include <embb/base/perf/call_args.h>
#include <iterator>
namespace embb {
namespace algorithms {
namespace perf {
template<typename T>
class SerialCount {
public:
explicit SerialCount(const embb::base::perf::CallArgs & args);
~SerialCount();
void Pre() { }
void Run();
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T * a;
size_t result;
/* prohibit copy and assignment */
SerialCount(const SerialCount & other);
SerialCount & operator=(const SerialCount & other);
};
template<typename T>
class ParallelCount {
public:
explicit ParallelCount(const embb::base::perf::CallArgs & args);
~ParallelCount();
void Pre() { }
void Run(unsigned int numThreads);
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T * a;
size_t result;
/* prohibit copy and assignment */
ParallelCount(const ParallelCount & other);
ParallelCount & operator=(const ParallelCount & other);
};
} // namespace perf
} // namespace algorithms
} // namespace embb
#include <count_perf-inl.h>
#endif /* EMBB_ALGORITHMS_CPP_PERF_COUNT_PERF_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_FOR_EACH_PERF_INL_H_
#define EMBB_ALGORITHMS_PERF_FOR_EACH_PERF_INL_H_
#include <for_each_perf.h>
#include <embb/algorithms/for_each.h>
#include <index_iterator.h>
#include <embb/base/perf/call_args.h>
#include <embb/base/memory_allocation.h>
#include <embb/tasks/tasks.h>
namespace embb {
namespace algorithms {
namespace perf {
template<typename T>
SerialForEach<T>::SerialForEach(const embb::base::perf::CallArgs & args)
: cargs(args), op(args), vector_size(args.VectorSize()) {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
else {
v = 0;
}
}
template<typename T>
SerialForEach<T>::~SerialForEach() {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
embb::base::Allocation::FreeAligned(v);
}
}
template<typename T>
void SerialForEach<T>::Run() {
if (cargs.StressMode() == CallArgs::CPU_STRESS) {
for (size_t i = 0; i < vector_size; i++) {
T v = static_cast<T>(i);
op(v);
}
}
else if (cargs.StressMode() == CallArgs::RAM_STRESS) {
for (size_t i = 0; i < vector_size; i++) {
op(v[i]);
}
}
}
template<typename T>
ParallelForEach<T>::ParallelForEach(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
else {
v = 0;
}
}
template<typename T>
ParallelForEach<T>::~ParallelForEach() {
if (v != 0) {
embb::base::Allocation::FreeAligned(v);
}
}
template<typename T>
void ParallelForEach<T>::Run(unsigned int numThreads) {
if (cargs.StressMode() == CallArgs::CPU_STRESS) {
// Computing input values, no memory access
ForEachFunctor<T> op(cargs);
embb::algorithms::ForEach(
// Using iterator returning index value to avoid
// memory access
IndexIterator<T>(0),
IndexIterator<T>(static_cast<int>(vector_size)),
op,
embb::tasks::ExecutionPolicy(),
vector_size / numThreads);
} else if (cargs.StressMode() == CallArgs::RAM_STRESS) {
// Reading input values from memory
ForEachFunctor<T> op(cargs);
embb::algorithms::ForEach(
v, v + vector_size,
op,
embb::tasks::ExecutionPolicy(),
vector_size / numThreads);
}
}
} // namespace perf
} // namespace algorithms
} // namespace embb
#endif /* EMBB_ALGORITHMS_PERF_FOR_EACH_PERF_INL_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_FOR_EACH_PERF_H_
#define EMBB_ALGORITHMS_PERF_FOR_EACH_PERF_H_
#include <embb/base/perf/call_args.h>
#include <cmath>
namespace embb {
namespace algorithms {
namespace perf {
using embb::base::perf::CallArgs;
/**
* Operation performed in each loop iteration.
*/
template<typename T>
class ForEachOp {
public:
explicit ForEachOp(const CallArgs & args) :
load_factor(args.LoadFactor()) { }
void operator()(T & val) const {
T x = val;
for (unsigned int i = 0; i < load_factor; i++) {
x = 2 * x * x;
x = sqrt(x);
x = x / sqrt(static_cast<T>(2));
}
val = x;
}
private:
unsigned int load_factor;
};
template<typename T>
class ForEachFunctor {
public:
ForEachFunctor(const CallArgs & args) :
cargs(args), op(args) { }
void operator()(T & value) const {
op(value);
}
ForEachFunctor(const ForEachFunctor & other) :
cargs(other.cargs), op(other.op) { }
ForEachFunctor & operator=(const ForEachFunctor & other) {
if (&other != *this) {
cargs = other.cargs;
op = other.op;
}
return *this;
}
private:
const CallArgs & cargs;
ForEachOp<T> op;
};
template<typename T>
class SerialForEach {
public:
explicit SerialForEach(const embb::base::perf::CallArgs & args);
~SerialForEach();
void Pre() { }
void Run();
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
ForEachOp<T> op;
const size_t vector_size;
T * v;
/* prohibit copy and assignment */
SerialForEach(const SerialForEach & other);
SerialForEach & operator=(const SerialForEach & other);
};
template<typename T>
class ParallelForEach {
public:
explicit ParallelForEach(const embb::base::perf::CallArgs & args);
~ParallelForEach();
void Pre() { }
void Run(unsigned int numThreads);
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T * v;
/* prohibit copy and assignment */
ParallelForEach(const ParallelForEach & other);
ParallelForEach & operator=(const ParallelForEach & other);
};
} // namespace perf
} // namespace algorithms
} // namespace embb
#include <for_each_perf-inl.h>
#endif /* EMBB_ALGORITHMS_PERF_FOR_PERF_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_INTERNAL_INDEX_ITERATOR_H_
#define EMBB_ALGORITHMS_PERF_INTERNAL_INDEX_ITERATOR_H_
#include <iterator>
namespace embb {
namespace algorithms {
namespace perf {
/**
* Auxiliary helper for performance tests. Iterator returns index value
* on dereferencing instead of resolving an underlying value. Prevents
* memory access in CPU-only test cases as parallel algorithms in embb
* currently do not support indices as RAI.
*/
template<typename T>
class IndexIterator :
public ::std::iterator< ::std::random_access_iterator_tag, T >
{
private:
typedef IndexIterator self_type;
public:
typedef ::std::random_access_iterator_tag iterator_category;
typedef T value_type;
typedef int difference_type;
typedef int distance_type;
typedef T * pointer;
typedef T & reference;
IndexIterator() : _ptr(0), _value(static_cast<T>(0)) { }
IndexIterator(int * rhs) : _ptr(*rhs), _value(static_cast<T>(0)) { }
IndexIterator(int rhs) : _ptr(rhs), _value(static_cast<T>(0)) { }
IndexIterator(const self_type &rhs) : _ptr(rhs._ptr), _value(rhs._value) { }
inline self_type& operator+=(const difference_type& rhs) {
_ptr += rhs; return *this;
}
inline self_type& operator-=(const difference_type& rhs) {
_ptr -= rhs; return *this;
}
inline T & operator*() {
_value = static_cast<T>(_ptr);
return _value;
}
inline T * operator->() {
_value = static_cast<T>(_ptr);
return _value;
}
inline T& operator[](const difference_type& rhs) {
return rhs;
}
inline self_type& operator++() {
++_ptr; return *this;
}
inline self_type& operator--() {
--_ptr; return *this;
}
inline difference_type operator+(const self_type& rhs) {
return difference_type(_ptr + rhs._ptr);
}
inline self_type operator+(const difference_type& rhs) const {
return self_type(_ptr + rhs);
}
inline self_type operator+(difference_type& rhs) const {
return self_type(_ptr + rhs);
}
inline self_type operator+=(const difference_type& rhs) const {
return self_type(_ptr + rhs);
}
inline difference_type operator-(const self_type& rhs) {
return difference_type(_ptr - rhs._ptr);
}
inline self_type operator-(const difference_type& rhs) const {
return self_type(_ptr - rhs);
}
inline self_type operator-(difference_type& rhs) const {
return self_type(_ptr - rhs);
}
inline self_type operator-=(const difference_type& rhs) const {
return self_type(_ptr - rhs);
}
friend inline self_type operator+(const difference_type& lhs, const self_type& rhs) {
return self_type(lhs) + rhs;
}
friend inline self_type operator+=(const difference_type& lhs, const self_type& rhs) {
return self_type(lhs) + rhs;
}
friend inline self_type operator-(const difference_type& lhs, const self_type& rhs) {
return self_type(lhs) - rhs;
}
friend inline self_type operator-=(const difference_type& lhs, const self_type& rhs) {
return self_type(lhs) - rhs;
}
inline bool operator==(const self_type& rhs) {
return _ptr == rhs._ptr;
}
inline bool operator!=(const self_type& rhs) {
return _ptr != rhs._ptr;
}
inline bool operator>(const self_type& rhs) {
return _ptr > rhs._ptr;
}
inline bool operator<(const self_type& rhs) {
return _ptr < rhs._ptr;
}
inline bool operator>=(const self_type& rhs) {
return _ptr >= rhs._ptr;
}
inline bool operator<=(const self_type& rhs) {
return _ptr <= rhs._ptr;
}
protected:
int _ptr;
T _value;
};
} // namespace perf
} // namespace algorithms
} // namespace embb
#endif /* EMBB_ALGORITHMS_PERF_INTERNAL_INDEX_ITERATOR_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <partest/partest.h>
#include <embb/base/core_set.h>
#include <embb/base/thread.h>
#include <embb/base/c/log.h>
#include <embb/base/c/internal/thread_index.h>
#include <embb/tasks/tasks.h>
#include <embb/base/perf/call_args.h>
#include <embb/base/perf/timer.h>
#include <for_each_perf.h>
#include <reduce_perf.h>
#include <scan_perf.h>
#include <count_perf.h>
#include <quick_sort_perf.h>
#include <merge_sort_perf.h>
using namespace embb::algorithms::perf;
using embb::base::perf::Timer;
using embb::base::perf::CallArgs;
void ReportResult(
const std::string & name,
unsigned int threads,
const CallArgs & args,
double elapsed,
double speedup) {
std::cout
<< args.VectorSize() << ","
<< args.ElementTypeName() << ","
<< args.LoadFactor() << ","
<< args.StressModeName() << ","
<< threads << ","
<< std::fixed << elapsed << ","
<< std::setprecision(3) << speedup
<< std::endl;
std::ofstream file;
std::string filename = "performance_tests_result.csv";
file.open(filename.c_str(), ::std::ofstream::out | ::std::ofstream::app);
file
<< name << ","
<< args.VectorSize() << ","
<< args.ElementTypeName() << ","
<< args.LoadFactor() << ","
<< args.StressModeName() << ","
<< threads << ","
<< std::fixed << elapsed << ","
<< std::setprecision(3) << speedup
<< std::endl;
}
template< typename TestSerial, typename TestParallel >
void RunPerformanceTest(
const embb::base::perf::CallArgs & args,
const std::string & name) {
std::cout << "--- Running " << name << std::endl;
// Initialize new test instances:
TestParallel testParallel(args);
// Parallel runs:
unsigned int threads = 1;
// Base value to compute speedup; parallel execution
// with 1 thread or serial execution.
double baseDuration = 0;
// Whether to use serial or parallel exeuction using 1
// thread for speedup reference:
if (args.ParallelBaseReference() == 0) {
TestSerial testSerial(args);
// Serial run:
Timer t;
testSerial.Run();
double elapsed = t.Elapsed();
ReportResult(
name,
0,
args,
elapsed,
1.0);
baseDuration = elapsed;
}
threads += args.ParallelBaseReference();
// Run executions with incrementing number of threads:
embb_internal_thread_index_set_max(args.MaxThreads());
while(threads <= args.MaxThreads()) {
// Set number of available threads to given limit:
// embb::base::Thread::SetThreadsMaxCount(threads);
embb_internal_thread_index_reset();
// Configure cores to be used by EMBB:
embb::base::CoreSet cores(false);
for (unsigned int coreId = 0; coreId < threads; ++coreId) {
cores.Add(coreId);
}
embb::tasks::Node::Initialize(
1, 1, cores,
MTAPI_NODE_MAX_TASKS_DEFAULT * 8,
MTAPI_NODE_MAX_GROUPS_DEFAULT * 8,
MTAPI_NODE_MAX_QUEUES_DEFAULT * 8,
MTAPI_NODE_QUEUE_LIMIT_DEFAULT * 8,
MTAPI_NODE_MAX_PRIORITIES_DEFAULT);
// Test setup:
testParallel.Pre();
// Initialize timer:
Timer t;
// Run the test body:
testParallel.Run(threads);
// Report duration:
double elapsed = t.Elapsed();
if (threads == 1) {
baseDuration = elapsed;
}
ReportResult(
name,
threads,
args,
elapsed,
static_cast<double>(baseDuration) / static_cast<double>(elapsed));
// Test teardown:
testParallel.Post();
if (threads < 4) {
++threads;
} else {
threads += 4;
}
embb::tasks::Node::Finalize();
}
}
template<typename EType>
void RunPerformanceTests(
const CallArgs & args) {
RunPerformanceTest<SerialForEach<EType>, ParallelForEach<EType> >(args, "ForEach");
RunPerformanceTest<SerialReduce<EType>, ParallelReduce<EType> >(args, "Reduce");
RunPerformanceTest<SerialScan<EType>, ParallelScan<EType> >(args, "Scan");
RunPerformanceTest<SerialCount<EType>, ParallelCount<EType> >(args, "Count");
RunPerformanceTest<SerialQuickSort<EType>, ParallelQuickSort<EType> >(args, "Quicksort");
RunPerformanceTest<SerialMergeSort<EType>, ParallelMergeSort<EType> >(args, "Mergesort");
}
int main(int argc, char * argv[]) {
// Parse command line arguments:
embb::base::perf::CallArgs args;
try {
args.Parse(argc, argv);
} catch (::std::runtime_error & re) {
::std::cerr << re.what() << ::std::endl;
}
if (args.ParallelBaseReference() == 1) {
embb_log_set_log_level(EMBB_LOG_LEVEL_TRACE);
}
// Print test settings:
args.Print(::std::cout);
// Run tests:
switch (args.ElementType()) {
case CallArgs::FLOAT:
RunPerformanceTests<float>(args);
break;
case CallArgs::DOUBLE:
RunPerformanceTests<double>(args);
break;
case CallArgs::UNDEFINED_SCALAR_TYPE:
break;
}
return 0;
}
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_MERGE_SORT_PERF_INL_H_
#define EMBB_ALGORITHMS_PERF_MERGE_SORT_PERF_INL_H_
#include <embb/base/perf/call_args.h>
#include <embb/base/memory_allocation.h>
#include <embb/algorithms/merge_sort.h>
#include <embb/tasks/tasks.h>
#include <algorithm>
#include <functional>
namespace embb {
namespace algorithms {
namespace perf {
template<typename T>
SerialMergeSort<T>::SerialMergeSort(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
SerialMergeSort<T>::~SerialMergeSort() {
embb::base::Allocation::FreeAligned(v);
}
template<typename T>
void SerialMergeSort<T>::Run() {
std::sort(v, v + vector_size, std::greater<T>());
}
template<typename T>
ParallelMergeSort<T>::ParallelMergeSort(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
ParallelMergeSort<T>::~ParallelMergeSort() {
embb::base::Allocation::FreeAligned(v);
}
template<typename T>
void ParallelMergeSort<T>::Run(unsigned int numThreads) {
// Always reading input values from memory, no CPU-only test possible
// as mergesort sorts in-place.
embb::algorithms::MergeSortAllocate(
v, v + vector_size,
std::greater<T>(),
embb::tasks::ExecutionPolicy(),
vector_size / numThreads);
}
} // namespace perf
} // namespace algorithms
} // namespace embb
#endif // EMBB_ALGORITHMS_PERF_MERGE_SORT_PERF_INL_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_MERGE_SORT_PERF_H_
#define EMBB_ALGORITHMS_PERF_MERGE_SORT_PERF_H_
#include <embb/base/perf/call_args.h>
#include <cmath>
namespace embb {
namespace algorithms {
namespace perf {
using embb::base::perf::CallArgs;
template<typename T>
class SerialMergeSort {
public:
explicit SerialMergeSort(const embb::base::perf::CallArgs & args);
~SerialMergeSort();
void Pre() { }
void Run();
void Post() { }
private:
static int Greater(const void *a, const void *b);
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T * v;
/* prohibit copy and assignment */
SerialMergeSort(const SerialMergeSort & other);
SerialMergeSort & operator=(const SerialMergeSort & other);
};
template<typename T>
class ParallelMergeSort {
public:
explicit ParallelMergeSort(const embb::base::perf::CallArgs & args);
~ParallelMergeSort();
void Pre() { }
void Run(unsigned int numThreads);
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T * v;
/* prohibit copy and assignment */
ParallelMergeSort(const ParallelMergeSort & other);
ParallelMergeSort & operator=(const ParallelMergeSort & other);
};
} // namespace perf
} // namespace algorithms
} // namespace embb
#include <merge_sort_perf-inl.h>
#endif // EMBB_ALGORITHMS_PERF_MERGE_SORT_PERF_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_QUICK_SORT_PERF_INL_H_
#define EMBB_ALGORITHMS_PERF_QUICK_SORT_PERF_INL_H_
#include <embb/base/perf/call_args.h>
#include <embb/base/memory_allocation.h>
#include <embb/algorithms/quick_sort.h>
#include <embb/tasks/tasks.h>
#include <algorithm>
#include <functional>
namespace embb {
namespace algorithms {
namespace perf {
template<typename T>
SerialQuickSort<T>::SerialQuickSort(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
SerialQuickSort<T>::~SerialQuickSort() {
embb::base::Allocation::FreeAligned(v);
}
template<typename T>
int SerialQuickSort<T>::Greater(const void *lhs, const void *rhs) {
T l = *reinterpret_cast<const T *>(lhs);
T r = *reinterpret_cast<const T *>(rhs);
if (l < r) { return 1; }
if (l > r) { return -1; }
return 0;
}
template<typename T>
void SerialQuickSort<T>::Run() {
std::qsort(v, vector_size, sizeof(T), Greater);
}
template<typename T>
ParallelQuickSort<T>::ParallelQuickSort(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) {
v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = static_cast<T>(i);
}
}
template<typename T>
ParallelQuickSort<T>::~ParallelQuickSort() {
embb::base::Allocation::FreeAligned(v);
}
template<typename T>
void ParallelQuickSort<T>::Run(unsigned int numThreads) {
// Always reading input values from memory, no CPU-only test possible
// as quicksort sorts in-place.
embb::algorithms::QuickSort(
v, v + vector_size,
std::greater<T>(),
embb::tasks::ExecutionPolicy(),
vector_size / numThreads);
}
} // namespace perf
} // namespace algorithms
} // namespace embb
#endif // EMBB_ALGORITHMS_PERF_QUICK_SORT_PERF_INL_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_QUICK_SORT_PERF_H_
#define EMBB_ALGORITHMS_PERF_QUICK_SORT_PERF_H_
#include <embb/base/perf/call_args.h>
#include <cmath>
namespace embb {
namespace algorithms {
namespace perf {
using embb::base::perf::CallArgs;
template<typename T>
class SerialQuickSort {
public:
explicit SerialQuickSort(const embb::base::perf::CallArgs & args);
~SerialQuickSort();
void Pre() { }
void Run();
void Post() { }
private:
static int Greater(const void *a, const void *b);
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T * v;
/* prohibit copy and assignment */
SerialQuickSort(const SerialQuickSort & other);
SerialQuickSort & operator=(const SerialQuickSort & other);
};
template<typename T>
class ParallelQuickSort {
public:
explicit ParallelQuickSort(const embb::base::perf::CallArgs & args);
~ParallelQuickSort();
void Pre() { }
void Run(unsigned int numThreads);
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T * v;
/* prohibit copy and assignment */
ParallelQuickSort(const ParallelQuickSort & other);
ParallelQuickSort & operator=(const ParallelQuickSort & other);
};
} // namespace perf
} // namespace algorithms
} // namespace embb
#include <quick_sort_perf-inl.h>
#endif // EMBB_ALGORITHMS_PERF_QUICK_SORT_PERF_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_REDUCE_PERF_TBB_INL_H_
#define EMBB_ALGORITHMS_PERF_REDUCE_PERF_TBB_INL_H_
#include <embb/base/perf/call_args.h>
#include <reduce_perf.h>
#include <embb/base/memory_allocation.h>
#include <embb/algorithms/reduce.h>
#include <index_iterator.h>
#include <embb/tasks/tasks.h>
namespace embb {
namespace algorithms {
namespace perf {
using embb::base::perf::CallArgs;
template<typename T>
SerialReduce<T>::SerialReduce(
const embb::base::perf::CallArgs & args) :
cargs(args),
vector_size(args.VectorSize()) {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
v = static_cast<T*>(
embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = (T)i;
}
}
else {
v = 0;
}
}
template<typename T>
SerialReduce<T>::~SerialReduce() {
if (v != 0) {
embb::base::Allocation::FreeAligned(v);
}
}
template<typename T>
void SerialReduce<T>::Run() {
result = 0;
if (cargs.StressMode() == CallArgs::CPU_STRESS) {
TransformOp<T> op(static_cast<T>(1.0) / vector_size, cargs);
for (size_t i = 0; i < vector_size; i++) {
result += op((T)i);
}
result /= static_cast<T>(vector_size);
}
else if (cargs.StressMode() == CallArgs::RAM_STRESS) {
TransformOp<T> op(static_cast<T>(1.0) / vector_size, cargs);
for (size_t i = 0; i < vector_size; i++) {
result += op(v[i]);
}
result /= static_cast<T>(vector_size);
}
}
template<typename T>
ParallelReduce<T>::ParallelReduce(
const embb::base::perf::CallArgs & args) :
cargs(args),
vector_size(args.VectorSize()) {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
v = static_cast<T*>(
embb::base::Allocation::AllocateCacheAligned(
vector_size * sizeof(T)));
for (size_t i = 0; i < vector_size; i++) {
v[i] = (T)i;
}
}
else {
v = 0;
}
}
template<typename T>
ParallelReduce<T>::~ParallelReduce() {
if (v != 0) {
embb::base::Allocation::FreeAligned(v);
}
}
template<typename T>
void ParallelReduce<T>::Run(unsigned int numThreads) {
TransformOp<T> op(static_cast<T>(1) / vector_size, cargs);
if (cargs.StressMode() == CallArgs::CPU_STRESS) {
result = embb::algorithms::Reduce(
// Using iterator returning index value to avoid
// memory access
IndexIterator<T>(0),
IndexIterator<T>(static_cast<int>(vector_size)),
static_cast<T>(0), // neutral element
::std::plus<T>(), // reduce op
op, // transform op
embb::tasks::ExecutionPolicy(),
vector_size / numThreads
);
}
else if (cargs.StressMode() == CallArgs::RAM_STRESS) {
result = embb::algorithms::Reduce(
v, v + vector_size, // input
static_cast<T>(0), // neutral element
::std::plus<T>(), // reduce op
op, // transform op
embb::tasks::ExecutionPolicy(),
vector_size / numThreads
);
}
result /= static_cast<T>(vector_size);
}
} // namespace perf
} // namespace algorithms
} // namespace embb
#endif
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_REDUCE_PERF_H_
#define EMBB_ALGORITHMS_PERF_REDUCE_PERF_H_
#include <embb/base/perf/call_args.h>
namespace embb {
namespace algorithms {
namespace perf {
template<typename T>
class TransformOp {
T step_size;
size_t load_factor;
public:
explicit TransformOp(T stepSize, const embb::base::perf::CallArgs & args) :
step_size(stepSize),
load_factor(args.LoadFactor()) { }
TransformOp(const TransformOp & other) :
step_size(other.step_size),
load_factor(other.load_factor) { }
TransformOp & operator=(const TransformOp & other) {
step_size = other.step_size;
load_factor = other.load_factor;
}
T operator()(T val) const {
T x = 0;
// Simulate more complex operation depending on
// load factor. Default load factor is 1.
for (size_t i = 0; i < load_factor * 10000; ++i) {
x = (val + static_cast<T>(0.5)) * step_size * i;
x = static_cast<T>(4.0 / (1.0 + x * x / load_factor));
}
return x;
}
};
template<typename T>
class SerialReduce {
public:
explicit SerialReduce(
const embb::base::perf::CallArgs & args);
~SerialReduce();
void Pre() { }
void Run();
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T *v;
T result;
/* prohibit copy and assignment */
SerialReduce(const SerialReduce &other);
SerialReduce& operator=(const SerialReduce &other);
};
template<typename T>
class ParallelReduce {
public:
explicit ParallelReduce(
const embb::base::perf::CallArgs & args);
~ParallelReduce();
void Pre() { }
void Run(unsigned int numThreads);
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T *v;
T result;
/* prohibit copy and assignment */
ParallelReduce(const ParallelReduce &other);
ParallelReduce& operator=(const ParallelReduce &other);
};
}
}
}
#include <reduce_perf-inl.h>
#endif /* EMBB_ALGORITHMS_PERF_REDUCE_PERF_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMB_ALGORITHMS_PERF_SCAN_PERF_INL_H_
#define EMB_ALGORITHMS_PERF_SCAN_PERF_INL_H_
#include <scan_perf.h>
#include <embb/algorithms/scan.h>
#include <embb/base/perf/call_args.h>
#include <embb/base/memory_allocation.h>
#include <index_iterator.h>
#include <embb/tasks/tasks.h>
namespace embb {
namespace algorithms {
namespace perf {
using namespace embb::base;
template<typename T>
SerialScan<T>::SerialScan(const embb::base::perf::CallArgs & args) :
cargs(args),
vector_size(args.VectorSize()),
load_factor(args.LoadFactor()) {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
out = (T *) Allocation::AllocateCacheAligned(
vector_size * sizeof(T));
in = (T *) Allocation::AllocateCacheAligned(
vector_size * sizeof(T));
for (size_t i = 0; i < vector_size; i++) {
in[i] = static_cast<T>(1);
}
}
else {
out = 0;
in = 0;
}
}
template<typename T>
SerialScan<T>::~SerialScan() {
if (in != 0) {
Allocation::FreeAligned(in);
}
if (out != 0) {
Allocation::FreeAligned(out);
}
}
template<typename T>
void SerialScan<T>::Run() {
T total = 0;
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
for (unsigned int i = 0; i < vector_size; i++) {
// artificial complexity
for (unsigned int k = 0; k < load_factor; k++) {
total += static_cast<T>(in[i]);
}
out[i] = total;
}
result = out[vector_size - 1];
}
else {
for (unsigned int i = 0; i < vector_size; i++) {
// artificial complexity
for (unsigned int k = 0; k < load_factor; k++) {
total += 1;
}
out[i] = total;
}
result = out[vector_size - 1];
}
}
template<typename T>
ParallelScan<T>::ParallelScan(const embb::base::perf::CallArgs & args) :
cargs(args), vector_size(args.VectorSize()) {
if (cargs.StressMode() == CallArgs::RAM_STRESS) {
in = (T *) Allocation::AllocateCacheAligned(
vector_size * sizeof(T));
for (size_t i = 0; i < vector_size; i++) {
in[i] = static_cast<T>(1);
}
}
else {
in = 0;
}
out = (T *) Allocation::AllocateCacheAligned(
vector_size * sizeof(T));
}
template<typename T>
ParallelScan<T>::~ParallelScan() {
if (in != 0) {
Allocation::FreeAligned(in);
}
}
template<typename T>
void ParallelScan<T>::Run(unsigned int numThreads) {
if (cargs.StressMode() == CallArgs::CPU_STRESS) {
CpuStressScanOp<T> op(cargs);
embb::algorithms::Scan(
// Using iterator returning index value to avoid
// memory access
IndexIterator<T>(0),
IndexIterator<T>(static_cast<int>(vector_size)),
out,
static_cast<T>(0), // neutral element
op, // scan aggregation functor
embb::algorithms::Identity(), // no transformation
embb::tasks::ExecutionPolicy(),
vector_size / numThreads
);
result = out[vector_size - 1];
}
else if (cargs.StressMode() == CallArgs::RAM_STRESS) {
RamStressScanOp<T> op(cargs, in);
embb::algorithms::Scan(
in, in + vector_size,
out,
static_cast<T>(0), // neutral element
op, // scan aggregation functor
embb::algorithms::Identity(), // no transformation
embb::tasks::ExecutionPolicy(),
vector_size / numThreads
);
result = out[vector_size - 1];
}
}
} // namespace perf
} // namespace algorithms
} // namespace embb
#endif /* EMB_ALGORITHMS_PERF_SCAN_PERF_INL_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_ALGORITHMS_PERF_SCAN_PERF_H_
#define EMBB_ALGORITHMS_PERF_SCAN_PERF_H_
#include <embb/base/perf/call_args.h>
#include <vector>
namespace embb {
namespace algorithms {
namespace perf {
template<typename T>
class RamStressScanOp {
size_t load_factor;
const T * const in;
public:
explicit RamStressScanOp(const CallArgs & args, const T inVector[]) :
load_factor(args.LoadFactor()), in(inVector) { }
public:
T operator()(const T & lhs, const T & rhs) {
T value = lhs;
// rhs is index. First iteration in range has
// lhs = 0 (neutral element).
// artificial complexity
for (unsigned int k = 0; k < load_factor; k++) {
// artificial complexity
value += rhs;
}
return value;
}
RamStressScanOp(const RamStressScanOp & other)
: load_factor(other.load_factor), in(other.in) { }
RamStressScanOp & operator=(const RamStressScanOp & other) {
if (*this != &other) {
load_factor = other.load_factor;
in = other.in;
}
return *this;
}
};
template<typename T>
class CpuStressScanOp {
size_t load_factor;
public:
explicit CpuStressScanOp(const CallArgs & args) :
load_factor(args.LoadFactor()) { }
public:
T operator()(const T & lhs, const T &) {
T value = lhs;
// Ignore rhs value, always add 1, corresponding
// to input value in RAM stress operation
for (unsigned int k = 0; k < load_factor; k++) {
// artificial complexity
value += static_cast<T>(1);
}
return value;
}
};
template<typename T>
class SerialScan {
public:
explicit SerialScan(const embb::base::perf::CallArgs & args);
~SerialScan();
void Pre() { }
void Run();
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
unsigned int load_factor;
T * in;
T * out;
T result;
/* prohibit copy and assignment */
SerialScan(const SerialScan & other);
SerialScan & operator=(const SerialScan & other);
};
template<typename T>
class ParallelScan {
public:
explicit ParallelScan(const embb::base::perf::CallArgs & args);
~ParallelScan();
void Pre() { }
void Run(unsigned int numThreads);
void Post() { }
private:
const embb::base::perf::CallArgs & cargs;
const size_t vector_size;
T * in;
T * out;
T result;
/* prohibit copy and assignment */
ParallelScan(const ParallelScan & other);
ParallelScan & operator=(const ParallelScan & other);
};
} // namespace perf
} // namespace algorithms
} // namespace embb
#include <scan_perf-inl.h>
#endif /* EMBB_ALGORITHMS_PERF_SCAN_PERF_H_ */
...@@ -7,6 +7,8 @@ file(GLOB_RECURSE EMBB_BASE_CPP_SOURCES "src/*.cc" "src/*.h") ...@@ -7,6 +7,8 @@ file(GLOB_RECURSE EMBB_BASE_CPP_SOURCES "src/*.cc" "src/*.h")
file(GLOB_RECURSE EMBB_BASE_CPP_HEADERS "include/embb/*.h") file(GLOB_RECURSE EMBB_BASE_CPP_HEADERS "include/embb/*.h")
if (BUILD_TESTS STREQUAL ON) if (BUILD_TESTS STREQUAL ON)
file(GLOB_RECURSE EMBB_BASE_TEST_SOURCES "test/*.cc" "test/*.h") file(GLOB_RECURSE EMBB_BASE_TEST_SOURCES "test/*.cc" "test/*.h")
file(GLOB_RECURSE EMBB_BASE_PERF_SOURCES "perf/src/*.cc" "perf/src/*.h")
file(GLOB_RECURSE EMBB_BASE_PERF_HEADERS "perf/include/*.h")
endif() endif()
# Create header file from input file # Create header file from input file
...@@ -32,13 +34,26 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include ...@@ -32,13 +34,26 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include
add_library (embb_base_cpp ${EMBB_BASE_CPP_SOURCES} ${EMBB_BASE_CPP_HEADERS}) add_library (embb_base_cpp ${EMBB_BASE_CPP_SOURCES} ${EMBB_BASE_CPP_HEADERS})
target_link_libraries(embb_base_cpp embb_base_c) target_link_libraries(embb_base_cpp embb_base_c)
if (BUILD_TESTS STREQUAL ON) if (BUILD_TESTS STREQUAL ON)
# Unit tests
include_directories(test/ include_directories(test/
${CMAKE_CURRENT_BINARY_DIR}/../partest/include ${CMAKE_CURRENT_BINARY_DIR}/../partest/include
) )
add_executable (embb_base_cpp_test ${EMBB_BASE_TEST_SOURCES}) add_executable (embb_base_cpp_test
${EMBB_BASE_TEST_SOURCES})
target_link_libraries(embb_base_cpp_test partest embb_base_cpp embb_base_c target_link_libraries(embb_base_cpp_test partest embb_base_cpp embb_base_c
${compiler_libs}) ${compiler_libs})
CopyBin(BIN embb_base_cpp_test DEST ${local_install_dir}) CopyBin(BIN embb_base_cpp_test DEST ${local_install_dir})
# Performance tests
include_directories(perf/include
${CMAKE_CURRENT_BINARY_DIR}/../partest/include
)
add_library (embb_base_cpp_perf ${EMBB_BASE_PERF_SOURCES})
target_link_libraries(embb_base_cpp_perf partest embb_base_cpp embb_base_c
${compiler_libs})
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/perf/include/embb
DESTINATION include FILES_MATCHING PATTERN "*.h")
install(TARGETS embb_base_cpp_perf DESTINATION lib)
endif() endif()
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/embb install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/embb
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_CALL_ARGS_H_
#define EMBB_BASE_PERF_CALL_ARGS_H_
#include <embb/base/core_set.h>
#include <string>
#include <ostream>
namespace embb {
namespace base {
namespace perf {
class CallArgs {
public:
typedef enum {
UNDEFINED_SCALAR_TYPE = 0,
FLOAT,
DOUBLE
} ScalarType;
typedef enum {
UNDEFINED_STRESS_TYPE = 0,
RAM_STRESS,
CPU_STRESS
} StressType;
public:
inline CallArgs() :
element_type(CallArgs::FLOAT),
stress_type(CallArgs::RAM_STRESS),
max_threads(embb::base::CoreSet::CountAvailable()),
vector_size(10000000),
load_factor(100),
parallel_base_ref(0),
counter_scale(0) {
}
inline CallArgs(int argc, char * argv[]) {
Parse(argc, argv);
}
void Print(::std::ostream & os);
void Parse(int argc, char * argv[]);
inline size_t MaxThreads() const {
return max_threads;
}
inline unsigned int CounterScale() const {
return counter_scale;
}
inline ScalarType ElementType() const {
return element_type;
}
inline ::std::string ElementTypeName() const {
return ((ElementType() == UNDEFINED_SCALAR_TYPE)
? "undefined"
: ((ElementType() == FLOAT)
? "float"
: "double"));
}
inline StressType StressMode() const {
return stress_type;
}
inline ::std::string StressModeName() const {
return ((StressMode() == CPU_STRESS)
? "cpu"
: "ram");
}
inline size_t VectorSize() const {
return vector_size;
}
inline size_t LoadFactor() const {
return load_factor;
}
inline unsigned int ParallelBaseReference() const {
return parallel_base_ref;;
}
private:
ScalarType element_type;
StressType stress_type;
size_t max_threads;
size_t vector_size;
size_t load_factor;
unsigned int parallel_base_ref;
unsigned int counter_scale;
};
} // namespace perf
} // namespace base
} // namespace embb
#endif
#ifndef _EMBB_BASE_CPP_PERF_DURATION_H
#define _EMBB_BASE_CPP_PERF_DURATION_H
#include <embb/base/perf/timer.h>
namespace embb {
namespace base {
namespace perf {
struct Duration {
Timer::timestamp_t Start;
Timer::timestamp_t End;
};
} // namespace perf
} // namespace base
} // namespace embb
#endif // _EMBB_BASE_CPP_PERF_DURATION_H
#ifndef EMBB_BASE_CPP_PERF_INTERNAL_ENV_H_
#define EMBB_BASE_CPP_PERF_INTERNAL_ENV_H_
#include <embb/base/c/internal/config.h>
// Windows
#ifdef EMBB_PLATFORM_THREADING_WINTHREADS
#define EMBB_BASE_CPP_PERF_TIMER_WIN32
#endif
// OS X
#if defined(__MACH__) && defined(__APPLE__)
#define EMBB_BASE_CPP_PERF_TIMER_OSX
#endif
// HPUX / Sun
#if (defined(__hpux) || defined(hpux)) || \
((defined(__sun__) || defined(__sun) || defined(sun)) && \
(defined(__SVR4) || defined(__svr4__)))
#define EMBB_BASE_CPP_PERF_TIMER_UX
#endif
// POSIX
#if defined(EMBB_PLATFORM_THREADING_POSIXTHREADS)
#define EMBB_BASE_CPP_PERF_TIMER_POSIX
#endif
// Linux
#if defined(__linux__)
#define EMBB_BASE_CPP_PERF_TIMER_LINUX
#endif
// FreeBSD
#if defined(__FreeBSD__)
#define EMBB_BASE_CPP_PERF_TIMER_FREEBSD
#endif
// C++11 (std::chrono)
#if (__cplusplus > 199711L)
#define EMBB_BASE_CPP_PERF_TIMER_CXX11__DISABLED__
#endif
// Architecture specific defines
// Intel 386
#if defined(EMBB_PLATFORM_ARCH_X86_32)
#define EMBB_BASE_CPP_PERF__ARCH_I386
#define EMBB_BASE_CPP_PERF__ARCH_X86
// AMD64, Intel x64
#elif defined(EMBB_PLATFORM_ARCH_X86_64)
#define EMBB_BASE_CPP_PERF__ARCH_X64
#define EMBB_BASE_CPP_PERF__ARCH_X86
// ARM
#elif defined(EMBB_PLATFORM_ARCH_ARM)
// ARM versions consolidated to major architecture version.
// See: https://wiki.edubuntu.org/ARM/Thumb2PortingHowto
#if defined(__ARM_ARCH_7__) || \
defined(__ARM_ARCH_7R__) || \
defined(__ARM_ARCH_7A__)
#define EMBB_BASE_CPP_PERF__ARCH_ARMV7 1
#endif
#if defined(EMBB_BASE_CPP_PERF__ARCH_ARMV7) || \
defined(__ARM_ARCH_6__) || \
defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6K__) || \
defined(__ARM_ARCH_6Z__) || \
defined(__ARM_ARCH_6T2__) || \
defined(__ARM_ARCH_6ZK__)
#define EMBB_BASE_CPP_PERF__ARCH_ARMV6 1
#endif
#if defined(EMBB_BASE_CPP_PERF__ARCH_ARMV6) || \
defined(__ARM_ARCH_5T__) || \
defined(__ARM_ARCH_5E__) || \
defined(__ARM_ARCH_5TE__) || \
defined(__ARM_ARCH_5TEJ__)
#define EMBB_BASE_CPP_PERF__ARCH_ARMV5 1
#endif
#if defined(EMBB_BASE_CPP_PERF__ARCH_ARMV5) || \
defined(__ARM_ARCH_4__) || \
defined(__ARM_ARCH_4T__)
#define EMBB_BASE_CPP_PERF__ARCH_ARMV4 1
#endif
#if defined(EMBB_BASE_CPP_PERF__ARCH_ARMV4) || \
defined(__ARM_ARCH_3__) || \
defined(__ARM_ARCH_3M__)
#define EMBB_BASE_CPP_PERF__ARCH_ARMV3 1
#endif
#if defined(EMBB_BASE_CPP_PERF__ARCH_ARMV3) || \
defined(__ARM_ARCH_2__)
#define EMBB_BASE_CPP_PERF__ARCH_ARMV2 1
#define EMBB_BASE_CPP_PERF__ARCH_ARM 1
#endif
#endif // Architecture
#if defined(__PAPI__)
#define EMBB_BASE_CPP_PERF_TIMER_PAPI
#endif
#endif // EMBB_BASE_CPP_PERF_INTERNAL_ENV_H_
#ifndef EMBB_BASE_CPP_PERF_INTERNAL_TIMESTAMP_H_
#define EMBB_BASE_CPP_PERF_INTERNAL_TIMESTAMP_H_
#include <embb/base/perf/internal/env.h>
#include <limits.h>
namespace embb {
namespace base {
namespace perf {
class Timestamp {
public:
#if defined(EMBB_BASE_CPP_PERF__ARCH_X64)
typedef unsigned long long counter_t;
#else
typedef unsigned long long counter_t;
#endif
public:
virtual const counter_t & Value() const = 0;
static double FrequencyScaling();
static double FrequencyPrescale();
static const char * VariantName();
inline static counter_t TimestampInfinity() {
return LLONG_MAX;
}
inline static counter_t TimestampNegInfinity() {
return 0;
}
};
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_INTERNAL_TIMESTAMP_H_
#ifndef EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_CLOCK_POSIX_H_
#define EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_CLOCK_POSIX_H_
#include <embb/base/perf/timer.h>
#include <embb/base/perf/internal/timestamp.h>
#include <climits>
#include <unistd.h> /* POSIX flags */
#include <time.h> /* clock_gettime(), time() */
#include <sys/time.h> /* gethrtime(), gettimeofday() */
#include <utility>
#include <iostream>
#include <stdexcept>
namespace embb {
namespace base {
namespace perf {
namespace internal {
namespace timer {
/**
* Timestamp counter (TSC) for POSIX platforms.
*/
class TimestampClockPosix : public Timestamp
{
private:
Timestamp::counter_t value;
public:
/**
* Clock modes, ordered by preference.
*/
typedef enum {
UNDEFINED_CLOCK = 0, ///< Undefined (unresolved) mode
GENERIC_CLOCK, ///< Unknown (unresolvable) mode
UX, ///< HP/UX mode (gethrtime)
MACH, ///< MACH/OS X mode
THREAD_CPUTIME, ///< POSIX clock mode CLOCK_THREAD_CPUTIME_ID
PROC_CPUTIME, ///< POSIX clock mode CLOCK_PROCESS_CPUTIME_ID
HIGHRES, ///< POSIX clock mode CLOCK_HIGHRES
MONOTONIC_PRECISE, ///< POSIX clock mode CLOCK_MONOTONIC_PRECISE
MONOTONIC_RAW, ///< POSIX clock mode CLOCK_MONOTONIC_RAW
MONOTONIC, ///< POSIX clock mode CLOCK_MONOTONIC
REALTIME, ///< POSIX clock mode CLOCK_REALTIME
NUM_CLOCK_MODES ///< Number of clock modes in total
} ClockMode;
private:
typedef std::pair<ClockMode, clockid_t> ClockDef;
private:
/**
* List of clock modes as readable strings.
*/
static const char * clockModeNames[];
/**
* Mapping of clock modes to clockid types.
* Only contains mappings for clock types available
* on the platform.
*/
static TimestampClockPosix::ClockDef availableModes[];
/**
* Currently selected clock mode.
*/
static ClockMode clockMode;
/**
* Currently selected clockid_t corresponding to clockMode.
*/
static clockid_t clockId;
public:
static Timestamp::counter_t frequencyScaling;
public:
static void Calibrate(unsigned int mode = 0);
inline TimestampClockPosix(const Timestamp::counter_t & counterValue)
: value(counterValue)
{ }
TimestampClockPosix();
inline TimestampClockPosix(const TimestampClockPosix & other) :
value(other.value)
{ }
inline TimestampClockPosix & operator=(const TimestampClockPosix & rhs) {
if (this != &rhs) {
value = rhs.value;
}
return *this;
}
inline const Timestamp::counter_t & Value() const {
return value;
}
inline static double FrequencyScaling() {
return 1.0f;
}
inline static double FrequencyPrescale() {
return 1.0f;
}
inline static const char * TimerName() {
return clockModeNames[clockMode];
}
};
} // namespace timer
} // namespace internal
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_CLOCK_POSIX_H_
#ifndef EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_CLOCK_WIN32_H_
#define EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_CLOCK_WIN32_H_
#include <embb/base/perf/timer.h>
#if defined(EMBB_BASE_CPP_PERF_TIMER_WIN32)
#if (defined(NTDDI_WIN8) && NTDDI_VERSION >= NTDDI_WIN8)
#define EMBB_BASE_CPP_PERF_TIMER_WIN32_SYSTEM_TIME_PRECISE
#endif
// Prevent definition of Windows-specific macros 'min' and 'max':
#define NOMINMAX
#include <Windows.h>
namespace embb {
namespace base {
namespace perf {
namespace internal {
namespace timer {
/**
* @brief Timestamp counter (TSC) for Windows platforms.
*/
class TimestampClockWin32 : public Timestamp
{
private:
typedef TimestampClockWin32 self_t;
private:
counter_t value;
public:
inline static void Calibrate(unsigned int = 0) { }
public:
inline TimestampClockWin32() {
FILETIME tm;
#if defined(EMBB_BASE_CPP_PERF_TIMER_WIN32_SYSTEM_TIME_PRECISE)
// Windows 8, Windows Server 2012 and later
GetSystemTimeAsFileTime(&tm);
// Should work according to docs, but doesn't:
// GetSystemTimePreciseAsFileTime(&tm);
#else
// Windows 2000 and later
GetSystemTimeAsFileTime(&tm);
#endif
value = (static_cast<counter_t>(tm.dwHighDateTime) << 32) |
static_cast<counter_t>(tm.dwLowDateTime);
}
inline TimestampClockWin32(const self_t & other)
: value(other.value)
{ }
inline TimestampClockWin32(const counter_t & counterValue)
: value(counterValue)
{ }
inline self_t & operator=(const self_t & rhs) {
if (this != &rhs) {
value = rhs.value;
}
return *this;
}
inline const counter_t & Value() const {
return value;
}
inline static double FrequencyScaling() {
return 1.0f;
}
inline static double FrequencyPrescale() {
// Adjust from millisecond to microsecond scale:
return 0.1;
}
inline static const char * TimerName() {
#if defined(EMBB_BASE_CPP_PERF_TIMER_WIN32_SYSTEM_TIME_PRECISE)
return "Win32:GetSystemTimePrecise";
#else
return "Win32:GetSystemTime";
#endif
}
};
} // namespace timer
} // namespace internal
} // namespace perf
} // namespace base
} // namespace embb
#endif // defined(EMBB_BASE_CPP_PERF_TIMER_WIN32)
#endif // EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_CLOCK_WIN32_H_
#ifndef EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_COUNTER_POSIX_H_
#define EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_COUNTER_POSIX_H_
#include <embb/base/perf/timer.h>
#include <embb/base/perf/internal/timestamp.h>
#include <stdint.h>
#include <iostream>
#include <embb/base/perf/internal/env.h>
#if defined(EMBB_BASE_CPP_PERF__ARCH_MIPS) || \
defined(EMBB_BASE_CPP_PERF__ARCH_ARM)
#include <sys/time.h>
#endif // ARM <= V3 or MIPS
namespace embb {
namespace base {
namespace perf {
namespace internal {
namespace timer {
/**
* @brief Timestamp counter (RDTSC or PMC) for POSIX platforms.
*/
class TimestampCounterPosix : public Timestamp
{
private:
Timestamp::counter_t value;
public:
static Timestamp::counter_t frequencyScaling;
/**
* @brief Serialized RDTSCP (x86, x64) or PMC,PMU (arm6+)
*
* Prevents out-of-order execution to affect timestamps.
*
*/
static inline uint64_t ArchCycleCount() {
#if defined(EMBB_BASE_CPP_PERF__ARCH_X64)
uint64_t rax, rdx;
uint32_t tsc_aux;
__asm__ volatile ("rdtscp\n" : "=a" (rax), "=d" (rdx), "=c" (tsc_aux) : : );
return (rdx << 32) + rax;
#elif defined(EMBB_BASE_CPP_PERF__ARCH_I386)
int64 ret;
__asm__ volatile ("rdtsc" : "=A" (ret) );
return ret;
#elif defined(EMBB_BASE_CPP_PERF__ARCH_ARMV6)
uint32_t pmccntr;
uint32_t pmuseren = 1;
uint32_t pmcntenset;
// Read the user mode perf monitor counter access permissions.
__asm__ volatile ("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
// Set permission flag:
pmuseren &= 0x01; // Set E bit
__asm__ volatile ("mcr p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
__asm__ volatile ("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
if (pmcntenset & 0x80000000ul) { // Is it counting?
__asm__ volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
// The counter is set up to count every 64th cycle
return static_cast<int64_t>(pmccntr) * 64; // Should optimize to << 6
}
else {
return 1;
}
}
else {
return 2;
}
#else // Fallback for architectures that do not provide any low-level counter:
#pragma message "Fallback to generic performance counter implementation"
struct timeval tv;
gettimeofday(&tv, NULL);
return static_cast<uint64_t>((tv.tv_sec + tv.tv_usec * 0.000001) *
FrequencyScaling());
#endif
// Undefined value if perfmon is unavailable:
return 0;
}
public:
/**
* @brief Calibrates counts per microscecond.
*
* Used for frequency scaling of RDSTD.
*/
static void Calibrate(unsigned int = 0);
public:
inline TimestampCounterPosix() {
value = static_cast<counter_t>(ArchCycleCount());
}
inline TimestampCounterPosix(const TimestampCounterPosix & other)
: value(other.value)
{ }
inline TimestampCounterPosix(const counter_t & counterValue)
: value(counterValue)
{ }
inline TimestampCounterPosix & operator=(const TimestampCounterPosix rhs) {
if (this != &rhs) {
value = rhs.value;
}
return *this;
}
inline const counter_t & Value() const {
return value;
}
inline static double FrequencyScaling() {
return static_cast<double>(TimestampCounterPosix::frequencyScaling);
}
inline static double FrequencyPrescale() {
return 1.0f;
}
inline static const char * TimerName() {
#if defined(EMBB_BASE_CPP_PERF__ARCH_X64)
return "POSIX:X64:RDTSC";
#elif defined(EMBB_BASE_CPP_PERF__ARCH_I386)
return "POSIX:X86:RDTSC";
#elif defined(EMBB_BASE_CPP_PERF__ARCH_ARMV6)
return "POSIX:ARM:PMCNT";
#else
return "POSIX:GENERIC";
#endif
}
};
} // namespace timer
} // namespace internal
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_COUNTER_POSIX_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_COUNTER_WIN32_H_
#define EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_COUNTER_WIN32_H_
#if defined(EMBB_BASE_CPP_PERF_TIMER_WIN32)
#include <embb/base/perf/timer.h>
// Prevent definition of Windows-specific macros 'min' and 'max':
#define NOMINMAX
#include <Windows.h>
namespace embb {
namespace base {
namespace perf {
namespace internal {
namespace timer {
/**
* @brief Timestamp counter (TSC) for Windows platforms.
*/
class TimestampCounterWin32 : public Timestamp
{
private:
typedef TimestampCounterWin32 self_t;
static counter_t frequencyScaling;
private:
Timestamp::counter_t value;
public:
inline static void Calibrate(unsigned int = 0) {
// Resolves ticks per second:
LARGE_INTEGER freq_li;
LONGLONG freq_value;
QueryPerformanceFrequency(&freq_li);
freq_value = freq_li.QuadPart;
self_t::frequencyScaling = static_cast<counter_t>(freq_value);
}
public:
inline TimestampCounterWin32() {
LARGE_INTEGER count_now;
LONGLONG count_now_value;
QueryPerformanceCounter(&count_now);
count_now_value = count_now.QuadPart;
value = static_cast<counter_t>(count_now_value);
}
inline TimestampCounterWin32(const self_t & other)
: value(other.value)
{ }
inline TimestampCounterWin32(const Timestamp::counter_t & counterValue)
: value(counterValue)
{ }
inline self_t & operator=(const self_t & rhs) {
if (this != &rhs) {
value = rhs.value;
}
return *this;
}
inline const Timestamp::counter_t & Value() const {
return value;
}
inline static double FrequencyScaling() {
if (self_t::frequencyScaling == 0) {
Calibrate();
}
return static_cast<double>(self_t::frequencyScaling);
}
inline static double FrequencyPrescale() {
// Adjusting frequency from seconds to microseconds scale:
return 1000000.0f;
}
inline static const char * TimerName() {
return "Win32:QueryPerformanceCounter";
}
};
} // namespace timer
} // namespace internal
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_BENCHMARK_INTERNAL_TIMESTAMP_WIN32_H_
#endif // _WIN32
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_PAPI_INL_H_
#define EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_PAPI_INL_H_
#if defined(EMBB_BASE_CPP_PERF_TIMER_PAPI)
#include <embb/base/perf/internal/timestamp_papi.h>
namespace embb {
namespace base {
namespace perf {
namespace internal {
namespace timer {
template<TimeMeasure::MeasureMode TTimer>
int TimestampPAPI<TTimer>::timer_mode = 0;
} // namespace timer
} // namespace internal
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_TIMER_PAPI
#endif // EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_PAPI_INL_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_PAPI_H_
#define EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_PAPI_H_
#include <embb/base/perf/timer.h>
#include <embb/base/perf/time_measure.h>
#include <embb/base/perf/internal/env.h>
#include <embb/base/perf/internal/timestamp.h>
#include <papi.h>
#include <stdint.h>
#include <iostream>
#include <stdexcept>
namespace embb {
namespace base {
namespace perf {
namespace internal {
namespace timer {
/**
* @brief Timestamp counter (RDTSC or PMC) for POSIX platforms.
*/
template<TimeMeasure::MeasureMode TTimer>
class TimestampPAPI : public Timestamp
{
private:
Timestamp::counter_t value;
static int timer_mode;
public:
static Timestamp::counter_t frequencyScaling;
public:
/**
* @brief Initializes the PAPI library.
*/
static void Calibrate(unsigned int arg = 0) {
timer_mode = arg;
int retval = PAPI_library_init(PAPI_VER_CURRENT);
if (retval != PAPI_VER_CURRENT && retval > 0) {
throw ::std::runtime_error("PAPI version mismatch");
}
else if (retval < 0) {
throw ::std::runtime_error("PAPI init failed");
}
}
public:
TimestampPAPI() {
if (TTimer == TimeMeasure::Clock) {
if (timer_mode == 0) {
value = static_cast<counter_t>(PAPI_get_real_usec());
}
else {
value = static_cast<counter_t>(PAPI_get_virt_usec());
}
}
else if (TTimer == TimeMeasure::Counter) {
if (timer_mode == 0) {
value = static_cast<counter_t>(PAPI_get_real_cyc());
}
else {
value = static_cast<counter_t>(PAPI_get_virt_cyc());
}
}
}
inline TimestampPAPI(const TimestampPAPI & other)
: value(other.value)
{ }
inline TimestampPAPI(const counter_t & counterValue)
: value(counterValue)
{ }
inline TimestampPAPI & operator=(const TimestampPAPI rhs) {
if (this != &rhs) {
value = rhs.value;
}
return *this;
}
inline const counter_t & Value() const {
return value;
}
inline static double FrequencyScaling() {
if (TTimer == TimeMeasure::Counter) {
return 996.0f; // clock speed on Wandboard
}
return 1.0f;
}
inline static double FrequencyPrescale() {
return 1.0f;
}
inline static const char * TimerName() {
return "PAPI";
}
};
} // namespace timer
} // namespace internal
} // namespace perf
} // namespace base
} // namespace embb
#include <embb/base/perf/internal/timestamp_papi-inl.h>
#endif // EMBB_BASE_CPP_PERF_INTERNAL_TIMER_TIMESTAMP_PAPI_H_
#ifndef _EMBB_BASE_CPP_PERF_MEASURE_H
#define _EMBB_BASE_CPP_PERF_MEASURE_H
#include <embb/base/perf/duration.h>
#include <embb/base/perf/performance_metrics.h>
namespace embb {
namespace base {
namespace perf {
struct Measure {
Duration duration;
PerformanceMetrics metrics;
};
} // namespace perf
} // namespace base
} // namespace embb
#endif // _EMBB_BASE_CPP_PERF_DURATION_H
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_PERF_TEST_UNIT_H_
#define EMBB_BASE_PERF_PERF_TEST_UNIT_H_
#include <cmath>
#include <vector>
#include <partest/partest.h>
#include <partest/test_unit.h>
#include <embb/base/perf/timer.h>
#include <embb/mtapi/mtapi.h>
#include <embb/base/c/thread.h>
#include <embb/base/c/internal/thread_index.h>
#define THIS_DOMAIN_ID 1
#define THIS_NODE_ID 1
namespace embb {
namespace base {
namespace perf {
/**
* \defgroup CPP_BASE_PERF Performance Tests
*
* Performance Test Framework
*
* \ingroup CPP_BASE
*/
/**
* Performance Test Unit
*
* Base unit of any test (Speedup Test, Performance Test, ...). Takes a
* non-copyable Functor as template argument and executes it \c iteration_count
* times on \c thread_count worker threads.
*
* If \c thread_count equals 0, EMBB is not initialized and the Functor is
* executed without EMBB support.
*
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template<typename F>
class PerfTestUnit : public partest::TestUnit {
public:
/**
* Constructs PerfTestUnit and sets up partest::TestUnit with Functor \c F.
*/
explicit PerfTestUnit(
size_t thread_count = partest::TestSuite::GetDefaultNumThreads(),
size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) :
partest::TestUnit("PTU"), duration_(0), thread_count_(thread_count),
iteration_count_(iteration_count) {
/* TODO: move creation and deletion of functor data (e.g. vector of doubles)
* to functor-specific Pre/Post methods to avoid memory shortage */
/* TODO: create possibility to initialize memory in these functor-specific
* Pre/Post methods to avoid first-touch problem. */
func = new F;
Pre(&PerfTestUnit::Tic, this);
Add(&F::Run, func, 1, iteration_count_);
Post(&PerfTestUnit::Toc, this);
}
/**
* Destructs PerfTestUnit
*/
~PerfTestUnit() {
delete func;
}
/**
* Returns duration of this unit in microseconds.
* \return Duration of this unit in microseconds.
*/
double GetDuration() const { return duration_; }
/**
* Returns thread count of this unit.
* \return Thread count of this unit.
*/
size_t GetThreadCount() const { return thread_count_; }
/**
* Returns iteration count of this unit.
* \return Iteration count of this unit.
*/
size_t GetIterationCount() const { return iteration_count_; }
private:
/**
* Sets up EMBB and starts timer.
*/
void Tic() {
/* if thread_count equals 0, run without EMBB */
if (thread_count_ > 0) {
/* initialize EMBB with thread_count worker threads */
embb::base::CoreSet core_set_(false);
for (unsigned int i = 0; (i < embb::base::CoreSet::CountAvailable()) &&
(i < thread_count_); i++) {
core_set_.Add(i);
}
embb::mtapi::Node::Initialize(THIS_DOMAIN_ID, THIS_NODE_ID, core_set_,
MTAPI_NODE_MAX_TASKS_DEFAULT,
MTAPI_NODE_MAX_GROUPS_DEFAULT,
MTAPI_NODE_MAX_QUEUES_DEFAULT,
MTAPI_NODE_QUEUE_LIMIT_DEFAULT,
MTAPI_NODE_MAX_PRIORITIES_DEFAULT);
}
/* start timer */
timer_ = Timer();
}
/**
* Stops timer and resets EMBB */
void Toc() {
/* stop timer */
duration_ = timer_.Elapsed();
/* execute EMBB Finalize (if EMBB was initialized) */
if (thread_count_ > 0) {
embb::mtapi::Node::Finalize();
/* reset internal thread count in EMBB. required in order to avoid
* lock-ups */
/* TODO: Talk to TobFuchs about nicer implementation */
embb_internal_thread_index_reset();
}
}
double duration_;
size_t thread_count_;
size_t iteration_count_;
Timer timer_;
F *func;
/* prohibit copy and assignment */
PerfTestUnit(const PerfTestUnit &other);
PerfTestUnit& operator=(const PerfTestUnit &other);
};
} /* perf */
} /* base */
} /* embb */
#endif /* EMBB_BASE_PERF_PERF_TEST_UNIT_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_PERFORMANCE_METRICS_H_
#define EMBB_BASE_CPP_PERF_PERFORMANCE_METRICS_H_
namespace embb {
namespace base {
namespace perf {
struct PerformanceMetrics {
float real_time;
float proc_time;
float mflops;
long long flpins;
};
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_PERFORMANCE_METRICS_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_PERFORMANCE_TEST_H_
#define EMBB_BASE_PERF_PERFORMANCE_TEST_H_
#include <partest/partest.h>
#include <embb/base/perf/perf_test_unit.h>
namespace embb {
namespace base {
namespace perf {
/**
* Performance Test
*
* Runs a non-copyable Functor \c iteration_count times with \c thread_count
* working threads and reports the duration.
*
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template<typename ParallelF>
class PerformanceTest : public partest::TestCase {
public:
/**
* Constructs PerformanceTest.
*/
explicit PerformanceTest(
size_t thread_count = partest::TestSuite::GetDefaultNumThreads(),
size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) :
partest::TestCase() {
/* maximum one thread per available core */
size_t threads = std::min<size_t>(thread_count,
embb::base::CoreSet::CountAvailable());
unit = &CreateUnit< PerfTestUnit<ParallelF> >(threads, iteration_count);
}
/**
* Destructs PerformanceTest.
*/
~PerformanceTest() {
PrintReport(std::cout);
}
/**
* Prints the durations of all units in comma separated format.
*/
void PrintReport(std::ostream &ostr) const {
/* print execution duration */
ostr << "P" << unit->GetThreadCount << std::endl << unit->GetDuration()
<< std::endl;
}
private:
PerfTestUnit<ParallelF> *unit;
/* prohibit copy and assignment */
PerformanceTest(const PerformanceTest &other);
PerformanceTest& operator=(const PerformanceTest &other);
};
} /* perf */
} /* base */
} /* embb */
#endif /* EMBB_BASE_PERF_PERFORMANCE_TEST_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_PERFORMANCE_TEST_RUNNER_H_
#define EMBB_BASE_PERF_PERFORMANCE_TEST_RUNNER_H_
#include <embb/base/perf/call_args.h>
namespace embb {
namespace base {
namespace perf {
class PerformanceTestRunner {
public:
PerformanceTestRunner(
const embb::base::perf::CallArgs & args) :
cargs(args) {
}
virtual ~PerformanceTestRunner() { }
virtual void Run() = 0;
protected:
inline const embb::base::perf::CallArgs & Args() const {
return cargs;
}
private:
const embb::base::perf::CallArgs & cargs;
};
} // namespace perf
} // namespace base
} // namespace embb
#endif
\ No newline at end of file
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_PERF_SPEEDUP_TEST_H_
#define EMBB_BASE_PERF_SPEEDUP_TEST_H_
#include <vector>
#include <iomanip>
#include <partest/partest.h>
#include <embb/base/perf/timer.h>
#include <embb/base/perf/perf_test_unit.h>
namespace embb {
namespace base {
namespace perf {
/**
* Speedup Test
*
* Runs a non-copyable Functor \c iteration_count times with 1...\c thread_count
* working threads (log2 steps) and reports the duration.
*
* Executes another Functor without EMBB. This can be used to compare a serial
* version of an algorithm with its parallel counterpart.
*
* \notthreadsafe
* \ingroup CPP_BASE_PERF
*/
template<typename ParallelF, typename SerialF>
class SpeedupTest : public partest::TestCase {
public:
/**
* Constructs SpeedupTest and creates test units.
*/
explicit SpeedupTest(
size_t max_thread_count = partest::TestSuite::GetDefaultNumThreads(),
size_t iteration_count = partest::TestSuite::GetDefaultNumIterations()) :
partest::TestCase() {
/* maximum one thread per available core */
size_t threads = std::min<size_t>(
max_thread_count,
embb::base::CoreSet::CountAvailable());
std::cout << "Test configuration ------------------------------------" << std::endl;
std::cout << " Num threads: " << threads << std::endl;
std::cout << " Iterations: " << iteration_count << std::endl;
/* create unit for serial version */
ser_unit_ = &CreateUnit< PerfTestUnit<SerialF> >(0, iteration_count);
/* create log2(threads)+1 units for parallel version */
for (size_t i = 1; i <= threads; i = i * 2) {
par_units_.push_back(
&CreateUnit< PerfTestUnit<ParallelF> >(i, iteration_count));
}
}
/**
* Destructs SpeedupTest.
*/
~SpeedupTest() {
}
/**
* Prints the durations of all units in comma separated format.
*/
void PrintReport(std::ostream &ostr) {
/* print sample row for sequential run (degree 0): */
ostr << "0,"
<< std::fixed << std::setprecision(2)
<< ser_unit_->GetDuration() << std::endl;
/* print sample rows for parallel runs (degree > 0): */
for (int i = 0; i < par_units_.size(); ++i) {
ostr << std::fixed << par_units_[i]->GetThreadCount()
<< ","
<< std::fixed << std::setprecision(2)
<< par_units_[i]->GetDuration()
<< std::endl;
}
}
private:
std::vector<PerfTestUnit<ParallelF> *> par_units_;
PerfTestUnit<SerialF> *ser_unit_;
/* prohibit copy and assignment */
SpeedupTest(const SpeedupTest &other);
SpeedupTest& operator=(const SpeedupTest &other);
};
} /* perf */
} /* base */
} /* embb */
#endif /* EMBB_BASE_PERF_SPEEDUP_TEST_H_ */
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_TIME_MEASURE_H_
#define EMBB_BASE_CPP_PERF_TIME_MEASURE_H_
namespace embb {
namespace base {
namespace perf {
class TimeMeasure {
public:
typedef enum {
Counter = 0,
Clock = 1
} MeasureMode;
typedef enum {
Cycles = 0,
Time = 1
} MeasureDomain;
};
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_TIME_MEASURE_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_TIMER_H_
#define EMBB_BASE_CPP_PERF_TIMER_H_
#ifdef EMBB_PLATFORM_THREADING_POSIXTHREADS
// #define EMBB_BASE_CPP_PERF_TIMER_PAPI
#endif
#include <embb/base/perf/time_measure.h>
#include <embb/base/perf/internal/env.h>
#include <embb/base/perf/internal/timestamp.h>
#include <embb/base/c/internal/config.h>
#include <climits>
#if defined(EMBB_BASE_CPP_PERF_TIMER_PAPI)
# include <embb/base/perf/internal/timestamp_papi.h>
#endif
#if defined(EMBB_PLATFORM_THREADING_WINTHREADS)
# include <embb/base/perf/internal/timestamp_counter_win32.h>
# include <embb/base/perf/internal/timestamp_clock_win32.h>
#elif defined(EMBB_PLATFORM_THREADING_POSIXTHREADS)
# include <embb/base/perf/internal/timestamp_counter_posix.h>
# include <embb/base/perf/internal/timestamp_clock_posix.h>
#endif
namespace embb {
namespace base {
namespace perf {
class Timer {
public:
typedef Timestamp::counter_t timestamp_t;
private:
typedef Timer self_t;
static embb::base::perf::TimeMeasure::MeasureMode Type;
timestamp_t timestampStart;
private:
#if defined(EMBB_BASE_CPP_PERF_TIMER_PAPI)
// PAPI support, use optimized performance measurements from PAPI
typedef embb::base::perf::internal::timer::TimestampPAPI<TimeMeasure::Clock>
TimestampClockBased;
typedef embb::base::perf::internal::timer::TimestampPAPI<TimeMeasure::Counter>
TimestampCounterBased;
#else // No PAPI
# if defined(EMBB_BASE_CPP_PERF_TIMER_WIN32)
// Windows:
typedef embb::base::perf::internal::timer::TimestampCounterWin32
TimestampCounterBased;
typedef embb::base::perf::internal::timer::TimestampClockWin32
TimestampClockBased;
# else
// POSIX:
typedef embb::base::perf::internal::timer::TimestampCounterPosix
TimestampCounterBased;
typedef embb::base::perf::internal::timer::TimestampClockPosix
TimestampClockBased;
# endif // WIN32 / POSIX
#endif // No PAPI
public:
inline Timer() {
timestampStart = Timer::Now();
}
inline Timer(const self_t & other) : timestampStart(other.timestampStart)
{ }
inline Timer & operator=(const self_t & other) {
if (this != &other) {
timestampStart = other.timestampStart;
}
return *this;
}
/**
* @brief Microeconds elapsed since instantiation of this Timer object.
*/
inline double Elapsed() const {
timestamp_t now;
if (Timer::Type == TimeMeasure::Counter) {
TimestampCounterBased timestamp;
now = timestamp.Value();
return (static_cast<double>(now - timestampStart) *
static_cast<double>(TimestampCounterBased::FrequencyPrescale())) /
static_cast<double>(TimestampCounterBased::FrequencyScaling());
}
if (Timer::Type == TimeMeasure::Clock) {
TimestampClockBased timestamp;
now = timestamp.Value();
return (static_cast<double>(now - timestampStart) *
static_cast<double>(TimestampClockBased::FrequencyPrescale())) /
static_cast<double>(TimestampClockBased::FrequencyScaling());
}
return 0.0f;
}
/**
* Returns timestamp from instantiation of this Timer.
*/
inline const timestamp_t & Start() const {
return timestampStart;
}
/**
* Microseconds elapsed since given timestamp.
*/
inline static double ElapsedSince(timestamp_t timestamp) {
if (Timer::Type == TimeMeasure::Counter) {
TimestampCounterBased now;
return (static_cast<double>(now.Value() - timestamp) *
static_cast<double>(TimestampCounterBased::FrequencyPrescale())) /
static_cast<double>(TimestampCounterBased::FrequencyScaling());
}
if (Timer::Type == TimeMeasure::Clock) {
TimestampClockBased now;
return (static_cast<double>(now.Value() - timestamp) *
static_cast<double>(TimestampClockBased::FrequencyPrescale())) /
static_cast<double>(TimestampClockBased::FrequencyScaling());
}
return 0.0f;
}
/**
* Produces current timestamp.
*/
inline static timestamp_t Now() {
if (Timer::Type == TimeMeasure::Counter) {
TimestampCounterBased timestamp;
return timestamp.Value();
}
if (Timer::Type == TimeMeasure::Clock) {
TimestampClockBased timestamp;
return timestamp.Value();
}
return 0;
}
/**
* Convert interval of two timestamp values to mircoseconds.
*/
inline static double FromInterval(
const timestamp_t & start,
const timestamp_t & end)
{
if (Timer::Type == TimeMeasure::Counter) {
return (static_cast<double>(end - start) *
static_cast<double>(TimestampCounterBased::FrequencyPrescale())) /
static_cast<double>(TimestampCounterBased::FrequencyScaling());
}
if (Timer::Type == TimeMeasure::Clock) {
return (static_cast<double>(end - start) *
static_cast<double>(TimestampClockBased::FrequencyPrescale())) /
static_cast<double>(TimestampClockBased::FrequencyScaling());
}
return -1.0f;
}
/**
* Convert interval of two timestamp values to mircoseconds.
*/
inline static double FromInterval(
const double & start,
const double & end)
{
if (Timer::Type == TimeMeasure::Counter) {
return ((end - start) *
TimestampCounterBased::FrequencyPrescale() /
TimestampCounterBased::FrequencyScaling());
}
if (Timer::Type == TimeMeasure::Clock) {
return ((end - start) *
TimestampClockBased::FrequencyPrescale() /
TimestampClockBased::FrequencyScaling());
}
return -1.0f;
}
inline static void Calibrate(
TimeMeasure::MeasureMode mode,
unsigned int freq = 0) {
Timer::Type = mode;
if (Timer::Type == TimeMeasure::Counter) {
Timer::TimestampCounterBased::Calibrate(freq);
}
else if (Timer::Type == TimeMeasure::Clock) {
Timer::TimestampClockBased::Calibrate(freq);
}
}
inline static const char * TimerName() {
if (Timer::Type == TimeMeasure::Counter) {
return TimestampCounterBased::TimerName();
}
if (Timer::Type == TimeMeasure::Clock) {
return TimestampClockBased::TimerName();
}
return "Undefined";
}
inline static Timestamp::counter_t TimestampInfinity() {
if (Timer::Type == TimeMeasure::Counter) {
return TimestampCounterBased::TimestampInfinity();
}
if (Timer::Type == TimeMeasure::Clock) {
return TimestampClockBased::TimestampInfinity();
}
return LLONG_MAX;
}
inline static Timestamp::counter_t TimestampNegInfinity() {
if (Timer::Type == TimeMeasure::Counter) {
return Timer::TimestampCounterBased::TimestampNegInfinity();
}
if (Timer::Type == TimeMeasure::Clock) {
return Timer::TimestampClockBased::TimestampNegInfinity();
}
return 0;
}
inline static double FrequencyScaling() {
if (Timer::Type == TimeMeasure::Counter) {
return Timer::TimestampCounterBased::FrequencyScaling();
}
if (Timer::Type == TimeMeasure::Clock) {
return Timer::TimestampClockBased::FrequencyScaling();
}
return 1.0f;
}
};
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_TIMER_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_BASE_CPP_PERF_TIMER_H_
#define EMBB_BASE_CPP_PERF_TIMER_H_
namespace embb {
namespace base {
namespace perf {
class TimeMeasure {
public:
typedef enum {
Counter = 0,
Clock = 1
} MeasureMode;
};
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_TIMER_H_
#include <embb/base/perf/call_args.h>
#include <embb/base/core_set.h>
#include <embb/base/perf/timer.h>
#include <string>
#include <ostream>
#include <cstdlib>
namespace embb {
namespace base {
namespace perf {
void CallArgs::Parse(int argc, char * argv[]) {
// Set config from command line arguments:
for (int paramIndex = 1; paramIndex < argc; paramIndex += 2) {
// Max. number of threads to resolve speedup:
if (std::string(argv[paramIndex]) == "-t") {
size_t threads_param = static_cast<size_t>(
atoi(argv[paramIndex + 1]));
if (threads_param > 0 && threads_param < max_threads) {
max_threads = threads_param;
}
}
// Test vector size:
if (std::string(argv[paramIndex]) == "-n") {
size_t vsize_param = static_cast<size_t>(
atoi(argv[paramIndex + 1]));
if (vsize_param > 0) {
vector_size = vsize_param;
}
}
// Performance counter scaling:
if (std::string(argv[paramIndex]) == "-f") {
unsigned int scale_param = static_cast<unsigned int>(
atoi(argv[paramIndex + 1]));
if (scale_param > 0) {
counter_scale = scale_param;
}
}
// Element type:
if (std::string(argv[paramIndex]) == "-e") {
element_type = UNDEFINED_SCALAR_TYPE;
::std::string type = argv[paramIndex + 1];
if (type == "float") {
element_type = FLOAT;
}
else if (type == "double") {
element_type = DOUBLE;
}
}
// Stress type:
if (std::string(argv[paramIndex]) == "-s") {
stress_type = UNDEFINED_STRESS_TYPE;
::std::string type = argv[paramIndex + 1];
if (type == "cpu") {
stress_type = CPU_STRESS;
}
else if (type == "ram") {
stress_type = RAM_STRESS;
}
}
// Test load factor:
if (std::string(argv[paramIndex]) == "-l") {
load_factor = static_cast<size_t>(
atoi(argv[paramIndex + 1]));
}
// Additional test parameter:
if (std::string(argv[paramIndex]) == "-p") {
parallel_base_ref = atoi(argv[paramIndex + 1]);
}
// Sanitizing and error handling:
if (element_type == UNDEFINED_SCALAR_TYPE) {
throw ::std::runtime_error(
"Invalid setting for element type (-e int|float|double)");
}
if (stress_type == UNDEFINED_STRESS_TYPE) {
throw ::std::runtime_error(
"Invalid setting for stress test type (-s ram|cpu)");
}
}
// Calibrate performance time sampling:
embb::base::perf::Timer::Calibrate(
embb::base::perf::TimeMeasure::Counter,
CounterScale());
}
void CallArgs::Print(std::ostream & os) {
os << "Max. threads: (-t) " << MaxThreads() << std::endl
<< "Vector size: (-n) " << VectorSize() << std::endl
<< "Load factor: (-l) " << LoadFactor() << std::endl
<< "Element type: (-e) " << ElementTypeName() << std::endl
<< "Stress mode: (-s) " << StressModeName() << std::endl
<< "Serial base ref: (-p) " << ParallelBaseReference() << std::endl
<< "Time sampling: (-f) " << embb::base::perf::Timer::TimerName()
<< std::endl;
}
} // namespace perf
} // namespace base
} // namespace embb
#include <embb/base/perf/timer.h>
#include <embb/base/perf/time_measure.h>
namespace embb {
namespace base {
namespace perf {
TimeMeasure::MeasureMode Timer::Type = TimeMeasure::Counter;
} // namespace perf
} // namespace base
} // namespace embb
#include <embb/base/perf/timer.h>
#if defined(EMBB_BASE_CPP_PERF_TIMER_POSIX) || \
defined(EMBB_BASE_CPP_PERF_TIMER_OSX) || \
defined(EMBB_BASE_CPP_PERF_TIMER_UX)
#include <embb/base/perf/internal/timestamp_clock_posix.h>
#include <stdexcept>
// POSIX standard header
#include <unistd.h> /* POSIX flags */
#include <time.h> /* clock_gettime(), time(), needs librt.a */
#include <sys/time.h> /* gethrtime(), gettimeofday() */
#if defined(__MACH__) && defined(__APPLE__)
// OS X / Mach
#include <mach/mach.h>
#include <mach/mach_time.h>
#endif
namespace embb {
namespace base {
namespace perf {
namespace internal {
namespace timer {
TimestampClockPosix::TimestampClockPosix() {
// {{{{
#if defined(EMBB_BASE_CPP_PERF_TIMER_UX)
// HP-UX, Solaris
value = static_cast<Timestamp::counter_t>(gethrtime());
#elif defined(EMBB_BASE_CPP_PERF_TIMER_OSX)
// OS X
static double timeConvert = 0.0;
if (timeConvert == 0.0)
{
mach_timebase_info_data_t timeBase;
(void)mach_timebase_info(&timeBase);
timeConvert = (double)timeBase.numer /
(double)timeBase.denom;
}
value = static_cast<Timestamp::counter_t>(
static_cast<double>(mach_absolute_time() * timeConvert));
#elif defined(EMBB_BASE_CPP_PERF_TIMER_POSIX)
// POSIX
#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0)
// POSIX clock_gettime
struct timespec ts;
if (clockId != (clockid_t)-1 && clock_gettime(clockId, &ts) != -1) {
value = static_cast<Timestamp::counter_t>(
static_cast<double>(ts.tv_sec * 1000000) +
static_cast<double>(ts.tv_nsec / 1000));
return;
}
#endif
// No support for POSIX clock_gettime
// Imprecise fallback to wall-clock time
struct timeval tm;
gettimeofday(&tm, NULL);
value = static_cast<Timestamp::counter_t>(
static_cast<double>(ts.tv_sec * 1000000) +
static_cast<double>(ts.tv_nsec / 1000));
#else
// No POSIX-compliant time mechanism found.
throw std::runtime_error("Could not resolve timer");
#endif
// }}}
}
const char *
TimestampClockPosix::clockModeNames[] = {
// {{{
"Default",
"GENERIC",
"UX",
"MACH",
"THREAD_CPUTIME",
"PROC_CPUTIME",
"HIGHRES",
"MONOTONIC_PRECISE",
"MONOTONIC_RAW",
"MONOTONIC",
"REALTIME"
// }}}
};
TimestampClockPosix::ClockDef
TimestampClockPosix::availableModes[] = {
// {{{
TimestampClockPosix::ClockDef(
UNDEFINED_CLOCK, static_cast<clockid_t>(-1)),
#if defined(EMBB_BASE_CPP_PERF_TIMER_UX)
TimestampClockPosix::ClockDef(
UX, static_cast<clockid_t>(-1)),
#endif
#if defined(EMBB_BASE_CPP_PERF_TIMER_OSX)
TimestampClockPosix::ClockDef(
MACH, static_cast<clockid_t>(-1)),
#endif
#if defined(_CLOCK_HIGHRES)
TimestampClockPosix::ClockDef(
HIGHRES, CLOCK_HIGHRES),
#endif
#if defined(_POSIX_MONOTONIC_CLOCK)
# if defined(EMBB_BASE_CPP_PERF_TIMER_FREEBSD)
TimestampClockPosix::ClockDef(
MONOTONIC_PRECISE, CLOCK_MONOTONIC_PRECISE),
///< FreeBSD specific, correcponds to MONOTONIC on Linux
# endif // FreeBSD
# if defined(EMBB_BASE_CPP_PERF_TIMER_LINUX)
TimestampClockPosix::ClockDef(
MONOTONIC_RAW, CLOCK_MONOTONIC_RAW),
///< Linux specific, like MONOTONIC but without NTP adjustment
# endif // Linux
TimestampClockPosix::ClockDef(
MONOTONIC, CLOCK_MONOTONIC),
#endif // _POSIX_MONOTONIC_CLOCK
#if defined(_POSIX_THREAD_CPUTIME)
TimestampClockPosix::ClockDef(
THREAD_CPUTIME, CLOCK_THREAD_CPUTIME_ID),
#endif
#if defined(_POSIX_CPUTIME)
TimestampClockPosix::ClockDef(
PROC_CPUTIME, CLOCK_PROCESS_CPUTIME_ID),
#endif
TimestampClockPosix::ClockDef(
REALTIME, CLOCK_REALTIME),
TimestampClockPosix::ClockDef(
GENERIC_CLOCK, static_cast<clockid_t>(-1))
// }}}
};
void TimestampClockPosix::
Calibrate(unsigned int mode) {
// {{{
const unsigned int lastAvMode = static_cast<unsigned int>(
TimestampClockPosix::GENERIC_CLOCK);
// Default to second index in available modes, which
// is the first and preferred clock type
unsigned int selectedModeIndex = 1;
// Iterate over all available clock types:
std::cout << "Available modes: ";
for (unsigned int avModeIdx = 1;
TimestampClockPosix::availableModes[avModeIdx].first != lastAvMode;
++avModeIdx) {
unsigned int modeNum = TimestampClockPosix::availableModes[avModeIdx].first;
if (modeNum == mode) {
// Selected mode id is contained in available modes
selectedModeIndex = avModeIdx;
}
std::cout << TimestampClockPosix::clockModeNames[modeNum]
<< "(" << modeNum << ") ";
}
std::cout << std::endl;
clockMode = TimestampClockPosix::availableModes[selectedModeIndex].first;
clockId = TimestampClockPosix::availableModes[selectedModeIndex].second;
// Print mode that finally has been activated:
unsigned int modeNum = static_cast<unsigned int>(clockMode);
std::cout << "Active mode: "
<< TimestampClockPosix::clockModeNames[modeNum]
<< "(" << modeNum << ")" << std::endl;
// Print resolution of the active clock:
struct timespec res;
if (clock_getres(clockId, &res) == 0) {
std::cout << "Resolution: " << res.tv_nsec << "ns" << std::endl;
}
// }}}
}
TimestampClockPosix::ClockMode
TimestampClockPosix::clockMode = TimestampClockPosix::UNDEFINED_CLOCK;
clockid_t
TimestampClockPosix::clockId = static_cast<clockid_t>(-1);
Timestamp::counter_t
TimestampClockPosix::frequencyScaling = 1;
} // namespace timer
} // namespace internal
} // namespace perf
} // namespace base
} // namespace embb
#endif // POSIX || OSX || UX
#include <embb/base/perf/timer.h>
#if defined(EMBB_BASE_CPP_PERF_TIMER_POSIX) || \
defined(EMBB_BASE_CPP_PERF_TIMER_UX)
#include <embb/base/perf/internal/timestamp_counter_posix.h>
namespace embb {
namespace base {
namespace perf {
namespace internal {
namespace timer {
void TimestampCounterPosix::Calibrate(unsigned int freq) {
frequencyScaling = freq == 0
? 1900.0f
: static_cast<double>(freq);
}
Timestamp::counter_t TimestampCounterPosix::frequencyScaling = 1;
} // namespace timer
} // namespace internal
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_TIMER_POSIX || EMBB_BASE_CPP_PERF_TIMER_UX
#include <embb/base/perf/timer.h>
#if defined(EMBB_BASE_CPP_PERF_TIMER_WIN32)
#include <embb/base/perf/internal/timestamp_counter_win32.h>
namespace embb {
namespace base {
namespace perf {
namespace internal {
namespace timer {
TimestampCounterWin32::counter_t TimestampCounterWin32::frequencyScaling = 0;
} // namespace timer
} // namespace internal
} // namespace perf
} // namespace base
} // namespace embb
#endif // EMBB_BASE_CPP_PERF_TIMER_WIN32
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment