for_each_perf-inl.h 4.28 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
/*
 * Copyright (c) 2014, Siemens AG. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef EMBB_ALGORITHMS_PERF_FOR_EACH_PERF_INL_H_
#define EMBB_ALGORITHMS_PERF_FOR_EACH_PERF_INL_H_

#include <for_each_perf.h>
#include <embb/algorithms/for_each.h>
#include <index_iterator.h>
#include <embb/base/perf/call_args.h>
#include <embb/base/memory_allocation.h>
#include <embb/tasks/tasks.h>

namespace embb {
namespace algorithms {
namespace perf {

template<typename T>
SerialForEach<T>::SerialForEach(const embb::base::perf::CallArgs & args)
: cargs(args), op(args), vector_size(args.VectorSize()) {
44
  if (cargs.StressMode() == embb::base::perf::CallArgs::RAM_STRESS) {
45 46 47 48 49
    v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
      vector_size * sizeof(T)));
    for (size_t i = 0; i < vector_size; i++) {
      v[i] = static_cast<T>(i);
    }
50
  } else {
51 52 53 54 55 56
    v = 0;
  }
}

template<typename T>
SerialForEach<T>::~SerialForEach() {
57
  if (cargs.StressMode() == embb::base::perf::CallArgs::RAM_STRESS) {
58 59 60 61 62 63
    embb::base::Allocation::FreeAligned(v);
  }
}

template<typename T>
void SerialForEach<T>::Run() {
64
  if (cargs.StressMode() == embb::base::perf::CallArgs::CPU_STRESS) {
65 66 67 68
    for (size_t i = 0; i < vector_size; i++) {
      T v = static_cast<T>(i);
      op(v);
    }
69
  } else if (cargs.StressMode() == embb::base::perf::CallArgs::RAM_STRESS) {
70 71 72 73 74 75 76 77 78
    for (size_t i = 0; i < vector_size; i++) {
      op(v[i]);
    }
  }
}

template<typename T>
ParallelForEach<T>::ParallelForEach(const embb::base::perf::CallArgs & args)
: cargs(args), vector_size(args.VectorSize()) {
79
  if (cargs.StressMode() == embb::base::perf::CallArgs::RAM_STRESS) {
80 81
    v = static_cast<T *>(embb::base::Allocation::AllocateCacheAligned(
      vector_size * sizeof(T)));
82
  } else {
83 84 85 86 87 88 89 90 91 92 93 94
    v = 0;
  }
}

template<typename T>
ParallelForEach<T>::~ParallelForEach() {
  if (v != 0) {
    embb::base::Allocation::FreeAligned(v);
  }
}

template<typename T>
95
void ParallelForEach<T>::Pre() {
96
  if (cargs.StressMode() == embb::base::perf::CallArgs::RAM_STRESS) {
97 98 99 100 101 102 103 104
    // Initialize input vector with incrementing values:
    for (size_t i = 0; i < vector_size; i++) {
      v[i] = static_cast<T>(i);
    }
  }
}

template<typename T>
105
void ParallelForEach<T>::Run(unsigned int numThreads) {
106
  if (cargs.StressMode() == embb::base::perf::CallArgs::CPU_STRESS) {
107 108 109 110 111 112 113 114 115 116
    // Computing input values, no memory access
    ForEachFunctor<T> op(cargs);
    embb::algorithms::ForEach(
      // Using iterator returning index value to avoid
      // memory access
      IndexIterator<T>(0),
      IndexIterator<T>(static_cast<int>(vector_size)),
      op,
      embb::tasks::ExecutionPolicy(),
      vector_size / numThreads);
117
  } else if (cargs.StressMode() == embb::base::perf::CallArgs::RAM_STRESS) {
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
    // Reading input values from memory
    ForEachFunctor<T> op(cargs);
    embb::algorithms::ForEach(
      v, v + vector_size,
      op,
      embb::tasks::ExecutionPolicy(),
      vector_size / numThreads);
  }
}

} // namespace perf
} // namespace algorithms
} // namespace embb

#endif /* EMBB_ALGORITHMS_PERF_FOR_EACH_PERF_INL_H_ */