profiler.h 4.02 KB
Newer Older
1 2 3 4

#ifndef PLS_INTERNAL_PROFILING_PROFILER_H_
#define PLS_INTERNAL_PROFILING_PROFILER_H_

5
#ifndef PLS_PROFILING_ENABLED
FritzFlorian committed
6
#define PLS_PROFILING_ENABLED true
7
#endif
8 9 10 11 12 13 14 15 16 17 18 19 20

#include <memory>
#include <chrono>
#include <vector>

#include <iostream>

#include "dag_node.h"
#include "thread_stats.h"

namespace pls::internal::profiling {
class profiler {
  using clock = std::chrono::steady_clock;
21 22
  using measurement_resolution = std::chrono::nanoseconds;
  using display_resolution = std::chrono::microseconds;
FritzFlorian committed
23 24 25 26
  static unsigned long m_to_d(unsigned long duration) {
    measurement_resolution measurement_duration{duration};
    return std::chrono::duration_cast<display_resolution>(measurement_duration).count();
  }
27 28 29 30 31

  struct profiler_run {
    profiler_run(unsigned num_threads) : start_time_{},
                                         end_time_{},
                                         root_node_{std::make_unique<dag_node>(0)},
FritzFlorian committed
32 33
                                         per_thread_stats_(num_threads),
                                         num_threads_{num_threads} {}
34

FritzFlorian committed
35
    // Runtime stats
36 37 38 39 40
    clock::time_point start_time_;
    clock::time_point end_time_;
    std::unique_ptr<dag_node> root_node_;
    std::vector<thread_stats> per_thread_stats_;

FritzFlorian committed
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
    // Collective stats
    unsigned num_threads_;

    unsigned long wall_time_;

    unsigned long t_1_;
    unsigned long t_inf_;

    unsigned long steals_failed_;
    unsigned long steals_successful_;
    unsigned long steals_cas_ops_;
    unsigned long steals_time_;

    unsigned long max_memory_per_stack_;
    unsigned long spawn_depth_;
56

FritzFlorian committed
57 58 59
    void calculate_stats();
    void print_stats() const;
    void print_dag(std::ostream &stream);
60 61 62
  };

 public:
FritzFlorian committed
63 64 65 66 67 68 69 70 71 72 73 74 75 76
  explicit profiler(unsigned num_threads) : num_threads_{num_threads},
                                            capture_memory_{true},
                                            capture_time_{true},
                                            profiler_runs_() {
    for (unsigned i = 0; i < num_threads_; i++) {
      stacks_.push_back(stack_allocator_.allocate_stack(STACK_SIZE));
    }
  }

  ~profiler() {
    for (unsigned i = 0; i < num_threads_; i++) {
      stack_allocator_.free_stack(STACK_SIZE, stacks_[i]);
    }
  }
77

78 79
  dag_node *start_profiler_run();
  void stop_profiler_run();
80

81 82 83
  void stealing_start(unsigned thread_id);
  void stealing_end(unsigned thread_id, bool success);
  void stealing_cas_op(unsigned thread_id);
84

85 86 87 88 89 90
  dag_node *task_spawn_child(unsigned thread_id, dag_node *parent);
  dag_node *task_sync(unsigned thread_id, dag_node *synced);
  void task_start_running(unsigned thread_id, dag_node *in_node);
  void task_stop_running(unsigned thread_id, dag_node *in_node);
  void task_prepare_stack_measure(unsigned thread_id, char *stack_memory, size_t stack_size);
  void task_finish_stack_measure(unsigned thread_id, char *stack_memory, size_t stack_size, dag_node *in_node);
91 92 93 94 95 96 97 98

  profiler_run &current_run() {
    return profiler_runs_[profiler_runs_.size() - 1];
  }
  thread_stats &thread_stats_for(unsigned thread_id) {
    return current_run().per_thread_stats_[thread_id];
  }

FritzFlorian committed
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  void disable_time_measure() {
    capture_time_ = false;
  }
  void enable_time_measure() {
    capture_time_ = true;
  }
  void disable_memory_measure() {
    capture_memory_ = false;
  }
  void enable_memory_measure() {
    capture_memory_ = true;
  }

 private:
  static constexpr char MAGIC_BYTES[] = {'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'};

115
  unsigned num_threads_;
FritzFlorian committed
116 117
  bool capture_memory_;
  bool capture_time_;
118
  std::vector<profiler_run> profiler_runs_;
FritzFlorian committed
119 120 121 122 123 124 125 126 127 128 129 130 131

  // Stacks to run the profiler code to not influence profiled stacks.
  static constexpr size_t STACK_SIZE = 4096 * 4;
  base::mmap_stack_allocator stack_allocator_;
  std::vector<char *> stacks_;

  template<typename Function>
  void run_on_stack(unsigned thread_id, const Function function) {
    context_switcher::enter_context(stacks_[thread_id], STACK_SIZE, [this, thread_id, function](auto cont) {
      function(thread_stats_for(thread_id));
      return cont;
    });
  }
132 133 134 135
};
}

#endif //PLS_INTERNAL_PROFILING_PROFILER_H_