profiler.cpp 6.3 KB
Newer Older
1 2 3
#include "pls/internal/profiling/profiler.h"

namespace pls::internal::profiling {
FritzFlorian committed
4 5

void profiler::profiler_run::calculate_stats() {
6 7
  root_node_->dag_compact();

FritzFlorian committed
8 9 10 11 12 13 14 15 16
  wall_time_ = std::chrono::duration_cast<measurement_resolution>(end_time_ - start_time_).count();

  t_1_ = root_node_->dag_total_user_time();
  t_inf_ = root_node_->dag_critical_path();

  steals_failed_ = 0;
  steals_successful_ = 0;
  steals_cas_ops_ = 0;
  steals_time_ = 0;
17 18 19

  sleeps_ = 0;
  sleeps_time_ = 0;
20
  for (auto &thread_stats : per_thread_stats_) {
FritzFlorian committed
21 22 23 24
    steals_failed_ += thread_stats.failed_steals_;
    steals_successful_ += thread_stats.successful_steals_;
    steals_cas_ops_ += thread_stats.steal_cas_ops_;
    steals_time_ += thread_stats.total_time_stealing_;
25 26 27

    sleeps_ += thread_stats.total_sleeps_;
    sleeps_time_ += thread_stats.total_time_sleeping_;
28 29
  }

FritzFlorian committed
30 31 32
  max_memory_per_stack_ = root_node_->dag_max_memory();
  spawn_depth_ = root_node_->dag_depth();
}
33

FritzFlorian committed
34
void profiler::profiler_run::print_stats() const {
35

FritzFlorian committed
36 37 38 39 40 41 42 43 44 45 46 47
  std::cout << "===========================" << std::endl;
  std::cout << "WALL TIME: " << m_to_d(wall_time_) << std::endl;
  std::cout << "USER TIME: " << m_to_d(t_1_) << std::endl;
  std::cout << "CRITICAL TIME: " << m_to_d(t_inf_) << std::endl;
  std::cout << "MAX SPEEDUP:" << (double) t_1_ / (double) t_inf_ << std::endl;

  std::cout << "STEALS: (Time " << m_to_d(steals_time_)
            << ", Total: " << (steals_failed_ + steals_successful_)
            << ", Success: " << steals_successful_
            << ", Failed: " << steals_failed_
            << ", CAS: " << steals_cas_ops_ << ")" << std::endl;

48 49 50 51
  std::cout << "SLEEPS: (Time " << m_to_d(sleeps_time_)
            << ", Total: " << sleeps_ << ")" << std::endl;

  unsigned long total_measured = steals_time_ + t_1_ + sleeps_time_;
FritzFlorian committed
52 53
  unsigned long total_wall = wall_time_ * num_threads_;
  std::cout << "Wall Time vs. Measured: " << 100.0 * total_measured / total_wall << std::endl;
54

FritzFlorian committed
55 56 57 58 59 60
  std::cout << "MEMORY: " << max_memory_per_stack_ << " bytes per stack" << std::endl;
  std::cout << "DEPTH:" << spawn_depth_ << std::endl;
}

void profiler::profiler_run::print_dag(std::ostream &stream) {
  stream << "digraph {" << std::endl;
61
  root_node_->dag_print(std::cout, 0, profiler_.capture_memory_, profiler_.capture_time_);
FritzFlorian committed
62
  stream << "}" << std::endl;
63 64 65
}

dag_node *profiler::start_profiler_run() {
66
  profiler_run &current_run = profiler_runs_.emplace_back(*this);
67 68 69 70 71 72
  current_run.start_time_ = clock::now();
  return current_run.root_node_.get();
}

void profiler::stop_profiler_run() {
  current_run().end_time_ = clock::now();
FritzFlorian committed
73 74
  current_run().root_node_->dag_compact();
  current_run().calculate_stats();
75 76 77
}

void profiler::stealing_start(unsigned thread_id) {
FritzFlorian committed
78
  run_on_stack(thread_id, [&](thread_stats &thread_stats) {
79
    thread_stats.total_steals_++;
FritzFlorian committed
80 81 82 83

    if (capture_time_) {
      thread_stats.stealing_start_time_ = clock::now();
    }
84 85 86 87
  });
}

void profiler::stealing_end(unsigned thread_id, bool success) {
FritzFlorian committed
88
  run_on_stack(thread_id, [&](thread_stats &thread_stats) {
89 90 91
    thread_stats.failed_steals_ += !success;
    thread_stats.successful_steals_ += success;

FritzFlorian committed
92 93
    if (capture_time_) {
      auto end_time = clock::now();
94
      auto steal_duration =
FritzFlorian committed
95 96 97
          std::chrono::duration_cast<measurement_resolution>(end_time - thread_stats.stealing_start_time_).count();
      thread_stats.total_time_stealing_ += steal_duration;
    }
98 99 100 101
  });
}

void profiler::stealing_cas_op(unsigned thread_id) {
FritzFlorian committed
102
  run_on_stack(thread_id, [&](thread_stats &thread_stats) {
103 104 105 106
    thread_stats.steal_cas_ops_++;
  });
}

107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
void profiler::sleep_start(unsigned thread_id) {
  run_on_stack(thread_id, [&](thread_stats &thread_stats) {
    thread_stats.total_sleeps_++;

    if (capture_time_) {
      thread_stats.sleeping_start_time_ = clock::now();
    }
  });
}

void profiler::sleep_stop(unsigned thread_id) {
  run_on_stack(thread_id, [&](thread_stats &thread_stats) {
    if (capture_time_) {
      auto end_time = clock::now();
      auto sleep_duration =
          std::chrono::duration_cast<measurement_resolution>(end_time - thread_stats.sleeping_start_time_).count();
      thread_stats.total_time_sleeping_ += sleep_duration;
    }
  });
}

128 129
dag_node *profiler::task_spawn_child(unsigned thread_id, dag_node *parent) {
  dag_node *result;
FritzFlorian committed
130
  run_on_stack(thread_id, [&](thread_stats &) {
131 132 133 134 135 136 137
    result = &parent->child_nodes_.emplace_back(thread_id);
  });
  return result;
}

dag_node *profiler::task_sync(unsigned thread_id, dag_node *synced) {
  dag_node *result;
FritzFlorian committed
138
  run_on_stack(thread_id, [&](thread_stats &) {
139 140 141 142 143 144 145
    synced->next_node_ = std::make_unique<dag_node>(thread_id);
    result = synced->next_node_.get();
  });
  return result;
}

void profiler::task_start_running(unsigned thread_id, dag_node *in_node) {
FritzFlorian committed
146 147 148 149 150 151
  if (capture_time_) {
    (void) in_node; // unused, keep for 'symmetric' API
    run_on_stack(thread_id, [&](thread_stats &thread_stats) {
      thread_stats.task_run_start_time_ = clock::now();
    });
  }
152 153 154
}

void profiler::task_stop_running(unsigned thread_id, dag_node *in_node) {
FritzFlorian committed
155 156 157 158 159 160 161 162
  if (capture_time_) {
    run_on_stack(thread_id, [&](thread_stats &thread_stats) {
      auto end_time = clock::now();
      auto user_code_duration =
          std::chrono::duration_cast<measurement_resolution>(end_time - thread_stats.task_run_start_time_).count();
      in_node->total_runtime_ += user_code_duration;
    });
  }
163 164 165
}

void profiler::task_prepare_stack_measure(unsigned thread_id, char *stack_memory, size_t stack_size) {
FritzFlorian committed
166 167 168 169 170 171
  if (capture_memory_) {
    run_on_stack(thread_id, [&](thread_stats &) {
      for (size_t i = 0; i < stack_size - sizeof(MAGIC_BYTES); i += sizeof(MAGIC_BYTES)) {
        for (size_t j = 0; j < sizeof(MAGIC_BYTES); j++) {
          stack_memory[i + j] = MAGIC_BYTES[j];
        }
172
      }
FritzFlorian committed
173 174
    });
  }
175 176 177
}

void profiler::task_finish_stack_measure(unsigned thread_id, char *stack_memory, size_t stack_size, dag_node *in_node) {
FritzFlorian committed
178 179 180 181 182 183 184 185
  if (capture_memory_) {
    run_on_stack(thread_id, [&](thread_stats &) {
      for (size_t i = 0; i < stack_size - sizeof(MAGIC_BYTES); i += sizeof(MAGIC_BYTES)) {
        bool section_clean = true;
        for (size_t j = 0; j < sizeof(MAGIC_BYTES); j++) {
          if (stack_memory[i + j] != MAGIC_BYTES[j]) {
            section_clean = false;
          }
186 187
        }

FritzFlorian committed
188 189 190 191
        in_node->max_memory_ = stack_size - i + sizeof(MAGIC_BYTES);
        if (!section_clean) {
          return;
        }
192
      }
FritzFlorian committed
193 194
    });
  }
195 196 197
}

}