profiler.cpp 5.39 KB
Newer Older
1 2 3
#include "pls/internal/profiling/profiler.h"

namespace pls::internal::profiling {
FritzFlorian committed
4 5

void profiler::profiler_run::calculate_stats() {
6 7
  root_node_->dag_compact();

FritzFlorian committed
8 9 10 11 12 13 14 15 16
  wall_time_ = std::chrono::duration_cast<measurement_resolution>(end_time_ - start_time_).count();

  t_1_ = root_node_->dag_total_user_time();
  t_inf_ = root_node_->dag_critical_path();

  steals_failed_ = 0;
  steals_successful_ = 0;
  steals_cas_ops_ = 0;
  steals_time_ = 0;
17
  for (auto &thread_stats : per_thread_stats_) {
FritzFlorian committed
18 19 20 21
    steals_failed_ += thread_stats.failed_steals_;
    steals_successful_ += thread_stats.successful_steals_;
    steals_cas_ops_ += thread_stats.steal_cas_ops_;
    steals_time_ += thread_stats.total_time_stealing_;
22 23
  }

FritzFlorian committed
24 25 26
  max_memory_per_stack_ = root_node_->dag_max_memory();
  spawn_depth_ = root_node_->dag_depth();
}
27

FritzFlorian committed
28
void profiler::profiler_run::print_stats() const {
29

FritzFlorian committed
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
  std::cout << "===========================" << std::endl;
  std::cout << "WALL TIME: " << m_to_d(wall_time_) << std::endl;
  std::cout << "USER TIME: " << m_to_d(t_1_) << std::endl;
  std::cout << "CRITICAL TIME: " << m_to_d(t_inf_) << std::endl;
  std::cout << "MAX SPEEDUP:" << (double) t_1_ / (double) t_inf_ << std::endl;

  std::cout << "STEALS: (Time " << m_to_d(steals_time_)
            << ", Total: " << (steals_failed_ + steals_successful_)
            << ", Success: " << steals_successful_
            << ", Failed: " << steals_failed_
            << ", CAS: " << steals_cas_ops_ << ")" << std::endl;

  unsigned long total_measured = steals_time_ + t_1_;
  unsigned long total_wall = wall_time_ * num_threads_;
  std::cout << "Wall Time vs. Measured: " << 100.0 * total_measured / total_wall << std::endl;
45

FritzFlorian committed
46 47 48 49 50 51 52 53
  std::cout << "MEMORY: " << max_memory_per_stack_ << " bytes per stack" << std::endl;
  std::cout << "DEPTH:" << spawn_depth_ << std::endl;
}

void profiler::profiler_run::print_dag(std::ostream &stream) {
  stream << "digraph {" << std::endl;
  root_node_->dag_print(std::cout, 0);
  stream << "}" << std::endl;
54 55 56 57 58 59 60 61 62 63
}

dag_node *profiler::start_profiler_run() {
  profiler_run &current_run = profiler_runs_.emplace_back(num_threads_);
  current_run.start_time_ = clock::now();
  return current_run.root_node_.get();
}

void profiler::stop_profiler_run() {
  current_run().end_time_ = clock::now();
FritzFlorian committed
64 65
  current_run().root_node_->dag_compact();
  current_run().calculate_stats();
66 67 68
}

void profiler::stealing_start(unsigned thread_id) {
FritzFlorian committed
69
  run_on_stack(thread_id, [&](thread_stats &thread_stats) {
70
    thread_stats.total_steals_++;
FritzFlorian committed
71 72 73 74

    if (capture_time_) {
      thread_stats.stealing_start_time_ = clock::now();
    }
75 76 77 78
  });
}

void profiler::stealing_end(unsigned thread_id, bool success) {
FritzFlorian committed
79
  run_on_stack(thread_id, [&](thread_stats &thread_stats) {
80 81 82
    thread_stats.failed_steals_ += !success;
    thread_stats.successful_steals_ += success;

FritzFlorian committed
83 84 85 86 87 88 89
    if (capture_time_) {
      auto end_time = clock::now();
      auto
          steal_duration =
          std::chrono::duration_cast<measurement_resolution>(end_time - thread_stats.stealing_start_time_).count();
      thread_stats.total_time_stealing_ += steal_duration;
    }
90 91 92 93
  });
}

void profiler::stealing_cas_op(unsigned thread_id) {
FritzFlorian committed
94
  run_on_stack(thread_id, [&](thread_stats &thread_stats) {
95 96 97 98 99 100
    thread_stats.steal_cas_ops_++;
  });
}

dag_node *profiler::task_spawn_child(unsigned thread_id, dag_node *parent) {
  dag_node *result;
FritzFlorian committed
101
  run_on_stack(thread_id, [&](thread_stats &) {
102 103 104 105 106 107 108
    result = &parent->child_nodes_.emplace_back(thread_id);
  });
  return result;
}

dag_node *profiler::task_sync(unsigned thread_id, dag_node *synced) {
  dag_node *result;
FritzFlorian committed
109
  run_on_stack(thread_id, [&](thread_stats &) {
110 111 112 113 114 115 116
    synced->next_node_ = std::make_unique<dag_node>(thread_id);
    result = synced->next_node_.get();
  });
  return result;
}

void profiler::task_start_running(unsigned thread_id, dag_node *in_node) {
FritzFlorian committed
117 118 119 120 121 122
  if (capture_time_) {
    (void) in_node; // unused, keep for 'symmetric' API
    run_on_stack(thread_id, [&](thread_stats &thread_stats) {
      thread_stats.task_run_start_time_ = clock::now();
    });
  }
123 124 125
}

void profiler::task_stop_running(unsigned thread_id, dag_node *in_node) {
FritzFlorian committed
126 127 128 129 130 131 132 133
  if (capture_time_) {
    run_on_stack(thread_id, [&](thread_stats &thread_stats) {
      auto end_time = clock::now();
      auto user_code_duration =
          std::chrono::duration_cast<measurement_resolution>(end_time - thread_stats.task_run_start_time_).count();
      in_node->total_runtime_ += user_code_duration;
    });
  }
134 135 136
}

void profiler::task_prepare_stack_measure(unsigned thread_id, char *stack_memory, size_t stack_size) {
FritzFlorian committed
137 138 139 140 141 142
  if (capture_memory_) {
    run_on_stack(thread_id, [&](thread_stats &) {
      for (size_t i = 0; i < stack_size - sizeof(MAGIC_BYTES); i += sizeof(MAGIC_BYTES)) {
        for (size_t j = 0; j < sizeof(MAGIC_BYTES); j++) {
          stack_memory[i + j] = MAGIC_BYTES[j];
        }
143
      }
FritzFlorian committed
144 145
    });
  }
146 147 148
}

void profiler::task_finish_stack_measure(unsigned thread_id, char *stack_memory, size_t stack_size, dag_node *in_node) {
FritzFlorian committed
149 150 151 152 153 154 155 156
  if (capture_memory_) {
    run_on_stack(thread_id, [&](thread_stats &) {
      for (size_t i = 0; i < stack_size - sizeof(MAGIC_BYTES); i += sizeof(MAGIC_BYTES)) {
        bool section_clean = true;
        for (size_t j = 0; j < sizeof(MAGIC_BYTES); j++) {
          if (stack_memory[i + j] != MAGIC_BYTES[j]) {
            section_clean = false;
          }
157 158
        }

FritzFlorian committed
159 160 161 162
        in_node->max_memory_ = stack_size - i + sizeof(MAGIC_BYTES);
        if (!section_clean) {
          return;
        }
163
      }
FritzFlorian committed
164 165
    });
  }
166 167 168
}

}