Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
3202323c
authored
May 19, 2020
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Finish up DAG profiler.
parent
6148bd1e
Pipeline
#1476
passed with stages
in 4 minutes 40 seconds
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
232 additions
and
132 deletions
+232
-132
app/benchmark_fib/main.cpp
+4
-0
app/benchmark_matrix/main.cpp
+5
-0
lib/pls/include/pls/internal/profiling/dag_node.h
+10
-0
lib/pls/include/pls/internal/profiling/profiler.h
+71
-11
lib/pls/include/pls/internal/profiling/thread_stats.h
+3
-21
lib/pls/include/pls/internal/scheduling/scheduler.h
+6
-0
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+16
-11
lib/pls/src/internal/profiling/dag_node.cpp
+19
-1
lib/pls/src/internal/profiling/profiler.cpp
+96
-86
lib/pls/src/internal/scheduling/scheduler.cpp
+2
-2
No files found.
app/benchmark_fib/main.cpp
View file @
3202323c
...
...
@@ -44,11 +44,15 @@ int main(int argc, char **argv) {
scheduler
scheduler
{(
unsigned
)
num_threads
,
MAX_NUM_TASKS
,
MAX_STACK_SIZE
};
volatile
int
res
;
scheduler
.
get_profiler
().
disable_memory_measure
();
runner
.
run_iterations
(
fib
::
NUM_ITERATIONS
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
res
=
pls_fib
(
fib
::
INPUT_N
);
});
},
fib
::
NUM_WARMUP_ITERATIONS
);
scheduler
.
get_profiler
().
current_run
().
print_dag
(
std
::
cout
);
scheduler
.
get_profiler
().
current_run
().
print_stats
();
runner
.
commit_results
(
true
);
return
0
;
...
...
app/benchmark_matrix/main.cpp
View file @
3202323c
...
...
@@ -4,6 +4,7 @@ using namespace pls;
#include "benchmark_runner.h"
#include "benchmark_base/matrix.h"
#include <iostream>
using
namespace
comparison_benchmarks
::
base
;
...
...
@@ -37,10 +38,14 @@ int main(int argc, char **argv) {
scheduler
scheduler
{(
unsigned
)
num_threads
,
MAX_NUM_TASKS
,
MAX_STACK_SIZE
};
scheduler
.
get_profiler
().
disable_memory_measure
();
runner
.
run_iterations
(
matrix
::
NUM_ITERATIONS
,
[
&
]()
{
scheduler
.
perform_work
([
&
]()
{
result
.
multiply
(
a
,
b
);
});
},
matrix
::
WARMUP_ITERATIONS
);
scheduler
.
get_profiler
().
current_run
().
print_dag
(
std
::
cout
);
scheduler
.
get_profiler
().
current_run
().
print_stats
();
runner
.
commit_results
(
true
);
}
lib/pls/include/pls/internal/profiling/dag_node.h
View file @
3202323c
...
...
@@ -6,10 +6,19 @@
#include <memory>
#include <list>
#include <iostream>
#include <chrono>
namespace
pls
::
internal
::
profiling
{
struct
dag_node
{
using
clock
=
std
::
chrono
::
steady_clock
;
using
measurement_resolution
=
std
::
chrono
::
nanoseconds
;
using
display_resolution
=
std
::
chrono
::
microseconds
;
static
unsigned
long
m_to_d
(
unsigned
long
duration
)
{
measurement_resolution
measurement_duration
{
duration
};
return
std
::
chrono
::
duration_cast
<
display_resolution
>
(
measurement_duration
).
count
();
}
dag_node
(
unsigned
spawning_thread_id
)
:
spawning_thread_id_
{
spawning_thread_id
}
{};
unsigned
spawning_thread_id_
;
...
...
@@ -28,6 +37,7 @@ struct dag_node {
unsigned
dag_max_memory
();
unsigned
long
dag_total_user_time
();
unsigned
long
dag_critical_path
();
unsigned
long
dag_depth
();
};
}
...
...
lib/pls/include/pls/internal/profiling/profiler.h
View file @
3202323c
...
...
@@ -3,7 +3,7 @@
#define PLS_INTERNAL_PROFILING_PROFILER_H_
#ifndef PLS_PROFILING_ENABLED
#define PLS_PROFILING_ENABLED
fals
e
#define PLS_PROFILING_ENABLED
tru
e
#endif
#include <memory>
...
...
@@ -20,29 +20,60 @@ class profiler {
using
clock
=
std
::
chrono
::
steady_clock
;
using
measurement_resolution
=
std
::
chrono
::
nanoseconds
;
using
display_resolution
=
std
::
chrono
::
microseconds
;
static
unsigned
long
m_to_d
(
unsigned
long
duration
)
{
measurement_resolution
measurement_duration
{
duration
};
return
std
::
chrono
::
duration_cast
<
display_resolution
>
(
measurement_duration
).
count
();
}
struct
profiler_run
{
profiler_run
(
unsigned
num_threads
)
:
start_time_
{},
end_time_
{},
root_node_
{
std
::
make_unique
<
dag_node
>
(
0
)},
per_thread_stats_
(
num_threads
)
{}
per_thread_stats_
(
num_threads
),
num_threads_
{
num_threads
}
{}
// Runtime stats
clock
::
time_point
start_time_
;
clock
::
time_point
end_time_
;
std
::
unique_ptr
<
dag_node
>
root_node_
;
std
::
vector
<
thread_stats
>
per_thread_stats_
;
static
unsigned
long
m_to_d
(
unsigned
long
duration
)
{
measurement_resolution
measurement_duration
{
duration
};
return
std
::
chrono
::
duration_cast
<
display_resolution
>
(
measurement_duration
).
count
();
}
// Collective stats
unsigned
num_threads_
;
unsigned
long
wall_time_
;
unsigned
long
t_1_
;
unsigned
long
t_inf_
;
unsigned
long
steals_failed_
;
unsigned
long
steals_successful_
;
unsigned
long
steals_cas_ops_
;
unsigned
long
steals_time_
;
unsigned
long
max_memory_per_stack_
;
unsigned
long
spawn_depth_
;
void
print_stats
(
unsigned
num_threads
)
const
;
void
calculate_stats
();
void
print_stats
()
const
;
void
print_dag
(
std
::
ostream
&
stream
);
};
public
:
profiler
(
unsigned
num_threads
)
:
num_threads_
{
num_threads
},
profiler_runs_
()
{}
explicit
profiler
(
unsigned
num_threads
)
:
num_threads_
{
num_threads
},
capture_memory_
{
true
},
capture_time_
{
true
},
profiler_runs_
()
{
for
(
unsigned
i
=
0
;
i
<
num_threads_
;
i
++
)
{
stacks_
.
push_back
(
stack_allocator_
.
allocate_stack
(
STACK_SIZE
));
}
}
~
profiler
()
{
for
(
unsigned
i
=
0
;
i
<
num_threads_
;
i
++
)
{
stack_allocator_
.
free_stack
(
STACK_SIZE
,
stacks_
[
i
]);
}
}
dag_node
*
start_profiler_run
();
void
stop_profiler_run
();
...
...
@@ -58,8 +89,6 @@ class profiler {
void
task_prepare_stack_measure
(
unsigned
thread_id
,
char
*
stack_memory
,
size_t
stack_size
);
void
task_finish_stack_measure
(
unsigned
thread_id
,
char
*
stack_memory
,
size_t
stack_size
,
dag_node
*
in_node
);
private
:
static
constexpr
char
MAGIC_BYTES
[]
=
{
'A'
,
'B'
,
'A'
,
'B'
,
'A'
,
'B'
,
'A'
,
'B'
};
profiler_run
&
current_run
()
{
return
profiler_runs_
[
profiler_runs_
.
size
()
-
1
];
}
...
...
@@ -67,8 +96,39 @@ class profiler {
return
current_run
().
per_thread_stats_
[
thread_id
];
}
void
disable_time_measure
()
{
capture_time_
=
false
;
}
void
enable_time_measure
()
{
capture_time_
=
true
;
}
void
disable_memory_measure
()
{
capture_memory_
=
false
;
}
void
enable_memory_measure
()
{
capture_memory_
=
true
;
}
private
:
static
constexpr
char
MAGIC_BYTES
[]
=
{
'A'
,
'B'
,
'A'
,
'B'
,
'A'
,
'B'
,
'A'
,
'B'
};
unsigned
num_threads_
;
bool
capture_memory_
;
bool
capture_time_
;
std
::
vector
<
profiler_run
>
profiler_runs_
;
// Stacks to run the profiler code to not influence profiled stacks.
static
constexpr
size_t
STACK_SIZE
=
4096
*
4
;
base
::
mmap_stack_allocator
stack_allocator_
;
std
::
vector
<
char
*>
stacks_
;
template
<
typename
Function
>
void
run_on_stack
(
unsigned
thread_id
,
const
Function
function
)
{
context_switcher
::
enter_context
(
stacks_
[
thread_id
],
STACK_SIZE
,
[
this
,
thread_id
,
function
](
auto
cont
)
{
function
(
thread_stats_for
(
thread_id
));
return
cont
;
});
}
};
}
...
...
lib/pls/include/pls/internal/profiling/thread_stats.h
View file @
3202323c
...
...
@@ -10,26 +10,11 @@
#include "pls/internal/base/system_details.h"
#include "pls/internal/base/stack_allocator.h"
#include "pls/internal/profiling/dag_node.h"
namespace
pls
::
internal
::
profiling
{
struct
PLS_CACHE_ALIGN
thread_stats
{
static
constexpr
size_t
STACK_SIZE
=
4096
*
4
;
thread_stats
()
{
stack_
=
stack_allocator_
.
allocate_stack
(
STACK_SIZE
);
}
~
thread_stats
()
{
stack_allocator_
.
free_stack
(
STACK_SIZE
,
stack_
);
}
template
<
typename
Function
>
void
run_on_stack
(
const
Function
function
)
{
context_switcher
::
enter_context
(
stack_
,
STACK_SIZE
,
[
function
](
auto
cont
)
{
function
();
return
cont
;
});
}
using
clock
=
std
::
chrono
::
steady_clock
;
unsigned
long
total_steals_
{
0
};
...
...
@@ -40,10 +25,7 @@ struct PLS_CACHE_ALIGN thread_stats {
unsigned
long
total_time_stealing_
{
0
};
clock
::
time_point
stealing_start_time_
;
clock
::
time_point
task_run_start_time
;
base
::
mmap_stack_allocator
stack_allocator_
;
char
*
stack_
;
clock
::
time_point
task_run_start_time_
;
};
}
...
...
lib/pls/include/pls/internal/scheduling/scheduler.h
View file @
3202323c
...
...
@@ -99,6 +99,12 @@ class scheduler {
thread_state
&
thread_state_for
(
unsigned
int
thread_id
)
{
return
*
thread_states_
[
thread_id
];
}
task_manager
&
task_manager_for
(
unsigned
int
thread_id
)
{
return
*
task_managers_
[
thread_id
];
}
#if PLS_PROFILING_ENABLED
profiling
::
profiler
&
get_profiler
()
{
return
profiler_
;
}
#endif
private
:
static
context_switcher
::
continuation
slow_return
(
thread_state
&
calling_state
);
...
...
lib/pls/include/pls/internal/scheduling/scheduler_impl.h
View file @
3202323c
...
...
@@ -31,7 +31,7 @@ scheduler::scheduler(unsigned int num_threads,
terminated_
{
false
},
stack_allocator_
{
std
::
make_shared
<
ALLOC
>
(
std
::
forward
<
ALLOC
>
(
stack_allocator
))}
#if PLS_PROFILING_ENABLED
,
profiler_
{
num_threads
}
,
profiler_
{
num_threads
}
#endif
{
...
...
@@ -87,18 +87,16 @@ class scheduler::init_function_impl : public init_function {
PLS_ASSERT
(
thread_state
::
get
().
main_continuation
().
valid
(),
"Must return valid continuation from main task."
);
#if PLS_PROFILING_ENABLED
thread_state
::
get
().
get_scheduler
().
profiler_
.
task_finish_stack_measure
(
thread_state
::
get
().
get_thread_id
(),
root_task
->
stack_memory_
,
root_task
->
stack_size_
,
root_task
->
profiling_node_
);
thread_state
::
get
().
get_scheduler
().
profiler_
.
task_stop_running
(
thread_state
::
get
().
get_thread_id
(),
root_task
->
profiling_node_
);
#endif
return
std
::
move
(
thread_state
::
get
().
main_continuation
());
});
#if PLS_PROFILING_ENABLED
thread_state
::
get
().
get_scheduler
().
profiler_
.
task_finish_stack_measure
(
thread_state
::
get
().
get_thread_id
(),
root_task
->
stack_memory_
,
root_task
->
stack_size_
,
root_task
->
profiling_node_
);
#endif
}
private
:
F
&
function_
;
...
...
@@ -141,7 +139,6 @@ void scheduler::spawn(Function &&lambda) {
base_task
*
spawned_task
=
last_task
->
next_
;
#if PLS_PROFILING_ENABLED
// Memory and DAG nodes
spawning_state
.
get_scheduler
().
profiler_
.
task_prepare_stack_measure
(
spawning_state
.
get_thread_id
(),
spawned_task
->
stack_memory_
,
spawned_task
->
stack_size_
);
...
...
@@ -189,24 +186,32 @@ void scheduler::spawn(Function &&lambda) {
syncing_state
.
set_active_task
(
last_task
);
#if PLS_PROFILING_ENABLED
syncing_state
.
get_scheduler
().
profiler_
.
task_finish_stack_measure
(
syncing_state
.
get_thread_id
(),
spawned_task
->
stack_memory_
,
spawned_task
->
stack_size_
,
spawned_task
->
profiling_node_
);
syncing_state
.
get_scheduler
().
profiler_
.
task_stop_running
(
syncing_state
.
get_thread_id
(),
spawned_task
->
profiling_node_
);
#endif
return
std
::
move
(
last_task
->
continuation_
);
}
else
{
// Slow path, the last task was stolen. This path is common to sync() events.
auto
continuation
=
slow_return
(
syncing_state
);
#if PLS_PROFILING_ENABLED
syncing_state
.
get_scheduler
().
profiler_
.
task_finish_stack_measure
(
syncing_state
.
get_thread_id
(),
spawned_task
->
stack_memory_
,
spawned_task
->
stack_size_
,
spawned_task
->
profiling_node_
);
syncing_state
.
get_scheduler
().
profiler_
.
task_stop_running
(
syncing_state
.
get_thread_id
(),
spawned_task
->
profiling_node_
);
#endif
auto
continuation
=
slow_return
(
syncing_state
);
return
continuation
;
}
});
#if PLS_PROFILING_ENABLED
thread_state
::
get
().
get_scheduler
().
profiler_
.
task_start_running
(
thread_state
::
get
().
get_thread_id
(),
thread_state
::
get
().
get_active_task
()
->
profiling_node_
);
last_task
->
profiling_node_
);
#endif
if
(
continuation
.
valid
())
{
...
...
lib/pls/src/internal/profiling/dag_node.cpp
View file @
3202323c
...
...
@@ -20,7 +20,11 @@ void dag_node::dag_compact() {
}
void
dag_node
::
dag_print
(
std
::
ostream
&
stream
,
unsigned
rank
)
{
stream
<<
node_print_id
()
<<
" [label="
<<
spawning_thread_id_
<<
", rank="
<<
rank
<<
"];"
<<
std
::
endl
;
stream
<<
node_print_id
()
<<
" [label=
\"
"
<<
spawning_thread_id_
<<
"
\n
"
<<
max_memory_
<<
" bytes
\n
"
<<
m_to_d
(
total_runtime_
)
<<
" us
\"
"
<<
" ,rank="
<<
rank
<<
"];"
<<
std
::
endl
;
for
(
auto
&
child
:
child_nodes_
)
{
child
.
dag_print
(
stream
,
rank
+
1
);
stream
<<
node_print_id
()
<<
" -> "
<<
child
.
node_print_id
()
<<
";"
<<
std
::
endl
;
...
...
@@ -68,4 +72,18 @@ unsigned long dag_node::dag_critical_path() {
return
critical_path
;
}
unsigned
long
dag_node
::
dag_depth
()
{
unsigned
long
deepest_at_level
=
1
;
if
(
next_node_
)
{
deepest_at_level
=
std
::
max
(
deepest_at_level
,
next_node_
->
dag_depth
());
}
unsigned
long
deepest_child
=
0
;
for
(
auto
&
child
:
child_nodes_
)
{
deepest_child
=
std
::
max
(
deepest_child
,
child
.
dag_depth
());
}
return
std
::
max
(
deepest_child
+
1
,
deepest_at_level
);
}
}
lib/pls/src/internal/profiling/profiler.cpp
View file @
3202323c
#include "pls/internal/profiling/profiler.h"
namespace
pls
::
internal
::
profiling
{
void
profiler
::
profiler_run
::
print_stats
(
unsigned
num_threads
)
const
{
void
profiler
::
profiler_run
::
calculate_stats
()
{
root_node_
->
dag_compact
();
auto
run_duration
=
std
::
chrono
::
duration_cast
<
measurement_resolution
>
(
end_time_
-
start_time_
).
count
();
std
::
cout
<<
"==========================="
<<
std
::
endl
;
std
::
cout
<<
"WALL TIME: "
<<
m_to_d
(
run_duration
)
<<
std
::
endl
;
unsigned
long
total_user_time
=
root_node_
->
dag_total_user_time
();
std
::
cout
<<
"USER TIME: "
<<
m_to_d
(
total_user_time
)
<<
std
::
endl
;
unsigned
long
critical_path_time
=
root_node_
->
dag_critical_path
();
std
::
cout
<<
"CRITICAL TIME: "
<<
m_to_d
(
critical_path_time
)
<<
std
::
endl
;
std
::
cout
<<
"MAX SPEEDUP:"
<<
(
double
)
total_user_time
/
(
double
)
critical_path_time
<<
std
::
endl
;
unsigned
long
total_failed_steals
=
0
;
unsigned
long
total_successful_steals
=
0
;
unsigned
long
total_steal_time
=
0
;
wall_time_
=
std
::
chrono
::
duration_cast
<
measurement_resolution
>
(
end_time_
-
start_time_
).
count
();
t_1_
=
root_node_
->
dag_total_user_time
();
t_inf_
=
root_node_
->
dag_critical_path
();
steals_failed_
=
0
;
steals_successful_
=
0
;
steals_cas_ops_
=
0
;
steals_time_
=
0
;
for
(
auto
&
thread_stats
:
per_thread_stats_
)
{
total_failed_steals
+=
thread_stats
.
failed_steals_
;
total_successful_steals
+=
thread_stats
.
successful_steals_
;
total_steal_time
+=
thread_stats
.
total_time_stealing_
;
steals_failed_
+=
thread_stats
.
failed_steals_
;
steals_successful_
+=
thread_stats
.
successful_steals_
;
steals_cas_ops_
+=
thread_stats
.
steal_cas_ops_
;
steals_time_
+=
thread_stats
.
total_time_stealing_
;
}
std
::
cout
<<
"STEALS: (Time "
<<
m_to_d
(
total_steal_time
)
<<
", Total: "
<<
(
total_successful_steals
+
total_failed_steals
)
<<
", Success: "
<<
total_successful_steals
<<
", Failed: "
<<
total_failed_steals
<<
")"
<<
std
::
endl
;
unsigned
long
total_measured
=
total_steal_time
+
total_user_time
;
unsigned
long
total_wall
=
run_duration
*
num_threads
;
std
::
cout
<<
"Wall Time vs. Measured: "
<<
100.0
*
total_measured
/
total_wall
<<
std
::
endl
;
max_memory_per_stack_
=
root_node_
->
dag_max_memory
()
;
spawn_depth_
=
root_node_
->
dag_depth
()
;
}
std
::
cout
<<
"MEMORY: "
<<
root_node_
->
dag_max_memory
()
<<
" bytes per stack"
<<
std
::
endl
;
void
profiler
::
profiler_run
::
print_stats
()
const
{
// TODO: re-add graph printing
std
::
cout
<<
"==========================="
<<
std
::
endl
;
std
::
cout
<<
"WALL TIME: "
<<
m_to_d
(
wall_time_
)
<<
std
::
endl
;
std
::
cout
<<
"USER TIME: "
<<
m_to_d
(
t_1_
)
<<
std
::
endl
;
std
::
cout
<<
"CRITICAL TIME: "
<<
m_to_d
(
t_inf_
)
<<
std
::
endl
;
std
::
cout
<<
"MAX SPEEDUP:"
<<
(
double
)
t_1_
/
(
double
)
t_inf_
<<
std
::
endl
;
std
::
cout
<<
"STEALS: (Time "
<<
m_to_d
(
steals_time_
)
<<
", Total: "
<<
(
steals_failed_
+
steals_successful_
)
<<
", Success: "
<<
steals_successful_
<<
", Failed: "
<<
steals_failed_
<<
", CAS: "
<<
steals_cas_ops_
<<
")"
<<
std
::
endl
;
unsigned
long
total_measured
=
steals_time_
+
t_1_
;
unsigned
long
total_wall
=
wall_time_
*
num_threads_
;
std
::
cout
<<
"Wall Time vs. Measured: "
<<
100.0
*
total_measured
/
total_wall
<<
std
::
endl
;
// std::cout << "digraph {" << std::endl;
// root_node_->dag_print(std::cout, 0);
// std::cout << "}" << std::endl;
std
::
cout
<<
"MEMORY: "
<<
max_memory_per_stack_
<<
" bytes per stack"
<<
std
::
endl
;
std
::
cout
<<
"DEPTH:"
<<
spawn_depth_
<<
std
::
endl
;
}
void
profiler
::
profiler_run
::
print_dag
(
std
::
ostream
&
stream
)
{
stream
<<
"digraph {"
<<
std
::
endl
;
root_node_
->
dag_print
(
std
::
cout
,
0
);
stream
<<
"}"
<<
std
::
endl
;
}
dag_node
*
profiler
::
start_profiler_run
()
{
...
...
@@ -47,56 +61,52 @@ dag_node *profiler::start_profiler_run() {
void
profiler
::
stop_profiler_run
()
{
current_run
().
end_time_
=
clock
::
now
();
current_run
().
print_stats
(
num_threads_
);
current_run
().
root_node_
->
dag_compact
();
current_run
().
calculate_stats
();
}
void
profiler
::
stealing_start
(
unsigned
thread_id
)
{
auto
&
thread_stats
=
thread_stats_for
(
thread_id
);
thread_stats
.
run_on_stack
([
&
]
{
thread_stats
.
stealing_start_time_
=
clock
::
now
();
run_on_stack
(
thread_id
,
[
&
](
thread_stats
&
thread_stats
)
{
thread_stats
.
total_steals_
++
;
if
(
capture_time_
)
{
thread_stats
.
stealing_start_time_
=
clock
::
now
();
}
});
}
void
profiler
::
stealing_end
(
unsigned
thread_id
,
bool
success
)
{
auto
&
thread_stats
=
thread_stats_for
(
thread_id
);
thread_stats
.
run_on_stack
([
&
]
{
run_on_stack
(
thread_id
,
[
&
](
thread_stats
&
thread_stats
)
{
thread_stats
.
failed_steals_
+=
!
success
;
thread_stats
.
successful_steals_
+=
success
;
auto
end_time
=
clock
::
now
();
auto
steal_duration
=
std
::
chrono
::
duration_cast
<
measurement_resolution
>
(
end_time
-
thread_stats
.
stealing_start_time_
).
count
();
thread_stats
.
total_time_stealing_
+=
steal_duration
;
if
(
capture_time_
)
{
auto
end_time
=
clock
::
now
();
auto
steal_duration
=
std
::
chrono
::
duration_cast
<
measurement_resolution
>
(
end_time
-
thread_stats
.
stealing_start_time_
).
count
();
thread_stats
.
total_time_stealing_
+=
steal_duration
;
}
});
}
void
profiler
::
stealing_cas_op
(
unsigned
thread_id
)
{
auto
&
thread_stats
=
thread_stats_for
(
thread_id
);
thread_stats
.
run_on_stack
([
&
]
{
run_on_stack
(
thread_id
,
[
&
](
thread_stats
&
thread_stats
)
{
thread_stats
.
steal_cas_ops_
++
;
});
}
dag_node
*
profiler
::
task_spawn_child
(
unsigned
thread_id
,
dag_node
*
parent
)
{
auto
&
thread_stats
=
thread_stats_for
(
thread_id
);
dag_node
*
result
;
thread_stats
.
run_on_stack
([
&
]
{
run_on_stack
(
thread_id
,
[
&
](
thread_stats
&
)
{
result
=
&
parent
->
child_nodes_
.
emplace_back
(
thread_id
);
});
return
result
;
}
dag_node
*
profiler
::
task_sync
(
unsigned
thread_id
,
dag_node
*
synced
)
{
auto
&
thread_stats
=
thread_stats_for
(
thread_id
);
dag_node
*
result
;
thread_stats
.
run_on_stack
([
&
]
{
run_on_stack
(
thread_id
,
[
&
](
thread_stats
&
)
{
synced
->
next_node_
=
std
::
make_unique
<
dag_node
>
(
thread_id
);
result
=
synced
->
next_node_
.
get
();
});
...
...
@@ -104,55 +114,55 @@ dag_node *profiler::task_sync(unsigned thread_id, dag_node *synced) {
}
void
profiler
::
task_start_running
(
unsigned
thread_id
,
dag_node
*
in_node
)
{
(
void
)
in_node
;
// unused, keep for 'symmetric' API
auto
&
thread_stats
=
thread_stats_for
(
thread_id
);
thread_stats
.
run_on_stack
([
&
]
{
thread_stats
.
task_run_start_time
=
clock
::
now
(
);
}
);
if
(
capture_time_
)
{
(
void
)
in_node
;
// unused, keep for 'symmetric' API
run_on_stack
(
thread_id
,
[
&
](
thread_stats
&
thread_stats
)
{
thread_stats
.
task_run_start_time_
=
clock
::
now
();
}
);
}
}
void
profiler
::
task_stop_running
(
unsigned
thread_id
,
dag_node
*
in_node
)
{
auto
&
thread_stats
=
thread_stats_for
(
thread_id
);
thread_stats
.
run_on_stack
([
&
]
{
auto
end_time
=
clock
::
now
();
auto
user_code_duration
=
std
::
chrono
::
duration_cast
<
measurement_resolution
>
(
end_time
-
thread_stats
.
task_run_start_time
).
count
()
;
in_node
->
total_runtime_
+=
user_code_duration
;
}
);
if
(
capture_time_
)
{
run_on_stack
(
thread_id
,
[
&
](
thread_stats
&
thread_stats
)
{
auto
end_time
=
clock
::
now
();
auto
user_code_duration
=
std
::
chrono
::
duration_cast
<
measurement_resolution
>
(
end_time
-
thread_stats
.
task_run_start_time_
).
count
();
in_node
->
total_runtime_
+=
user_code_duration
;
})
;
}
}
void
profiler
::
task_prepare_stack_measure
(
unsigned
thread_id
,
char
*
stack_memory
,
size_t
stack_size
)
{
auto
&
thread_stats
=
thread_stats_for
(
thread_id
);
thread_stats
.
run_on_stack
([
&
]
{
for
(
size_t
i
=
0
;
i
<
stack_size
-
sizeof
(
MAGIC_BYTES
);
i
+=
sizeof
(
MAGIC_BYTES
)
)
{
for
(
size_t
j
=
0
;
j
<
sizeof
(
MAGIC_BYTES
);
j
++
)
{
stack_memory
[
i
+
j
]
=
MAGIC_BYTES
[
j
];
if
(
capture_memory_
)
{
run_on_stack
(
thread_id
,
[
&
](
thread_stats
&
)
{
for
(
size_t
i
=
0
;
i
<
stack_size
-
sizeof
(
MAGIC_BYTES
);
i
+=
sizeof
(
MAGIC_BYTES
))
{
for
(
size_t
j
=
0
;
j
<
sizeof
(
MAGIC_BYTES
);
j
++
)
{
stack_memory
[
i
+
j
]
=
MAGIC_BYTES
[
j
];
}
}
}
}
);
}
);
}
}
void
profiler
::
task_finish_stack_measure
(
unsigned
thread_id
,
char
*
stack_memory
,
size_t
stack_size
,
dag_node
*
in_node
)
{
auto
&
thread_stats
=
thread_stats_for
(
thread_id
);
thread_stats
.
run_on_stack
([
&
]
{
for
(
size_t
i
=
0
;
i
<
stack_size
-
sizeof
(
MAGIC_BYTES
);
i
+=
sizeof
(
MAGIC_BYTES
))
{
bool
section_clean
=
true
;
for
(
size_t
j
=
0
;
j
<
sizeof
(
MAGIC_BYTES
);
j
++
)
{
if
(
stack_memory
[
i
+
j
]
!=
MAGIC_BYTES
[
j
])
{
section_clean
=
false
;
if
(
capture_memory_
)
{
run_on_stack
(
thread_id
,
[
&
](
thread_stats
&
)
{
for
(
size_t
i
=
0
;
i
<
stack_size
-
sizeof
(
MAGIC_BYTES
);
i
+=
sizeof
(
MAGIC_BYTES
))
{
bool
section_clean
=
true
;
for
(
size_t
j
=
0
;
j
<
sizeof
(
MAGIC_BYTES
);
j
++
)
{
if
(
stack_memory
[
i
+
j
]
!=
MAGIC_BYTES
[
j
]
)
{
section_clean
=
false
;
}
}
}
in_node
->
max_memory_
=
stack_size
-
i
+
sizeof
(
MAGIC_BYTES
);
if
(
!
section_clean
)
{
return
;
in_node
->
max_memory_
=
stack_size
-
i
+
sizeof
(
MAGIC_BYTES
);
if
(
!
section_clean
)
{
return
;
}
}
}
}
);
}
);
}
}
}
lib/pls/src/internal/scheduling/scheduler.cpp
View file @
3202323c
...
...
@@ -124,8 +124,8 @@ void scheduler::sync() {
if
(
active_task
->
is_synchronized_
)
{
#if PLS_PROFILING_ENABLED
syncing_state
.
get_scheduler
().
profiler_
.
task_start_running
(
syncing_state
.
get_thread_id
(),
active_task
->
profiling_node_
);
thread_state
::
get
().
get_scheduler
().
profiler_
.
task_start_running
(
thread_state
::
get
()
.
get_thread_id
(),
thread_state
::
get
().
get_active_task
()
->
profiling_node_
);
#endif
return
;
// We are already the sole owner of last_task
}
else
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment