Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
las3_pub
/
predictable_parallel_patterns
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
f0f3b80e
authored
Apr 05, 2019
by
FritzFlorian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add basic 'mini_benchmark_runner'.
parent
44ea144a
Pipeline
#1136
passed with stages
in 3 minutes 27 seconds
Changes
5
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
148 additions
and
1 deletions
+148
-1
CMakeLists.txt
+1
-0
app/benchmark_fft/CMakeLists.txt
+5
-0
app/benchmark_fft/main.cpp
+87
-0
lib/pls/CMakeLists.txt
+2
-1
lib/pls/include/pls/internal/helpers/mini_benchmark.h
+53
-0
No files found.
CMakeLists.txt
View file @
f0f3b80e
...
@@ -32,6 +32,7 @@ add_subdirectory(lib/pls)
...
@@ -32,6 +32,7 @@ add_subdirectory(lib/pls)
add_subdirectory
(
app/playground
)
add_subdirectory
(
app/playground
)
add_subdirectory
(
app/test_for_new
)
add_subdirectory
(
app/test_for_new
)
add_subdirectory
(
app/invoke_parallel
)
add_subdirectory
(
app/invoke_parallel
)
add_subdirectory
(
app/benchmark_fft
)
# Add optional tests
# Add optional tests
option
(
PACKAGE_TESTS
"Build the tests"
ON
)
option
(
PACKAGE_TESTS
"Build the tests"
ON
)
...
...
app/benchmark_fft/CMakeLists.txt
0 → 100644
View file @
f0f3b80e
add_executable
(
benchmark_fft main.cpp
)
target_link_libraries
(
benchmark_fft pls
)
if
(
EASY_PROFILER
)
target_link_libraries
(
benchmark_fft easy_profiler
)
endif
()
app/benchmark_fft/main.cpp
0 → 100644
View file @
f0f3b80e
#include <pls/pls.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include <iostream>
#include <complex>
#include <vector>
static
constexpr
int
CUTOFF
=
10
;
static
constexpr
int
NUM_ITERATIONS
=
1000
;
static
constexpr
int
INPUT_SIZE
=
2064
;
typedef
std
::
vector
<
std
::
complex
<
double
>>
complex_vector
;
void
divide
(
complex_vector
::
iterator
data
,
int
n
)
{
complex_vector
tmp_odd_elements
(
n
/
2
);
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
tmp_odd_elements
[
i
]
=
data
[
i
*
2
+
1
];
}
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
data
[
i
]
=
data
[
i
*
2
];
}
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
data
[
i
+
n
/
2
]
=
tmp_odd_elements
[
i
];
}
}
void
combine
(
complex_vector
::
iterator
data
,
int
n
)
{
for
(
int
i
=
0
;
i
<
n
/
2
;
i
++
)
{
std
::
complex
<
double
>
even
=
data
[
i
];
std
::
complex
<
double
>
odd
=
data
[
i
+
n
/
2
];
// w is the "twiddle-factor".
// this could be cached, but we run the same 'base' algorithm parallel/serial,
// so it won't impact the performance comparison.
std
::
complex
<
double
>
w
=
exp
(
std
::
complex
<
double
>
(
0
,
-
2.
*
M_PI
*
i
/
n
));
data
[
i
]
=
even
+
w
*
odd
;
data
[
i
+
n
/
2
]
=
even
-
w
*
odd
;
}
}
void
fft
(
complex_vector
::
iterator
data
,
int
n
)
{
if
(
n
<
2
)
{
return
;
}
divide
(
data
,
n
);
if
(
n
<=
CUTOFF
)
{
fft
(
data
,
n
/
2
);
fft
(
data
+
n
/
2
,
n
/
2
);
}
else
{
pls
::
invoke_parallel
(
[
&
]
{
fft
(
data
,
n
/
2
);
},
[
&
]
{
fft
(
data
+
n
/
2
,
n
/
2
);
}
);
}
combine
(
data
,
n
);
}
complex_vector
prepare_input
(
int
input_size
)
{
std
::
vector
<
double
>
known_frequencies
{
2
,
11
,
52
,
88
,
256
};
complex_vector
data
(
input_size
);
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
for
(
int
i
=
0
;
i
<
input_size
;
i
++
)
{
data
[
i
]
=
std
::
complex
<
double
>
(
0.0
,
0.0
);
for
(
auto
frequencie
:
known_frequencies
)
{
data
[
i
]
+=
sin
(
2
*
M_PI
*
frequencie
*
i
/
input_size
);
}
}
return
data
;
}
int
main
()
{
PROFILE_ENABLE
complex_vector
initial_input
=
prepare_input
(
INPUT_SIZE
);
pls
::
internal
::
helpers
::
run_mini_benchmark
([
&
]
{
complex_vector
input
=
initial_input
;
fft
(
input
.
begin
(),
input
.
size
());
},
8
);
PROFILE_SAVE
(
"test_profile.prof"
)
}
lib/pls/CMakeLists.txt
View file @
f0f3b80e
...
@@ -17,7 +17,8 @@ add_library(pls STATIC
...
@@ -17,7 +17,8 @@ add_library(pls STATIC
src/algorithms/invoke_parallel.cpp include/pls/algorithms/invoke_parallel.h
src/algorithms/invoke_parallel.cpp include/pls/algorithms/invoke_parallel.h
include/pls/internal/base/error_handling.h
include/pls/internal/base/error_handling.h
include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp
include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp
include/pls/internal/helpers/profiler.h
)
include/pls/internal/helpers/profiler.h
include/pls/internal/helpers/mini_benchmark.h
)
# Add everything in `./include` to be in the include path of this project
# Add everything in `./include` to be in the include path of this project
target_include_directories
(
pls
target_include_directories
(
pls
...
...
lib/pls/include/pls/internal/helpers/mini_benchmark.h
0 → 100644
View file @
f0f3b80e
#ifndef PLS_MINI_BENCHMARK_H
#define PLS_MINI_BENCHMARK_H
#include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/internal/scheduling/scheduler.h"
#include <chrono>
#include <iostream>
namespace
pls
{
namespace
internal
{
namespace
helpers
{
// TODO: Clean up (separate into small functions and .cpp file)
template
<
typename
Function
>
void
run_mini_benchmark
(
const
Function
&
lambda
,
size_t
max_threads
,
long
max_runtime_ms
=
1000
)
{
using
namespace
std
;
using
namespace
pls
::
internal
::
scheduling
;
malloc_scheduler_memory
scheduler_memory
{
max_threads
};
for
(
unsigned
int
num_threads
=
1
;
num_threads
<=
max_threads
;
num_threads
++
)
{
scheduler
local_scheduler
{
&
scheduler_memory
,
num_threads
};
chrono
::
high_resolution_clock
::
time_point
start_time
;
chrono
::
high_resolution_clock
::
time_point
end_time
;
unsigned
long
iterations
=
0
;
local_scheduler
.
perform_work
([
&
]
{
start_time
=
chrono
::
high_resolution_clock
::
now
();
end_time
=
start_time
;
chrono
::
high_resolution_clock
::
time_point
planned_end_time
=
start_time
+
chrono
::
milliseconds
(
max_runtime_ms
);
while
(
end_time
<
planned_end_time
)
{
lambda
();
end_time
=
chrono
::
high_resolution_clock
::
now
();
iterations
++
;
}
});
long
time
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
end_time
-
start_time
).
count
();
double
time_per_iteration
=
(
double
)
time
/
iterations
;
std
::
cout
<<
time_per_iteration
;
if
(
num_threads
<
max_threads
)
{
std
::
cout
<<
", "
;
}
}
std
::
cout
<<
std
::
endl
;
}
}
}
}
#endif //PLS_MINI_BENCHMARK_H
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment