Fix legacy components breaking CI.

3ae41f42 · FritzFlorian · dfe88369 · 3ae41f42 · dfe88369 · dfe88369
Commit 3ae41f42 authored Apr 06, 2020 by FritzFlorian
17 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,12 +37,9 @@ add_subdirectory(lib/pls)

 # Include examples
 add_subdirectory(app/playground)
-add_subdirectory(app/test_for_new)
 add_subdirectory(app/benchmark_fft)
 add_subdirectory(app/benchmark_unbalanced)
 add_subdirectory(app/benchmark_matrix)
-add_subdirectory(app/benchmark_prefix)
-add_subdirectory(app/benchmark_pipeline)
 add_subdirectory(app/benchmark_fib)
 add_subdirectory(app/context_switch)


--- a/app/benchmark_pipeline/CMakeLists.txt
+++ b/app/benchmark_pipeline/CMakeLists.txt
-add_executable(benchmark_pipeline main.cpp)
-target_link_libraries(benchmark_pipeline pls)
-if (EASY_PROFILER)
-    target_link_libraries(benchmark_pipeline easy_profiler)
-endif ()
--- a/app/benchmark_pipeline/main.cpp
+++ b/app/benchmark_pipeline/main.cpp
-#include <pls/pls.h>
-#include <pls/dataflow/dataflow.h>
-#include <pls/internal/helpers/profiler.h>
-#include <pls/internal/helpers/mini_benchmark.h>
-
-#include <iostream>
-#include <complex>
-#include <vector>
-#include <tuple>
-#include <atomic>
-
-static constexpr int INPUT_SIZE = 8192;
-typedef std::vector<std::complex<double>> complex_vector;
-
-using namespace pls::dataflow;
-
-void divide(complex_vector::iterator data, int n) {
-  complex_vector tmp_odd_elements(n / 2);
-  for (int i = 0; i < n / 2; i++) {
-    tmp_odd_elements[i] = data[i * 2 + 1];
-  }
-  for (int i = 0; i < n / 2; i++) {
-    data[i] = data[i * 2];
-  }
-  for (int i = 0; i < n / 2; i++) {
-    data[i + n / 2] = tmp_odd_elements[i];
-  }
-}
-
-void combine(complex_vector::iterator data, int n) {
-  for (int i = 0; i < n / 2; i++) {
-    std::complex<double> even = data[i];
-    std::complex<double> odd = data[i + n / 2];
-
-    // w is the "twiddle-factor".
-    // this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
-    // so it won't impact the performance comparison.
-    std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
-
-    data[i] = even + w * odd;
-    data[i + n / 2] = even - w * odd;
-  }
-}
-
-void fft(complex_vector::iterator data, int n) {
-  if (n < 2) {
-    return;
-  }
-
-  divide(data, n);
-  fft(data, n / 2);
-  fft(data + n / 2, n / 2);
-  combine(data, n);
-}
-
-complex_vector prepare_input(int input_size) {
-  std::vector<double> known_frequencies{2, 11, 52, 88, 256};
-  complex_vector data(input_size);
-
-  // Set our input data to match a time series of the known_frequencies.
-  // When applying fft to this time-series we should find these frequencies.
-  for (int i = 0; i < input_size; i++) {
-    data[i] = std::complex<double>(0.0, 0.0);
-    for (auto frequencie : known_frequencies) {
-      data[i] += sin(2 * M_PI * frequencie * i / input_size);
-    }
-  }
-
-  return data;
-}
-
-int main() {
-  PROFILE_ENABLE
-  pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u};
-  pls::scheduler scheduler{&my_scheduler_memory, 4};
-
-  graph<inputs<int>, outputs<int>> graph;
-  std::atomic<int> count{0};
-  auto lambda = [&](const int &in, int &out) {
-    PROFILE_WORK_BLOCK("Work Lambda")
-    auto tmp = in;
-    out = tmp;
-    complex_vector input = prepare_input(INPUT_SIZE);
-    fft(input.begin(), input.size());
-    count++;
-  };
-  function_node<inputs<int>, outputs<int>, decltype(lambda)> step_1{lambda};
-  function_node<inputs<int>, outputs<int>, decltype(lambda)> step_2{lambda};
-  function_node<inputs<int>, outputs<int>, decltype(lambda)> step_3{lambda};
-  function_node<inputs<int>, outputs<int>, decltype(lambda)> step_4{lambda};
-
-  graph >> step_1 >> step_2 >> step_3 >> step_4 >> graph;
-  graph.build();
-
-  const int num_elements = 10;
-  std::vector<std::tuple<int>> results(num_elements);
-
-  pls::internal::helpers::run_mini_benchmark([&] {
-    PROFILE_WORK_BLOCK("Top Level")
-    for (int j = 0; j < num_elements; j++) {
-      graph.run(std::tuple<int>{j}, &results[j]);
-    }
-    pls::scheduler::wait_for_all();
-  }, 8, 1000);
-
-  PROFILE_SAVE("test_profile.prof")
-}
-
-//int main() {
-//  PROFILE_ENABLE
-//  pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u};
-//  pls::scheduler scheduler{&my_scheduler_memory, 4};
-//
-//  graph<inputs<int>, outputs<int>> graph;
-//  std::atomic<int> count{0};
-//  auto lambda = [&](const int &in, int &out) {
-//    PROFILE_WORK_BLOCK("Work Lambda")
-//    out = in;
-//    complex_vector input = prepare_input(INPUT_SIZE);
-//    fft(input.begin(), input.size());
-//    count++;
-//  };
-//  function_node<inputs<int>, outputs<int>, decltype(lambda)> step_1{lambda};
-//  function_node<inputs<int>, outputs<int>, decltype(lambda)> step_2{lambda};
-//  function_node<inputs<int>, outputs<int>, decltype(lambda)> step_3{lambda};
-//  function_node<inputs<int>, outputs<int>, decltype(lambda)> step_4{lambda};
-//
-//  graph >> step_1 >> step_2 >> step_3 >> step_4 >> graph;
-//  graph.build();
-//
-//  const int num_elements = 10;
-//  std::vector<std::tuple<int>> results(num_elements);
-//
-//  scheduler.perform_work([&] {
-//    PROFILE_MAIN_THREAD
-//    for (int i = 0; i < 10; i++) {
-//      PROFILE_WORK_BLOCK("Top Level")
-//      for (int j = 0; j < num_elements; j++) {
-//        graph.run(std::tuple<int>{j}, &results[j]);
-//      }
-//      pls::scheduler::wait_for_all();
-//    }
-//  });
-//
-//  std::cout << count << std::endl;
-//
-//  PROFILE_SAVE("test_profile.prof")
-//}
--- a/app/benchmark_prefix/CMakeLists.txt
+++ b/app/benchmark_prefix/CMakeLists.txt
-add_executable(benchmark_prefix main.cpp)
-target_link_libraries(benchmark_prefix pls)
-if (EASY_PROFILER)
-    target_link_libraries(benchmark_prefix easy_profiler)
-endif ()
--- a/app/benchmark_prefix/main.cpp
+++ b/app/benchmark_prefix/main.cpp
-#include <pls/pls.h>
-#include <pls/internal/helpers/profiler.h>
-#include <pls/internal/helpers/mini_benchmark.h>
-
-#include <iostream>
-#include <vector>
-#include <functional>
-
-static constexpr int INPUT_SIZE = 10e7;
-
-int main() {
-  PROFILE_ENABLE
-  std::vector<double> vec(INPUT_SIZE, 1);
-  std::vector<double> out(INPUT_SIZE);
-
-  for (int i = 0; i < INPUT_SIZE; i++) {
-    vec[i] = i;
-  }
-
-  pls::internal::helpers::run_mini_benchmark([&] {
-    pls::scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0);
-  }, 8, 1000);
-
-  PROFILE_SAVE("test_profile.prof")
-}
-
-//int main() {
-//  PROFILE_ENABLE
-//  pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18};
-//  pls::scheduler scheduler{&my_scheduler_memory, 8};
-//
-//  std::vector<double> vec(INPUT_SIZE, 1);
-//  std::vector<double> out(INPUT_SIZE);
-//
-//  for (int i = 0; i < INPUT_SIZE; i++) {
-//    vec[i] = 1;
-//  }
-//
-//  scheduler.perform_work([&] {
-//    PROFILE_MAIN_THREAD
-//    for (int i = 0; i < 100; i++) {
-//      pls::scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0);
-//    }
-//  });
-//
-//  PROFILE_SAVE("test_profile.prof")
-//}
--- a/app/playground/CMakeLists.txt
+++ b/app/playground/CMakeLists.txt
 add_executable(playground
-        barrier.h barrier.cpp
        main.cpp)

 # Example for adding the library to your app (as a cmake project dependency)
-target_link_libraries(playground pls context_switcher Threads::Threads)
+target_link_libraries(playground pls Threads::Threads)
--- a/app/playground/barrier.cpp
+++ b/app/playground/barrier.cpp
-#include "barrier.h"
-
-barrier::barrier(const unsigned int count) : barrier_{} {
-  pthread_barrier_init(&barrier_, nullptr, count);
-}
-
-barrier::~barrier() {
-  pthread_barrier_destroy(&barrier_);
-}
-
-void barrier::wait() {
-  pthread_barrier_wait(&barrier_);
-}
--- a/app/playground/barrier.h
+++ b/app/playground/barrier.h
-
-#ifndef PLS_BARRIER_H
-#define PLS_BARRIER_H
-
-#include <pthread.h>
-
-
-/**
- * Provides standard barrier behaviour.
- * `count` threads have to call `wait()` before any of the `wait()` calls returns,
- * thus blocking all threads until everyone reached the barrier.
- *
- * PORTABILITY:
- * Current implementation is based on pthreads.
- */
-class barrier {
-  pthread_barrier_t barrier_;
-
- public:
-  explicit barrier(unsigned int count);
-  ~barrier();
-
-  void wait();
-};
-
-
-#endif //PLS_BARRIER_H
--- a/app/playground/main.cpp
+++ b/app/playground/main.cpp
-#include <sys/types.h>
-#include <unistd.h>
-#include <fstream>
-#include <sstream>
-#include <string>
-
-#include <mutex>
-#include "tsan_support.h"
-
-using namespace std;
-
-long count_memory_mappings() {
-  pid_t my_pid = getpid();
-  ifstream proc_file{"/proc/" + to_string(my_pid) + "/maps"};
-
-  string line;
-  long line_count{0};
-  while (getline(proc_file, line)) {
-    line_count++;
-  }
-
-  return line_count;
-}
-
 int main() {
-  mutex mut;
-
-  int count = 0;
-  while (true) {
-    printf("iteration: %d, mappings: %ld\n", count++, count_memory_mappings());
-    void *main_fiber = __tsan_get_current_fiber();
-    void *other_fiber = __tsan_create_fiber(0);
-    __tsan_switch_to_fiber(other_fiber, 0);
-    mut.lock();
-    mut.unlock();
-    __tsan_switch_to_fiber(main_fiber, 0);
-    __tsan_destroy_fiber(other_fiber);
-
-  }

  return 0;
 }
--- a/app/playground/tsan_support.h
+++ b/app/playground/tsan_support.h
-
-#ifndef CONTEXT_SWITCHER_TSAN_SUPPORT
-#define CONTEXT_SWITCHER_TSAN_SUPPORT
-
-extern "C" {
-// Fiber switching API.
-//   - TSAN context for fiber can be created by __tsan_create_fiber
-//     and freed by __tsan_destroy_fiber.
-//   - TSAN context of current fiber or thread can be obtained
-//     by calling __tsan_get_current_fiber.
-//   - __tsan_switch_to_fiber should be called immediatly before switch
-//     to fiber, such as call of swapcontext.
-//   - Fiber name can be set by __tsan_set_fiber_name.
-void *__tsan_get_current_fiber(void);
-void *__tsan_create_fiber(unsigned flags);
-void __tsan_destroy_fiber(void *fiber);
-void __tsan_switch_to_fiber(void *fiber, unsigned flags);
-void __tsan_set_fiber_name(void *fiber, const char *name);
-};
-
-#endif //CONTEXT_SWITCHER_TSAN_SUPPORT
--- a/app/test_for_new/CMakeLists.txt
+++ b/app/test_for_new/CMakeLists.txt
-add_executable(test_for_new main.cpp)
-
-# Example for adding the library to your app (as a cmake project dependency)
-target_link_libraries(test_for_new pls)
--- a/app/test_for_new/main.cpp
+++ b/app/test_for_new/main.cpp
-#include <pls/internal/base/thread.h>
-#include <pls/internal/helpers/prohibit_new.h>
-
-using namespace pls::internal::base;
-
-int global = 0;
-
-int main() {
-  // Try to use every feature, to trigger the prohibited use of new if found somewhere
-  thread t1{[]() {}};
-  t1.join();
-}
--- a/lib/pls/CMakeLists.txt
+++ b/lib/pls/CMakeLists.txt
@@ -25,7 +25,6 @@ add_library(pls STATIC

        include/pls/internal/helpers/prohibit_new.h
        include/pls/internal/helpers/profiler.h
-        include/pls/internal/helpers/mini_benchmark.h
        include/pls/internal/helpers/unique_id.h
        include/pls/internal/helpers/range.h
        include/pls/internal/helpers/seqence.h

--- a/lib/pls/include/pls/internal/base/backoff.h
+++ b/lib/pls/include/pls/internal/base/backoff.h
@@ -4,10 +4,11 @@

 #include "pls/internal/base/system_details.h"
 #include "pls/internal/helpers/profiler.h"
-#include "pls/internal/base/thread.h"

 #include <random>
-#include <math.h>
+#include <thread>
+#include <chrono>
+#include <cmath>

 namespace pls::internal::base {

@@ -34,7 +35,8 @@ class backoff {

    if (current_ >= YELD_ITERS) {
      PROFILE_LOCK("Yield")
-      this_thread::sleep(5);
+      using namespace std::chrono_literals;
+      std::this_thread::sleep_for(5us);
    }

    current_ = std::min(current_ * 2, MAX_ITERS);

--- a/lib/pls/include/pls/internal/base/tas_spin_lock.h
+++ b/lib/pls/include/pls/internal/base/tas_spin_lock.h
@@ -7,8 +7,6 @@
 #include <atomic>
 #include <iostream>

-#include "pls/internal/base/thread.h"
-
 namespace pls {
 namespace internal {
 namespace base {

--- a/lib/pls/include/pls/internal/base/ttas_spin_lock.h
+++ b/lib/pls/include/pls/internal/base/ttas_spin_lock.h
@@ -5,7 +5,6 @@
 #include <atomic>
 #include <iostream>

-#include "pls/internal/base/thread.h"
 #include "pls/internal/base/backoff.h"

 namespace pls {

--- a/lib/pls/include/pls/internal/helpers/mini_benchmark.h
+++ b/lib/pls/include/pls/internal/helpers/mini_benchmark.h
-
-#ifndef PLS_MINI_BENCHMARK_H
-#define PLS_MINI_BENCHMARK_H
-
-#include "pls/internal/scheduling/scheduler_memory.h"
-#include "pls/internal/scheduling/scheduler.h"
-
-#include <chrono>
-#include <iostream>
-
-namespace pls {
-namespace internal {
-namespace helpers {
-
-// TODO: Clean up (separate into small functions and .cpp file)
-template<typename Function>
-void run_mini_benchmark(const Function &lambda,
-                        size_t max_threads,
-                        unsigned long max_runtime_ms = 1000,
-                        unsigned long warmup_time_ms = 100) {
-  using namespace std;
-  using namespace pls::internal::scheduling;
-
-  malloc_scheduler_memory scheduler_memory{max_threads, 2u << 17u};
-  for (unsigned int num_threads = 1; num_threads <= max_threads; num_threads++) {
-    scheduler local_scheduler{&scheduler_memory, num_threads};
-
-    chrono::high_resolution_clock::time_point start_time;
-    chrono::high_resolution_clock::time_point end_time;
-    long max_local_time = 0;
-    long total_time = 0;
-    long iterations = 0;
-
-    local_scheduler.perform_work([&] {
-      start_time = chrono::high_resolution_clock::now();
-      end_time = start_time;
-      chrono::high_resolution_clock::time_point planned_end_time = start_time + chrono::milliseconds(max_runtime_ms);
-      chrono::high_resolution_clock::time_point planned_warmup_time = start_time + chrono::milliseconds(warmup_time_ms);
-
-      while (end_time < planned_end_time) {
-        if (end_time < planned_warmup_time) {
-          lambda();
-        } else {
-          auto local_start_time = chrono::high_resolution_clock::now();
-          lambda();
-          auto local_end_time = chrono::high_resolution_clock::now();
-          long local_time = chrono::duration_cast<chrono::microseconds>(local_end_time - local_start_time).count();
-
-          total_time += local_time;
-          max_local_time = std::max(local_time, max_local_time);
-          iterations++;
-        }
-        end_time = chrono::high_resolution_clock::now();
-      }
-    });
-    double time_per_iteration = (double) total_time / iterations;
-
-    std::cout << (long) time_per_iteration << " (" << max_local_time << ")";
-    if (num_threads < max_threads) {
-      std::cout << "\t\t";
-    }
-  }
-  std::cout << std::endl;
-}
-
-}
-}
-}
-
-#endif //PLS_MINI_BENCHMARK_H