#include #include #include #include const int MATRIX_SIZE = 128; template class matrix { public: T data[SIZE][SIZE]; explicit matrix(T i = 1) { std::fill(&data[0][0], &data[0][0] + SIZE * SIZE, i); } void multiply(const matrix &a, const matrix &b) { pls::for_each_range(0, SIZE, [&](int i) { this->multiply_column(i, a, b); }); } private: void multiply_column(int i, const matrix &a, const matrix &b) { for (int j = 0; j < SIZE; ++j) { data[i][j] = 0; } for (int k = 0; k < SIZE; ++k) { for (int j = 0; j < SIZE; ++j) { data[i][j] += a.data[i][k] * b.data[k][j]; } } } }; void fill_with_data(matrix &a, matrix &b) { // Fill in some data... for (int i = 0; i < MATRIX_SIZE; i++) { for (int j = 0; j < MATRIX_SIZE; j++) { a.data[i][j] = i; b.data[i][j] = j; } } } int main() { PROFILE_ENABLE matrix a; matrix b; matrix result; fill_with_data(a, b); pls::internal::helpers::run_mini_benchmark([&] { result.multiply(a, b); }, 8, 1000); PROFILE_SAVE("test_profile.prof") } //int main() { // PROFILE_ENABLE // pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u}; // pls::scheduler scheduler{&my_scheduler_memory, 4}; // // matrix a; // matrix b; // matrix result; // fill_with_data(a, b); // // scheduler.perform_work([&] { // auto start_time = std::chrono::high_resolution_clock::now(); // PROFILE_MAIN_THREAD // for (int i = 0; i < 10000; i++) { // PROFILE_WORK_BLOCK("Top Level") // result.multiply(a, b); // } // auto end_time = std::chrono::high_resolution_clock::now(); // long time = std::chrono::duration_cast(end_time - start_time).count(); // std::cout << "Runtime: " << time << "us" << std::endl; // }); // // PROFILE_SAVE("test_profile.prof") //}