#include "Matrix.h" #include #include #include #include #include "RegisterBlocking.h" #include "Boost.h" #include "Naive.h" #include "DevideAndConquer.h" #include "BLASMul.h" using namespace std::chrono; namespace po = boost::program_options; template using BinaryMatrixOp = void (*)(Matrix &C, const Matrix &A, const Matrix &B); template Matrix run_function(BinaryMatrixOp f, const Matrix &A, const Matrix &B) { Matrix C(A.size1(), B.size2(), 0); auto a = steady_clock::now(); f(C, A, B); auto b = steady_clock::now(); auto ms = std::chrono::duration_cast(b - a).count(); if (ms > 1000) { std::cout << "multiply: " << ms / 1000. << "s" << std::endl; } else { std::cout << "multiply: " << ms << "ms" << std::endl; } return C; } #define TEST_IF(test_function_name, function, A, B) {\ if(test_function_name == #function) {\ C = run_function(function, A, B);\ use_result += C(0, 0);\ }\ } template int main_work(const std::string &test_function_name, const std::string &input_folder, bool validate) { std::cout << "Running function '" << test_function_name << "'" << std::endl; std::srand(0); Matrix A; Matrix B; Matrix C; auto a = steady_clock::now(); matrix_io::loadAB(A, B, input_folder); auto b = steady_clock::now(); std::cout << "loading from file: " << std::chrono::duration_cast(b - a).count() << "ms" << std::endl; // use the result to prevent compiler to optimize... double use_result = 0; TEST_IF(test_function_name, naive_reordered, A, B) TEST_IF(test_function_name, block_wise_avx2, A, B) #ifdef WITH_AVX512 TEST_IF(test_function_name, block_wise_avx512, A, B) #endif TEST_IF(test_function_name, boost_axpy_mul, A, B) TEST_IF(test_function_name, divide_and_conquer_block_avx2, A, B) #ifdef WITH_AVX512 TEST_IF(test_function_name, divide_and_conquer_block_avx512, A, B) #endif TEST_IF(test_function_name, divide_and_conquer_naive_r1, A, B) TEST_IF(test_function_name, divide_and_conquer_naive_r2, A, B) TEST_IF(test_function_name, divide_and_conquer_naive_r3, A, B) TEST_IF(test_function_name, divide_and_conquer_naive_r4, A, B) TEST_IF(test_function_name, divide_and_conquer_naive_r5, A, B) TEST_IF(test_function_name, blas, A, B) if(validate) { std::cout << "Validating matrix" << std::endl; Matrix C2(A.size1(), B.size2(), 0); boost_axpy_mul(C2, A, B); if(C.size1() != C2.size1() || C.size2() != C2.size2()) throw std::runtime_error("Result matrix has invalid size."); for(auto i = 0; i < C2.size1(); ++i) { for(auto j = 0; j < C2.size2(); ++j) { if(abs(C(i, j) - C2(i, j)) > 1e-1) { std::cerr << i << ", " << j << " is wrong with values " << C(i, j) << ", " << C2(i, j) << std::endl; exit(-1); } } } std::cout << "Matrix seems fine" << std::endl; } std::cout << use_result << "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" << std::endl; return use_result == 0 ? -1 : 0; } int main(int argc, char* argv[]) { po::options_description desc("Multiply two matrices"); desc.add_options() ("input-folder", "folder containing matrices, following naming conventions; folder name:xx; file name: x") ("validate", "validate matrix with boost") ("algorithm", po::value(), "algorithm to execute") ("double", "use_double instead of float"); po::positional_options_description p; p.add("input-folder", -1); po::variables_map vm; po::store(po::command_line_parser(argc, argv). options(desc).positional(p).run(), vm); po::notify(vm); std::string test_function_name = vm.count("algorithm") ? vm["algorithm"].as() : DEFAULT_TEST_FUNCTION_NAME; if(vm.count("double")) { return main_work(test_function_name, vm["input-folder"].as(), vm.count("validate")); } else { return main_work(test_function_name, vm["input-folder"].as(), vm.count("validate")); } }