// // Created by oke on 04.07.20. // #ifndef SMID_MATRIX_BLOCKWISE_H #define SMID_MATRIX_BLOCKWISE_H #include "detail/BlockWiseImpl.h" #include struct __m128_block_wise_config { using FloatType = float; using VectorType = __m128; static constexpr auto LoadVector = _mm_loadu_ps; static constexpr auto StoreVector = _mm_storeu_ps; static constexpr auto BroadcastToVector = _mm_set1_ps; static constexpr auto XOR = _mm_xor_ps; static constexpr unsigned Registers = 16; }; struct __m128d_block_wise_config { using FloatType = double; using VectorType = __m128d; static constexpr auto LoadVector = _mm_loadu_pd; static constexpr auto StoreVector = _mm_storeu_pd; static constexpr auto BroadcastToVector = _mm_set1_pd; static constexpr auto XOR = _mm_xor_pd; static constexpr unsigned Registers = 16; }; struct __m256_block_wise_config { using FloatType = float; using VectorType = __m256; static constexpr auto LoadVector = _mm256_loadu_ps; static constexpr auto StoreVector = _mm256_storeu_ps; static constexpr auto BroadcastToVector = _mm256_set1_ps; static constexpr auto XOR = _mm256_xor_ps; static constexpr unsigned Registers = 16; }; struct __m256d_block_wise_config { using FloatType = double; using VectorType = __m256d; static constexpr auto LoadVector = _mm256_loadu_pd; static constexpr auto StoreVector = _mm256_storeu_pd; static constexpr auto BroadcastToVector = _mm256_set1_pd; static constexpr auto XOR = _mm256_xor_pd; static constexpr unsigned Registers = 16; }; #ifdef WITH_AVX512 struct __m512_block_wise_config { using FloatType = float; using VectorType = __m512; static constexpr auto LoadVector = _mm512_loadu_ps; static constexpr auto StoreVector = _mm512_storeu_ps; static constexpr auto BroadcastToVector = _mm512_set1_ps; static constexpr unsigned Registers = 32; }; struct __m512d_block_wise_config { using FloatType = double; using VectorType = __m512d; static constexpr auto LoadVector = _mm512_loadu_pd; static constexpr auto StoreVector = _mm512_storeu_pd; static constexpr auto BroadcastToVector = _mm512_set1_pd; static constexpr unsigned Registers = 32; }; #endif enum AvxVersion { SSE, AVX2, #ifdef WITH_AVX512 AVX512 #endif }; template struct block_wise; template<> struct block_wise : public detail::block_wise_base<__m128_block_wise_config> {}; template<> struct block_wise : public detail::block_wise_base<__m128d_block_wise_config> {}; template<> struct block_wise : public detail::block_wise_base<__m256_block_wise_config> {}; template<> struct block_wise : public detail::block_wise_base<__m256d_block_wise_config> {}; #ifdef WITH_AVX512 template<> struct block_wise : public detail::block_wise_base<__m512_block_wise_config> {}; template<> struct block_wise : public detail::block_wise_base<__m512d_block_wise_config> {}; #endif template void __attribute__ ((noinline)) block_wise_sse(Matrix &C, const Matrix &A, const Matrix &B) { block_wise::multiply2(C, A, B); } template void __attribute__ ((noinline)) block_wise_avx2(Matrix &C, const Matrix &A, const Matrix &B) { block_wise::multiply2(C, A, B); } #ifdef WITH_AVX512 template void __attribute__ ((noinline)) block_wise_avx512(Matrix &C, const Matrix &A, const Matrix &B) { block_wise::multiply2(C, A, B); } #endif #endif //SMID_MATRIX_BLOCKWISE_H