Line data Source code
1 : /************************************************************************* 2 : * 3 : * Project 4 : * _____ _____ __ __ _____ 5 : * / ____| __ \| \/ | __ \ 6 : * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) | 7 : * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/ 8 : *| (_) | |_) | __/ | | | |__| | | | | | | | 9 : * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_| 10 : * | | 11 : * |_| 12 : * 13 : * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al. 14 : * 15 : * This software is licensed as described in the file LICENSE, which 16 : * you should have received as part of this distribution. The terms 17 : * among other details are referenced in the official documentation 18 : * seen here : https://akielaries.github.io/openGPMP/ along with 19 : * important files seen in this project. 20 : * 21 : * You may opt to use, copy, modify, merge, publish, distribute 22 : * and/or sell copies of the Software, and permit persons to whom 23 : * the Software is furnished to do so, under the terms of the 24 : * LICENSE file. As this is an Open Source effort, all implementations 25 : * must be of the same methodology. 26 : * 27 : * 28 : * 29 : * This software is distributed on an AS IS basis, WITHOUT 30 : * WARRANTY OF ANY KIND, either express or implied. 31 : * 32 : ************************************************************************/ 33 : #include <cmath> 34 : #include <cstdint> 35 : #include <iostream> 36 : #include <numeric> 37 : #include <openGPMP/linalg/vector.hpp> 38 : #include <stdexcept> 39 : #include <vector> 40 : 41 : #if defined(__x86_64__) || defined(__amd64__) || defined(__amd64) 42 : 43 : /************************************************************************ 44 : * 45 : * Vector Operations for AVX ISA 46 : * 47 : ************************************************************************/ 48 : #if defined(__AVX2__) 49 : 50 : // AVX family intrinsics 51 : #include <immintrin.h> 52 : 53 : /************************************************************************ 54 : * 55 : * Vector Operations on Vectors 56 : * 57 : ************************************************************************/ 58 : 59 : /*****************************************************************************/ 60 : 61 : template <typename T> 62 0 : void gpmp::linalg::vector_add_i16(const T *data1, 63 : const T *data2, 64 : T *result_data, 65 : size_t size) { 66 0 : size_t i = 0; 67 0 : if (size > 32) { 68 0 : for (; i < size - 15; i += 16) { 69 0 : __m256i a = _mm256_loadu_si256( 70 0 : reinterpret_cast<const __m256i *>(data1 + i)); 71 0 : __m256i b = _mm256_loadu_si256( 72 0 : reinterpret_cast<const __m256i *>(data2 + i)); 73 0 : __m256i c = _mm256_add_epi8(a, b); 74 0 : _mm256_storeu_si256(reinterpret_cast<__m256i *>(result_data + i), 75 : c); 76 : } 77 : } 78 0 : for (; i < size; ++i) { 79 0 : result_data[i] = data1[i] + data2[i]; 80 : } 81 0 : } 82 : 83 0 : void gpmp::linalg::vector_add(const std::vector<int16_t> &vec1, 84 : const std::vector<int16_t> &vec2, 85 : std::vector<int16_t> &result) { 86 : 87 0 : const size_t size = vec1.size(); 88 0 : vector_add_i16(vec1.data(), vec2.data(), result.data(), size); 89 0 : } 90 : 91 0 : void gpmp::linalg::vector_add(const std::vector<uint16_t> &vec1, 92 : const std::vector<uint16_t> &vec2, 93 : std::vector<uint16_t> &result) { 94 0 : const size_t size = vec1.size(); 95 0 : vector_add_i16(vec1.data(), vec2.data(), result.data(), size); 96 0 : } 97 : 98 : #endif // AVX2 99 : 100 : #endif // x86