1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_ 11 #define CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_ 12 13 namespace Eigen { 14 namespace internal { 15 16 // Accumulate the product of 2 QInt8 inputs on 32 bits to prevent 17 // overflows 18 template <> 19 struct scalar_product_traits<QInt8, QInt8> { 20 enum { Defined = 1 }; 21 typedef QInt32 ReturnType; 22 }; 23 24 // Accumulate the product of 2 QInt16 inputs on 32 bits to prevent 25 // overflows 26 template <> 27 struct scalar_product_traits<QInt16, QInt16> { 28 enum { Defined = 1 }; 29 typedef QInt32 ReturnType; 30 }; 31 32 // Accumulate the product of QInt8 inputs with QUint8 inputs on 32 bits 33 // to prevent overflows 34 template <> 35 struct scalar_product_traits<QInt8, QUInt8> { 36 enum { Defined = 1 }; 37 typedef QInt32 ReturnType; 38 }; 39 40 // Accumulate the product of QUInt8 inputs with Qint8 inputs on 32 bits 41 // to prevent overflows 42 template <> 43 struct scalar_product_traits<QUInt8, QInt8> { 44 enum { Defined = 1 }; 45 typedef QInt32 ReturnType; 46 }; 47 48 // Description of the product implementation. It's pretty simple now since 49 // nothing is vectorized yet. 50 // This definition tackle the case where both lhs and rhs are encoded using 51 // signed 8bit integers 52 #ifndef EIGEN_USE_OPTIMIZED_INT8_INT8_MAT_MAT_PRODUCT 53 54 template <bool _ConjLhs, bool _ConjRhs> 55 class gebp_traits<QInt8, QInt8, _ConjLhs, _ConjRhs> { 56 public: 57 typedef QInt8 LhsScalar; 58 typedef QInt8 RhsScalar; 59 typedef QInt32 ResScalar; 60 61 typedef typename packet_traits<LhsScalar>::type LhsPacket; 62 typedef LhsPacket LhsPacket4Packing; 63 64 enum { 65 // register block size along the M and N directions 66 // One for the current implementation 67 nr = 1, 68 mr = 1, 69 // Progress made at each iteration of the product loop 70 // also 1 for the current implementation 71 LhsProgress = 1, 72 RhsProgress = 1 73 }; 74 }; 75 76 // The signed 8bit Mat-Mat product itself. 77 template <typename Index, typename DataMapper, int mr, int nr, 78 bool ConjugateLhs, bool ConjugateRhs> 79 struct gebp_kernel<QInt8, QInt8, Index, DataMapper, mr, nr, ConjugateLhs, 80 ConjugateRhs> { 81 EIGEN_DONT_INLINE 82 void operator()(const DataMapper& res, const QInt8* blockA, 83 const QInt8* blockB, Index rows, Index depth, Index cols, 84 QInt32 alpha, Index strideA = -1, Index strideB = -1, 85 Index offsetA = 0, Index offsetB = 0); 86 }; 87 88 template <typename Index, typename DataMapper, int mr, int nr, 89 bool ConjugateLhs, bool ConjugateRhs> 90 EIGEN_DONT_INLINE void gebp_kernel<QInt8, QInt8, Index, DataMapper, mr, nr, 91 ConjugateLhs, ConjugateRhs>:: 92 operator()(const DataMapper& res, const QInt8* blockA, const QInt8* blockB, 93 Index rows, Index depth, Index cols, QInt32 alpha, Index strideA, 94 Index strideB, Index offsetA, Index offsetB) { 95 EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE); 96 EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE); 97 98 eigen_assert(alpha.value == 1); 99 eigen_assert(strideA == -1); 100 eigen_assert(strideB == -1); 101 eigen_assert(offsetA == 0); 102 eigen_assert(offsetB == 0); 103 104 eigen_assert(rows > 0); 105 eigen_assert(cols > 0); 106 eigen_assert(depth > 0); 107 eigen_assert(blockA); 108 eigen_assert(blockB); 109 110 for (Index j = 0; j < cols; ++j) { 111 Index startB = j * depth; 112 113 for (Index i = 0; i < rows; ++i) { 114 Index startA = i * depth; 115 116 for (Index k = 0; k < depth; ++k) { 117 res(i, j) += blockA[startA + k] * blockB[startB + k]; 118 } 119 } 120 } 121 } 122 #endif 123 124 // This definition tackle the case where the lhs is encoded using signed 8bit 125 // integers and the rhs using unsigned 8bit integers. 126 #ifndef EIGEN_USE_OPTIMIZED_INT8_UINT8_MAT_MAT_PRODUCT 127 template <bool _ConjLhs, bool _ConjRhs> 128 class gebp_traits<QInt8, QUInt8, _ConjLhs, _ConjRhs> { 129 public: 130 typedef QInt8 LhsScalar; 131 typedef QUInt8 RhsScalar; 132 typedef QInt32 ResScalar; 133 134 typedef typename packet_traits<LhsScalar>::type LhsPacket; 135 typedef LhsPacket LhsPacket4Packing; 136 137 enum { 138 // register block size along the M and N directions 139 // One for the current implementation 140 nr = 1, 141 mr = 1, 142 // Progress made at each iteration of the product loop 143 // also 1 for the current implementation 144 LhsProgress = 1, 145 RhsProgress = 1 146 }; 147 }; 148 149 // Mat-Mat product of a signed 8bit lhs with an unsigned 8bit rhs 150 template <typename Index, typename DataMapper, int mr, int nr, 151 bool ConjugateLhs, bool ConjugateRhs> 152 struct gebp_kernel<QInt8, QUInt8, Index, DataMapper, mr, nr, ConjugateLhs, 153 ConjugateRhs> { 154 EIGEN_DONT_INLINE 155 void operator()(const DataMapper& res, const QInt8* blockA, 156 const QUInt8* blockB, Index rows, Index depth, Index cols, 157 QInt32 alpha, Index strideA = -1, Index strideB = -1, 158 Index offsetA = 0, Index offsetB = 0); 159 }; 160 161 template <typename Index, typename DataMapper, int mr, int nr, 162 bool ConjugateLhs, bool ConjugateRhs> 163 EIGEN_DONT_INLINE void gebp_kernel<QInt8, QUInt8, Index, DataMapper, mr, nr, 164 ConjugateLhs, ConjugateRhs>:: 165 operator()(const DataMapper& res, const QInt8* blockA, const QUInt8* blockB, 166 Index rows, Index depth, Index cols, QInt32 alpha, Index strideA, 167 Index strideB, Index offsetA, Index offsetB) { 168 EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE); 169 EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE); 170 171 eigen_assert(alpha.value == 1); 172 eigen_assert(strideA == -1); 173 eigen_assert(strideB == -1); 174 eigen_assert(offsetA == 0); 175 eigen_assert(offsetB == 0); 176 177 eigen_assert(rows > 0); 178 eigen_assert(cols > 0); 179 eigen_assert(depth > 0); 180 eigen_assert(blockA); 181 eigen_assert(blockB); 182 183 for (Index j = 0; j < cols; ++j) { 184 Index startB = j * depth; 185 186 for (Index i = 0; i < rows; ++i) { 187 Index startA = i * depth; 188 189 for (Index k = 0; k < depth; ++k) { 190 res(i, j) += blockA[startA + k] * blockB[startB + k]; 191 } 192 } 193 } 194 } 195 #endif 196 197 // This definition tackle the case where the khs is encoded using unsigned 8bit 198 // integers and the rhs using signed 8bit integers. 199 #ifndef EIGEN_USE_OPTIMIZED_UINT8_INT8_MAT_MAT_PRODUCT 200 template <bool _ConjLhs, bool _ConjRhs> 201 class gebp_traits<QUInt8, QInt8, _ConjLhs, _ConjRhs> { 202 public: 203 typedef QUInt8 LhsScalar; 204 typedef QInt8 RhsScalar; 205 typedef QInt32 ResScalar; 206 207 typedef typename packet_traits<LhsScalar>::type LhsPacket; 208 typedef LhsPacket LhsPacket4Packing; 209 210 enum { 211 // register block size along the M and N directions 212 // One for the current implementation 213 nr = 1, 214 mr = 1, 215 // Progress made at each iteration of the product loop 216 // also 1 for the current implementation 217 LhsProgress = 1, 218 RhsProgress = 1 219 }; 220 }; 221 222 // Mat-Mat product of an unsigned 8bit lhs with a signed 8bit rhs 223 template <typename Index, typename DataMapper, int mr, int nr, 224 bool ConjugateLhs, bool ConjugateRhs> 225 struct gebp_kernel<QUInt8, QInt8, Index, DataMapper, mr, nr, ConjugateLhs, 226 ConjugateRhs> { 227 EIGEN_DONT_INLINE 228 void operator()(const DataMapper& res, const QUInt8* blockA, 229 const QInt8* blockB, Index rows, Index depth, Index cols, 230 QInt32 alpha, Index strideA = -1, Index strideB = -1, 231 Index offsetA = 0, Index offsetB = 0); 232 }; 233 234 template <typename Index, typename DataMapper, int mr, int nr, 235 bool ConjugateLhs, bool ConjugateRhs> 236 EIGEN_DONT_INLINE void gebp_kernel<QUInt8, QInt8, Index, DataMapper, mr, nr, 237 ConjugateLhs, ConjugateRhs>:: 238 operator()(const DataMapper& res, const QUInt8* blockA, const QInt8* blockB, 239 Index rows, Index depth, Index cols, QInt32 alpha, Index strideA, 240 Index strideB, Index offsetA, Index offsetB) { 241 EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE); 242 EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE); 243 244 eigen_assert(alpha.value == 1); 245 eigen_assert(strideA == -1); 246 eigen_assert(strideB == -1); 247 eigen_assert(offsetA == 0); 248 eigen_assert(offsetB == 0); 249 250 eigen_assert(rows > 0); 251 eigen_assert(cols > 0); 252 eigen_assert(depth > 0); 253 eigen_assert(blockA); 254 eigen_assert(blockB); 255 256 for (Index j = 0; j < cols; ++j) { 257 Index startB = j * depth; 258 259 for (Index i = 0; i < rows; ++i) { 260 Index startA = i * depth; 261 262 for (Index k = 0; k < depth; ++k) { 263 res(i, j) += blockA[startA + k] * blockB[startB + k]; 264 } 265 } 266 } 267 } 268 #endif 269 270 #ifndef EIGEN_USE_OPTIMIZED_INT16_INT16_MAT_MAT_PRODUCT 271 272 template <bool _ConjLhs, bool _ConjRhs> 273 class gebp_traits<QInt16, QInt16, _ConjLhs, _ConjRhs> { 274 public: 275 typedef QInt16 LhsScalar; 276 typedef QInt16 RhsScalar; 277 typedef QInt32 ResScalar; 278 279 typedef typename packet_traits<LhsScalar>::type LhsPacket; 280 typedef LhsPacket LhsPacket4Packing; 281 282 enum { 283 // register block size along the M and N directions 284 // One for the current implementation 285 nr = 1, 286 mr = 1, 287 // Progress made at each iteration of the product loop 288 // also 1 for the current implementation 289 LhsProgress = 1, 290 RhsProgress = 1 291 }; 292 }; 293 294 // The signed 16bit Mat-Mat product itself. 295 template <typename Index, typename DataMapper, int mr, int nr, 296 bool ConjugateLhs, bool ConjugateRhs> 297 struct gebp_kernel<QInt16, QInt16, Index, DataMapper, mr, nr, ConjugateLhs, 298 ConjugateRhs> { 299 EIGEN_DONT_INLINE 300 void operator()(const DataMapper& res, const QInt16* blockA, 301 const QInt16* blockB, Index rows, Index depth, Index cols, 302 QInt32 alpha, Index strideA = -1, Index strideB = -1, 303 Index offsetA = 0, Index offsetB = 0); 304 }; 305 306 template <typename Index, typename DataMapper, int mr, int nr, 307 bool ConjugateLhs, bool ConjugateRhs> 308 EIGEN_DONT_INLINE void gebp_kernel<QInt16, QInt16, Index, DataMapper, mr, nr, 309 ConjugateLhs, ConjugateRhs>:: 310 operator()(const DataMapper& res, const QInt16* blockA, const QInt16* blockB, 311 Index rows, Index depth, Index cols, QInt32 alpha, Index strideA, 312 Index strideB, Index offsetA, Index offsetB) { 313 EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE); 314 EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE); 315 316 eigen_assert(alpha.value == 1); 317 eigen_assert(strideA == -1); 318 eigen_assert(strideB == -1); 319 eigen_assert(offsetA == 0); 320 eigen_assert(offsetB == 0); 321 322 eigen_assert(rows > 0); 323 eigen_assert(cols > 0); 324 eigen_assert(depth > 0); 325 eigen_assert(blockA); 326 eigen_assert(blockB); 327 328 for (Index j = 0; j < cols; ++j) { 329 Index startB = j * depth; 330 331 for (Index i = 0; i < rows; ++i) { 332 Index startA = i * depth; 333 334 for (Index k = 0; k < depth; ++k) { 335 res(i, j) += blockA[startA + k] * blockB[startB + k]; 336 } 337 } 338 } 339 } 340 #endif 341 342 } // namespace internal 343 } // namespace Eigen 344 345 #endif // CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_ 346