1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H 12 13 namespace Eigen { 14 namespace internal { 15 16 17 template <uint64_t n> 18 struct static_val { 19 static const uint64_t value = n; uint64_tstatic_val20 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; } 21 static_valstatic_val22 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { } 23 24 template <typename T> static_valstatic_val25 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) { 26 eigen_assert(v == n); 27 } 28 }; 29 30 31 template <typename HIGH = uint64_t, typename LOW = uint64_t> 32 struct TensorUInt128 33 { 34 HIGH high; 35 LOW low; 36 37 template<typename OTHER_HIGH, typename OTHER_LOW> 38 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TensorUInt128TensorUInt12839 TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) { 40 EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE); 41 EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE); 42 } 43 44 template<typename OTHER_HIGH, typename OTHER_LOW> 45 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 46 TensorUInt128& operator = (const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) { 47 EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE); 48 EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE); 49 high = other.high; 50 low = other.low; 51 return *this; 52 } 53 54 template<typename T> 55 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TensorUInt128TensorUInt12856 explicit TensorUInt128(const T& x) : high(0), low(x) { 57 eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest())); 58 eigen_assert(x >= 0); 59 } 60 61 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TensorUInt128TensorUInt12862 TensorUInt128(HIGH y, LOW x) : high(y), low(x) { } 63 LOWTensorUInt12864 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const { 65 return low; 66 } lowerTensorUInt12867 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const { 68 return low; 69 } upperTensorUInt12870 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const { 71 return high; 72 } 73 }; 74 75 76 template <typename HL, typename LL, typename HR, typename LR> 77 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 78 bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) 79 { 80 return (lhs.high == rhs.high) & (lhs.low == rhs.low); 81 } 82 83 template <typename HL, typename LL, typename HR, typename LR> 84 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 85 bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) 86 { 87 return (lhs.high != rhs.high) | (lhs.low != rhs.low); 88 } 89 90 template <typename HL, typename LL, typename HR, typename LR> 91 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 92 bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) 93 { 94 if (lhs.high != rhs.high) { 95 return lhs.high > rhs.high; 96 } 97 return lhs.low >= rhs.low; 98 } 99 100 template <typename HL, typename LL, typename HR, typename LR> 101 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 102 bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) 103 { 104 if (lhs.high != rhs.high) { 105 return lhs.high < rhs.high; 106 } 107 return lhs.low < rhs.low; 108 } 109 110 template <typename HL, typename LL, typename HR, typename LR> 111 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 112 TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) 113 { 114 TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low); 115 if (result.low < rhs.low) { 116 result.high += 1; 117 } 118 return result; 119 } 120 121 template <typename HL, typename LL, typename HR, typename LR> 122 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 123 TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) 124 { 125 TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low); 126 if (result.low > lhs.low) { 127 result.high -= 1; 128 } 129 return result; 130 } 131 132 133 template <typename HL, typename LL, typename HR, typename LR> 134 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 135 TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) 136 { 137 // Split each 128-bit integer into 4 32-bit integers, and then do the 138 // multiplications by hand as follow: 139 // lhs a b c d 140 // rhs e f g h 141 // ----------- 142 // ah bh ch dh 143 // bg cg dg 144 // cf df 145 // de 146 // The result is stored in 2 64bit integers, high and low. 147 148 const uint64_t LOW = 0x00000000FFFFFFFFLL; 149 const uint64_t HIGH = 0xFFFFFFFF00000000LL; 150 151 uint64_t d = lhs.low & LOW; 152 uint64_t c = (lhs.low & HIGH) >> 32LL; 153 uint64_t b = lhs.high & LOW; 154 uint64_t a = (lhs.high & HIGH) >> 32LL; 155 156 uint64_t h = rhs.low & LOW; 157 uint64_t g = (rhs.low & HIGH) >> 32LL; 158 uint64_t f = rhs.high & LOW; 159 uint64_t e = (rhs.high & HIGH) >> 32LL; 160 161 // Compute the low 32 bits of low 162 uint64_t acc = d * h; 163 uint64_t low = acc & LOW; 164 // Compute the high 32 bits of low. Add a carry every time we wrap around 165 acc >>= 32LL; 166 uint64_t carry = 0; 167 uint64_t acc2 = acc + c * h; 168 if (acc2 < acc) { 169 carry++; 170 } 171 acc = acc2 + d * g; 172 if (acc < acc2) { 173 carry++; 174 } 175 low |= (acc << 32LL); 176 177 // Carry forward the high bits of acc to initiate the computation of the 178 // low 32 bits of high 179 acc2 = (acc >> 32LL) | (carry << 32LL); 180 carry = 0; 181 182 acc = acc2 + b * h; 183 if (acc < acc2) { 184 carry++; 185 } 186 acc2 = acc + c * g; 187 if (acc2 < acc) { 188 carry++; 189 } 190 acc = acc2 + d * f; 191 if (acc < acc2) { 192 carry++; 193 } 194 uint64_t high = acc & LOW; 195 196 // Start to compute the high 32 bits of high. 197 acc2 = (acc >> 32LL) | (carry << 32LL); 198 199 acc = acc2 + a * h; 200 acc2 = acc + b * g; 201 acc = acc2 + c * f; 202 acc2 = acc + d * e; 203 high |= (acc2 << 32LL); 204 205 return TensorUInt128<uint64_t, uint64_t>(high, low); 206 } 207 208 template <typename HL, typename LL, typename HR, typename LR> 209 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 210 TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) 211 { 212 if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) { 213 return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); 214 } else if (lhs < rhs) { 215 return TensorUInt128<uint64_t, uint64_t>(0); 216 } else { 217 // calculate the biggest power of 2 times rhs that's less than or equal to lhs 218 TensorUInt128<uint64_t, uint64_t> power2(1); 219 TensorUInt128<uint64_t, uint64_t> d(rhs); 220 TensorUInt128<uint64_t, uint64_t> tmp(lhs - d); 221 while (lhs >= d) { 222 tmp = tmp - d; 223 d = d + d; 224 power2 = power2 + power2; 225 } 226 227 tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); 228 TensorUInt128<uint64_t, uint64_t> result(0); 229 while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) { 230 if (tmp >= d) { 231 tmp = tmp - d; 232 result = result + power2; 233 } 234 // Shift right 235 power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63)); 236 d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63)); 237 } 238 239 return result; 240 } 241 } 242 243 244 } // namespace internal 245 } // namespace Eigen 246 247 248 #endif // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H 249