1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <cstring>
17
18 #include "tensorflow/compiler/xla/service/cpu/runtime_fp16.h"
19 #include "tensorflow/core/platform/macros.h"
20
21 namespace {
22 using tensorflow::uint16;
23 using tensorflow::uint32;
24
25 // Helper class that lets us access the underlying bit representation
26 // of a float without breaking C++ strict aliasing.
27 class AliasedFloatInt {
28 public:
29 static_assert(sizeof(float) == sizeof(uint32), "");
30
FromFloat(float f)31 static AliasedFloatInt FromFloat(float f) {
32 AliasedFloatInt value;
33 value.set_float(f);
34 return value;
35 }
36
FromUInt(uint32 u)37 static AliasedFloatInt FromUInt(uint32 u) {
38 AliasedFloatInt value;
39 value.set_uint(u);
40 return value;
41 }
42
set_float(float f)43 void set_float(float f) { memcpy(&value_, &f, sizeof(f)); }
as_float() const44 float as_float() const {
45 float f;
46 memcpy(&f, &value_, sizeof(f));
47 return f;
48 }
49
set_uint(uint32 u)50 void set_uint(uint32 u) { value_ = u; }
as_uint() const51 uint32 as_uint() const { return value_; }
52
53 private:
54 uint32 value_;
55 };
56 } // namespace
57
58 // __gnu_f2h_ieee and __gnu_h2f_ieee are marked as weak symbols so if XLA is
59 // built with compiler-rt (that also defines these symbols) we don't get a
60 // duplicate definition linker error. Making these symbols weak also ensures
61 // that the compiler-rt definitions "win", but that isn't essential.
62
63 // Algorithm copied from Eigen.
__gnu_f2h_ieee(float float_value)64 uint16 TF_ATTRIBUTE_WEAK __gnu_f2h_ieee(float float_value) {
65 AliasedFloatInt f = AliasedFloatInt::FromFloat(float_value);
66
67 const AliasedFloatInt f32infty = AliasedFloatInt::FromUInt(255 << 23);
68 const AliasedFloatInt f16max = AliasedFloatInt::FromUInt((127 + 16) << 23);
69 const AliasedFloatInt denorm_magic =
70 AliasedFloatInt::FromUInt(((127 - 15) + (23 - 10) + 1) << 23);
71 unsigned int sign_mask = 0x80000000u;
72 uint32 o = static_cast<uint16>(0x0u);
73
74 unsigned int sign = f.as_uint() & sign_mask;
75 f.set_uint(f.as_uint() ^ sign);
76
77 // NOTE all the integer compares in this function can be safely
78 // compiled into signed compares since all operands are below
79 // 0x80000000. Important if you want fast straight SSE2 code
80 // (since there's no unsigned PCMPGTD).
81
82 if (f.as_uint() >=
83 f16max.as_uint()) { // result is Inf or NaN (all exponent bits set)
84 o = (f.as_uint() > f32infty.as_uint()) ? 0x7e00
85 : 0x7c00; // NaN->qNaN and Inf->Inf
86 } else { // (De)normalized number or zero
87 if (f.as_uint() < (113 << 23)) { // resulting FP16 is subnormal or zero
88 // use a magic value to align our 10 mantissa bits at the bottom of
89 // the float. as long as FP addition is round-to-nearest-even this
90 // just works.
91 f.set_float(f.as_float() + denorm_magic.as_float());
92
93 // and one integer subtract of the bias later, we have our final float!
94 o = static_cast<uint16>(f.as_uint() - denorm_magic.as_uint());
95 } else {
96 unsigned int mant_odd =
97 (f.as_uint() >> 13) & 1; // resulting mantissa is odd
98
99 // update exponent, rounding bias part 1
100 f.set_uint(f.as_uint() + (static_cast<unsigned int>(15 - 127) << 23) +
101 0xfff);
102 // rounding bias part 2
103 f.set_uint(f.as_uint() + mant_odd);
104 // take the bits!
105 o = static_cast<uint16>(f.as_uint() >> 13);
106 }
107 }
108
109 o |= static_cast<uint16>(sign >> 16);
110 return o;
111 }
112
113 // Algorithm copied from Eigen.
__gnu_h2f_ieee(uint16 h)114 float TF_ATTRIBUTE_WEAK __gnu_h2f_ieee(uint16 h) {
115 const AliasedFloatInt magic = AliasedFloatInt::FromUInt(113 << 23);
116 const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
117 AliasedFloatInt o;
118
119 o.set_uint((h & 0x7fff) << 13); // exponent/mantissa bits
120 unsigned int exp = shifted_exp & o.as_uint(); // just the exponent
121 o.set_uint(o.as_uint() + ((127 - 15) << 23)); // exponent adjust
122
123 // handle exponent special cases
124 if (exp == shifted_exp) { // Inf/NaN?
125 o.set_uint(o.as_uint() + ((128 - 16) << 23)); // extra exp adjust
126 } else if (exp == 0) { // Zero/Denormal?
127 o.set_uint(o.as_uint() + (1 << 23)); // extra exp adjust
128 o.set_float(o.as_float() - magic.as_float()); // renormalize
129 }
130
131 o.set_uint(o.as_uint() | (h & 0x8000) << 16); // sign bit
132 return o.as_float();
133 }
134
__truncdfhf2(double d)135 uint16 TF_ATTRIBUTE_WEAK __truncdfhf2(double d) {
136 // This does a double rounding step, but it's precise enough for our use
137 // cases.
138 return __gnu_f2h_ieee(static_cast<float>(d));
139 }
140