1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2020 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Soft-float library for IEEE-754.
20 */
21
22 #include "astc_mathlib.h"
23
24 /******************************************
25 helper functions and their lookup tables
26 ******************************************/
27 /* count leading zeros functions. Only used when the input is nonzero. */
28
29 #if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
30 #elif defined(__arm__) && defined(__ARMCC_VERSION)
31 #elif defined(__arm__) && defined(__GNUC__)
32 #else
33 /* table used for the slow default versions. */
34 static const uint8_t clz_table[256] =
35 {
36 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
37 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
38 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
39 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
52 };
53 #endif
54
55 /*
56 32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
clz32(uint32_t inp)57 uint32_t clz32(uint32_t inp)
58 {
59 #if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
60 uint32_t bsr;
61 __asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
62 return 31 - bsr;
63 #else
64 #if defined(__arm__) && defined(__ARMCC_VERSION)
65 return __clz(inp); /* armcc builtin */
66 #else
67 #if defined(__arm__) && defined(__GNUC__)
68 uint32_t lz;
69 __asm__("clz %0, %1": "=r"(lz):"r"(inp));
70 return lz;
71 #else
72 /* slow default version */
73 uint32_t summa = 24;
74 if (inp >= UINT32_C(0x10000))
75 {
76 inp >>= 16;
77 summa -= 16;
78 }
79 if (inp >= UINT32_C(0x100))
80 {
81 inp >>= 8;
82 summa -= 8;
83 }
84 return summa + clz_table[inp];
85 #endif
86 #endif
87 #endif
88 }
89
90 /* convert from FP16 to FP32. */
sf16_to_sf32(sf16 inp)91 sf32 sf16_to_sf32(sf16 inp)
92 {
93 uint32_t inpx = inp;
94
95 /*
96 This table contains, for every FP16 sign/exponent value combination,
97 the difference between the input FP16 value and the value obtained
98 by shifting the correct FP32 result right by 13 bits.
99 This table allows us to handle every case except denormals and NaN
100 with just 1 table lookup, 2 shifts and 1 add.
101 */
102
103 #define WITH_MB(a) INT32_C((a) | (1 << 31))
104 static const int32_t tbl[64] =
105 {
106 WITH_MB(0x00000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
107 INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
108 INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
109 INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), WITH_MB(0x38000),
110 WITH_MB(0x38000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
111 INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
112 INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
113 INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), WITH_MB(0x70000)
114 };
115
116 int32_t res = tbl[inpx >> 10];
117 res += inpx;
118
119 /* the normal cases: the MSB of 'res' is not set. */
120 if (res >= 0) /* signed compare */
121 return res << 13;
122
123 /* Infinity and Zero: the bottom 10 bits of 'res' are clear. */
124 if ((res & UINT32_C(0x3FF)) == 0)
125 return res << 13;
126
127 /* NaN: the exponent field of 'inp' is not zero; NaNs must be quietened. */
128 if ((inpx & 0x7C00) != 0)
129 return (res << 13) | UINT32_C(0x400000);
130
131 /* the remaining cases are Denormals. */
132 {
133 uint32_t sign = (inpx & UINT32_C(0x8000)) << 16;
134 uint32_t mskval = inpx & UINT32_C(0x7FFF);
135 uint32_t leadingzeroes = clz32(mskval);
136 mskval <<= leadingzeroes;
137 return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
138 }
139 }
140
141 /* convert from soft-float to native-float */
sf16_to_float(sf16 p)142 float sf16_to_float(sf16 p)
143 {
144 if32 i;
145 i.u = sf16_to_sf32(p);
146 return i.f;
147 }
148
149