1 /*-------------------------------------------------------------------------
2 * drawElements Base Portability Library
3 * -------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief 32-bit integer math.
22 *//*--------------------------------------------------------------------*/
23
24 #include "deInt32.h"
25
26 DE_BEGIN_EXTERN_C
27
28 const deInt8 g_clzLUT[256] =
29 {
30 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
31 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
32 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
33 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
46 };
47
48 const deInt8 g_ctzLUT[256] =
49 {
50 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
51 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
52 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
53 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
54 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
55 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
56 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
57 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
58 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
59 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
60 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
61 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
62 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
63 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
66 };
67
68 /*--------------------------------------------------------------------*//*!
69 * \brief Compute the reciprocal of an integer.
70 * \param a Input value.
71 * \param rcp Pointer to resulting reciprocal "mantissa" value.
72 * \param exp Pointer to resulting exponent value.
73 *
74 * The returned value is split into an exponent part and a mantissa part for
75 * practical reasons. The actual reciprocal can be computed with the formula:
76 * result = exp2(exp) * rcp / (1<<DE_RCP_FRAC_BITS).
77 *//*--------------------------------------------------------------------*/
deRcp32(deUint32 a,deUint32 * rcp,int * exp)78 void deRcp32 (deUint32 a, deUint32* rcp, int* exp)
79 {
80 enum { RCP_LUT_BITS = 8 };
81 static const deUint32 s_rcpLUT[1<<RCP_LUT_BITS] =
82 {
83 0x40000000, 0x3fc03fc0, 0x3f80fe03, 0x3f423954,
84 0x3f03f03f, 0x3ec62159, 0x3e88cb3c, 0x3e4bec88,
85 0x3e0f83e0, 0x3dd38ff0, 0x3d980f66, 0x3d5d00f5,
86 0x3d226357, 0x3ce8354b, 0x3cae7592, 0x3c7522f3,
87 0x3c3c3c3c, 0x3c03c03c, 0x3bcbadc7, 0x3b9403b9,
88 0x3b5cc0ed, 0x3b25e446, 0x3aef6ca9, 0x3ab95900,
89 0x3a83a83a, 0x3a4e5947, 0x3a196b1e, 0x39e4dcb8,
90 0x39b0ad12, 0x397cdb2c, 0x3949660a, 0x39164cb5,
91 0x38e38e38, 0x38b129a2, 0x387f1e03, 0x384d6a72,
92 0x381c0e07, 0x37eb07dd, 0x37ba5713, 0x3789facb,
93 0x3759f229, 0x372a3c56, 0x36fad87b, 0x36cbc5c7,
94 0x369d0369, 0x366e9095, 0x36406c80, 0x36129663,
95 0x35e50d79, 0x35b7d0ff, 0x358ae035, 0x355e3a5f,
96 0x3531dec0, 0x3505cca2, 0x34da034d, 0x34ae820e,
97 0x34834834, 0x3458550f, 0x342da7f2, 0x34034034,
98 0x33d91d2a, 0x33af3e2e, 0x3385a29d, 0x335c49d4,
99 0x33333333, 0x330a5e1b, 0x32e1c9f0, 0x32b97617,
100 0x329161f9, 0x32698cff, 0x3241f693, 0x321a9e24,
101 0x31f3831f, 0x31cca4f5, 0x31a6031a, 0x317f9d00,
102 0x3159721e, 0x313381ec, 0x310dcbe1, 0x30e84f79,
103 0x30c30c30, 0x309e0184, 0x30792ef5, 0x30549403,
104 0x30303030, 0x300c0300, 0x2fe80bfa, 0x2fc44aa2,
105 0x2fa0be82, 0x2f7d6724, 0x2f5a4411, 0x2f3754d7,
106 0x2f149902, 0x2ef21023, 0x2ecfb9c8, 0x2ead9584,
107 0x2e8ba2e8, 0x2e69e18a, 0x2e4850fe, 0x2e26f0db,
108 0x2e05c0b8, 0x2de4c02d, 0x2dc3eed6, 0x2da34c4d,
109 0x2d82d82d, 0x2d629215, 0x2d4279a2, 0x2d228e75,
110 0x2d02d02d, 0x2ce33e6c, 0x2cc3d8d4, 0x2ca49f0a,
111 0x2c8590b2, 0x2c66ad71, 0x2c47f4ee, 0x2c2966d0,
112 0x2c0b02c0, 0x2becc868, 0x2bceb771, 0x2bb0cf87,
113 0x2b931057, 0x2b75798c, 0x2b580ad6, 0x2b3ac3e2,
114 0x2b1da461, 0x2b00ac02, 0x2ae3da78, 0x2ac72f74,
115 0x2aaaaaaa, 0x2a8e4bcd, 0x2a721291, 0x2a55fead,
116 0x2a3a0fd5, 0x2a1e45c2, 0x2a02a02a, 0x29e71ec5,
117 0x29cbc14e, 0x29b0877d, 0x2995710e, 0x297a7dbb,
118 0x295fad40, 0x2944ff5a, 0x292a73c7, 0x29100a44,
119 0x28f5c28f, 0x28db9c68, 0x28c1978f, 0x28a7b3c5,
120 0x288df0ca, 0x28744e61, 0x285acc4b, 0x28416a4c,
121 0x28282828, 0x280f05a2, 0x27f6027f, 0x27dd1e85,
122 0x27c45979, 0x27abb323, 0x27932b48, 0x277ac1b2,
123 0x27627627, 0x274a4870, 0x27323858, 0x271a45a6,
124 0x27027027, 0x26eab7a3, 0x26d31be7, 0x26bb9cbf,
125 0x26a439f6, 0x268cf359, 0x2675c8b6, 0x265eb9da,
126 0x2647c694, 0x2630eeb1, 0x261a3202, 0x26039055,
127 0x25ed097b, 0x25d69d43, 0x25c04b80, 0x25aa1402,
128 0x2593f69b, 0x257df31c, 0x2568095a, 0x25523925,
129 0x253c8253, 0x2526e4b7, 0x25116025, 0x24fbf471,
130 0x24e6a171, 0x24d166f9, 0x24bc44e1, 0x24a73afd,
131 0x24924924, 0x247d6f2e, 0x2468acf1, 0x24540245,
132 0x243f6f02, 0x242af300, 0x24168e18, 0x24024024,
133 0x23ee08fb, 0x23d9e878, 0x23c5de76, 0x23b1eace,
134 0x239e0d5b, 0x238a45f8, 0x23769480, 0x2362f8cf,
135 0x234f72c2, 0x233c0233, 0x2328a701, 0x23156107,
136 0x23023023, 0x22ef1432, 0x22dc0d12, 0x22c91aa1,
137 0x22b63cbe, 0x22a37347, 0x2290be1c, 0x227e1d1a,
138 0x226b9022, 0x22591713, 0x2246b1ce, 0x22346033,
139 0x22222222, 0x220ff77c, 0x21fde021, 0x21ebdbf5,
140 0x21d9ead7, 0x21c80cab, 0x21b64151, 0x21a488ac,
141 0x2192e29f, 0x21814f0d, 0x216fcdd8, 0x215e5ee4,
142 0x214d0214, 0x213bb74d, 0x212a7e72, 0x21195766,
143 0x21084210, 0x20f73e53, 0x20e64c14, 0x20d56b38,
144 0x20c49ba5, 0x20b3dd40, 0x20a32fef, 0x20929398,
145 0x20820820, 0x20718d6f, 0x2061236a, 0x2050c9f8,
146 0x20408102, 0x2030486c, 0x20202020, 0x20100804
147 };
148
149 DE_STATIC_ASSERT(DE_LENGTH_OF_ARRAY(s_rcpLUT) == (1<<RCP_LUT_BITS));
150
151 int shift = deClz32(a);
152 deUint32 normalized = (deUint32)a << shift; /* Highest bit is always 1. */
153 int lookupNdx = (normalized >> (31 - RCP_LUT_BITS)) & ((1<<RCP_LUT_BITS)-1); /* Discard high bit, leave 8 next highest bits to lowest bits of normalized. */
154 deUint32 result;
155 deUint32 tmp;
156
157 /* Must be non-negative. */
158 DE_ASSERT(a > 0);
159
160 /* First approximation from lookup table. */
161 DE_ASSERT(lookupNdx >= 0 && lookupNdx < (1<<RCP_LUT_BITS));
162 result = s_rcpLUT[lookupNdx];
163
164 /* Newton-Raphson iteration for improved accuracy (has 16 bits of accuracy afterwards). */
165 tmp = deSafeMuluAsr32(result, normalized, 31);
166 result = deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
167
168 /* Second Newton-Raphson iteration (now has full accuracy). */
169 tmp = deSafeMuluAsr32(result, normalized, 31);
170 result = deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
171
172 /* Return results. */
173 *rcp = result;
174 *exp = 31 - shift;
175 }
176
177 DE_END_EXTERN_C
178