1 /*-------------------------------------------------------------------------
2 * drawElements Base Portability Library
3 * -------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief 32-bit integer math.
22 *//*--------------------------------------------------------------------*/
23
24 #include "deInt32.h"
25
26 DE_BEGIN_EXTERN_C
27
28 const deInt8 g_clzLUT[256] =
29 {
30 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
31 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
32 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
33 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
46 };
47
48 /*--------------------------------------------------------------------*//*!
49 * \brief Compute the reciprocal of an integer.
50 * \param a Input value.
51 * \param rcp Pointer to resulting reciprocal "mantissa" value.
52 * \param exp Pointer to resulting exponent value.
53 *
54 * The returned value is split into an exponent part and a mantissa part for
55 * practical reasons. The actual reciprocal can be computed with the formula:
56 * result = exp2(exp) * rcp / (1<<DE_RCP_FRAC_BITS).
57 *//*--------------------------------------------------------------------*/
deRcp32(deUint32 a,deUint32 * rcp,int * exp)58 void deRcp32 (deUint32 a, deUint32* rcp, int* exp)
59 {
60 enum { RCP_LUT_BITS = 8 };
61 static const deUint32 s_rcpLUT[1<<RCP_LUT_BITS] =
62 {
63 0x40000000, 0x3fc03fc0, 0x3f80fe03, 0x3f423954,
64 0x3f03f03f, 0x3ec62159, 0x3e88cb3c, 0x3e4bec88,
65 0x3e0f83e0, 0x3dd38ff0, 0x3d980f66, 0x3d5d00f5,
66 0x3d226357, 0x3ce8354b, 0x3cae7592, 0x3c7522f3,
67 0x3c3c3c3c, 0x3c03c03c, 0x3bcbadc7, 0x3b9403b9,
68 0x3b5cc0ed, 0x3b25e446, 0x3aef6ca9, 0x3ab95900,
69 0x3a83a83a, 0x3a4e5947, 0x3a196b1e, 0x39e4dcb8,
70 0x39b0ad12, 0x397cdb2c, 0x3949660a, 0x39164cb5,
71 0x38e38e38, 0x38b129a2, 0x387f1e03, 0x384d6a72,
72 0x381c0e07, 0x37eb07dd, 0x37ba5713, 0x3789facb,
73 0x3759f229, 0x372a3c56, 0x36fad87b, 0x36cbc5c7,
74 0x369d0369, 0x366e9095, 0x36406c80, 0x36129663,
75 0x35e50d79, 0x35b7d0ff, 0x358ae035, 0x355e3a5f,
76 0x3531dec0, 0x3505cca2, 0x34da034d, 0x34ae820e,
77 0x34834834, 0x3458550f, 0x342da7f2, 0x34034034,
78 0x33d91d2a, 0x33af3e2e, 0x3385a29d, 0x335c49d4,
79 0x33333333, 0x330a5e1b, 0x32e1c9f0, 0x32b97617,
80 0x329161f9, 0x32698cff, 0x3241f693, 0x321a9e24,
81 0x31f3831f, 0x31cca4f5, 0x31a6031a, 0x317f9d00,
82 0x3159721e, 0x313381ec, 0x310dcbe1, 0x30e84f79,
83 0x30c30c30, 0x309e0184, 0x30792ef5, 0x30549403,
84 0x30303030, 0x300c0300, 0x2fe80bfa, 0x2fc44aa2,
85 0x2fa0be82, 0x2f7d6724, 0x2f5a4411, 0x2f3754d7,
86 0x2f149902, 0x2ef21023, 0x2ecfb9c8, 0x2ead9584,
87 0x2e8ba2e8, 0x2e69e18a, 0x2e4850fe, 0x2e26f0db,
88 0x2e05c0b8, 0x2de4c02d, 0x2dc3eed6, 0x2da34c4d,
89 0x2d82d82d, 0x2d629215, 0x2d4279a2, 0x2d228e75,
90 0x2d02d02d, 0x2ce33e6c, 0x2cc3d8d4, 0x2ca49f0a,
91 0x2c8590b2, 0x2c66ad71, 0x2c47f4ee, 0x2c2966d0,
92 0x2c0b02c0, 0x2becc868, 0x2bceb771, 0x2bb0cf87,
93 0x2b931057, 0x2b75798c, 0x2b580ad6, 0x2b3ac3e2,
94 0x2b1da461, 0x2b00ac02, 0x2ae3da78, 0x2ac72f74,
95 0x2aaaaaaa, 0x2a8e4bcd, 0x2a721291, 0x2a55fead,
96 0x2a3a0fd5, 0x2a1e45c2, 0x2a02a02a, 0x29e71ec5,
97 0x29cbc14e, 0x29b0877d, 0x2995710e, 0x297a7dbb,
98 0x295fad40, 0x2944ff5a, 0x292a73c7, 0x29100a44,
99 0x28f5c28f, 0x28db9c68, 0x28c1978f, 0x28a7b3c5,
100 0x288df0ca, 0x28744e61, 0x285acc4b, 0x28416a4c,
101 0x28282828, 0x280f05a2, 0x27f6027f, 0x27dd1e85,
102 0x27c45979, 0x27abb323, 0x27932b48, 0x277ac1b2,
103 0x27627627, 0x274a4870, 0x27323858, 0x271a45a6,
104 0x27027027, 0x26eab7a3, 0x26d31be7, 0x26bb9cbf,
105 0x26a439f6, 0x268cf359, 0x2675c8b6, 0x265eb9da,
106 0x2647c694, 0x2630eeb1, 0x261a3202, 0x26039055,
107 0x25ed097b, 0x25d69d43, 0x25c04b80, 0x25aa1402,
108 0x2593f69b, 0x257df31c, 0x2568095a, 0x25523925,
109 0x253c8253, 0x2526e4b7, 0x25116025, 0x24fbf471,
110 0x24e6a171, 0x24d166f9, 0x24bc44e1, 0x24a73afd,
111 0x24924924, 0x247d6f2e, 0x2468acf1, 0x24540245,
112 0x243f6f02, 0x242af300, 0x24168e18, 0x24024024,
113 0x23ee08fb, 0x23d9e878, 0x23c5de76, 0x23b1eace,
114 0x239e0d5b, 0x238a45f8, 0x23769480, 0x2362f8cf,
115 0x234f72c2, 0x233c0233, 0x2328a701, 0x23156107,
116 0x23023023, 0x22ef1432, 0x22dc0d12, 0x22c91aa1,
117 0x22b63cbe, 0x22a37347, 0x2290be1c, 0x227e1d1a,
118 0x226b9022, 0x22591713, 0x2246b1ce, 0x22346033,
119 0x22222222, 0x220ff77c, 0x21fde021, 0x21ebdbf5,
120 0x21d9ead7, 0x21c80cab, 0x21b64151, 0x21a488ac,
121 0x2192e29f, 0x21814f0d, 0x216fcdd8, 0x215e5ee4,
122 0x214d0214, 0x213bb74d, 0x212a7e72, 0x21195766,
123 0x21084210, 0x20f73e53, 0x20e64c14, 0x20d56b38,
124 0x20c49ba5, 0x20b3dd40, 0x20a32fef, 0x20929398,
125 0x20820820, 0x20718d6f, 0x2061236a, 0x2050c9f8,
126 0x20408102, 0x2030486c, 0x20202020, 0x20100804
127 };
128
129 DE_STATIC_ASSERT(DE_LENGTH_OF_ARRAY(s_rcpLUT) == (1<<RCP_LUT_BITS));
130
131 int shift = deClz32(a);
132 deUint32 normalized = (deUint32)a << shift; /* Highest bit is always 1. */
133 int lookupNdx = (normalized >> (31 - RCP_LUT_BITS)) & ((1<<RCP_LUT_BITS)-1); /* Discard high bit, leave 8 next highest bits to lowest bits of normalized. */
134 deUint32 result;
135 deUint32 tmp;
136
137 /* Must be non-negative. */
138 DE_ASSERT(a > 0);
139
140 /* First approximation from lookup table. */
141 DE_ASSERT(lookupNdx >= 0 && lookupNdx < (1<<RCP_LUT_BITS));
142 result = s_rcpLUT[lookupNdx];
143
144 /* Newton-Raphson iteration for improved accuracy (has 16 bits of accuracy afterwards). */
145 tmp = deSafeMuluAsr32(result, normalized, 31);
146 result = deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
147
148 /* Second Newton-Raphson iteration (now has full accuracy). */
149 tmp = deSafeMuluAsr32(result, normalized, 31);
150 result = deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
151
152 /* Return results. */
153 *rcp = result;
154 *exp = 31 - shift;
155 }
156
157 DE_END_EXTERN_C
158