• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "image_compressor.h"
17 
18 #include <unistd.h>
19 #include <fstream>
20 
21 #include "securec.h"
22 #include "media_errors.h"
23 #include "image_log.h"
24 
25 #undef LOG_DOMAIN
26 #define LOG_DOMAIN LOG_TAG_DOMAIN_ID_PLUGIN
27 
28 #undef LOG_TAG
29 #define LOG_TAG "ClAstcEnc"
30 
31 namespace OHOS {
32 namespace ImagePlugin {
33 namespace AstcEncBasedCl {
34 constexpr int MAX_WIDTH = 8192;
35 constexpr int MAX_HEIGHT = 8192;
36 constexpr int TEXTURE_HEAD_BYTES = 16;
37 constexpr int TEXTURE_BLOCK_BYTES = 16;
38 constexpr int MAGIC_FILE_CONSTANT = 0x5CA1AB13;
39 constexpr int DIM = 4;
40 constexpr uint8_t BIT_SHIFT_8BITS = 8;
41 constexpr uint8_t BIT_SHIFT_16BITS = 16;
42 constexpr uint8_t BIT_SHIFT_24BITS = 24;
43 constexpr uint8_t BYTES_MASK = 0xFF;
44 constexpr uint8_t GLOBAL_WH_NUM_CL = 2;
45 constexpr size_t MAX_MALLOC_BYTES = 10000000; // max 10MB
46 constexpr size_t WORK_GROUP_SIZE = 8;
47 
48 const char *g_programSource = R"(
49 // Notice: the code from line 42 to line 1266 is openCL language
50 // openCL cound only support C language style and could not support constexpr and static_cast in same platform
51 #define DIM (4)
52 #define BLOCK_SIZE (16)
53 #define X_GRIDS (4)
54 #define Y_GRIDS (4)
55 #define SMALL_VALUE (0.00001f) // avoid divide 0
56 #define BLOCK_MAX_WEIGHTS (64)
57 #define BLOCK_MAX_WEIGHTS_SHORT (64)
58 #define BLOCK_MAX_WEIGHTS_FLOAT (64.0f)
59 #define BLOCK_MAX_WEIGHTS_2PLANE (32)
60 #define WEIGHTS_PLANE2_OFFSET (32)
61 #define CEM_LDR_RGB_DIRECT (8)
62 #define CEM_LDR_RGB_BASE_OFFSET (9)
63 #define CEM_LDR_RGBA_DIRECT (12)
64 #define CEM_LDR_RGBA_BASE_OFFSET (13)
65 #define PIXEL_MAX_VALUE (255.0f)
66 
67 #define QUANT_2 (0)
68 #define QUANT_3 (1)
69 #define QUANT_4 (2)
70 #define QUANT_5 (3)
71 #define QUANT_6 (4)
72 #define QUANT_8 (5)
73 #define QUANT_10 (6)
74 #define QUANT_12 (7)
75 #define QUANT_16 (8)
76 #define QUANT_20 (9)
77 #define QUANT_24 (10)
78 #define QUANT_32 (11)
79 #define QUANT_40 (12)
80 #define QUANT_48 (13)
81 #define QUANT_64 (14)
82 #define QUANT_80 (15)
83 #define QUANT_96 (16)
84 #define QUANT_128 (17)
85 #define QUANT_160 (18)
86 #define QUANT_192 (19)
87 #define QUANT_256 (20)
88 #define QUANT_MAX (21)
89 
90 #define WEIGHT_RANGE_6 (6)
91 #define WEIGHT_QUANTIZE_NUM (32)
92 #define COLOR_NUM (256)
93 #define MAX_PARTITION_COUNT (4)
94 #define PARTITION_COUNT (2)
95 #define MAX_BLOCK_SIZE (32)
96 #define WEIGHT_QUANTIZE_GROUP (12)
97 #define SECOND_PARTITION_INDEX (1)
98 
99 #define START_INDEX (0)
100 #define FLOAT_ZERO (0.0f)
101 #define FLOAT_ONE (1.0f)
102 #define INT_ZERO (0)
103 #define INT_ONE (1)
104 #define SHORT_ZERO (0)
105 #define UINT_ZERO (0)
106 #define UINT_ONE (1u)
107 #define EP0_INDEX (0)
108 #define EP1_INDEX (1)
109 #define END_POINT_NUM (2)
110 #define EP0_R_INDEX (0)
111 #define EP1_R_INDEX (1)
112 #define EP0_G_INDEX (2)
113 #define EP1_G_INDEX (3)
114 #define EP0_B_INDEX (4)
115 #define EP1_B_INDEX (5)
116 #define EP0_A_INDEX (6)
117 #define EP1_A_INDEX (7)
118 #define COLOR_COMPONENT_NUM (8)
119 #define QUANTIZE_WEIGHT_MIN (0)
120 
121 #define TRIT_MSB_SIZE (8)
122 #define TRIT_BLOCK_SIZE (5)
123 #define TRIT_ROUND_NUM (4)
124 #define QUINT_MSB_SIZE (7)
125 #define QUINT_BLOCK_SIZE (3)
126 #define QUINT_ROUND_NUM (2)
127 #define ISE_0 (0)
128 #define ISE_1 (1)
129 #define ISE_2 (2)
130 #define ISE_3 (3)
131 #define ISE_4 (4)
132 
133 #define WEIGHT_0 (0)
134 #define WEIGHT_1 (1)
135 #define WEIGHT_2 (2)
136 #define WEIGHT_3 (3)
137 #define WEIGHT_4 (4)
138 #define WEIGHT_5 (5)
139 #define WEIGHT_6 (6)
140 #define WEIGHT_7 (7)
141 #define WEIGHT_8 (8)
142 #define WEIGHT_9 (9)
143 #define WEIGHT_10 (10)
144 #define WEIGHT_11 (11)
145 #define WEIGHT_12 (12)
146 #define WEIGHT_13 (13)
147 #define WEIGHT_14 (14)
148 #define WEIGHT_15 (15)
149 
150 #define BYTE_1_POS (8)
151 #define BYTE_2_POS (16)
152 #define BYTE_3_POS (24)
153 #define BYTE_MASK (0xFFu)
154 #define CEM_POS (13)
155 #define COLOR_EP_POS (17)
156 #define COLOR_EP_HIGH_BIT (15)
157 #define MASK_FOR_4BITS (0xFu)
158 #define MASK_FOR_15BITS (0x7FFFu)
159 #define MASK_FOR_17BITS (0x1FFFFu)
160 
161 #define HEIGHT_BITS_OFFSET (2)
162 #define WIDTH_BITS_OFFSET (4)
163 #define MASK_FOR_2BITS (0x3u)
164 #define MASK_FOR_1BITS (0x1u)
165 #define WEIGHT_METHOD_OFFSET (2u)
166 #define WEIGHT_METHOD_RIGHT_BIT (1)
167 #define WEIGHT_METHOD_POS (4u)
168 #define BLOCK_WIDTH_POS (5u)
169 #define BLOCK_HEIGHT_POS (5u)
170 #define WEIGHT_PRECISION_POS (9u)
171 #define IS_DUALPLANE_POS (10u)
172 
173 typedef struct {
174     int partid;
175     uint bitmaps[PARTITION_COUNT];
176 } PartInfo;
177 
178 int GetPart(PartInfo* partInfo, int i)
179 {
180     if (i >= MAX_BLOCK_SIZE) {
181         return 0;
182     }
183     return (int)(((*partInfo).bitmaps[SECOND_PARTITION_INDEX] >> i) & MASK_FOR_1BITS);
184 }
185 
186 __constant short g_scrambleTable[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
187     0, 1,
188     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189     0,
190     0, 1, 2,
191     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192     0, 1, 2, 3,
193     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194     0, 1, 2, 3, 4,
195     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
196     0, 2, 4, 5, 3, 1,
197     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
198     0, 1, 2, 3, 4, 5, 6, 7,
199     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
200     0, 2, 4, 6, 8, 9, 7, 5, 3, 1,
201     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202     0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1,
203     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
204     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
205     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
206     0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1,
207     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
208     0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19,
209     11, 3, 17, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0,
210     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
211     20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
212 };
213 
214 __constant short g_weightUnquant[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
215     0, 64,
216     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217     0, 32, 64,
218     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219     0, 21, 43, 64,
220     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221     0, 16, 32, 48, 64,
222     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223     0, 64, 12, 52, 25, 39,
224     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225     0, 9, 18, 27, 37, 46, 55, 64,
226     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227     0, 64, 7, 57, 14, 50, 21, 43, 28, 36,
228     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
229     0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36,
230     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231     0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64,
232     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
233     0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35,
234     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
235     0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34,
236     0, 0, 0, 0, 0, 0, 0, 0,
237     0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38,
238     40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
239 };
240 __constant short g_integerFromTrits[243] = { // the numbers of integer to derivated from trits is 243
241     0, 1, 2, 4, 5, 6, 8, 9, 10,
242     16, 17, 18, 20, 21, 22, 24, 25, 26,
243     3, 7, 15, 19, 23, 27, 12, 13, 14,
244     32, 33, 34, 36, 37, 38, 40, 41, 42,
245     48, 49, 50, 52, 53, 54, 56, 57, 58,
246     35, 39, 47, 51, 55, 59, 44, 45, 46,
247     64, 65, 66, 68, 69, 70, 72, 73, 74,
248     80, 81, 82, 84, 85, 86, 88, 89, 90,
249     67, 71, 79, 83, 87, 91, 76, 77, 78,
250 
251     128, 129, 130, 132, 133, 134, 136, 137, 138,
252     144, 145, 146, 148, 149, 150, 152, 153, 154,
253     131, 135, 143, 147, 151, 155, 140, 141, 142,
254     160, 161, 162, 164, 165, 166, 168, 169, 170,
255     176, 177, 178, 180, 181, 182, 184, 185, 186,
256     163, 167, 175, 179, 183, 187, 172, 173, 174,
257     192, 193, 194, 196, 197, 198, 200, 201, 202,
258     208, 209, 210, 212, 213, 214, 216, 217, 218,
259     195, 199, 207, 211, 215, 219, 204, 205, 206,
260 
261     96, 97, 98, 100, 101, 102, 104, 105, 106,
262     112, 113, 114, 116, 117, 118, 120, 121, 122,
263     99, 103, 111, 115, 119, 123, 108, 109, 110,
264     224, 225, 226, 228, 229, 230, 232, 233, 234,
265     240, 241, 242, 244, 245, 246, 248, 249, 250,
266     227, 231, 239, 243, 247, 251, 236, 237, 238,
267     28, 29, 30, 60, 61, 62, 92, 93, 94,
268     156, 157, 158, 188, 189, 190, 220, 221, 222,
269     31, 63, 127, 159, 191, 255, 252, 253, 254
270 };
271 
272 __constant int g_bitsTritsQuintsTable[QUANT_MAX * 3] = { // 1 quints match 3 number
273     1, 0, 0, // RANGE_2
274     0, 1, 0, // RANGE_3
275     2, 0, 0, // RANGE_4
276     0, 0, 1, // RANGE_5
277     1, 1, 0, // RANGE_6
278     3, 0, 0, // RANGE_8
279     1, 0, 1, // RANGE_10
280     2, 1, 0, // RANGE_12
281     4, 0, 0, // RANGE_16
282     2, 0, 1, // RANGE_20
283     3, 1, 0, // RANGE_24
284     5, 0, 0, // RANGE_32
285     3, 0, 1, // RANGE_40
286     4, 1, 0, // RANGE_48
287     6, 0, 0, // RANGE_64
288     4, 0, 1, // RANGE_80
289     5, 1, 0, // RANGE_96
290     7, 0, 0, // RANGE_128
291     5, 0, 1, // RANGE_160
292     6, 1, 0, // RANGE_192
293     8, 0, 0 // RANGE_256
294 };
295 
296 __constant short g_integerFromQuints[125] = { // the numbers of integer to derivated from quints is 125
297     0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6,
298     32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, 58, 59, 60, 37, 45, 53,
299     61, 14,
300     64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85,
301     93, 22,
302     96, 97, 98, 99, 100, 104, 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123,
303     124, 101, 109, 117, 125, 30,
304     102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, 126, 127, 94, 95, 62, 39,
305     47, 55, 63, 31
306 };
307 
308 __constant short g_colorQuantTables[QUANT_MAX * COLOR_NUM] = {
309     // QUANT_2
310     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
312     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
313     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
314     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
315     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
317     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
319     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
320     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
321     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
322     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
323     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
324     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
325     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326     // QUANT_3
327     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
332     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
337     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
338     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
339     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
340     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
341     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
342     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
343     // QUANT_4
344     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
347     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
349     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
351     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
353     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
354     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
355     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
356     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
357     2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
358     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
359     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
360     // QUANT_5
361     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
364     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
366     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
367     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
368     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
369     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
370     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
371     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
372     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
373     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
374     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
375     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
376     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
377     // QUANT_6
378     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
380     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
381     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4,
383     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
384     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
386     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
387     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
388     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
389     5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
390     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
391     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
392     3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
393     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
394     // QUANT_8
395     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
397     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
398     1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
399     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
400     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
401     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
402     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
403     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
404     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
405     4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
406     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
407     5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
408     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
409     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
410     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
411     // QUANT_10
412     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
413     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
414     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
415     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
416     4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6,
417     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
418     6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
419     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
420     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
421     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7,
422     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
423     7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 5,
424     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
425     5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
426     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
427     3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
428     // QUANT_12
429     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4,
430     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
431     4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
432     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
433     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
434     2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
435     6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10,
436     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
437     11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
438     11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7,
439     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
440     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
441     3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
442     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5,
443     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
444     5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
445     // QUANT_16
446     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
447     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
448     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
449     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
450     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
451     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
452     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
453     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
454     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
455     8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
456     9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
457     10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
458     11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
459     12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
460     13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
461     14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
462     // QUANT_20
463     0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
464     4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
465     8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
466     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2,
467     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
468     6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10,
469     10, 10, 10, 10, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
470     14, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
471     19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 15, 15, 15,
472     15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11,
473     11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7,
474     7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
475     3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
476     13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 9,
477     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5,
478     5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1,
479     // QUANT_24
480     0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
481     8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2, 2,
482     2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10,
483     10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 4, 4, 4,
484     4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12,
485     12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 6, 6,
486     6, 6, 6, 6, 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14,
487     14, 14, 14, 14, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
488     23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 15, 15, 15, 15,
489     15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7,
490     7, 7, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 13, 13, 13,
491     13, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5, 5, 5, 5,
492     5, 5, 5, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 11, 11,
493     11, 11, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, 3,
494     3, 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9,
495     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
496     // QUANT_32
497     0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
498     2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
499     4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
500     6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
501     8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10,
502     10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
503     12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
504     14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
505     16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
506     18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
507     19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
508     21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
509     23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25,
510     25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
511     27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
512     29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31,
513     // QUANT_40
514     0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,
515     16, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 2, 2,
516     2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18,
517     18, 26, 26, 26, 26, 26, 26, 26, 34, 34, 34, 34, 34, 34, 4, 4,
518     4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20,
519     20, 20, 28, 28, 28, 28, 28, 28, 36, 36, 36, 36, 36, 36, 36, 6,
520     6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14, 22, 22, 22, 22,
521     22, 22, 30, 30, 30, 30, 30, 30, 30, 38, 38, 38, 38, 38, 38, 38,
522     39, 39, 39, 39, 39, 39, 39, 31, 31, 31, 31, 31, 31, 31, 23, 23,
523     23, 23, 23, 23, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7,
524     7, 37, 37, 37, 37, 37, 37, 37, 29, 29, 29, 29, 29, 29, 21, 21,
525     21, 21, 21, 21, 21, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5,
526     5, 5, 35, 35, 35, 35, 35, 35, 27, 27, 27, 27, 27, 27, 27, 19,
527     19, 19, 19, 19, 19, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3,
528     3, 3, 33, 33, 33, 33, 33, 33, 33, 25, 25, 25, 25, 25, 25, 17,
529     17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1,
530     // QUANT_48
531     0, 0, 0, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 2, 2,
532     2, 2, 2, 18, 18, 18, 18, 18, 18, 34, 34, 34, 34, 34, 4, 4,
533     4, 4, 4, 4, 20, 20, 20, 20, 20, 36, 36, 36, 36, 36, 6, 6,
534     6, 6, 6, 6, 22, 22, 22, 22, 22, 38, 38, 38, 38, 38, 38, 8,
535     8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 40, 40, 40, 40, 40, 10,
536     10, 10, 10, 10, 26, 26, 26, 26, 26, 26, 42, 42, 42, 42, 42, 12,
537     12, 12, 12, 12, 12, 28, 28, 28, 28, 28, 44, 44, 44, 44, 44, 14,
538     14, 14, 14, 14, 14, 30, 30, 30, 30, 30, 46, 46, 46, 46, 46, 46,
539     47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 15, 15, 15, 15, 15,
540     15, 45, 45, 45, 45, 45, 29, 29, 29, 29, 29, 13, 13, 13, 13, 13,
541     13, 43, 43, 43, 43, 43, 27, 27, 27, 27, 27, 27, 11, 11, 11, 11,
542     11, 41, 41, 41, 41, 41, 25, 25, 25, 25, 25, 25, 9, 9, 9, 9,
543     9, 39, 39, 39, 39, 39, 39, 23, 23, 23, 23, 23, 7, 7, 7, 7,
544     7, 7, 37, 37, 37, 37, 37, 21, 21, 21, 21, 21, 5, 5, 5, 5,
545     5, 5, 35, 35, 35, 35, 35, 19, 19, 19, 19, 19, 19, 3, 3, 3,
546     3, 3, 33, 33, 33, 33, 33, 17, 17, 17, 17, 17, 17, 1, 1, 1,
547     // QUANT_64
548     0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
549     4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
550     8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
551     12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16,
552     16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
553     20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
554     24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
555     28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
556     32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
557     36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
558     40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
559     44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
560     47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51,
561     51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55,
562     55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59,
563     59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63,
564     // QUANT_80
565     0, 0, 16, 16, 16, 32, 32, 32, 48, 48, 48, 48, 64, 64, 64, 2,
566     2, 2, 18, 18, 18, 34, 34, 34, 50, 50, 50, 50, 66, 66, 66, 4,
567     4, 4, 20, 20, 20, 36, 36, 36, 36, 52, 52, 52, 68, 68, 68, 6,
568     6, 6, 22, 22, 22, 38, 38, 38, 38, 54, 54, 54, 70, 70, 70, 8,
569     8, 8, 24, 24, 24, 24, 40, 40, 40, 56, 56, 56, 72, 72, 72, 10,
570     10, 10, 26, 26, 26, 26, 42, 42, 42, 58, 58, 58, 74, 74, 74, 12,
571     12, 12, 12, 28, 28, 28, 44, 44, 44, 60, 60, 60, 76, 76, 76, 14,
572     14, 14, 14, 30, 30, 30, 46, 46, 46, 62, 62, 62, 78, 78, 78, 78,
573     79, 79, 79, 79, 63, 63, 63, 47, 47, 47, 31, 31, 31, 15, 15, 15,
574     15, 77, 77, 77, 61, 61, 61, 45, 45, 45, 29, 29, 29, 13, 13, 13,
575     13, 75, 75, 75, 59, 59, 59, 43, 43, 43, 27, 27, 27, 27, 11, 11,
576     11, 73, 73, 73, 57, 57, 57, 41, 41, 41, 25, 25, 25, 25, 9, 9,
577     9, 71, 71, 71, 55, 55, 55, 39, 39, 39, 39, 23, 23, 23, 7, 7,
578     7, 69, 69, 69, 53, 53, 53, 37, 37, 37, 37, 21, 21, 21, 5, 5,
579     5, 67, 67, 67, 51, 51, 51, 51, 35, 35, 35, 19, 19, 19, 3, 3,
580     3, 65, 65, 65, 49, 49, 49, 49, 33, 33, 33, 17, 17, 17, 1, 1,
581     // QUANT_96
582     0, 0, 32, 32, 64, 64, 64, 2, 2, 2, 34, 34, 66, 66, 66, 4,
583     4, 4, 36, 36, 68, 68, 68, 6, 6, 6, 38, 38, 70, 70, 70, 8,
584     8, 8, 40, 40, 40, 72, 72, 10, 10, 10, 42, 42, 42, 74, 74, 12,
585     12, 12, 44, 44, 44, 76, 76, 14, 14, 14, 46, 46, 46, 78, 78, 16,
586     16, 16, 48, 48, 48, 80, 80, 80, 18, 18, 50, 50, 50, 82, 82, 82,
587     20, 20, 52, 52, 52, 84, 84, 84, 22, 22, 54, 54, 54, 86, 86, 86,
588     24, 24, 56, 56, 56, 88, 88, 88, 26, 26, 58, 58, 58, 90, 90, 90,
589     28, 28, 60, 60, 60, 92, 92, 92, 30, 30, 62, 62, 62, 94, 94, 94,
590     95, 95, 95, 63, 63, 63, 31, 31, 93, 93, 93, 61, 61, 61, 29, 29,
591     91, 91, 91, 59, 59, 59, 27, 27, 89, 89, 89, 57, 57, 57, 25, 25,
592     87, 87, 87, 55, 55, 55, 23, 23, 85, 85, 85, 53, 53, 53, 21, 21,
593     83, 83, 83, 51, 51, 51, 19, 19, 81, 81, 81, 49, 49, 49, 17, 17,
594     17, 79, 79, 47, 47, 47, 15, 15, 15, 77, 77, 45, 45, 45, 13, 13,
595     13, 75, 75, 43, 43, 43, 11, 11, 11, 73, 73, 41, 41, 41, 9, 9,
596     9, 71, 71, 71, 39, 39, 7, 7, 7, 69, 69, 69, 37, 37, 5, 5,
597     5, 67, 67, 67, 35, 35, 3, 3, 3, 65, 65, 65, 33, 33, 1, 1,
598     // QUANT_128
599     0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
600     8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
601     16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23,
602     24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31,
603     32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39,
604     40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47,
605     48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55,
606     56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63,
607     64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
608     72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
609     80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87,
610     88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
611     96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103,
612     104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
613     112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119,
614     120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127,
615     // QUANT_160
616     0, 32, 32, 64, 96, 96, 128, 128, 2, 34, 34, 66, 98, 98, 130, 130,
617     4, 36, 36, 68, 100, 100, 132, 132, 6, 38, 38, 70, 102, 102, 134, 134,
618     8, 40, 40, 72, 104, 104, 136, 136, 10, 42, 42, 74, 106, 106, 138, 138,
619     12, 44, 44, 76, 108, 108, 140, 140, 14, 46, 46, 78, 110, 110, 142, 142,
620     16, 48, 48, 80, 112, 112, 144, 144, 18, 50, 50, 82, 114, 114, 146, 146,
621     20, 52, 52, 84, 116, 116, 148, 148, 22, 54, 54, 86, 118, 118, 150, 150,
622     24, 56, 56, 88, 120, 120, 152, 152, 26, 58, 58, 90, 122, 122, 154, 154,
623     28, 60, 60, 92, 124, 124, 156, 156, 30, 62, 62, 94, 126, 126, 158, 158,
624     159, 159, 127, 127, 95, 63, 63, 31, 157, 157, 125, 125, 93, 61, 61, 29,
625     155, 155, 123, 123, 91, 59, 59, 27, 153, 153, 121, 121, 89, 57, 57, 25,
626     151, 151, 119, 119, 87, 55, 55, 23, 149, 149, 117, 117, 85, 53, 53, 21,
627     147, 147, 115, 115, 83, 51, 51, 19, 145, 145, 113, 113, 81, 49, 49, 17,
628     143, 143, 111, 111, 79, 47, 47, 15, 141, 141, 109, 109, 77, 45, 45, 13,
629     139, 139, 107, 107, 75, 43, 43, 11, 137, 137, 105, 105, 73, 41, 41, 9,
630     135, 135, 103, 103, 71, 39, 39, 7, 133, 133, 101, 101, 69, 37, 37, 5,
631     131, 131, 99, 99, 67, 35, 35, 3, 129, 129, 97, 97, 65, 33, 33, 1,
632     // QUANT_192
633     0, 64, 128, 128, 2, 66, 130, 130, 4, 68, 132, 132, 6, 70, 134, 134,
634     8, 72, 136, 136, 10, 74, 138, 138, 12, 76, 140, 140, 14, 78, 142, 142,
635     16, 80, 144, 144, 18, 82, 146, 146, 20, 84, 148, 148, 22, 86, 150, 150,
636     24, 88, 152, 152, 26, 90, 154, 154, 28, 92, 156, 156, 30, 94, 158, 158,
637     32, 96, 160, 160, 34, 98, 162, 162, 36, 100, 164, 164, 38, 102, 166, 166,
638     40, 104, 168, 168, 42, 106, 170, 170, 44, 108, 172, 172, 46, 110, 174, 174,
639     48, 112, 176, 176, 50, 114, 178, 178, 52, 116, 180, 180, 54, 118, 182, 182,
640     56, 120, 184, 184, 58, 122, 186, 186, 60, 124, 188, 188, 62, 126, 190, 190,
641     191, 191, 127, 63, 189, 189, 125, 61, 187, 187, 123, 59, 185, 185, 121, 57,
642     183, 183, 119, 55, 181, 181, 117, 53, 179, 179, 115, 51, 177, 177, 113, 49,
643     175, 175, 111, 47, 173, 173, 109, 45, 171, 171, 107, 43, 169, 169, 105, 41,
644     167, 167, 103, 39, 165, 165, 101, 37, 163, 163, 99, 35, 161, 161, 97, 33,
645     159, 159, 95, 31, 157, 157, 93, 29, 155, 155, 91, 27, 153, 153, 89, 25,
646     151, 151, 87, 23, 149, 149, 85, 21, 147, 147, 83, 19, 145, 145, 81, 17,
647     143, 143, 79, 15, 141, 141, 77, 13, 139, 139, 75, 11, 137, 137, 73, 9,
648     135, 135, 71, 7, 133, 133, 69, 5, 131, 131, 67, 3, 129, 129, 65, 1,
649     // QUANT_256
650     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
651     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
652     32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
653     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
654     64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
655     80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
656     96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
657     112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
658     128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
659     144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
660     160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
661     176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
662     192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
663     208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
664     224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
665     240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
666 };
667 
668 __constant short color_unquant_tables[QUANT_MAX][COLOR_NUM] = {
669     {
670         0, 255
671     },
672     {
673         0, 128, 255
674     },
675     {
676         0, 85, 170, 255
677     },
678     {
679         0, 64, 128, 192, 255
680     },
681     {
682         0, 255, 51, 204, 102, 153
683     },
684     {
685         0, 36, 73, 109, 146, 182, 219, 255
686     },
687     {
688         0, 255, 28, 227, 56, 199, 84, 171, 113, 142
689     },
690     {
691         0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139
692     },
693     { // 16
694         0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255
695     },
696     { // 20
697         0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
698         54, 201, 121, 134
699     },
700     { // 24
701         0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
702         22, 233, 55, 200, 88, 167, 121, 134
703     },
704     { // 32
705         0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
706         132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255
707     },
708     { // 40
709         0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
710         13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
711         26, 229, 58, 197, 91, 164, 123, 132
712     },
713     { // 48
714         0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
715         5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
716         11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131
717     },
718     { // 64
719         0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
720         65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
721         130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
722         195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255
723     },
724     { // 80
725         0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
726         3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
727         6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
728         9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
729         13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130
730     },
731     { // 96
732         0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
733         64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
734         2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
735         67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
736         5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
737         70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
738     },
739     { // 128
740         0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
741         32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
742         64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
743         96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
744         129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
745         161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
746         193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
747         225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255
748     },
749     { // 160
750         0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
751         64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
752         1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
753         65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
754         3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
755         67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
756         4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
757         68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
758         6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
759         70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
760     },
761     {
762         0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
763         32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
764         64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
765         96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
766         1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
767         33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
768         65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
769         97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
770         2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
771         34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
772         66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
773         98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129
774     },
775     {
776         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
777         16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
778         32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
779         48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
780         64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
781         80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
782         96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
783         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
784         128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
785         144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
786         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
787         176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
788         192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
789         208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
790         224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
791         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
792     }
793 };
794 
795 void Swap(float4* lhs, float4* rhs)
796 {
797     if ((lhs == NULL) || (rhs == NULL)) {
798         return;
799     }
800     float4 tmp = *lhs;
801     *lhs = *rhs;
802     *rhs = tmp;
803 }
804 
805 void FindMinMax(float4* texels, float4 ptMean, float4 vecK, float4* e0, float4* e1)
806 {
807     if ((texels == NULL) || (e0 == NULL) || (e1 == NULL)) {
808         return;
809     }
810     float a = 1e31f; // max float is clipped to 1e31f
811     float b = -1e31f; // min float is clipped to -1e31f
812     for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
813         float t = dot(texels[i] - ptMean, vecK);
814         a = min(a, t);
815         b = max(b, t);
816     }
817     *e0 = clamp(vecK * a + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
818     *e1 = clamp(vecK * b + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
819     // if the direction_vector ends up pointing from light to dark, FLIP IT!
820     // this will make the endpoint the darkest one;
821     float4 e0u = round(*e0);
822     float4 e1u = round(*e1);
823     if (e0u.x + e0u.y + e0u.z > e1u.x + e1u.y + e1u.z) {
824         Swap(e0, e1);
825     }
826 }
827 
828 void MaxAccumulationPixelDirection(float4* texels, float4 ptMean, float4* e0, float4* e1, bool hasAlpha)
829 {
830     if ((texels == NULL) || (e0 == NULL) || (e1 == NULL)) {
831         return;
832     }
833     float4 sumR = (float4)(FLOAT_ZERO);
834     float4 sumG = (float4)(FLOAT_ZERO);
835     float4 sumB = (float4)(FLOAT_ZERO);
836     float4 sumA = (float4)(FLOAT_ZERO);
837     for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
838         float4 dt = texels[i] - ptMean;
839         sumR += (dt.x > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
840         sumG += (dt.y > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
841         sumB += (dt.z > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
842         sumA += (dt.w > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
843     }
844     float dotR = dot(sumR, sumR);
845     float dotG = dot(sumG, sumG);
846     float dotB = dot(sumB, sumB);
847     float dotA = dot(sumA, sumA);
848     float maxDot = dotR;
849     float4 vecK = sumR;
850     if (dotG > maxDot) {
851         vecK = sumG;
852         maxDot = dotG;
853     }
854     if (dotB > maxDot) {
855         vecK = sumB;
856         maxDot = dotB;
857     }
858     if (hasAlpha && dotA > maxDot) {
859         vecK = sumA;
860         maxDot = dotA;
861     }
862     // safe normalize
863     float lenk = length(vecK);
864     vecK = (lenk < SMALL_VALUE) ? vecK : normalize(vecK);
865     FindMinMax(texels, ptMean, vecK, e0, e1);
866 }
867 
868 void EncodeColorNormal(short quantLevel, float4 e0, float4 e1, short* endpointQuantized)
869 {
870     if (endpointQuantized == NULL) {
871         return;
872     }
873     int4 e0q = (int4)((int)(round(e0.x)), (int)(round(e0.y)),
874         (int)(round(e0.z)), (int)(round(e0.w)));
875     int4 e1q = (int4)((int)(round(e1.x)), (int)(round(e1.y)),
876         (int)(round(e1.z)), (int)(round(e1.w)));
877     endpointQuantized[EP0_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.x];
878     endpointQuantized[EP1_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.x];
879     endpointQuantized[EP0_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.y];
880     endpointQuantized[EP1_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.y];
881     endpointQuantized[EP0_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.z];
882     endpointQuantized[EP1_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.z];
883     endpointQuantized[EP0_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.w];
884     endpointQuantized[EP1_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.w];
885 }
886 
887 void DecodeColor(short quantLevel, short endpointQuantized[COLOR_COMPONENT_NUM], float4* e0, float4* e1)
888 {
889     if ((endpointQuantized == NULL) || (e0 == NULL) || (e1 == NULL)) {
890         return;
891     }
892     (*e0).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_R_INDEX]]);
893     (*e1).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_R_INDEX]]);
894     (*e0).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_G_INDEX]]);
895     (*e1).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_G_INDEX]]);
896     (*e0).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_B_INDEX]]);
897     (*e1).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_B_INDEX]]);
898     (*e0).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_A_INDEX]]);
899     (*e1).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_A_INDEX]]);
900 }
901 
902 // calculate quantize weights
903 short QuantizeWeight(uint weightRange, float weight)
904 {
905     short q = (short)(round(weight * ((float)(weightRange))));
906     return clamp(q, (short)(QUANTIZE_WEIGHT_MIN), (short)(weightRange));
907 }
908 
909 void CalculateNormalWeights(int part, float4* texels,
910     float4 endPoint[END_POINT_NUM], float* projw)
911 {
912     if ((texels == NULL) || (endPoint == NULL) || (projw == NULL)) {
913         return;
914     }
915     int i = START_INDEX;
916     float4 vecK = endPoint[EP1_INDEX] - endPoint[EP0_INDEX];
917     if (length(vecK) < SMALL_VALUE) {
918         for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
919             projw[i] = FLOAT_ZERO;
920         }
921     } else {
922         vecK = normalize(vecK);
923         float minw = 1e31f; // max float is clipped to 1e31f
924         float maxw = -1e31f; // min float is clipped to -1e31f
925         for (i = START_INDEX; i < BLOCK_SIZE; ++i) {
926             float w = dot(vecK, texels[i] - endPoint[EP0_INDEX]);
927             minw = min(w, minw);
928             maxw = max(w, maxw);
929             projw[i] = w;
930         }
931         float invlen = maxw - minw;
932         invlen = max(SMALL_VALUE, invlen);
933         invlen = FLOAT_ONE / invlen; // invlen min is SMALL_VALUE, not zero
934         for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
935             projw[i] = (projw[i] - minw) * invlen;
936         }
937     }
938 }
939 
940 void QuantizeWeights(float projw[X_GRIDS * Y_GRIDS], uint weightRange, short* weights)
941 {
942     for (int i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
943         weights[i] = QuantizeWeight(weightRange, projw[i]);
944     }
945 }
946 
947 void CalculateQuantizedWeights(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM], short* weights)
948 {
949     if ((texels == NULL) || (endPoint == NULL) || (weights == NULL)) {
950         return;
951     }
952     float projw[X_GRIDS * Y_GRIDS];
953     CalculateNormalWeights(INT_ZERO, texels, endPoint, projw);
954     QuantizeWeights(projw, weightRange, weights);
955 }
956 
957 void Orbits8Ptr(uint4* outputs, uint* bitoffset, uint number, uint bitcount)
958 {
959     if ((outputs == NULL) || (bitoffset == NULL)) {
960         return;
961     }
962     uint newpos = *bitoffset + bitcount;
963     uint nidx = newpos >> 5; // split low bits (5 bits) to get high bits
964     uint uidx = *bitoffset >> 5; // split low bits (5 bits) to get high bits
965     uint bitIdx = *bitoffset & 31u; // split low bits to get low bits (31 for mask 5 bits)
966     if (uidx == 0) { // high bits is 0 for x
967         (*outputs).x |= (number << bitIdx);
968         (*outputs).y |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
969     } else if (uidx == 1) { // high bits is 1 for y
970         (*outputs).y |= (number << bitIdx);
971         (*outputs).z |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
972     } else if (uidx == 2) { // high bits is 2 for z
973         (*outputs).z |= (number << bitIdx);
974         (*outputs).w |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
975     }
976     *bitoffset = newpos;
977 }
978 
979 void SplitHighLow(uint n, uint i, int* high, uint* low)
980 {
981     uint low_mask = (UINT_ONE << i) - UINT_ONE;
982     *low = n & low_mask;
983     *high = ((int)(n >> i)) & 0xFF; // mask 0xFF to get low 8 bits
984 }
985 
986 uint ReverseByte(uint p)
987 {
988     p = ((p & 0xFu) << 4) | ((p >> 4) & 0xFu); // 0xFu 4 for reverse
989     p = ((p & 0x33u) << 2) | ((p >> 2) & 0x33u); // 0x33u 2 for reverse
990     p = ((p & 0x55u) << 1) | ((p >> 1) & 0x55u); // 0x55u 1 for reverse
991     return p;
992 }
993 
994 void EncodeTrits(uint bitcount, uint tritInput[TRIT_BLOCK_SIZE], uint4* outputs, uint* outpos)
995 {
996     int t0;
997     int t1;
998     int t2;
999     int t3;
1000     int t4;
1001     uint m0;
1002     uint m1;
1003     uint m2;
1004     uint m3;
1005     uint m4;
1006     SplitHighLow(tritInput[ISE_0], bitcount, &t0, &m0);
1007     SplitHighLow(tritInput[ISE_1], bitcount, &t1, &m1);
1008     SplitHighLow(tritInput[ISE_2], bitcount, &t2, &m2);
1009     SplitHighLow(tritInput[ISE_3], bitcount, &t3, &m3);
1010     SplitHighLow(tritInput[ISE_4], bitcount, &t4, &m4);
1011     ushort packhigh = (ushort)(
1012         g_integerFromTrits[t4 * 81 + t3 * 27 + t2 * 9 + t1 * 3 + t0]); // trits for 3 9 27 81
1013     Orbits8Ptr(outputs, outpos, m0, bitcount);
1014     Orbits8Ptr(outputs, outpos, packhigh & 3u, 2u); // low 2bits (mask 3u) offset 2u
1015 
1016     Orbits8Ptr(outputs, outpos, m1, bitcount);
1017     Orbits8Ptr(outputs, outpos, (packhigh >> 2) & 3u, 2u); // right shift 2 bits for low 2bits (mask 3u) offset 2u
1018 
1019     Orbits8Ptr(outputs, outpos, m2, bitcount);
1020     Orbits8Ptr(outputs, outpos, (packhigh >> 4) & 1u, 1u); // right shift 4 bits for low 1bits (mask 1u) offset 1u
1021 
1022     Orbits8Ptr(outputs, outpos, m3, bitcount);
1023     Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1024 
1025     Orbits8Ptr(outputs, outpos, m4, bitcount);
1026     Orbits8Ptr(outputs, outpos, (packhigh >> 7) & 1u, 1u); // right shift 7 bits for low 1bits (mask 1u) offset 1u
1027 }
1028 
1029 void EncodeQuints(uint bitcount, uint quintInput[QUINT_BLOCK_SIZE], uint4* outputs, uint* outpos)
1030 {
1031     int q0;
1032     int q1;
1033     int q2;
1034     uint m0;
1035     uint m1;
1036     uint m2;
1037     SplitHighLow(quintInput[ISE_0], bitcount, &q0, &m0);
1038     SplitHighLow(quintInput[ISE_1], bitcount, &q1, &m1);
1039     SplitHighLow(quintInput[ISE_2], bitcount, &q2, &m2);
1040     ushort packhigh = (ushort)(g_integerFromQuints[q2 * 25 + q1 * 5 + q0]); // Quints 5 25
1041     Orbits8Ptr(outputs, outpos, m0, bitcount);
1042     Orbits8Ptr(outputs, outpos, packhigh & 7u, 3u); // low 3bits (mask 7u) offset 3u
1043     Orbits8Ptr(outputs, outpos, m1, bitcount);
1044     Orbits8Ptr(outputs, outpos, (packhigh >> 3) & 3u, 2u); // right shift 3 bits for low 2bits (mask 3u) offset 2u
1045     Orbits8Ptr(outputs, outpos, m2, bitcount);
1046     Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1047 }
1048 
1049 void BiseEndpoints(short numbers[COLOR_COMPONENT_NUM], int range, uint4* outputs, bool hasAlpha, uint* bitPos)
1050 {
1051     uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1052     uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1053     uint quints = (uint)(g_bitsTritsQuintsTable[range * 3 + 2]); // Quints 3 offset 2
1054     uint count = hasAlpha ? 8u : 6u; // RGBA 4x2 = 8 or RGB 3x2 = 6
1055     if (trits == UINT_ONE) {
1056         uint tritsInput[TRIT_BLOCK_SIZE];
1057         tritsInput[ISE_0] = numbers[EP0_R_INDEX];
1058         tritsInput[ISE_1] = numbers[EP1_R_INDEX];
1059         tritsInput[ISE_2] = numbers[EP0_G_INDEX];
1060         tritsInput[ISE_3] = numbers[EP1_G_INDEX];
1061         tritsInput[ISE_4] = numbers[EP0_B_INDEX];
1062         EncodeTrits(bits, tritsInput, outputs, bitPos);
1063         tritsInput[ISE_0] = numbers[EP1_B_INDEX];
1064         tritsInput[ISE_1] = numbers[EP0_A_INDEX];
1065         tritsInput[ISE_2] = numbers[EP1_A_INDEX];
1066         tritsInput[ISE_3] = UINT_ZERO;
1067         tritsInput[ISE_4] = UINT_ZERO;
1068         EncodeTrits(bits, tritsInput, outputs, bitPos);
1069         *bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * count + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1070     } else if (quints == UINT_ONE) {
1071         uint quintsInput[QUINT_BLOCK_SIZE];
1072         quintsInput[ISE_0] = numbers[EP0_R_INDEX];
1073         quintsInput[ISE_1] = numbers[EP1_R_INDEX];
1074         quintsInput[ISE_2] = numbers[EP0_G_INDEX];
1075         EncodeQuints(bits, quintsInput, outputs, bitPos);
1076         quintsInput[ISE_0] = numbers[EP1_G_INDEX];
1077         quintsInput[ISE_1] = numbers[EP0_B_INDEX];
1078         quintsInput[ISE_2] = numbers[EP1_B_INDEX];
1079         EncodeQuints(bits, quintsInput, outputs, bitPos);
1080         quintsInput[ISE_0] = numbers[EP0_A_INDEX];
1081         quintsInput[ISE_1] = numbers[EP1_A_INDEX];
1082         quintsInput[ISE_2] = UINT_ZERO;
1083         EncodeQuints(bits, quintsInput, outputs, bitPos);
1084         *bitPos = ((QUINT_MSB_SIZE + QUINT_BLOCK_SIZE * bits) * count + QUINT_ROUND_NUM) / QUINT_BLOCK_SIZE;
1085     } else {
1086         for (uint i = UINT_ZERO; i < count; ++i) {
1087             Orbits8Ptr(outputs, bitPos, numbers[i], bits);
1088         }
1089     }
1090 }
1091 
1092 void BiseWeights(short numbers[BLOCK_SIZE], int range, uint4* outputs)
1093 {
1094     uint bitPos = UINT_ZERO;
1095     uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1096     uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1097     if (trits == UINT_ONE) {
1098         uint tritsInput[TRIT_BLOCK_SIZE];
1099         tritsInput[ISE_0] = numbers[WEIGHT_0];
1100         tritsInput[ISE_1] = numbers[WEIGHT_1];
1101         tritsInput[ISE_2] = numbers[WEIGHT_2];
1102         tritsInput[ISE_3] = numbers[WEIGHT_3];
1103         tritsInput[ISE_4] = numbers[WEIGHT_4];
1104         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1105         tritsInput[ISE_0] = numbers[WEIGHT_5];
1106         tritsInput[ISE_1] = numbers[WEIGHT_6];
1107         tritsInput[ISE_2] = numbers[WEIGHT_7];
1108         tritsInput[ISE_3] = numbers[WEIGHT_8];
1109         tritsInput[ISE_4] = numbers[WEIGHT_9];
1110         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1111         tritsInput[ISE_0] = numbers[WEIGHT_10];
1112         tritsInput[ISE_1] = numbers[WEIGHT_11];
1113         tritsInput[ISE_2] = numbers[WEIGHT_12];
1114         tritsInput[ISE_3] = numbers[WEIGHT_13];
1115         tritsInput[ISE_4] = numbers[WEIGHT_14];
1116         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1117         tritsInput[ISE_0] = numbers[WEIGHT_15];
1118         tritsInput[ISE_1] = UINT_ZERO;
1119         tritsInput[ISE_2] = UINT_ZERO;
1120         tritsInput[ISE_3] = UINT_ZERO;
1121         tritsInput[ISE_4] = UINT_ZERO;
1122         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1123         bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * BLOCK_SIZE + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1124     } else {
1125         for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
1126             Orbits8Ptr(outputs, &bitPos, numbers[i], bits);
1127         }
1128     }
1129 }
1130 
1131 uint4 AssembleBlock(uint blockMode, uint colorEndpointMode, uint4 epIse, uint4 wtIse)
1132 {
1133     uint4 phyBlk = (uint4)(0, 0, 0, 0); // initialize to (0, 0, 0, 0)
1134     phyBlk.w |= ReverseByte(wtIse.x & BYTE_MASK) << BYTE_3_POS;
1135     phyBlk.w |= ReverseByte((wtIse.x >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1136     phyBlk.w |= ReverseByte((wtIse.x >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1137     phyBlk.w |= ReverseByte((wtIse.x >> BYTE_3_POS) & BYTE_MASK);
1138     phyBlk.z |= ReverseByte(wtIse.y & BYTE_MASK) << BYTE_3_POS;
1139     phyBlk.z |= ReverseByte((wtIse.y >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1140     phyBlk.z |= ReverseByte((wtIse.y >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1141     phyBlk.z |= ReverseByte((wtIse.y >> BYTE_3_POS) & BYTE_MASK);
1142     phyBlk.y |= ReverseByte(wtIse.z & BYTE_MASK) << BYTE_3_POS;
1143     phyBlk.y |= ReverseByte((wtIse.z >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1144     phyBlk.y |= ReverseByte((wtIse.z >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1145     phyBlk.y |= ReverseByte((wtIse.z >> BYTE_3_POS) & BYTE_MASK);
1146     phyBlk.x = blockMode;
1147 
1148     phyBlk.x |= (colorEndpointMode & MASK_FOR_4BITS) << CEM_POS;
1149     phyBlk.x |= (epIse.x & MASK_FOR_15BITS) << COLOR_EP_POS;
1150     phyBlk.y |= ((epIse.x >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1151     phyBlk.y |= (epIse.y & MASK_FOR_15BITS) << COLOR_EP_POS;
1152     phyBlk.z |= ((epIse.y >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1153 
1154     return phyBlk;
1155 }
1156 
1157 uint AssembleBlockmode(uint weightQuantmethod, bool isDualPlane)
1158 {
1159     uint a = (uint)((Y_GRIDS - HEIGHT_BITS_OFFSET) & MASK_FOR_2BITS);
1160     uint b = (uint)((X_GRIDS - WIDTH_BITS_OFFSET) & MASK_FOR_2BITS);
1161     uint d = isDualPlane ? UINT_ONE : UINT_ZERO;
1162     uint h = (weightQuantmethod < 6u) ? UINT_ZERO : UINT_ONE; // low/high-precision limit is 6u
1163     uint r = (weightQuantmethod % 6u) + WEIGHT_METHOD_OFFSET; // low/high-precision limit is 6u
1164     uint blockMode = (r >> WEIGHT_METHOD_RIGHT_BIT) & MASK_FOR_2BITS;
1165     blockMode |= (r & MASK_FOR_1BITS) << WEIGHT_METHOD_POS;
1166     blockMode |= (a & MASK_FOR_2BITS) << BLOCK_WIDTH_POS;
1167     blockMode |= (b & MASK_FOR_2BITS) << BLOCK_HEIGHT_POS;
1168     blockMode |= h << WEIGHT_PRECISION_POS;
1169     blockMode |= d << IS_DUALPLANE_POS;
1170     return blockMode;
1171 }
1172 
1173 uint4 EndpointIse(float4* ep0, float4* ep1, short endpointQuantmethod, bool hasAlpha)
1174 {
1175     short epQuantized[COLOR_COMPONENT_NUM];
1176     EncodeColorNormal(endpointQuantmethod, *ep0, *ep1, epQuantized);
1177     DecodeColor(endpointQuantmethod, epQuantized, ep0, ep1);
1178     if (!hasAlpha) {
1179         epQuantized[EP0_A_INDEX] = SHORT_ZERO;
1180         epQuantized[EP1_A_INDEX] = SHORT_ZERO;
1181     }
1182     uint4 epIse = (uint4)(UINT_ZERO);
1183     uint bitPos = UINT_ZERO;
1184     BiseEndpoints(epQuantized, endpointQuantmethod, &epIse, hasAlpha, &bitPos);
1185     return epIse;
1186 }
1187 
1188 float4 CalTexel(short weight, float4 ep0, float4 ep1)
1189 {
1190     short weight0 = BLOCK_MAX_WEIGHTS_SHORT - weight;
1191     return (ep0 * weight0 + ep1 * weight) / BLOCK_MAX_WEIGHTS_FLOAT;
1192 }
1193 
1194 uint4 WeightIse(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM],
1195     short weightQuantmethod, float* errval)
1196 {
1197     int i = START_INDEX;
1198     short wtQuantized[X_GRIDS * Y_GRIDS];
1199     CalculateQuantizedWeights(texels, weightRange, endPoint, wtQuantized);
1200     float sumErr = FLOAT_ZERO;
1201     for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
1202         short w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1203         wtQuantized[i] = g_scrambleTable[w];
1204         w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1205         short wt = g_weightUnquant[w];
1206         float4 new_texel = CalTexel(wt, endPoint[EP0_INDEX], endPoint[EP1_INDEX]);
1207         float4 diff = new_texel - texels[i];
1208         sumErr += dot(diff, diff);
1209     }
1210     *errval = sumErr;
1211     uint4 wtIse = (uint4)(UINT_ZERO);
1212     BiseWeights(wtQuantized, (int)(weightQuantmethod), &wtIse);
1213     return wtIse;
1214 }
1215 
1216 float TryEncode(float4* texels, float4 texelsMean, uint4* epIse, uint4* wtIse, short3* bestBlockmode)
1217 {
1218     float errval;
1219     bool hasAlpha = true;
1220     *bestBlockmode = (short3)(QUANT_6, QUANT_256, WEIGHT_RANGE_6);
1221     short weightQuantmethod = (*bestBlockmode).x;
1222     short endpointQuantmethod = (*bestBlockmode).y;
1223     short weightRange = (*bestBlockmode).z;
1224     float4 ep0;
1225     float4 ep1;
1226     float4 endPoint[END_POINT_NUM];
1227     MaxAccumulationPixelDirection(texels, texelsMean, &ep0, &ep1, hasAlpha);
1228     *epIse = EndpointIse(&ep0, &ep1, endpointQuantmethod, hasAlpha);
1229     endPoint[EP0_INDEX] = ep0;
1230     endPoint[EP1_INDEX] = ep1;
1231     *wtIse = WeightIse(texels, weightRange - UINT_ONE, endPoint, weightQuantmethod, &errval);
1232     return errval;
1233 }
1234 
1235 uint4 EncodeBlock(float4* texels, float4 texelsMean, int blockID, __global uint* errs)
1236 {
1237     bool hasAlpha = true;
1238     bool isDualPlane = false;
1239     float errval = 10000000.0f; // the errval is initialized to 10000000.0f
1240 
1241     uint4 epIse, wtIse;
1242     short3 bestBlockmode, tmpBestBlockMode;
1243     errval = TryEncode(texels, texelsMean, &epIse, &wtIse, &bestBlockmode);
1244 
1245     uint blockMode = AssembleBlockmode(bestBlockmode.x, isDualPlane);
1246     uint ColorEndpointMode;
1247     if (hasAlpha) {
1248         ColorEndpointMode = CEM_LDR_RGBA_DIRECT;
1249     } else {
1250         ColorEndpointMode = CEM_LDR_RGB_DIRECT;
1251     }
1252     errs[blockID] = (uint)(errval);
1253     return AssembleBlock(blockMode, ColorEndpointMode, epIse, wtIse);
1254 }
1255 
1256 void GotTexelFromImage(read_only image2d_t inputImage, float4 texels[BLOCK_SIZE],
1257     int width, int height, float4 *texelMean)
1258 {
1259     int2 pos = (int2)(get_global_id(0), get_global_id(1));
1260     pos.x *= DIM;
1261     pos.y *= DIM;
1262     for (int i = 0; i < DIM; ++i) {
1263         for (int j = 0; j < DIM; ++j) {
1264             int2 pixelPos = pos + (int2)(j, i);
1265             if (pixelPos.x >= width) {
1266                 pixelPos.x = width - 1;
1267             }
1268             if (pixelPos.y >= height) {
1269                 pixelPos.y = height - 1;
1270             }
1271             float4 texel = read_imagef(inputImage, pixelPos);
1272             texels[i * DIM + j] = texel * PIXEL_MAX_VALUE;
1273             *texelMean += texel * PIXEL_MAX_VALUE;
1274         }
1275     }
1276 }
1277 
1278 kernel void AstcCl(read_only image2d_t inputImage, __global uint4* astcArr, __global uint* errs,
1279     int width, int height)
1280 {
1281     const int2 globalSize = (int2)(get_global_size(0), get_global_size(1));
1282     const int2 globalId = (int2)(get_global_id(0), get_global_id(1));
1283     int blockID = globalId.y * globalSize.x + globalId.x;
1284     float4 texels[BLOCK_SIZE];
1285     float4 texelMean = 0;
1286     GotTexelFromImage(inputImage, texels, width, height, &texelMean);
1287     texelMean = texelMean / ((float)(BLOCK_SIZE));
1288     astcArr[blockID] = EncodeBlock(texels, texelMean, blockID, errs);
1289 }
1290 )";
1291 
1292 class OpenCLSoManager {
1293 public:
1294     OpenCLSoManager();
1295     ~OpenCLSoManager();
1296     bool LoadOpenCLSo();
1297 private:
1298     void *clSoHandle = nullptr;
1299     bool loadSuccess = false;
1300     std::mutex openClSoMutex_ = {};
1301 };
1302 
1303 static OpenCLSoManager g_clSoManager;
1304 std::mutex checkClBinPathMutex = {};
1305 
OpenCLSoManager()1306 OpenCLSoManager::OpenCLSoManager()
1307 {
1308     clSoHandle = nullptr;
1309     loadSuccess = false;
1310 }
1311 
~OpenCLSoManager()1312 OpenCLSoManager::~OpenCLSoManager()
1313 {
1314     if (!UnLoadCLExtern(clSoHandle)) {
1315         IMAGE_LOGE("astcenc OpenCLSoManager UnLoad failed!");
1316     } else {
1317         IMAGE_LOGD("astcenc OpenCLSoManager UnLoad success!");
1318         loadSuccess = false;
1319     }
1320 }
1321 
LoadOpenCLSo()1322 bool OpenCLSoManager::LoadOpenCLSo()
1323 {
1324     std::lock_guard<std::mutex> lock(openClSoMutex_);
1325     if (!loadSuccess) {
1326         loadSuccess = InitOpenCLExtern(&clSoHandle);
1327     }
1328     return loadSuccess;
1329 }
1330 
AstcClClose(ClAstcHandle * clAstcHandle)1331 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClClose(ClAstcHandle *clAstcHandle)
1332 {
1333     if (clAstcHandle == nullptr) {
1334         IMAGE_LOGE("astc AstcClClose clAstcHandle is nullptr!");
1335         return CL_ASTC_ENC_FAILED;
1336     }
1337     cl_int clRet;
1338     if (clAstcHandle->kernel != nullptr) {
1339         clRet = clReleaseKernel(clAstcHandle->kernel);
1340         if (clRet != CL_SUCCESS) {
1341             IMAGE_LOGE("astc clReleaseKernel failed ret %{public}d!", clRet);
1342             return CL_ASTC_ENC_FAILED;
1343         }
1344         clAstcHandle->kernel = nullptr;
1345     }
1346     if (clAstcHandle->queue != nullptr) {
1347         clRet = clReleaseCommandQueue(clAstcHandle->queue);
1348         if (clRet != CL_SUCCESS) {
1349             IMAGE_LOGE("astc clReleaseCommandQueue failed ret %{public}d!", clRet);
1350             return CL_ASTC_ENC_FAILED;
1351         }
1352         clAstcHandle->queue = nullptr;
1353     }
1354     if (clAstcHandle->context != nullptr) {
1355         clRet = clReleaseContext(clAstcHandle->context);
1356         if (clRet != CL_SUCCESS) {
1357             IMAGE_LOGE("astc clReleaseContext failed ret %{public}d!", clRet);
1358             return CL_ASTC_ENC_FAILED;
1359         }
1360         clAstcHandle->context = nullptr;
1361     }
1362     if (clAstcHandle->encObj.blockErrs_ != nullptr) {
1363         free(clAstcHandle->encObj.blockErrs_);
1364         clAstcHandle->encObj.blockErrs_ = nullptr;
1365     }
1366     if (clAstcHandle != nullptr) {
1367         free(clAstcHandle);
1368     }
1369     return CL_ASTC_ENC_SUCCESS;
1370 }
1371 
CheckClBinIsExist(const std::string & name)1372 static bool CheckClBinIsExist(const std::string &name)
1373 {
1374     std::lock_guard<std::mutex> lock(checkClBinPathMutex);
1375     return (access(name.c_str(), F_OK) != -1); // -1 means that the file is  not exist
1376 }
1377 
SaveClBin(cl_program program,const std::string & clBinPath)1378 static CL_ASTC_STATUS SaveClBin(cl_program program, const std::string &clBinPath)
1379 {
1380     std::lock_guard<std::mutex> lock(checkClBinPathMutex);
1381     size_t programBinarySizes;
1382     cl_int clRet = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &programBinarySizes, NULL);
1383     if (clRet != CL_SUCCESS) {
1384         IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARY_SIZES failed ret %{public}d!", clRet);
1385         return CL_ASTC_ENC_FAILED;
1386     }
1387     bool genBinFail = (programBinarySizes == 0) || (programBinarySizes > MAX_MALLOC_BYTES);
1388     if (genBinFail) {
1389         IMAGE_LOGE("astc clGetProgramInfo programBinarySizes %{public}zu too big!", programBinarySizes);
1390         return CL_ASTC_ENC_FAILED;
1391     }
1392     uint8_t *programBinaries = static_cast<uint8_t *>(malloc(programBinarySizes));
1393     if (programBinaries == nullptr) {
1394         IMAGE_LOGE("astc programBinaries malloc failed!");
1395         return CL_ASTC_ENC_FAILED;
1396     }
1397     clRet = clGetProgramInfo(program, CL_PROGRAM_BINARIES, programBinarySizes, &programBinaries, NULL);
1398     if (clRet != CL_SUCCESS) {
1399         IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARIES failed ret %{public}d!", clRet);
1400         free(programBinaries);
1401         return CL_ASTC_ENC_FAILED;
1402     }
1403     FILE *fp = fopen(clBinPath.c_str(), "wb");
1404     if (fp == nullptr) {
1405         IMAGE_LOGE("astc create file: %{public}s failed!", clBinPath.c_str());
1406         free(programBinaries);
1407         return CL_ASTC_ENC_FAILED;
1408     }
1409     CL_ASTC_STATUS ret = CL_ASTC_ENC_SUCCESS;
1410     if (fwrite(programBinaries, 1, programBinarySizes, fp) != programBinarySizes) {
1411         IMAGE_LOGE("astc fwrite programBinaries file failed!");
1412         ret = CL_ASTC_ENC_FAILED;
1413     }
1414     if (fclose(fp) != 0) {
1415         IMAGE_LOGE("astc SaveClBin close file failed!");
1416         ret = CL_ASTC_ENC_FAILED;
1417     }
1418     fp = nullptr;
1419     free(programBinaries);
1420     return ret;
1421 }
1422 
BuildProgramAndCreateKernel(cl_program program,ClAstcHandle * clAstcHandle)1423 static CL_ASTC_STATUS BuildProgramAndCreateKernel(cl_program program, ClAstcHandle *clAstcHandle)
1424 {
1425     cl_int clRet = clBuildProgram(program, 1, &clAstcHandle->deviceID, "-cl-std=CL3.0", nullptr, nullptr);
1426     if (clRet != CL_SUCCESS) {
1427         IMAGE_LOGE("astc clBuildProgram failed ret %{public}d!", clRet);
1428         return CL_ASTC_ENC_FAILED;
1429     }
1430     clAstcHandle->kernel = clCreateKernel(program, "AstcCl", &clRet);
1431     if (clRet != CL_SUCCESS) {
1432         IMAGE_LOGE("astc clCreateKernel failed ret %{public}d!", clRet);
1433         return CL_ASTC_ENC_FAILED;
1434     }
1435     return CL_ASTC_ENC_SUCCESS;
1436 }
1437 
AstcClBuildProgram(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1438 static CL_ASTC_STATUS AstcClBuildProgram(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1439 {
1440     cl_int clRet;
1441     cl_program program = nullptr;
1442     if (!CheckClBinIsExist(clBinPath)) {
1443         size_t sourceSize = strlen(g_programSource) + 1; // '\0' occupies 1 bytes
1444         program = clCreateProgramWithSource(clAstcHandle->context, 1, &g_programSource, &sourceSize, &clRet);
1445         if (clRet != CL_SUCCESS) {
1446             IMAGE_LOGE("astc clCreateProgramWithSource failed ret %{public}d!", clRet);
1447             return CL_ASTC_ENC_FAILED;
1448         }
1449         if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1450             IMAGE_LOGE("astc BuildProgramAndCreateKernel failed ret %{public}d!", clRet);
1451             clReleaseProgram(program);
1452             return CL_ASTC_ENC_FAILED;
1453         }
1454         if (SaveClBin(program, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1455             IMAGE_LOGI("astc SaveClBin failed!");
1456         }
1457     } else {
1458         std::ifstream contents{clBinPath};
1459         std::string binaryContent{std::istreambuf_iterator<char>{contents}, {}};
1460         size_t binSize = binaryContent.length();
1461         bool invaildSize = (binSize == 0) || (binSize > MAX_MALLOC_BYTES);
1462         if (invaildSize) {
1463             IMAGE_LOGE("astc AstcClBuildProgram read CLbin file lenth error %{public}zu!", binSize);
1464             return CL_ASTC_ENC_FAILED;
1465         }
1466         const char *binary = static_cast<const char *>(binaryContent.c_str());
1467         program = clCreateProgramWithBinary(clAstcHandle->context, 1, &clAstcHandle->deviceID, &binSize,
1468             (const unsigned char **)&binary, nullptr, &clRet);
1469         if (clRet != CL_SUCCESS) {
1470             IMAGE_LOGE("astc clCreateProgramWithBinary failed ret %{public}d!", clRet);
1471             return CL_ASTC_ENC_FAILED;
1472         }
1473         if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1474             IMAGE_LOGE("astc BuildProgramAndCreateKernel with bin failed!");
1475             clReleaseProgram(program);
1476             return CL_ASTC_ENC_FAILED;
1477         }
1478     }
1479     clRet = clReleaseProgram(program);
1480     if (clRet != CL_SUCCESS) {
1481         IMAGE_LOGE("astc clReleaseProgram failed ret %{public}d!", clRet);
1482         return CL_ASTC_ENC_FAILED;
1483     }
1484     return CL_ASTC_ENC_SUCCESS;
1485 }
1486 
AstcCreateClKernel(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1487 static CL_ASTC_STATUS AstcCreateClKernel(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1488 {
1489     if (!g_clSoManager.LoadOpenCLSo()) {
1490         IMAGE_LOGE("astc InitOpenCL error!");
1491         return CL_ASTC_ENC_FAILED;
1492     }
1493     cl_int clRet;
1494     cl_platform_id platformID;
1495     clRet = clGetPlatformIDs(1, &platformID, NULL);
1496     if (clRet != CL_SUCCESS) {
1497         IMAGE_LOGE("astc clGetPlatformIDs failed ret %{public}d!", clRet);
1498         return CL_ASTC_ENC_FAILED;
1499     }
1500     clRet = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &clAstcHandle->deviceID, NULL);
1501     if (clRet != CL_SUCCESS) {
1502         IMAGE_LOGE("astc clGetDeviceIDs failed ret %{public}d!", clRet);
1503         return CL_ASTC_ENC_FAILED;
1504     }
1505     clAstcHandle->context = clCreateContext(0, 1, &clAstcHandle->deviceID, NULL, NULL, &clRet);
1506     if (clRet != CL_SUCCESS) {
1507         IMAGE_LOGE("astc clCreateContext failed ret %{public}d!", clRet);
1508         return CL_ASTC_ENC_FAILED;
1509     }
1510     cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0};
1511     clAstcHandle->queue = clCreateCommandQueueWithProperties(clAstcHandle->context,
1512         clAstcHandle->deviceID, props, &clRet);
1513     if (clRet != CL_SUCCESS) {
1514         IMAGE_LOGE("astc clCreateCommandQueueWithProperties failed ret %{public}d!", clRet);
1515         return CL_ASTC_ENC_FAILED;
1516     }
1517     if (AstcClBuildProgram(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1518         IMAGE_LOGE("astc AstcClBuildProgram failed!");
1519         return CL_ASTC_ENC_FAILED;
1520     }
1521     return CL_ASTC_ENC_SUCCESS;
1522 }
1523 
AstcClCreate(ClAstcHandle ** handle,const std::string & clBinPath)1524 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClCreate(ClAstcHandle **handle, const std::string &clBinPath)
1525 {
1526     ClAstcHandle *clAstcHandle = static_cast<ClAstcHandle *>(calloc(1, sizeof(ClAstcHandle)));
1527     if (clAstcHandle == nullptr) {
1528         IMAGE_LOGE("astc AstcClCreate handle calloc failed!");
1529         return CL_ASTC_ENC_FAILED;
1530     }
1531     *handle = clAstcHandle;
1532     size_t numMaxBlocks = static_cast<size_t>(((MAX_WIDTH + DIM - 1) / DIM) * ((MAX_HEIGHT + DIM - 1) / DIM));
1533     clAstcHandle->encObj.blockErrs_ =
1534         static_cast<uint32_t *>(malloc((numMaxBlocks * sizeof(uint32_t)))); // 8MB mem Max
1535     if (clAstcHandle->encObj.blockErrs_ == nullptr) {
1536         IMAGE_LOGE("astc blockErrs_ malloc failed!");
1537         AstcClClose(*handle);
1538         return CL_ASTC_ENC_FAILED;
1539     }
1540     if (AstcCreateClKernel(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1541         IMAGE_LOGE("astc AstcCreateClKernel failed!");
1542         AstcClClose(*handle);
1543         return CL_ASTC_ENC_FAILED;
1544     }
1545     return CL_ASTC_ENC_SUCCESS;
1546 }
1547 
AstcClEncImageCheckImageOption(const ClAstcImageOption * imageIn)1548 static CL_ASTC_STATUS AstcClEncImageCheckImageOption(const ClAstcImageOption *imageIn)
1549 {
1550     if ((imageIn->width <= 0) || (imageIn->height <= 0) || (imageIn->stride < imageIn->width)) {
1551         IMAGE_LOGE("astc AstcClEncImage width <= 0 or height <= 0 or stride < width!");
1552         return CL_ASTC_ENC_FAILED;
1553     }
1554     if ((imageIn->width > MAX_WIDTH) || (imageIn->height > MAX_HEIGHT)) {
1555         IMAGE_LOGE("astc AstcClEncImage width[%{public}d] \
1556             need be [1, %{public}d] and height[%{public}d] need be [1, %{public}d]", \
1557             imageIn->width, MAX_WIDTH, imageIn->height, MAX_HEIGHT);
1558         return CL_ASTC_ENC_FAILED;
1559     }
1560     return CL_ASTC_ENC_SUCCESS;
1561 }
1562 
AstcClFillImage(ClAstcImageOption * imageIn,uint8_t * data,int32_t stride,int32_t width,int32_t height)1563 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClFillImage(ClAstcImageOption *imageIn, uint8_t *data, int32_t stride,
1564     int32_t width, int32_t height)
1565 {
1566     if (imageIn == nullptr) {
1567         IMAGE_LOGE("astc AstcClFillImage imageIn is  nullptr!");
1568         return CL_ASTC_ENC_FAILED;
1569     }
1570     imageIn->data = data;
1571     imageIn->stride = stride;
1572     imageIn->width = width;
1573     imageIn->height = height;
1574     if (AstcClEncImageCheckImageOption(imageIn) != CL_ASTC_ENC_SUCCESS) {
1575         IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1576         return CL_ASTC_ENC_FAILED;
1577     }
1578     return CL_ASTC_ENC_SUCCESS;
1579 }
1580 
GenAstcHeader(uint8_t * buffer,uint8_t blockX,uint8_t blockY,uint32_t dimX,uint32_t dimY)1581 static void GenAstcHeader(uint8_t *buffer, uint8_t blockX, uint8_t blockY, uint32_t dimX, uint32_t dimY)
1582 {
1583     uint8_t *headInfo = buffer;
1584     *headInfo++ = MAGIC_FILE_CONSTANT & BYTES_MASK;
1585     *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_8BITS) & BYTES_MASK;
1586     *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_16BITS) & BYTES_MASK;
1587     *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_24BITS) & BYTES_MASK;
1588     *headInfo++ = static_cast<uint8_t>(blockX);
1589     *headInfo++ = static_cast<uint8_t>(blockY);
1590     *headInfo++ = 1;
1591     *headInfo++ = dimX & BYTES_MASK;
1592     *headInfo++ = (dimX >> BIT_SHIFT_8BITS) & BYTES_MASK;
1593     *headInfo++ = (dimX >> BIT_SHIFT_16BITS) & BYTES_MASK;
1594     *headInfo++ = dimY & BYTES_MASK;
1595     *headInfo++ = (dimY >> BIT_SHIFT_8BITS) & BYTES_MASK;
1596     *headInfo++ = (dimY >> BIT_SHIFT_16BITS) & BYTES_MASK;
1597     *headInfo++ = 1;
1598     *headInfo++ = 0;
1599     *headInfo++ = 0;
1600 }
1601 
ReleaseClAstcObj(ClAstcObjEnc * obj)1602 static void ReleaseClAstcObj(ClAstcObjEnc *obj)
1603 {
1604     cl_int clRet;
1605     if (obj != nullptr) {
1606         if (obj->inputImage != nullptr) {
1607             clRet = clReleaseMemObject(obj->inputImage);
1608             if (clRet != CL_SUCCESS) {
1609                 IMAGE_LOGE("astc inputImage release failed ret %{public}d!", clRet);
1610             }
1611             obj->inputImage = nullptr;
1612         }
1613         if (obj->astcResult != nullptr) {
1614             clRet = clReleaseMemObject(obj->astcResult);
1615             if (clRet != CL_SUCCESS) {
1616                 IMAGE_LOGE("astc astcResult release failed ret %{public}d!", clRet);
1617             }
1618             obj->astcResult = nullptr;
1619         }
1620         if (obj->errBuffer != nullptr) {
1621             clRet = clReleaseMemObject(obj->errBuffer);
1622             if (clRet != CL_SUCCESS) {
1623                 IMAGE_LOGE("astc errBuffer release failed ret %{public}d!", clRet);
1624             }
1625             obj->errBuffer = nullptr;
1626         }
1627     }
1628 }
1629 
GetMaxAndSumVal(size_t numBlocks,uint32_t * blockErrs,uint32_t & maxVal,uint32_t & sumVal)1630 static void GetMaxAndSumVal(size_t numBlocks, uint32_t *blockErrs, uint32_t &maxVal, uint32_t &sumVal)
1631 {
1632     sumVal = 0;
1633     for (size_t i = 0; i < numBlocks; i++) {
1634         sumVal += blockErrs[i];
1635         maxVal = fmax(maxVal, blockErrs[i]);
1636     }
1637 }
1638 
ClCreateBufferAndImage(const ClAstcImageOption * imageIn,ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj)1639 static CL_ASTC_STATUS ClCreateBufferAndImage(const ClAstcImageOption *imageIn,
1640     ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj)
1641 {
1642     uint8_t *data = imageIn->data;
1643     int32_t stride = imageIn->stride;
1644     int32_t width = imageIn->width;
1645     int32_t height = imageIn->height;
1646     size_t numBlocks = static_cast<size_t>(((width + DIM - 1) / DIM) * ((height + DIM - 1) / DIM));
1647     uint32_t *blockErrs = encObj->blockErrs_;
1648     size_t blockErrBytes = sizeof(uint32_t) * numBlocks;
1649     encObj->astcSize = numBlocks * TEXTURE_BLOCK_BYTES;
1650     if ((blockErrs == nullptr) || (memset_s(blockErrs, blockErrBytes, 0, blockErrBytes))) {
1651         IMAGE_LOGE("astc blockErrs is nullptr or memset failed!");
1652         return CL_ASTC_ENC_FAILED;
1653     }
1654     cl_image_format imageFormat = { CL_RGBA, CL_UNORM_INT8 };
1655     cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, stride, height };
1656     cl_int clRet;
1657     encObj->inputImage = clCreateImage(clAstcHandle->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imageFormat,
1658         &desc, data, &clRet);
1659     if (clRet != CL_SUCCESS) {
1660         IMAGE_LOGE("astc clCreateImage failed ret %{public}d!", clRet);
1661         return CL_ASTC_ENC_FAILED;
1662     }
1663     encObj->astcResult = clCreateBuffer(clAstcHandle->context,
1664         CL_MEM_ALLOC_HOST_PTR, encObj->astcSize, NULL, &clRet);
1665     if (clRet != CL_SUCCESS) {
1666         IMAGE_LOGE("astc clCreateBuffer astcResult failed ret %{public}d!", clRet);
1667         return CL_ASTC_ENC_FAILED;
1668     }
1669     encObj->errBuffer = clCreateBuffer(clAstcHandle->context, CL_MEM_USE_HOST_PTR, blockErrBytes, blockErrs, &clRet);
1670     if (clRet != CL_SUCCESS) {
1671         IMAGE_LOGE("astc clCreateBuffer errBuffer failed ret %{public}d!", clRet);
1672         return CL_ASTC_ENC_FAILED;
1673     }
1674     return CL_ASTC_ENC_SUCCESS;
1675 }
1676 
ClKernelArgSet(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1677 static CL_ASTC_STATUS ClKernelArgSet(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1678 {
1679     int32_t kernelId = 0;
1680     cl_int clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->inputImage);
1681     if (clRet != CL_SUCCESS) {
1682         IMAGE_LOGE("astc clSetKernelArg inputImage failed ret %{public}d!", clRet);
1683         return CL_ASTC_ENC_FAILED;
1684     }
1685     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->astcResult);
1686     if (clRet != CL_SUCCESS) {
1687         IMAGE_LOGE("astc clSetKernelArg astcResult failed ret %{public}d!", clRet);
1688         return CL_ASTC_ENC_FAILED;
1689     }
1690     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->errBuffer);
1691     if (clRet != CL_SUCCESS) {
1692         IMAGE_LOGE("astc clSetKernelArg errBuffer failed ret %{public}d!", clRet);
1693         return CL_ASTC_ENC_FAILED;
1694     }
1695     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &width);
1696     if (clRet != CL_SUCCESS) {
1697         IMAGE_LOGE("astc clSetKernelArg width failed ret %{public}d!", clRet);
1698         return CL_ASTC_ENC_FAILED;
1699     }
1700     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &height);
1701     if (clRet != CL_SUCCESS) {
1702         IMAGE_LOGE("astc clSetKernelArg height failed ret %{public}d!", clRet);
1703         return CL_ASTC_ENC_FAILED;
1704     }
1705     return CL_ASTC_ENC_SUCCESS;
1706 }
1707 
ClKernelArgSetAndRun(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1708 static CL_ASTC_STATUS ClKernelArgSetAndRun(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1709 {
1710     if (ClKernelArgSet(clAstcHandle, encObj, width, height) != CL_ASTC_ENC_SUCCESS) {
1711         IMAGE_LOGE("astc ClKernelArgSet failed!");
1712         return CL_ASTC_ENC_FAILED;
1713     }
1714     size_t local[] = {WORK_GROUP_SIZE, WORK_GROUP_SIZE};
1715     size_t global[GLOBAL_WH_NUM_CL];
1716     global[0] = static_cast<size_t>((width + DIM - 1) / DIM);
1717     global[1] = static_cast<size_t>((height + DIM - 1) / DIM);
1718     size_t localMax;
1719     cl_int clRet = clGetKernelWorkGroupInfo(clAstcHandle->kernel, clAstcHandle->deviceID, CL_KERNEL_WORK_GROUP_SIZE,
1720         sizeof(size_t), &localMax, nullptr);
1721     if (clRet != CL_SUCCESS) {
1722         IMAGE_LOGE("astc clGetKernelWorkGroupInfo failed ret %{public}d!", clRet);
1723         return CL_ASTC_ENC_FAILED;
1724     }
1725     while (local[0] * local[1] > localMax) {
1726         local[0]--;
1727         local[1]--;
1728     }
1729     bool invalidLocal = (local[0] < 1) || (local[1] < 1);
1730     if (invalidLocal) {
1731         IMAGE_LOGE("astc ClKernelArgSetAndRun local set failed!");
1732         return CL_ASTC_ENC_FAILED;
1733     }
1734     clRet = clEnqueueNDRangeKernel(clAstcHandle->queue, clAstcHandle->kernel, GLOBAL_WH_NUM_CL, nullptr, global, local,
1735         0, nullptr, nullptr);
1736     if (clRet != CL_SUCCESS) {
1737         IMAGE_LOGE("astc clEnqueueNDRangeKernel failed ret %{public}d!", clRet);
1738         return CL_ASTC_ENC_FAILED;
1739     }
1740     clRet = clFinish(clAstcHandle->queue);
1741     if (clRet != CL_SUCCESS) {
1742         IMAGE_LOGE("astc clFinish failed ret %{public}d!", clRet);
1743         return CL_ASTC_ENC_FAILED;
1744     }
1745     return CL_ASTC_ENC_SUCCESS;
1746 }
1747 
ClReadAstcBufAndBlockError(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,const ClAstcImageOption * imageIn,uint8_t * buffer)1748 static CL_ASTC_STATUS ClReadAstcBufAndBlockError(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj,
1749     const ClAstcImageOption *imageIn, uint8_t *buffer)
1750 {
1751     cl_int clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->astcResult, CL_TRUE,
1752         0, encObj->astcSize, buffer + TEXTURE_HEAD_BYTES, 0, NULL, NULL);
1753     if (clRet != CL_SUCCESS) {
1754         IMAGE_LOGE("astc clEnqueueReadBuffer astcResult failed ret %{public}d!", clRet);
1755         return CL_ASTC_ENC_FAILED;
1756     }
1757     uint32_t maxVal = 0;
1758     uint32_t sumVal = 0;
1759     size_t numBlocks = ((imageIn->width + DIM - 1) / DIM) * ((imageIn->height + DIM - 1) / DIM);
1760     clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->errBuffer, CL_TRUE,
1761         0, sizeof(uint32_t) * numBlocks, encObj->blockErrs_, 0, NULL, NULL);
1762     if (clRet != CL_SUCCESS) {
1763         IMAGE_LOGE("astc clEnqueueReadBuffer blockErrs failed ret %{public}d!", clRet);
1764         return CL_ASTC_ENC_FAILED;
1765     }
1766     GetMaxAndSumVal(numBlocks, encObj->blockErrs_, maxVal, sumVal);
1767     return CL_ASTC_ENC_SUCCESS;
1768 }
1769 
AstcClEncImage(ClAstcHandle * clAstcHandle,const ClAstcImageOption * imageIn,uint8_t * buffer)1770 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClEncImage(ClAstcHandle *clAstcHandle,
1771     const ClAstcImageOption *imageIn, uint8_t *buffer)
1772 {
1773     if ((clAstcHandle == nullptr) || (imageIn == nullptr) || (buffer == nullptr)) {
1774         IMAGE_LOGE("astc AstcClEncImage clAstcHandle or imageIn or buffer is nullptr!");
1775         return CL_ASTC_ENC_FAILED;
1776     }
1777     if (AstcClEncImageCheckImageOption(imageIn) != CL_ASTC_ENC_SUCCESS) {
1778         IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1779         return CL_ASTC_ENC_FAILED;
1780     }
1781     GenAstcHeader(buffer, DIM, DIM, imageIn->width, imageIn->height);
1782     ClAstcObjEnc *encObj = &clAstcHandle->encObj;
1783     if (encObj == nullptr) {
1784         IMAGE_LOGE("astc AstcClEncImage clAstcHandle encObj is nullptr!");
1785         return CL_ASTC_ENC_FAILED;
1786     }
1787     if (ClCreateBufferAndImage(imageIn, clAstcHandle, encObj) != CL_ASTC_ENC_SUCCESS) {
1788         ReleaseClAstcObj(encObj);
1789         IMAGE_LOGE("astc ClCreateBufferAndImage failed!");
1790         return CL_ASTC_ENC_FAILED;
1791     }
1792     if (ClKernelArgSetAndRun(clAstcHandle, encObj, imageIn->width, imageIn->height) != CL_ASTC_ENC_SUCCESS) {
1793         ReleaseClAstcObj(encObj);
1794         IMAGE_LOGE("astc ClKernelArgSetAndRun failed!");
1795         return CL_ASTC_ENC_FAILED;
1796     }
1797     if (ClReadAstcBufAndBlockError(clAstcHandle, encObj, imageIn, buffer) != CL_ASTC_ENC_SUCCESS) {
1798         ReleaseClAstcObj(encObj);
1799         IMAGE_LOGE("astc ClReadAstcBufAndBlockError failed!");
1800         return CL_ASTC_ENC_FAILED;
1801     }
1802     ReleaseClAstcObj(encObj);
1803     return CL_ASTC_ENC_SUCCESS;
1804 }
1805 }
1806 }
1807 }