• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "image_compressor.h"
17 
18 #include <unistd.h>
19 #include <fstream>
20 
21 #include "securec.h"
22 #include "media_errors.h"
23 #include "image_log.h"
24 
25 #undef LOG_DOMAIN
26 #define LOG_DOMAIN LOG_TAG_DOMAIN_ID_PLUGIN
27 
28 #undef LOG_TAG
29 #define LOG_TAG "ClAstcEnc"
30 
31 namespace OHOS {
32 namespace ImagePlugin {
33 namespace AstcEncBasedCl {
34 constexpr int MAX_WIDTH = 8192;
35 constexpr int MAX_HEIGHT = 4096;
36 constexpr int TEXTURE_HEAD_BYTES = 16;
37 constexpr int TEXTURE_BLOCK_BYTES = 16;
38 constexpr int MAGIC_FILE_CONSTANT = 0x5CA1AB13;
39 constexpr int DIM = 4;
40 constexpr uint8_t BIT_SHIFT_8BITS = 8;
41 constexpr uint8_t BIT_SHIFT_16BITS = 16;
42 constexpr uint8_t BIT_SHIFT_24BITS = 24;
43 constexpr uint8_t BYTES_MASK = 0xFF;
44 constexpr uint8_t STRIDE_RGBA_LOG2 = 2;
45 constexpr uint8_t GLOBAL_WH_NUM_CL = 2;
46 constexpr size_t MAX_MALLOC_BYTES = 10000000; // max 10MB
47 constexpr size_t WORK_GROUP_SIZE = 8;
48 
49 const char *g_programSource = R"(
50 // Notice: the code from line 42 to line 1266 is openCL language
51 // openCL cound only support C language style and could not support constexpr and static_cast in same platform
52 #define DIM (4)
53 #define BLOCK_SIZE (16)
54 #define X_GRIDS (4)
55 #define Y_GRIDS (4)
56 #define SMALL_VALUE (0.00001f) // avoid divide 0
57 #define BLOCK_MAX_WEIGHTS (64)
58 #define BLOCK_MAX_WEIGHTS_SHORT (64)
59 #define BLOCK_MAX_WEIGHTS_FLOAT (64.0f)
60 #define BLOCK_MAX_WEIGHTS_2PLANE (32)
61 #define WEIGHTS_PLANE2_OFFSET (32)
62 #define CEM_LDR_RGB_DIRECT (8)
63 #define CEM_LDR_RGB_BASE_OFFSET (9)
64 #define CEM_LDR_RGBA_DIRECT (12)
65 #define CEM_LDR_RGBA_BASE_OFFSET (13)
66 #define PIXEL_MAX_VALUE (255.0f)
67 
68 #define QUANT_2 (0)
69 #define QUANT_3 (1)
70 #define QUANT_4 (2)
71 #define QUANT_5 (3)
72 #define QUANT_6 (4)
73 #define QUANT_8 (5)
74 #define QUANT_10 (6)
75 #define QUANT_12 (7)
76 #define QUANT_16 (8)
77 #define QUANT_20 (9)
78 #define QUANT_24 (10)
79 #define QUANT_32 (11)
80 #define QUANT_40 (12)
81 #define QUANT_48 (13)
82 #define QUANT_64 (14)
83 #define QUANT_80 (15)
84 #define QUANT_96 (16)
85 #define QUANT_128 (17)
86 #define QUANT_160 (18)
87 #define QUANT_192 (19)
88 #define QUANT_256 (20)
89 #define QUANT_MAX (21)
90 
91 #define WEIGHT_RANGE_6 (6)
92 #define WEIGHT_QUANTIZE_NUM (32)
93 #define COLOR_NUM (256)
94 #define MAX_PARTITION_COUNT (4)
95 #define PARTITION_COUNT (2)
96 #define MAX_BLOCK_SIZE (32)
97 #define WEIGHT_QUANTIZE_GROUP (12)
98 #define SECOND_PARTITION_INDEX (1)
99 
100 #define START_INDEX (0)
101 #define FLOAT_ZERO (0.0f)
102 #define FLOAT_ONE (1.0f)
103 #define INT_ZERO (0)
104 #define INT_ONE (1)
105 #define SHORT_ZERO (0)
106 #define UINT_ZERO (0)
107 #define UINT_ONE (1u)
108 #define EP0_INDEX (0)
109 #define EP1_INDEX (1)
110 #define END_POINT_NUM (2)
111 #define EP0_R_INDEX (0)
112 #define EP1_R_INDEX (1)
113 #define EP0_G_INDEX (2)
114 #define EP1_G_INDEX (3)
115 #define EP0_B_INDEX (4)
116 #define EP1_B_INDEX (5)
117 #define EP0_A_INDEX (6)
118 #define EP1_A_INDEX (7)
119 #define COLOR_COMPONENT_NUM (8)
120 #define QUANTIZE_WEIGHT_MIN (0)
121 
122 #define TRIT_MSB_SIZE (8)
123 #define TRIT_BLOCK_SIZE (5)
124 #define TRIT_ROUND_NUM (4)
125 #define QUINT_MSB_SIZE (7)
126 #define QUINT_BLOCK_SIZE (3)
127 #define QUINT_ROUND_NUM (2)
128 #define ISE_0 (0)
129 #define ISE_1 (1)
130 #define ISE_2 (2)
131 #define ISE_3 (3)
132 #define ISE_4 (4)
133 
134 #define WEIGHT_0 (0)
135 #define WEIGHT_1 (1)
136 #define WEIGHT_2 (2)
137 #define WEIGHT_3 (3)
138 #define WEIGHT_4 (4)
139 #define WEIGHT_5 (5)
140 #define WEIGHT_6 (6)
141 #define WEIGHT_7 (7)
142 #define WEIGHT_8 (8)
143 #define WEIGHT_9 (9)
144 #define WEIGHT_10 (10)
145 #define WEIGHT_11 (11)
146 #define WEIGHT_12 (12)
147 #define WEIGHT_13 (13)
148 #define WEIGHT_14 (14)
149 #define WEIGHT_15 (15)
150 
151 #define BYTE_1_POS (8)
152 #define BYTE_2_POS (16)
153 #define BYTE_3_POS (24)
154 #define BYTE_MASK (0xFFu)
155 #define CEM_POS (13)
156 #define COLOR_EP_POS (17)
157 #define COLOR_EP_HIGH_BIT (15)
158 #define MASK_FOR_4BITS (0xFu)
159 #define MASK_FOR_15BITS (0x7FFFu)
160 #define MASK_FOR_17BITS (0x1FFFFu)
161 
162 #define HEIGHT_BITS_OFFSET (2)
163 #define WIDTH_BITS_OFFSET (4)
164 #define MASK_FOR_2BITS (0x3u)
165 #define MASK_FOR_1BITS (0x1u)
166 #define WEIGHT_METHOD_OFFSET (2u)
167 #define WEIGHT_METHOD_RIGHT_BIT (1)
168 #define WEIGHT_METHOD_POS (4u)
169 #define BLOCK_WIDTH_POS (5u)
170 #define BLOCK_HEIGHT_POS (5u)
171 #define WEIGHT_PRECISION_POS (9u)
172 #define IS_DUALPLANE_POS (10u)
173 
174 typedef struct {
175     int partid;
176     uint bitmaps[PARTITION_COUNT];
177 } PartInfo;
178 
179 int GetPart(PartInfo* partInfo, int i)
180 {
181     if (i >= MAX_BLOCK_SIZE) {
182         return 0;
183     }
184     return (int)(((*partInfo).bitmaps[SECOND_PARTITION_INDEX] >> i) & MASK_FOR_1BITS);
185 }
186 
187 __constant short g_scrambleTable[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
188     0, 1,
189     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190     0,
191     0, 1, 2,
192     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
193     0, 1, 2, 3,
194     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
195     0, 1, 2, 3, 4,
196     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197     0, 2, 4, 5, 3, 1,
198     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199     0, 1, 2, 3, 4, 5, 6, 7,
200     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201     0, 2, 4, 6, 8, 9, 7, 5, 3, 1,
202     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
203     0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1,
204     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
205     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
206     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
207     0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1,
208     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
209     0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19,
210     11, 3, 17, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0,
211     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
212     20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
213 };
214 
215 __constant short g_weightUnquant[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
216     0, 64,
217     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218     0, 32, 64,
219     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220     0, 21, 43, 64,
221     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222     0, 16, 32, 48, 64,
223     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224     0, 64, 12, 52, 25, 39,
225     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
226     0, 9, 18, 27, 37, 46, 55, 64,
227     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228     0, 64, 7, 57, 14, 50, 21, 43, 28, 36,
229     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230     0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36,
231     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232     0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64,
233     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
234     0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35,
235     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
236     0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34,
237     0, 0, 0, 0, 0, 0, 0, 0,
238     0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38,
239     40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
240 };
241 __constant short g_integerFromTrits[243] = { // the numbers of integer to derivated from trits is 243
242     0, 1, 2, 4, 5, 6, 8, 9, 10,
243     16, 17, 18, 20, 21, 22, 24, 25, 26,
244     3, 7, 15, 19, 23, 27, 12, 13, 14,
245     32, 33, 34, 36, 37, 38, 40, 41, 42,
246     48, 49, 50, 52, 53, 54, 56, 57, 58,
247     35, 39, 47, 51, 55, 59, 44, 45, 46,
248     64, 65, 66, 68, 69, 70, 72, 73, 74,
249     80, 81, 82, 84, 85, 86, 88, 89, 90,
250     67, 71, 79, 83, 87, 91, 76, 77, 78,
251 
252     128, 129, 130, 132, 133, 134, 136, 137, 138,
253     144, 145, 146, 148, 149, 150, 152, 153, 154,
254     131, 135, 143, 147, 151, 155, 140, 141, 142,
255     160, 161, 162, 164, 165, 166, 168, 169, 170,
256     176, 177, 178, 180, 181, 182, 184, 185, 186,
257     163, 167, 175, 179, 183, 187, 172, 173, 174,
258     192, 193, 194, 196, 197, 198, 200, 201, 202,
259     208, 209, 210, 212, 213, 214, 216, 217, 218,
260     195, 199, 207, 211, 215, 219, 204, 205, 206,
261 
262     96, 97, 98, 100, 101, 102, 104, 105, 106,
263     112, 113, 114, 116, 117, 118, 120, 121, 122,
264     99, 103, 111, 115, 119, 123, 108, 109, 110,
265     224, 225, 226, 228, 229, 230, 232, 233, 234,
266     240, 241, 242, 244, 245, 246, 248, 249, 250,
267     227, 231, 239, 243, 247, 251, 236, 237, 238,
268     28, 29, 30, 60, 61, 62, 92, 93, 94,
269     156, 157, 158, 188, 189, 190, 220, 221, 222,
270     31, 63, 127, 159, 191, 255, 252, 253, 254
271 };
272 
273 __constant int g_bitsTritsQuintsTable[QUANT_MAX * 3] = { // 1 quints match 3 number
274     1, 0, 0, // RANGE_2
275     0, 1, 0, // RANGE_3
276     2, 0, 0, // RANGE_4
277     0, 0, 1, // RANGE_5
278     1, 1, 0, // RANGE_6
279     3, 0, 0, // RANGE_8
280     1, 0, 1, // RANGE_10
281     2, 1, 0, // RANGE_12
282     4, 0, 0, // RANGE_16
283     2, 0, 1, // RANGE_20
284     3, 1, 0, // RANGE_24
285     5, 0, 0, // RANGE_32
286     3, 0, 1, // RANGE_40
287     4, 1, 0, // RANGE_48
288     6, 0, 0, // RANGE_64
289     4, 0, 1, // RANGE_80
290     5, 1, 0, // RANGE_96
291     7, 0, 0, // RANGE_128
292     5, 0, 1, // RANGE_160
293     6, 1, 0, // RANGE_192
294     8, 0, 0 // RANGE_256
295 };
296 
297 __constant short g_integerFromQuints[125] = { // the numbers of integer to derivated from quints is 125
298     0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6,
299     32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, 58, 59, 60, 37, 45, 53,
300     61, 14,
301     64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85,
302     93, 22,
303     96, 97, 98, 99, 100, 104, 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123,
304     124, 101, 109, 117, 125, 30,
305     102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, 126, 127, 94, 95, 62, 39,
306     47, 55, 63, 31
307 };
308 
309 __constant short g_colorQuantTables[QUANT_MAX * COLOR_NUM] = {
310     // QUANT_2
311     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
312     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
313     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
314     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
315     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
317     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
319     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
320     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
321     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
322     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
323     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
324     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
325     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
327     // QUANT_3
328     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
337     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
338     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
339     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
341     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
342     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
343     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
344     // QUANT_4
345     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
347     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
348     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
349     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
351     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
353     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
354     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
355     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
356     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
357     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
358     2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
359     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
360     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
361     // QUANT_5
362     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
364     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
366     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
367     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
368     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
369     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
370     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
371     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
372     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
373     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
374     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
375     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
376     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
377     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
378     // QUANT_6
379     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
381     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
383     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4,
384     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
386     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
387     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
388     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
389     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
390     5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
391     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
392     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
393     3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
394     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
395     // QUANT_8
396     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
398     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
399     1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
400     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
401     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
402     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
403     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
404     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
405     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
406     4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
407     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
408     5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
409     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
410     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
411     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
412     // QUANT_10
413     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
414     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
415     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
416     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
417     4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6,
418     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
419     6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
420     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
421     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
422     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7,
423     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
424     7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 5,
425     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
426     5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
427     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
428     3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
429     // QUANT_12
430     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4,
431     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
432     4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
433     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
434     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
435     2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
436     6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10,
437     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
438     11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
439     11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7,
440     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
441     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
442     3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
443     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5,
444     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
445     5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
446     // QUANT_16
447     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
448     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
449     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
450     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
451     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
452     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
453     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
454     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
455     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
456     8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
457     9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
458     10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
459     11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
460     12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
461     13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
462     14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
463     // QUANT_20
464     0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
465     4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
466     8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
467     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2,
468     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
469     6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10,
470     10, 10, 10, 10, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
471     14, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
472     19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 15, 15, 15,
473     15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11,
474     11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7,
475     7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
476     3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
477     13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 9,
478     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5,
479     5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1,
480     // QUANT_24
481     0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
482     8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2, 2,
483     2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10,
484     10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 4, 4, 4,
485     4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12,
486     12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 6, 6,
487     6, 6, 6, 6, 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14,
488     14, 14, 14, 14, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
489     23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 15, 15, 15, 15,
490     15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7,
491     7, 7, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 13, 13, 13,
492     13, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5, 5, 5, 5,
493     5, 5, 5, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 11, 11,
494     11, 11, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, 3,
495     3, 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9,
496     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
497     // QUANT_32
498     0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
499     2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
500     4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
501     6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
502     8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10,
503     10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
504     12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
505     14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
506     16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
507     18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
508     19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
509     21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
510     23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25,
511     25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
512     27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
513     29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31,
514     // QUANT_40
515     0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,
516     16, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 2, 2,
517     2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18,
518     18, 26, 26, 26, 26, 26, 26, 26, 34, 34, 34, 34, 34, 34, 4, 4,
519     4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20,
520     20, 20, 28, 28, 28, 28, 28, 28, 36, 36, 36, 36, 36, 36, 36, 6,
521     6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14, 22, 22, 22, 22,
522     22, 22, 30, 30, 30, 30, 30, 30, 30, 38, 38, 38, 38, 38, 38, 38,
523     39, 39, 39, 39, 39, 39, 39, 31, 31, 31, 31, 31, 31, 31, 23, 23,
524     23, 23, 23, 23, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7,
525     7, 37, 37, 37, 37, 37, 37, 37, 29, 29, 29, 29, 29, 29, 21, 21,
526     21, 21, 21, 21, 21, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5,
527     5, 5, 35, 35, 35, 35, 35, 35, 27, 27, 27, 27, 27, 27, 27, 19,
528     19, 19, 19, 19, 19, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3,
529     3, 3, 33, 33, 33, 33, 33, 33, 33, 25, 25, 25, 25, 25, 25, 17,
530     17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1,
531     // QUANT_48
532     0, 0, 0, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 2, 2,
533     2, 2, 2, 18, 18, 18, 18, 18, 18, 34, 34, 34, 34, 34, 4, 4,
534     4, 4, 4, 4, 20, 20, 20, 20, 20, 36, 36, 36, 36, 36, 6, 6,
535     6, 6, 6, 6, 22, 22, 22, 22, 22, 38, 38, 38, 38, 38, 38, 8,
536     8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 40, 40, 40, 40, 40, 10,
537     10, 10, 10, 10, 26, 26, 26, 26, 26, 26, 42, 42, 42, 42, 42, 12,
538     12, 12, 12, 12, 12, 28, 28, 28, 28, 28, 44, 44, 44, 44, 44, 14,
539     14, 14, 14, 14, 14, 30, 30, 30, 30, 30, 46, 46, 46, 46, 46, 46,
540     47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 15, 15, 15, 15, 15,
541     15, 45, 45, 45, 45, 45, 29, 29, 29, 29, 29, 13, 13, 13, 13, 13,
542     13, 43, 43, 43, 43, 43, 27, 27, 27, 27, 27, 27, 11, 11, 11, 11,
543     11, 41, 41, 41, 41, 41, 25, 25, 25, 25, 25, 25, 9, 9, 9, 9,
544     9, 39, 39, 39, 39, 39, 39, 23, 23, 23, 23, 23, 7, 7, 7, 7,
545     7, 7, 37, 37, 37, 37, 37, 21, 21, 21, 21, 21, 5, 5, 5, 5,
546     5, 5, 35, 35, 35, 35, 35, 19, 19, 19, 19, 19, 19, 3, 3, 3,
547     3, 3, 33, 33, 33, 33, 33, 17, 17, 17, 17, 17, 17, 1, 1, 1,
548     // QUANT_64
549     0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
550     4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
551     8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
552     12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16,
553     16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
554     20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
555     24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
556     28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
557     32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
558     36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
559     40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
560     44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
561     47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51,
562     51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55,
563     55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59,
564     59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63,
565     // QUANT_80
566     0, 0, 16, 16, 16, 32, 32, 32, 48, 48, 48, 48, 64, 64, 64, 2,
567     2, 2, 18, 18, 18, 34, 34, 34, 50, 50, 50, 50, 66, 66, 66, 4,
568     4, 4, 20, 20, 20, 36, 36, 36, 36, 52, 52, 52, 68, 68, 68, 6,
569     6, 6, 22, 22, 22, 38, 38, 38, 38, 54, 54, 54, 70, 70, 70, 8,
570     8, 8, 24, 24, 24, 24, 40, 40, 40, 56, 56, 56, 72, 72, 72, 10,
571     10, 10, 26, 26, 26, 26, 42, 42, 42, 58, 58, 58, 74, 74, 74, 12,
572     12, 12, 12, 28, 28, 28, 44, 44, 44, 60, 60, 60, 76, 76, 76, 14,
573     14, 14, 14, 30, 30, 30, 46, 46, 46, 62, 62, 62, 78, 78, 78, 78,
574     79, 79, 79, 79, 63, 63, 63, 47, 47, 47, 31, 31, 31, 15, 15, 15,
575     15, 77, 77, 77, 61, 61, 61, 45, 45, 45, 29, 29, 29, 13, 13, 13,
576     13, 75, 75, 75, 59, 59, 59, 43, 43, 43, 27, 27, 27, 27, 11, 11,
577     11, 73, 73, 73, 57, 57, 57, 41, 41, 41, 25, 25, 25, 25, 9, 9,
578     9, 71, 71, 71, 55, 55, 55, 39, 39, 39, 39, 23, 23, 23, 7, 7,
579     7, 69, 69, 69, 53, 53, 53, 37, 37, 37, 37, 21, 21, 21, 5, 5,
580     5, 67, 67, 67, 51, 51, 51, 51, 35, 35, 35, 19, 19, 19, 3, 3,
581     3, 65, 65, 65, 49, 49, 49, 49, 33, 33, 33, 17, 17, 17, 1, 1,
582     // QUANT_96
583     0, 0, 32, 32, 64, 64, 64, 2, 2, 2, 34, 34, 66, 66, 66, 4,
584     4, 4, 36, 36, 68, 68, 68, 6, 6, 6, 38, 38, 70, 70, 70, 8,
585     8, 8, 40, 40, 40, 72, 72, 10, 10, 10, 42, 42, 42, 74, 74, 12,
586     12, 12, 44, 44, 44, 76, 76, 14, 14, 14, 46, 46, 46, 78, 78, 16,
587     16, 16, 48, 48, 48, 80, 80, 80, 18, 18, 50, 50, 50, 82, 82, 82,
588     20, 20, 52, 52, 52, 84, 84, 84, 22, 22, 54, 54, 54, 86, 86, 86,
589     24, 24, 56, 56, 56, 88, 88, 88, 26, 26, 58, 58, 58, 90, 90, 90,
590     28, 28, 60, 60, 60, 92, 92, 92, 30, 30, 62, 62, 62, 94, 94, 94,
591     95, 95, 95, 63, 63, 63, 31, 31, 93, 93, 93, 61, 61, 61, 29, 29,
592     91, 91, 91, 59, 59, 59, 27, 27, 89, 89, 89, 57, 57, 57, 25, 25,
593     87, 87, 87, 55, 55, 55, 23, 23, 85, 85, 85, 53, 53, 53, 21, 21,
594     83, 83, 83, 51, 51, 51, 19, 19, 81, 81, 81, 49, 49, 49, 17, 17,
595     17, 79, 79, 47, 47, 47, 15, 15, 15, 77, 77, 45, 45, 45, 13, 13,
596     13, 75, 75, 43, 43, 43, 11, 11, 11, 73, 73, 41, 41, 41, 9, 9,
597     9, 71, 71, 71, 39, 39, 7, 7, 7, 69, 69, 69, 37, 37, 5, 5,
598     5, 67, 67, 67, 35, 35, 3, 3, 3, 65, 65, 65, 33, 33, 1, 1,
599     // QUANT_128
600     0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
601     8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
602     16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23,
603     24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31,
604     32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39,
605     40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47,
606     48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55,
607     56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63,
608     64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
609     72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
610     80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87,
611     88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
612     96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103,
613     104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
614     112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119,
615     120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127,
616     // QUANT_160
617     0, 32, 32, 64, 96, 96, 128, 128, 2, 34, 34, 66, 98, 98, 130, 130,
618     4, 36, 36, 68, 100, 100, 132, 132, 6, 38, 38, 70, 102, 102, 134, 134,
619     8, 40, 40, 72, 104, 104, 136, 136, 10, 42, 42, 74, 106, 106, 138, 138,
620     12, 44, 44, 76, 108, 108, 140, 140, 14, 46, 46, 78, 110, 110, 142, 142,
621     16, 48, 48, 80, 112, 112, 144, 144, 18, 50, 50, 82, 114, 114, 146, 146,
622     20, 52, 52, 84, 116, 116, 148, 148, 22, 54, 54, 86, 118, 118, 150, 150,
623     24, 56, 56, 88, 120, 120, 152, 152, 26, 58, 58, 90, 122, 122, 154, 154,
624     28, 60, 60, 92, 124, 124, 156, 156, 30, 62, 62, 94, 126, 126, 158, 158,
625     159, 159, 127, 127, 95, 63, 63, 31, 157, 157, 125, 125, 93, 61, 61, 29,
626     155, 155, 123, 123, 91, 59, 59, 27, 153, 153, 121, 121, 89, 57, 57, 25,
627     151, 151, 119, 119, 87, 55, 55, 23, 149, 149, 117, 117, 85, 53, 53, 21,
628     147, 147, 115, 115, 83, 51, 51, 19, 145, 145, 113, 113, 81, 49, 49, 17,
629     143, 143, 111, 111, 79, 47, 47, 15, 141, 141, 109, 109, 77, 45, 45, 13,
630     139, 139, 107, 107, 75, 43, 43, 11, 137, 137, 105, 105, 73, 41, 41, 9,
631     135, 135, 103, 103, 71, 39, 39, 7, 133, 133, 101, 101, 69, 37, 37, 5,
632     131, 131, 99, 99, 67, 35, 35, 3, 129, 129, 97, 97, 65, 33, 33, 1,
633     // QUANT_192
634     0, 64, 128, 128, 2, 66, 130, 130, 4, 68, 132, 132, 6, 70, 134, 134,
635     8, 72, 136, 136, 10, 74, 138, 138, 12, 76, 140, 140, 14, 78, 142, 142,
636     16, 80, 144, 144, 18, 82, 146, 146, 20, 84, 148, 148, 22, 86, 150, 150,
637     24, 88, 152, 152, 26, 90, 154, 154, 28, 92, 156, 156, 30, 94, 158, 158,
638     32, 96, 160, 160, 34, 98, 162, 162, 36, 100, 164, 164, 38, 102, 166, 166,
639     40, 104, 168, 168, 42, 106, 170, 170, 44, 108, 172, 172, 46, 110, 174, 174,
640     48, 112, 176, 176, 50, 114, 178, 178, 52, 116, 180, 180, 54, 118, 182, 182,
641     56, 120, 184, 184, 58, 122, 186, 186, 60, 124, 188, 188, 62, 126, 190, 190,
642     191, 191, 127, 63, 189, 189, 125, 61, 187, 187, 123, 59, 185, 185, 121, 57,
643     183, 183, 119, 55, 181, 181, 117, 53, 179, 179, 115, 51, 177, 177, 113, 49,
644     175, 175, 111, 47, 173, 173, 109, 45, 171, 171, 107, 43, 169, 169, 105, 41,
645     167, 167, 103, 39, 165, 165, 101, 37, 163, 163, 99, 35, 161, 161, 97, 33,
646     159, 159, 95, 31, 157, 157, 93, 29, 155, 155, 91, 27, 153, 153, 89, 25,
647     151, 151, 87, 23, 149, 149, 85, 21, 147, 147, 83, 19, 145, 145, 81, 17,
648     143, 143, 79, 15, 141, 141, 77, 13, 139, 139, 75, 11, 137, 137, 73, 9,
649     135, 135, 71, 7, 133, 133, 69, 5, 131, 131, 67, 3, 129, 129, 65, 1,
650     // QUANT_256
651     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
652     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
653     32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
654     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
655     64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
656     80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
657     96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
658     112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
659     128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
660     144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
661     160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
662     176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
663     192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
664     208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
665     224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
666     240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
667 };
668 
669 __constant short color_unquant_tables[QUANT_MAX][COLOR_NUM] = {
670     {
671         0, 255
672     },
673     {
674         0, 128, 255
675     },
676     {
677         0, 85, 170, 255
678     },
679     {
680         0, 64, 128, 192, 255
681     },
682     {
683         0, 255, 51, 204, 102, 153
684     },
685     {
686         0, 36, 73, 109, 146, 182, 219, 255
687     },
688     {
689         0, 255, 28, 227, 56, 199, 84, 171, 113, 142
690     },
691     {
692         0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139
693     },
694     { // 16
695         0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255
696     },
697     { // 20
698         0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
699         54, 201, 121, 134
700     },
701     { // 24
702         0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
703         22, 233, 55, 200, 88, 167, 121, 134
704     },
705     { // 32
706         0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
707         132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255
708     },
709     { // 40
710         0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
711         13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
712         26, 229, 58, 197, 91, 164, 123, 132
713     },
714     { // 48
715         0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
716         5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
717         11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131
718     },
719     { // 64
720         0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
721         65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
722         130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
723         195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255
724     },
725     { // 80
726         0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
727         3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
728         6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
729         9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
730         13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130
731     },
732     { // 96
733         0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
734         64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
735         2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
736         67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
737         5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
738         70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
739     },
740     { // 128
741         0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
742         32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
743         64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
744         96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
745         129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
746         161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
747         193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
748         225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255
749     },
750     { // 160
751         0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
752         64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
753         1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
754         65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
755         3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
756         67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
757         4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
758         68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
759         6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
760         70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
761     },
762     {
763         0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
764         32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
765         64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
766         96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
767         1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
768         33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
769         65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
770         97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
771         2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
772         34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
773         66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
774         98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129
775     },
776     {
777         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
778         16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
779         32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
780         48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
781         64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
782         80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
783         96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
784         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
785         128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
786         144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
787         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
788         176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
789         192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
790         208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
791         224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
792         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
793     }
794 };
795 
796 void Swap(float4* lhs, float4* rhs)
797 {
798     float4 tmp = *lhs;
799     *lhs = *rhs;
800     *rhs = tmp;
801 }
802 
803 void FindMinMax(float4* texels, float4 ptMean, float4 vecK, float4* e0, float4* e1)
804 {
805     float a = 1e31f; // max float is clipped to 1e31f
806     float b = -1e31f; // min float is clipped to -1e31f
807     for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
808         float t = dot(texels[i] - ptMean, vecK);
809         a = min(a, t);
810         b = max(b, t);
811     }
812     *e0 = clamp(vecK * a + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
813     *e1 = clamp(vecK * b + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
814     // if the direction_vector ends up pointing from light to dark, FLIP IT!
815     // this will make the endpoint the darkest one;
816     float4 e0u = round(*e0);
817     float4 e1u = round(*e1);
818     if (e0u.x + e0u.y + e0u.z > e1u.x + e1u.y + e1u.z) {
819         Swap(e0, e1);
820     }
821 }
822 
823 void MaxAccumulationPixelDirection(float4* texels, float4 ptMean, float4* e0, float4* e1, bool hasAlpha)
824 {
825     float4 sumR = (float4)(FLOAT_ZERO);
826     float4 sumG = (float4)(FLOAT_ZERO);
827     float4 sumB = (float4)(FLOAT_ZERO);
828     float4 sumA = (float4)(FLOAT_ZERO);
829     for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
830         float4 dt = texels[i] - ptMean;
831         sumR += (dt.x > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
832         sumG += (dt.y > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
833         sumB += (dt.z > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
834         sumA += (dt.w > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
835     }
836     float dotR = dot(sumR, sumR);
837     float dotG = dot(sumG, sumG);
838     float dotB = dot(sumB, sumB);
839     float dotA = dot(sumA, sumA);
840     float maxDot = dotR;
841     float4 vecK = sumR;
842     if (dotG > maxDot) {
843         vecK = sumG;
844         maxDot = dotG;
845     }
846     if (dotB > maxDot) {
847         vecK = sumB;
848         maxDot = dotB;
849     }
850     if (hasAlpha && dotA > maxDot) {
851         vecK = sumA;
852         maxDot = dotA;
853     }
854     // safe normalize
855     float lenk = length(vecK);
856     vecK = (lenk < SMALL_VALUE) ? vecK : normalize(vecK);
857     FindMinMax(texels, ptMean, vecK, e0, e1);
858 }
859 
860 void EncodeColorNormal(short quantLevel, float4 e0, float4 e1, short* endpointQuantized)
861 {
862     int4 e0q = (int4)((int)(round(e0.x)), (int)(round(e0.y)),
863         (int)(round(e0.z)), (int)(round(e0.w)));
864     int4 e1q = (int4)((int)(round(e1.x)), (int)(round(e1.y)),
865         (int)(round(e1.z)), (int)(round(e1.w)));
866     endpointQuantized[EP0_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.x];
867     endpointQuantized[EP1_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.x];
868     endpointQuantized[EP0_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.y];
869     endpointQuantized[EP1_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.y];
870     endpointQuantized[EP0_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.z];
871     endpointQuantized[EP1_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.z];
872     endpointQuantized[EP0_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.w];
873     endpointQuantized[EP1_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.w];
874 }
875 
876 void DecodeColor(short quantLevel, short endpointQuantized[COLOR_COMPONENT_NUM], float4* e0, float4* e1)
877 {
878     (*e0).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_R_INDEX]]);
879     (*e1).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_R_INDEX]]);
880     (*e0).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_G_INDEX]]);
881     (*e1).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_G_INDEX]]);
882     (*e0).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_B_INDEX]]);
883     (*e1).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_B_INDEX]]);
884     (*e0).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_A_INDEX]]);
885     (*e1).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_A_INDEX]]);
886 }
887 
888 // calculate quantize weights
889 short QuantizeWeight(uint weightRange, float weight)
890 {
891     short q = (short)(round(weight * ((float)(weightRange))));
892     return clamp(q, (short)(QUANTIZE_WEIGHT_MIN), (short)(weightRange));
893 }
894 
895 void CalculateNormalWeights(int part, PartInfo* partInfo, float4* texels,
896     float4 endPoint[END_POINT_NUM], float* projw)
897 {
898     int i = START_INDEX;
899     float4 vecK = endPoint[EP1_INDEX] - endPoint[EP0_INDEX];
900     if (length(vecK) < SMALL_VALUE && !partInfo) {
901         for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
902             projw[i] = FLOAT_ZERO;
903         }
904     } else {
905         vecK = normalize(vecK);
906         float minw = 1e31f; // max float is clipped to 1e31f
907         float maxw = -1e31f; // min float is clipped to -1e31f
908         for (i = START_INDEX; i < BLOCK_SIZE; ++i) {
909             if ((!partInfo) || (GetPart(partInfo, i) == part)) {
910                 float w = dot(vecK, texels[i] - endPoint[EP0_INDEX]);
911                 minw = min(w, minw);
912                 maxw = max(w, maxw);
913                 projw[i] = w;
914             }
915         }
916         float invlen = maxw - minw;
917         invlen = max(SMALL_VALUE, invlen);
918         invlen = FLOAT_ONE / invlen; // invlen min is SMALL_VALUE, not zero
919         for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
920             if ((!partInfo) || (GetPart(partInfo, i) == part)) {
921                 projw[i] = (projw[i] - minw) * invlen;
922             }
923         }
924     }
925 }
926 
927 void QuantizeWeights(float projw[X_GRIDS * Y_GRIDS], uint weightRange, short* weights)
928 {
929     for (int i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
930         weights[i] = QuantizeWeight(weightRange, projw[i]);
931     }
932 }
933 
934 void CalculateQuantizedWeights(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM], short* weights)
935 {
936     float projw[X_GRIDS * Y_GRIDS];
937     CalculateNormalWeights(INT_ZERO, NULL, texels, endPoint, projw);
938     QuantizeWeights(projw, weightRange, weights);
939 }
940 
941 void Orbits8Ptr(uint4* outputs, uint* bitoffset, uint number, uint bitcount)
942 {
943     uint newpos = *bitoffset + bitcount;
944     uint nidx = newpos >> 5; // split low bits (5 bits) to get high bits
945     uint uidx = *bitoffset >> 5; // split low bits (5 bits) to get high bits
946     uint bitIdx = *bitoffset & 31u; // split low bits to get low bits (31 for mask 5 bits)
947     if (uidx == 0) { // high bits is 0 for x
948         (*outputs).x |= (number << bitIdx);
949         (*outputs).y |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
950     } else if (uidx == 1) { // high bits is 1 for y
951         (*outputs).y |= (number << bitIdx);
952         (*outputs).z |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
953     } else if (uidx == 2) { // high bits is 2 for z
954         (*outputs).z |= (number << bitIdx);
955         (*outputs).w |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
956     }
957     *bitoffset = newpos;
958 }
959 
960 void SplitHighLow(uint n, uint i, int* high, uint* low)
961 {
962     uint low_mask = (UINT_ONE << i) - UINT_ONE;
963     *low = n & low_mask;
964     *high = ((int)(n >> i)) & 0xFF; // mask 0xFF to get low 8 bits
965 }
966 
967 uint ReverseByte(uint p)
968 {
969     p = ((p & 0xFu) << 4) | ((p >> 4) & 0xFu); // 0xFu 4 for reverse
970     p = ((p & 0x33u) << 2) | ((p >> 2) & 0x33u); // 0x33u 2 for reverse
971     p = ((p & 0x55u) << 1) | ((p >> 1) & 0x55u); // 0x55u 1 for reverse
972     return p;
973 }
974 
975 void EncodeTrits(uint bitcount, uint tritInput[TRIT_BLOCK_SIZE], uint4* outputs, uint* outpos)
976 {
977     int t0;
978     int t1;
979     int t2;
980     int t3;
981     int t4;
982     uint m0;
983     uint m1;
984     uint m2;
985     uint m3;
986     uint m4;
987     SplitHighLow(tritInput[ISE_0], bitcount, &t0, &m0);
988     SplitHighLow(tritInput[ISE_1], bitcount, &t1, &m1);
989     SplitHighLow(tritInput[ISE_2], bitcount, &t2, &m2);
990     SplitHighLow(tritInput[ISE_3], bitcount, &t3, &m3);
991     SplitHighLow(tritInput[ISE_4], bitcount, &t4, &m4);
992     ushort packhigh = (ushort)(
993         g_integerFromTrits[t4 * 81 + t3 * 27 + t2 * 9 + t1 * 3 + t0]); // trits for 3 9 27 81
994     Orbits8Ptr(outputs, outpos, m0, bitcount);
995     Orbits8Ptr(outputs, outpos, packhigh & 3u, 2u); // low 2bits (mask 3u) offset 2u
996 
997     Orbits8Ptr(outputs, outpos, m1, bitcount);
998     Orbits8Ptr(outputs, outpos, (packhigh >> 2) & 3u, 2u); // right shift 2 bits for low 2bits (mask 3u) offset 2u
999 
1000     Orbits8Ptr(outputs, outpos, m2, bitcount);
1001     Orbits8Ptr(outputs, outpos, (packhigh >> 4) & 1u, 1u); // right shift 4 bits for low 1bits (mask 1u) offset 1u
1002 
1003     Orbits8Ptr(outputs, outpos, m3, bitcount);
1004     Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1005 
1006     Orbits8Ptr(outputs, outpos, m4, bitcount);
1007     Orbits8Ptr(outputs, outpos, (packhigh >> 7) & 1u, 1u); // right shift 7 bits for low 1bits (mask 1u) offset 1u
1008 }
1009 
1010 void EncodeQuints(uint bitcount, uint quintInput[QUINT_BLOCK_SIZE], uint4* outputs, uint* outpos)
1011 {
1012     int q0;
1013     int q1;
1014     int q2;
1015     uint m0;
1016     uint m1;
1017     uint m2;
1018     SplitHighLow(quintInput[ISE_0], bitcount, &q0, &m0);
1019     SplitHighLow(quintInput[ISE_1], bitcount, &q1, &m1);
1020     SplitHighLow(quintInput[ISE_2], bitcount, &q2, &m2);
1021     ushort packhigh = (ushort)(g_integerFromQuints[q2 * 25 + q1 * 5 + q0]); // Quints 5 25
1022     Orbits8Ptr(outputs, outpos, m0, bitcount);
1023     Orbits8Ptr(outputs, outpos, packhigh & 7u, 3u); // low 3bits (mask 7u) offset 3u
1024     Orbits8Ptr(outputs, outpos, m1, bitcount);
1025     Orbits8Ptr(outputs, outpos, (packhigh >> 3) & 3u, 2u); // right shift 3 bits for low 2bits (mask 3u) offset 2u
1026     Orbits8Ptr(outputs, outpos, m2, bitcount);
1027     Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1028 }
1029 
1030 void BiseEndpoints(short numbers[COLOR_COMPONENT_NUM], int range, uint4* outputs, bool hasAlpha, uint* bitPos)
1031 {
1032     uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1033     uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1034     uint quints = (uint)(g_bitsTritsQuintsTable[range * 3 + 2]); // Quints 3 offset 2
1035     uint count = hasAlpha ? 8u : 6u; // RGBA 4x2 = 8 or RGB 3x2 = 6
1036     if (trits == UINT_ONE) {
1037         uint tritsInput[TRIT_BLOCK_SIZE];
1038         tritsInput[ISE_0] = numbers[EP0_R_INDEX];
1039         tritsInput[ISE_1] = numbers[EP1_R_INDEX];
1040         tritsInput[ISE_2] = numbers[EP0_G_INDEX];
1041         tritsInput[ISE_3] = numbers[EP1_G_INDEX];
1042         tritsInput[ISE_4] = numbers[EP0_B_INDEX];
1043         EncodeTrits(bits, tritsInput, outputs, bitPos);
1044         tritsInput[ISE_0] = numbers[EP1_B_INDEX];
1045         tritsInput[ISE_1] = numbers[EP0_A_INDEX];
1046         tritsInput[ISE_2] = numbers[EP1_A_INDEX];
1047         tritsInput[ISE_3] = UINT_ZERO;
1048         tritsInput[ISE_4] = UINT_ZERO;
1049         EncodeTrits(bits, tritsInput, outputs, bitPos);
1050         *bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * count + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1051     } else if (quints == UINT_ONE) {
1052         uint quintsInput[QUINT_BLOCK_SIZE];
1053         quintsInput[ISE_0] = numbers[EP0_R_INDEX];
1054         quintsInput[ISE_1] = numbers[EP1_R_INDEX];
1055         quintsInput[ISE_2] = numbers[EP0_G_INDEX];
1056         EncodeQuints(bits, quintsInput, outputs, bitPos);
1057         quintsInput[ISE_0] = numbers[EP1_G_INDEX];
1058         quintsInput[ISE_1] = numbers[EP0_B_INDEX];
1059         quintsInput[ISE_2] = numbers[EP1_B_INDEX];
1060         EncodeQuints(bits, quintsInput, outputs, bitPos);
1061         quintsInput[ISE_0] = numbers[EP0_A_INDEX];
1062         quintsInput[ISE_1] = numbers[EP1_A_INDEX];
1063         quintsInput[ISE_2] = UINT_ZERO;
1064         EncodeQuints(bits, quintsInput, outputs, bitPos);
1065         *bitPos = ((QUINT_MSB_SIZE + QUINT_BLOCK_SIZE * bits) * count + QUINT_ROUND_NUM) / QUINT_BLOCK_SIZE;
1066     } else {
1067         for (uint i = UINT_ZERO; i < count; ++i) {
1068             Orbits8Ptr(outputs, bitPos, numbers[i], bits);
1069         }
1070     }
1071 }
1072 
1073 void BiseWeights(short numbers[BLOCK_SIZE], int range, uint4* outputs)
1074 {
1075     uint bitPos = UINT_ZERO;
1076     uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1077     uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1078     if (trits == UINT_ONE) {
1079         uint tritsInput[TRIT_BLOCK_SIZE];
1080         tritsInput[ISE_0] = numbers[WEIGHT_0];
1081         tritsInput[ISE_1] = numbers[WEIGHT_1];
1082         tritsInput[ISE_2] = numbers[WEIGHT_2];
1083         tritsInput[ISE_3] = numbers[WEIGHT_3];
1084         tritsInput[ISE_4] = numbers[WEIGHT_4];
1085         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1086         tritsInput[ISE_0] = numbers[WEIGHT_5];
1087         tritsInput[ISE_1] = numbers[WEIGHT_6];
1088         tritsInput[ISE_2] = numbers[WEIGHT_7];
1089         tritsInput[ISE_3] = numbers[WEIGHT_8];
1090         tritsInput[ISE_4] = numbers[WEIGHT_9];
1091         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1092         tritsInput[ISE_0] = numbers[WEIGHT_10];
1093         tritsInput[ISE_1] = numbers[WEIGHT_11];
1094         tritsInput[ISE_2] = numbers[WEIGHT_12];
1095         tritsInput[ISE_3] = numbers[WEIGHT_13];
1096         tritsInput[ISE_4] = numbers[WEIGHT_14];
1097         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1098         tritsInput[ISE_0] = numbers[WEIGHT_15];
1099         tritsInput[ISE_1] = UINT_ZERO;
1100         tritsInput[ISE_2] = UINT_ZERO;
1101         tritsInput[ISE_3] = UINT_ZERO;
1102         tritsInput[ISE_4] = UINT_ZERO;
1103         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1104         bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * BLOCK_SIZE + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1105     } else {
1106         for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
1107             Orbits8Ptr(outputs, &bitPos, numbers[i], bits);
1108         }
1109     }
1110 }
1111 
1112 uint4 AssembleBlock(uint blockMode, uint colorEndpointMode, uint4 epIse, uint4 wtIse)
1113 {
1114     uint4 phyBlk = (uint4)(0, 0, 0, 0); // initialize to (0, 0, 0, 0)
1115     phyBlk.w |= ReverseByte(wtIse.x & BYTE_MASK) << BYTE_3_POS;
1116     phyBlk.w |= ReverseByte((wtIse.x >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1117     phyBlk.w |= ReverseByte((wtIse.x >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1118     phyBlk.w |= ReverseByte((wtIse.x >> BYTE_3_POS) & BYTE_MASK);
1119     phyBlk.z |= ReverseByte(wtIse.y & BYTE_MASK) << BYTE_3_POS;
1120     phyBlk.z |= ReverseByte((wtIse.y >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1121     phyBlk.z |= ReverseByte((wtIse.y >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1122     phyBlk.z |= ReverseByte((wtIse.y >> BYTE_3_POS) & BYTE_MASK);
1123     phyBlk.y |= ReverseByte(wtIse.z & BYTE_MASK) << BYTE_3_POS;
1124     phyBlk.y |= ReverseByte((wtIse.z >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1125     phyBlk.y |= ReverseByte((wtIse.z >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1126     phyBlk.y |= ReverseByte((wtIse.z >> BYTE_3_POS) & BYTE_MASK);
1127     phyBlk.x = blockMode;
1128 
1129     phyBlk.x |= (colorEndpointMode & MASK_FOR_4BITS) << CEM_POS;
1130     phyBlk.x |= (epIse.x & MASK_FOR_15BITS) << COLOR_EP_POS;
1131     phyBlk.y |= ((epIse.x >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1132     phyBlk.y |= (epIse.y & MASK_FOR_15BITS) << COLOR_EP_POS;
1133     phyBlk.z |= ((epIse.y >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1134 
1135     return phyBlk;
1136 }
1137 
1138 uint AssembleBlockmode(uint weightQuantmethod, bool isDualPlane)
1139 {
1140     uint a = (uint)((Y_GRIDS - HEIGHT_BITS_OFFSET) & MASK_FOR_2BITS);
1141     uint b = (uint)((X_GRIDS - WIDTH_BITS_OFFSET) & MASK_FOR_2BITS);
1142     uint d = isDualPlane ? UINT_ONE : UINT_ZERO;
1143     uint h = (weightQuantmethod < 6u) ? UINT_ZERO : UINT_ONE; // low/high-precision limit is 6u
1144     uint r = (weightQuantmethod % 6u) + WEIGHT_METHOD_OFFSET; // low/high-precision limit is 6u
1145     uint blockMode = (r >> WEIGHT_METHOD_RIGHT_BIT) & MASK_FOR_2BITS;
1146     blockMode |= (r & MASK_FOR_1BITS) << WEIGHT_METHOD_POS;
1147     blockMode |= (a & MASK_FOR_2BITS) << BLOCK_WIDTH_POS;
1148     blockMode |= (b & MASK_FOR_2BITS) << BLOCK_HEIGHT_POS;
1149     blockMode |= h << WEIGHT_PRECISION_POS;
1150     blockMode |= d << IS_DUALPLANE_POS;
1151     return blockMode;
1152 }
1153 
1154 uint4 EndpointIse(float4* ep0, float4* ep1, short endpointQuantmethod, bool hasAlpha)
1155 {
1156     short epQuantized[COLOR_COMPONENT_NUM];
1157     EncodeColorNormal(endpointQuantmethod, *ep0, *ep1, epQuantized);
1158     DecodeColor(endpointQuantmethod, epQuantized, ep0, ep1);
1159     if (!hasAlpha) {
1160         epQuantized[EP0_A_INDEX] = SHORT_ZERO;
1161         epQuantized[EP1_A_INDEX] = SHORT_ZERO;
1162     }
1163     uint4 epIse = (uint4)(UINT_ZERO);
1164     uint bitPos = UINT_ZERO;
1165     BiseEndpoints(epQuantized, endpointQuantmethod, &epIse, hasAlpha, &bitPos);
1166     return epIse;
1167 }
1168 
1169 float4 CalTexel(short weight, float4 ep0, float4 ep1)
1170 {
1171     short weight0 = BLOCK_MAX_WEIGHTS_SHORT - weight;
1172     return (ep0 * weight0 + ep1 * weight) / BLOCK_MAX_WEIGHTS_FLOAT;
1173 }
1174 
1175 uint4 WeightIse(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM],
1176     short weightQuantmethod, float* errval)
1177 {
1178     int i = START_INDEX;
1179     short wtQuantized[X_GRIDS * Y_GRIDS];
1180     CalculateQuantizedWeights(texels, weightRange, endPoint, wtQuantized);
1181     float sumErr = FLOAT_ZERO;
1182     for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
1183         short w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1184         wtQuantized[i] = g_scrambleTable[w];
1185         w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1186         short wt = g_weightUnquant[w];
1187         float4 new_texel = CalTexel(wt, endPoint[EP0_INDEX], endPoint[EP1_INDEX]);
1188         float4 diff = new_texel - texels[i];
1189         sumErr += dot(diff, diff);
1190     }
1191     *errval = sumErr;
1192     uint4 wtIse = (uint4)(UINT_ZERO);
1193     BiseWeights(wtQuantized, (int)(weightQuantmethod), &wtIse);
1194     return wtIse;
1195 }
1196 
1197 float TryEncode(float4* texels, float4 texelsMean, uint4* epIse, uint4* wtIse, short3* bestBlockmode)
1198 {
1199     float errval;
1200     bool hasAlpha = true;
1201     *bestBlockmode = (short3)(QUANT_6, QUANT_256, WEIGHT_RANGE_6);
1202     short weightQuantmethod = (*bestBlockmode).x;
1203     short endpointQuantmethod = (*bestBlockmode).y;
1204     short weightRange = (*bestBlockmode).z;
1205     float4 ep0;
1206     float4 ep1;
1207     float4 endPoint[END_POINT_NUM];
1208     MaxAccumulationPixelDirection(texels, texelsMean, &ep0, &ep1, hasAlpha);
1209     *epIse = EndpointIse(&ep0, &ep1, endpointQuantmethod, hasAlpha);
1210     endPoint[EP0_INDEX] = ep0;
1211     endPoint[EP1_INDEX] = ep1;
1212     *wtIse = WeightIse(texels, weightRange - UINT_ONE, endPoint, weightQuantmethod, &errval);
1213     return errval;
1214 }
1215 
1216 uint4 EncodeBlock(float4* texels, float4 texelsMean, int blockID, __global uint* errs)
1217 {
1218     bool hasAlpha = true;
1219     bool isDualPlane = false;
1220     float errval = 10000000.0f; // the errval is initialized to 10000000.0f
1221 
1222     uint4 epIse, wtIse;
1223     short3 bestBlockmode, tmpBestBlockMode;
1224     errval = TryEncode(texels, texelsMean, &epIse, &wtIse, &bestBlockmode);
1225 
1226     uint blockMode = AssembleBlockmode(bestBlockmode.x, isDualPlane);
1227     uint ColorEndpointMode;
1228     if (hasAlpha) {
1229         ColorEndpointMode = CEM_LDR_RGBA_DIRECT;
1230     } else {
1231         ColorEndpointMode = CEM_LDR_RGB_DIRECT;
1232     }
1233     errs[blockID] = (uint)(errval);
1234     return AssembleBlock(blockMode, ColorEndpointMode, epIse, wtIse);
1235 }
1236 
1237 void GotTexelFromImage(read_only image2d_t inputImage, float4 texels[BLOCK_SIZE],
1238     int width, int height, float4 *texelMean)
1239 {
1240     int2 pos = (int2)(get_global_id(0), get_global_id(1));
1241     pos.x *= DIM;
1242     pos.y *= DIM;
1243     for (int i = 0; i < DIM; ++i) {
1244         for (int j = 0; j < DIM; ++j) {
1245             int2 pixelPos = pos + (int2)(j, i);
1246             if (pixelPos.x >= width) {
1247                 pixelPos.x = width - 1;
1248             }
1249             if (pixelPos.y >= height) {
1250                 pixelPos.y = height - 1;
1251             }
1252             float4 texel = read_imagef(inputImage, pixelPos);
1253             texels[i * DIM + j] = texel * PIXEL_MAX_VALUE;
1254             *texelMean += texel * PIXEL_MAX_VALUE;
1255         }
1256     }
1257 }
1258 
1259 kernel void AstcCl(read_only image2d_t inputImage, __global uint4* astcArr, __global uint* errs,
1260     int width, int height)
1261 {
1262     const int2 globalSize = (int2)(get_global_size(0), get_global_size(1));
1263     const int2 globalId = (int2)(get_global_id(0), get_global_id(1));
1264     int blockID = globalId.y * globalSize.x + globalId.x;
1265     float4 texels[BLOCK_SIZE];
1266     float4 texelMean = 0;
1267     GotTexelFromImage(inputImage, texels, width, height, &texelMean);
1268     texelMean = texelMean / ((float)(BLOCK_SIZE));
1269     astcArr[blockID] = EncodeBlock(texels, texelMean, blockID, errs);
1270 }
1271 )";
1272 
AstcClClose(ClAstcHandle * clAstcHandle)1273 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClClose(ClAstcHandle *clAstcHandle)
1274 {
1275     if (clAstcHandle == nullptr) {
1276         IMAGE_LOGE("astc AstcClClose clAstcHandle is nullptr!");
1277         return CL_ASTC_ENC_FAILED;
1278     }
1279     cl_int clRet;
1280     if (clAstcHandle->kernel != nullptr) {
1281         clRet = clReleaseKernel(clAstcHandle->kernel);
1282         if (clRet != CL_SUCCESS) {
1283             IMAGE_LOGE("astc clReleaseKernel failed ret %{public}d!", clRet);
1284             return CL_ASTC_ENC_FAILED;
1285         }
1286         clAstcHandle->kernel = nullptr;
1287     }
1288     if (clAstcHandle->queue != nullptr) {
1289         clRet = clReleaseCommandQueue(clAstcHandle->queue);
1290         if (clRet != CL_SUCCESS) {
1291             IMAGE_LOGE("astc clReleaseCommandQueue failed ret %{public}d!", clRet);
1292             return CL_ASTC_ENC_FAILED;
1293         }
1294         clAstcHandle->queue = nullptr;
1295     }
1296     if (clAstcHandle->context != nullptr) {
1297         clRet = clReleaseContext(clAstcHandle->context);
1298         if (clRet != CL_SUCCESS) {
1299             IMAGE_LOGE("astc clReleaseContext failed ret %{public}d!", clRet);
1300             return CL_ASTC_ENC_FAILED;
1301         }
1302         clAstcHandle->context = nullptr;
1303     }
1304     if (clAstcHandle->encObj.blockErrs_ != nullptr) {
1305         free(clAstcHandle->encObj.blockErrs_);
1306         clAstcHandle->encObj.blockErrs_ = nullptr;
1307     }
1308     if (clAstcHandle != nullptr) {
1309         free(clAstcHandle);
1310     }
1311     return CL_ASTC_ENC_SUCCESS;
1312 }
1313 
CheckClBinIsExist(const std::string & name)1314 static bool CheckClBinIsExist(const std::string &name)
1315 {
1316     return (access(name.c_str(), F_OK) != -1); // -1 means that the file is  not exist
1317 }
1318 
SaveClBin(cl_program program,const std::string & clBinPath)1319 static CL_ASTC_STATUS SaveClBin(cl_program program, const std::string &clBinPath)
1320 {
1321     size_t programBinarySizes;
1322     cl_int clRet = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &programBinarySizes, NULL);
1323     if (clRet != CL_SUCCESS) {
1324         IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARY_SIZES failed ret %{public}d!", clRet);
1325         return CL_ASTC_ENC_FAILED;
1326     }
1327     if ((programBinarySizes == 0) || (programBinarySizes > MAX_MALLOC_BYTES)) {
1328         IMAGE_LOGE("astc clGetProgramInfo programBinarySizes %{public}zu too big!", programBinarySizes);
1329         return CL_ASTC_ENC_FAILED;
1330     }
1331     uint8_t *programBinaries = static_cast<uint8_t *>(malloc(programBinarySizes));
1332     if (programBinaries == nullptr) {
1333         IMAGE_LOGE("astc programBinaries malloc failed!");
1334         return CL_ASTC_ENC_FAILED;
1335     }
1336     clRet = clGetProgramInfo(program, CL_PROGRAM_BINARIES, programBinarySizes, &programBinaries, NULL);
1337     if (clRet != CL_SUCCESS) {
1338         IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARIES failed ret %{public}d!", clRet);
1339         free(programBinaries);
1340         return CL_ASTC_ENC_FAILED;
1341     }
1342     FILE *fp = fopen(clBinPath.c_str(), "wb");
1343     if (fp == nullptr) {
1344         IMAGE_LOGE("astc create file: %{public}s failed!", clBinPath.c_str());
1345         free(programBinaries);
1346         return CL_ASTC_ENC_FAILED;
1347     }
1348     CL_ASTC_STATUS ret = CL_ASTC_ENC_SUCCESS;
1349     if (fwrite(programBinaries, 1, programBinarySizes, fp) != programBinarySizes) {
1350         IMAGE_LOGE("astc fwrite programBinaries file failed!");
1351         ret = CL_ASTC_ENC_FAILED;
1352     }
1353     if (fclose(fp) != 0) {
1354         IMAGE_LOGE("astc SaveClBin close file failed!");
1355         ret = CL_ASTC_ENC_FAILED;
1356     }
1357     fp = nullptr;
1358     free(programBinaries);
1359     return ret;
1360 }
1361 
BuildProgramAndCreateKernel(cl_program program,ClAstcHandle * clAstcHandle)1362 static CL_ASTC_STATUS BuildProgramAndCreateKernel(cl_program program, ClAstcHandle *clAstcHandle)
1363 {
1364     cl_int clRet = clBuildProgram(program, 1, &clAstcHandle->deviceID, "-cl-std=CL3.0", nullptr, nullptr);
1365     if (clRet != CL_SUCCESS) {
1366         IMAGE_LOGE("astc clBuildProgram failed ret %{public}d!", clRet);
1367         return CL_ASTC_ENC_FAILED;
1368     }
1369     clAstcHandle->kernel = clCreateKernel(program, "AstcCl", &clRet);
1370     if (clRet != CL_SUCCESS) {
1371         IMAGE_LOGE("astc clCreateKernel failed ret %{public}d!", clRet);
1372         return CL_ASTC_ENC_FAILED;
1373     }
1374     return CL_ASTC_ENC_SUCCESS;
1375 }
1376 
AstcClBuildProgram(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1377 static CL_ASTC_STATUS AstcClBuildProgram(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1378 {
1379     cl_int clRet;
1380     cl_program program = nullptr;
1381     if (!CheckClBinIsExist(clBinPath)) {
1382         size_t sourceSize = strlen(g_programSource) + 1; // '\0' occupies 1 bytes
1383         program = clCreateProgramWithSource(clAstcHandle->context, 1, &g_programSource, &sourceSize, &clRet);
1384         if (clRet != CL_SUCCESS) {
1385             IMAGE_LOGE("astc clCreateProgramWithSource failed ret %{public}d!", clRet);
1386             return CL_ASTC_ENC_FAILED;
1387         }
1388         if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1389             IMAGE_LOGE("astc clCreateProgramWithSource failed ret %{public}d!", clRet);
1390             clReleaseProgram(program);
1391             return CL_ASTC_ENC_FAILED;
1392         }
1393         if (SaveClBin(program, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1394             IMAGE_LOGI("astc SaveClBin failed!");
1395         }
1396     } else {
1397         std::ifstream contents{clBinPath};
1398         std::string binaryContent{std::istreambuf_iterator<char>{contents}, {}};
1399         size_t binSize = binaryContent.length();
1400         if ((binSize == 0) || (binSize > MAX_MALLOC_BYTES)) {
1401             IMAGE_LOGE("astc AstcClBuildProgram read CLbin file lenth error %{public}zu!", binSize);
1402             return CL_ASTC_ENC_FAILED;
1403         }
1404         const char *binary = static_cast<const char *>(binaryContent.c_str());
1405         program = clCreateProgramWithBinary(clAstcHandle->context, 1, &clAstcHandle->deviceID, &binSize,
1406             (const unsigned char **)&binary, nullptr, &clRet);
1407         if (clRet != CL_SUCCESS) {
1408             IMAGE_LOGE("astc clCreateProgramWithBinary failed ret %{public}d!", clRet);
1409             return CL_ASTC_ENC_FAILED;
1410         }
1411         if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1412             IMAGE_LOGE("astc BuildProgramAndCreateKernel with bin failed!");
1413             clReleaseProgram(program);
1414             return CL_ASTC_ENC_FAILED;
1415         }
1416     }
1417     clRet = clReleaseProgram(program);
1418     if (clRet != CL_SUCCESS) {
1419         IMAGE_LOGE("astc clReleaseProgram failed ret %{public}d!", clRet);
1420         return CL_ASTC_ENC_FAILED;
1421     }
1422     return CL_ASTC_ENC_SUCCESS;
1423 }
1424 
AstcCreateClKernel(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1425 static CL_ASTC_STATUS AstcCreateClKernel(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1426 {
1427     if (!OHOS::InitOpenCL()) {
1428         IMAGE_LOGE("astc InitOpenCL error!");
1429         return CL_ASTC_ENC_FAILED;
1430     }
1431     cl_int clRet;
1432     cl_platform_id platformID;
1433     clRet = clGetPlatformIDs(1, &platformID, NULL);
1434     if (clRet != CL_SUCCESS) {
1435         IMAGE_LOGE("astc clGetPlatformIDs failed ret %{public}d!", clRet);
1436         return CL_ASTC_ENC_FAILED;
1437     }
1438     clRet = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &clAstcHandle->deviceID, NULL);
1439     if (clRet != CL_SUCCESS) {
1440         IMAGE_LOGE("astc clGetDeviceIDs failed ret %{public}d!", clRet);
1441         return CL_ASTC_ENC_FAILED;
1442     }
1443     clAstcHandle->context = clCreateContext(0, 1, &clAstcHandle->deviceID, NULL, NULL, &clRet);
1444     if (clRet != CL_SUCCESS) {
1445         IMAGE_LOGE("astc clCreateContext failed ret %{public}d!", clRet);
1446         return CL_ASTC_ENC_FAILED;
1447     }
1448     cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0};
1449     clAstcHandle->queue = clCreateCommandQueueWithProperties(clAstcHandle->context,
1450         clAstcHandle->deviceID, props, &clRet);
1451     if (clRet != CL_SUCCESS) {
1452         IMAGE_LOGE("astc clCreateCommandQueueWithProperties failed ret %{public}d!", clRet);
1453         return CL_ASTC_ENC_FAILED;
1454     }
1455     if (AstcClBuildProgram(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1456         IMAGE_LOGE("astc AstcClBuildProgram failed!");
1457         return CL_ASTC_ENC_FAILED;
1458     }
1459     return CL_ASTC_ENC_SUCCESS;
1460 }
1461 
AstcClCreate(ClAstcHandle ** handle,const std::string & clBinPath)1462 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClCreate(ClAstcHandle **handle, const std::string &clBinPath)
1463 {
1464     ClAstcHandle *clAstcHandle = static_cast<ClAstcHandle *>(calloc(1, sizeof(ClAstcHandle)));
1465     if (clAstcHandle == nullptr) {
1466         IMAGE_LOGE("astc AstcClCreate handle calloc failed!");
1467         return CL_ASTC_ENC_FAILED;
1468     }
1469     *handle = clAstcHandle;
1470     size_t numMaxBlocks = static_cast<size_t>(((MAX_WIDTH + DIM - 1) / DIM) * ((MAX_HEIGHT + DIM - 1) / DIM));
1471     clAstcHandle->encObj.blockErrs_ =
1472         static_cast<uint32_t *>(malloc((numMaxBlocks * sizeof(uint32_t)))); // 8MB mem Max
1473     if (clAstcHandle->encObj.blockErrs_ == nullptr) {
1474         IMAGE_LOGE("astc blockErrs_ malloc failed!");
1475         AstcClClose(*handle);
1476         return CL_ASTC_ENC_FAILED;
1477     }
1478     if (AstcCreateClKernel(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1479         IMAGE_LOGE("astc AstcCreateClKernel failed!");
1480         AstcClClose(*handle);
1481         return CL_ASTC_ENC_FAILED;
1482     }
1483     return CL_ASTC_ENC_SUCCESS;
1484 }
1485 
AstcClEncImageCheckImageOption(const ClAstcImageOption * imageIn)1486 static CL_ASTC_STATUS AstcClEncImageCheckImageOption(const ClAstcImageOption *imageIn)
1487 {
1488     if ((imageIn->width <= 0) || (imageIn->height <= 0) || (imageIn->stride < imageIn->width)) {
1489         IMAGE_LOGE("astc AstcClEncImage width <= 0 or height <= 0 or stride < width!");
1490         return CL_ASTC_ENC_FAILED;
1491     }
1492     if ((imageIn->width > MAX_WIDTH) || (imageIn->height > MAX_HEIGHT)) {
1493         IMAGE_LOGE("astc AstcClEncImage width[%{public}d] \
1494             need be [1, %{public}d] and height[%{public}d] need be [1, %{public}d]", \
1495             imageIn->width, MAX_WIDTH, imageIn->height, MAX_HEIGHT);
1496         return CL_ASTC_ENC_FAILED;
1497     }
1498     return CL_ASTC_ENC_SUCCESS;
1499 }
1500 
AstcClFillImage(ClAstcImageOption * imageIn,uint8_t * data,int32_t stride,int32_t width,int32_t height)1501 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClFillImage(ClAstcImageOption *imageIn, uint8_t *data, int32_t stride,
1502     int32_t width, int32_t height)
1503 {
1504     if (imageIn == nullptr) {
1505         IMAGE_LOGE("astc AstcClFillImage imageIn is  nullptr!");
1506         return CL_ASTC_ENC_FAILED;
1507     }
1508     imageIn->data = data;
1509     imageIn->stride = stride >> STRIDE_RGBA_LOG2;
1510     imageIn->width = width;
1511     imageIn->height = height;
1512     if (AstcClEncImageCheckImageOption(imageIn)) {
1513         IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1514         return CL_ASTC_ENC_FAILED;
1515     }
1516     return CL_ASTC_ENC_SUCCESS;
1517 }
1518 
GenAstcHeader(uint8_t * buffer,uint8_t blockX,uint8_t blockY,uint32_t dimX,uint32_t dimY)1519 static void GenAstcHeader(uint8_t *buffer, uint8_t blockX, uint8_t blockY, uint32_t dimX, uint32_t dimY)
1520 {
1521     uint8_t *headInfo = buffer;
1522     *headInfo++ = MAGIC_FILE_CONSTANT & BYTES_MASK;
1523     *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_8BITS) & BYTES_MASK;
1524     *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_16BITS) & BYTES_MASK;
1525     *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_24BITS) & BYTES_MASK;
1526     *headInfo++ = static_cast<uint8_t>(blockX);
1527     *headInfo++ = static_cast<uint8_t>(blockY);
1528     *headInfo++ = 1;
1529     *headInfo++ = dimX & BYTES_MASK;
1530     *headInfo++ = (dimX >> BIT_SHIFT_8BITS) & BYTES_MASK;
1531     *headInfo++ = (dimX >> BIT_SHIFT_16BITS) & BYTES_MASK;
1532     *headInfo++ = dimY & BYTES_MASK;
1533     *headInfo++ = (dimY >> BIT_SHIFT_8BITS) & BYTES_MASK;
1534     *headInfo++ = (dimY >> BIT_SHIFT_16BITS) & BYTES_MASK;
1535     *headInfo++ = 1;
1536     *headInfo++ = 0;
1537     *headInfo++ = 0;
1538 }
1539 
ReleaseClAstcObj(ClAstcObjEnc * obj)1540 static void ReleaseClAstcObj(ClAstcObjEnc *obj)
1541 {
1542     cl_int clRet;
1543     if (obj != nullptr) {
1544         if (obj->inputImage != nullptr) {
1545             clRet = clReleaseMemObject(obj->inputImage);
1546             if (clRet != CL_SUCCESS) {
1547                 IMAGE_LOGE("astc inputImage release failed ret %{public}d!", clRet);
1548             }
1549             obj->inputImage = nullptr;
1550         }
1551         if (obj->astcResult != nullptr) {
1552             clRet = clReleaseMemObject(obj->astcResult);
1553             if (clRet != CL_SUCCESS) {
1554                 IMAGE_LOGE("astc astcResult release failed ret %{public}d!", clRet);
1555             }
1556             obj->astcResult = nullptr;
1557         }
1558         if (obj->errBuffer != nullptr) {
1559             clRet = clReleaseMemObject(obj->errBuffer);
1560             if (clRet != CL_SUCCESS) {
1561                 IMAGE_LOGE("astc errBuffer release failed ret %{public}d!", clRet);
1562             }
1563             obj->errBuffer = nullptr;
1564         }
1565     }
1566 }
1567 
GetMaxAndSumVal(size_t numBlocks,uint32_t * blockErrs,uint32_t & maxVal,uint32_t & sumVal)1568 static void GetMaxAndSumVal(size_t numBlocks, uint32_t *blockErrs, uint32_t &maxVal, uint32_t &sumVal)
1569 {
1570     sumVal = 0;
1571     for (size_t i = 0; i < numBlocks; i++) {
1572         sumVal += blockErrs[i];
1573         maxVal = fmax(maxVal, blockErrs[i]);
1574     }
1575 }
1576 
ClCreateBufferAndImage(const ClAstcImageOption * imageIn,ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj)1577 static CL_ASTC_STATUS ClCreateBufferAndImage(const ClAstcImageOption *imageIn,
1578     ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj)
1579 {
1580     uint8_t *data = imageIn->data;
1581     int32_t stride = imageIn->stride;
1582     int32_t width = imageIn->width;
1583     int32_t height = imageIn->height;
1584     size_t numBlocks = static_cast<size_t>(((width + DIM - 1) / DIM) * ((height + DIM - 1) / DIM));
1585     uint32_t *blockErrs = encObj->blockErrs_;
1586     size_t blockErrBytes = sizeof(uint32_t) * numBlocks;
1587     encObj->astcSize = numBlocks * TEXTURE_BLOCK_BYTES;
1588     if ((blockErrs == nullptr) || (memset_s(blockErrs, blockErrBytes, 0, blockErrBytes))) {
1589         IMAGE_LOGE("astc blockErrs is nullptr or memset failed!");
1590         return CL_ASTC_ENC_FAILED;
1591     }
1592     cl_image_format imageFormat = { CL_RGBA, CL_UNORM_INT8 };
1593     cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, stride, height };
1594     cl_int clRet;
1595     encObj->inputImage = clCreateImage(clAstcHandle->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imageFormat,
1596         &desc, data, &clRet);
1597     if (clRet != CL_SUCCESS) {
1598         IMAGE_LOGE("astc clCreateImage failed ret %{public}d!", clRet);
1599         return CL_ASTC_ENC_FAILED;
1600     }
1601     encObj->astcResult = clCreateBuffer(clAstcHandle->context,
1602         CL_MEM_ALLOC_HOST_PTR, encObj->astcSize, NULL, &clRet);
1603     if (clRet != CL_SUCCESS) {
1604         IMAGE_LOGE("astc clCreateBuffer astcResult failed ret %{public}d!", clRet);
1605         return CL_ASTC_ENC_FAILED;
1606     }
1607     encObj->errBuffer = clCreateBuffer(clAstcHandle->context, CL_MEM_USE_HOST_PTR, blockErrBytes, blockErrs, &clRet);
1608     if (clRet != CL_SUCCESS) {
1609         IMAGE_LOGE("astc clCreateBuffer errBuffer failed ret %{public}d!", clRet);
1610         return CL_ASTC_ENC_FAILED;
1611     }
1612     return CL_ASTC_ENC_SUCCESS;
1613 }
1614 
ClKernelArgSet(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1615 static CL_ASTC_STATUS ClKernelArgSet(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1616 {
1617     int32_t kernelId = 0;
1618     cl_int clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->inputImage);
1619     if (clRet != CL_SUCCESS) {
1620         IMAGE_LOGE("astc clSetKernelArg inputImage failed ret %{public}d!", clRet);
1621         return CL_ASTC_ENC_FAILED;
1622     }
1623     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->astcResult);
1624     if (clRet != CL_SUCCESS) {
1625         IMAGE_LOGE("astc clSetKernelArg astcResult failed ret %{public}d!", clRet);
1626         return CL_ASTC_ENC_FAILED;
1627     }
1628     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->errBuffer);
1629     if (clRet != CL_SUCCESS) {
1630         IMAGE_LOGE("astc clSetKernelArg errBuffer failed ret %{public}d!", clRet);
1631         return CL_ASTC_ENC_FAILED;
1632     }
1633     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &width);
1634     if (clRet != CL_SUCCESS) {
1635         IMAGE_LOGE("astc clSetKernelArg width failed ret %{public}d!", clRet);
1636         return CL_ASTC_ENC_FAILED;
1637     }
1638     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &height);
1639     if (clRet != CL_SUCCESS) {
1640         IMAGE_LOGE("astc clSetKernelArg height failed ret %{public}d!", clRet);
1641         return CL_ASTC_ENC_FAILED;
1642     }
1643     return CL_ASTC_ENC_SUCCESS;
1644 }
1645 
ClKernelArgSetAndRun(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1646 static CL_ASTC_STATUS ClKernelArgSetAndRun(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1647 {
1648     if (ClKernelArgSet(clAstcHandle, encObj, width, height) != CL_ASTC_ENC_SUCCESS) {
1649         IMAGE_LOGE("astc ClKernelArgSet failed!");
1650         return CL_ASTC_ENC_FAILED;
1651     }
1652     size_t local[] = {WORK_GROUP_SIZE, WORK_GROUP_SIZE};
1653     size_t global[GLOBAL_WH_NUM_CL];
1654     global[0] = (width + DIM - 1) / DIM;
1655     global[1] = (height + DIM - 1) / DIM;
1656     size_t localMax;
1657     cl_int clRet = clGetKernelWorkGroupInfo(clAstcHandle->kernel, clAstcHandle->deviceID, CL_KERNEL_WORK_GROUP_SIZE,
1658         sizeof(size_t), &localMax, nullptr);
1659     if (clRet != CL_SUCCESS) {
1660         IMAGE_LOGE("astc clGetKernelWorkGroupInfo failed ret %{public}d!", clRet);
1661         return CL_ASTC_ENC_FAILED;
1662     }
1663     while (local[0] * local[1] > localMax) {
1664         local[0]--;
1665         local[1]--;
1666     }
1667     if ((local[0] < 1) || (local[1] < 1)) {
1668         IMAGE_LOGE("astc ClKernelArgSetAndRun local set failed!");
1669         return CL_ASTC_ENC_FAILED;
1670     }
1671     clRet = clEnqueueNDRangeKernel(clAstcHandle->queue, clAstcHandle->kernel, GLOBAL_WH_NUM_CL, nullptr, global, local,
1672         0, nullptr, nullptr);
1673     if (clRet != CL_SUCCESS) {
1674         IMAGE_LOGE("astc clEnqueueNDRangeKernel failed ret %{public}d!", clRet);
1675         return CL_ASTC_ENC_FAILED;
1676     }
1677     clRet = clFinish(clAstcHandle->queue);
1678     if (clRet != CL_SUCCESS) {
1679         IMAGE_LOGE("astc clFinish failed ret %{public}d!", clRet);
1680         return CL_ASTC_ENC_FAILED;
1681     }
1682     return CL_ASTC_ENC_SUCCESS;
1683 }
1684 
ClReadAstcBufAndBlockError(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,const ClAstcImageOption * imageIn,uint8_t * buffer)1685 static CL_ASTC_STATUS ClReadAstcBufAndBlockError(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj,
1686     const ClAstcImageOption *imageIn, uint8_t *buffer)
1687 {
1688     cl_int clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->astcResult, CL_TRUE,
1689         0, encObj->astcSize, buffer + TEXTURE_HEAD_BYTES, 0, NULL, NULL);
1690     if (clRet != CL_SUCCESS) {
1691         IMAGE_LOGE("astc clEnqueueReadBuffer astcResult failed ret %{public}d!", clRet);
1692         return CL_ASTC_ENC_FAILED;
1693     }
1694     uint32_t maxVal = 0;
1695     uint32_t sumVal = 0;
1696     size_t numBlocks = ((imageIn->width + DIM - 1) / DIM) * ((imageIn->height + DIM - 1) / DIM);
1697     clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->errBuffer, CL_TRUE,
1698         0, sizeof(uint32_t) * numBlocks, encObj->blockErrs_, 0, NULL, NULL);
1699     if (clRet != CL_SUCCESS) {
1700         IMAGE_LOGE("astc clEnqueueReadBuffer blockErrs failed ret %{public}d!", clRet);
1701         return CL_ASTC_ENC_FAILED;
1702     }
1703     GetMaxAndSumVal(numBlocks, encObj->blockErrs_, maxVal, sumVal);
1704     return CL_ASTC_ENC_SUCCESS;
1705 }
1706 
AstcClEncImage(ClAstcHandle * clAstcHandle,const ClAstcImageOption * imageIn,uint8_t * buffer)1707 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClEncImage(ClAstcHandle *clAstcHandle,
1708     const ClAstcImageOption *imageIn, uint8_t *buffer)
1709 {
1710     if ((clAstcHandle == nullptr) || (imageIn == nullptr) || (buffer == nullptr)) {
1711         IMAGE_LOGE("astc AstcClEncImage clAstcHandle or imageIn or buffer is nullptr!");
1712         return CL_ASTC_ENC_FAILED;
1713     }
1714     if (AstcClEncImageCheckImageOption(imageIn)) {
1715         IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1716         return CL_ASTC_ENC_FAILED;
1717     }
1718     GenAstcHeader(buffer, DIM, DIM, imageIn->width, imageIn->height);
1719     ClAstcObjEnc *encObj = &clAstcHandle->encObj;
1720     if (ClCreateBufferAndImage(imageIn, clAstcHandle, encObj) != CL_ASTC_ENC_SUCCESS) {
1721         ReleaseClAstcObj(encObj);
1722         IMAGE_LOGE("astc ClCreateBufferAndImage failed!");
1723         return CL_ASTC_ENC_FAILED;
1724     }
1725     if (ClKernelArgSetAndRun(clAstcHandle, encObj, imageIn->width, imageIn->height) != CL_ASTC_ENC_SUCCESS) {
1726         ReleaseClAstcObj(encObj);
1727         IMAGE_LOGE("astc ClKernelArgSetAndRun failed!");
1728         return CL_ASTC_ENC_FAILED;
1729     }
1730     if (ClReadAstcBufAndBlockError(clAstcHandle, encObj, imageIn, buffer) != CL_ASTC_ENC_SUCCESS) {
1731         ReleaseClAstcObj(encObj);
1732         IMAGE_LOGE("astc ClReadAstcBufAndBlockError failed!");
1733         return CL_ASTC_ENC_FAILED;
1734     }
1735     ReleaseClAstcObj(encObj);
1736     return CL_ASTC_ENC_SUCCESS;
1737 }
1738 }
1739 }
1740 }