1 /*
2 * Copyright (C) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "image_compressor.h"
17
18 #include <unistd.h>
19 #include <fstream>
20
21 #include "securec.h"
22 #include "media_errors.h"
23 #include "image_log.h"
24
25 #undef LOG_DOMAIN
26 #define LOG_DOMAIN LOG_TAG_DOMAIN_ID_PLUGIN
27
28 #undef LOG_TAG
29 #define LOG_TAG "ClAstcEnc"
30
31 namespace OHOS {
32 namespace ImagePlugin {
33 namespace AstcEncBasedCl {
34 constexpr int MAX_WIDTH = 8192;
35 constexpr int MAX_HEIGHT = 4096;
36 constexpr int TEXTURE_HEAD_BYTES = 16;
37 constexpr int TEXTURE_BLOCK_BYTES = 16;
38 constexpr int MAGIC_FILE_CONSTANT = 0x5CA1AB13;
39 constexpr int DIM = 4;
40 constexpr uint8_t BIT_SHIFT_8BITS = 8;
41 constexpr uint8_t BIT_SHIFT_16BITS = 16;
42 constexpr uint8_t BIT_SHIFT_24BITS = 24;
43 constexpr uint8_t BYTES_MASK = 0xFF;
44 constexpr uint8_t STRIDE_RGBA_LOG2 = 2;
45 constexpr uint8_t GLOBAL_WH_NUM_CL = 2;
46 constexpr size_t MAX_MALLOC_BYTES = 10000000; // max 10MB
47 constexpr size_t WORK_GROUP_SIZE = 8;
48
49 const char *g_programSource = R"(
50 // Notice: the code from line 42 to line 1266 is openCL language
51 // openCL cound only support C language style and could not support constexpr and static_cast in same platform
52 #define DIM (4)
53 #define BLOCK_SIZE (16)
54 #define X_GRIDS (4)
55 #define Y_GRIDS (4)
56 #define SMALL_VALUE (0.00001f) // avoid divide 0
57 #define BLOCK_MAX_WEIGHTS (64)
58 #define BLOCK_MAX_WEIGHTS_SHORT (64)
59 #define BLOCK_MAX_WEIGHTS_FLOAT (64.0f)
60 #define BLOCK_MAX_WEIGHTS_2PLANE (32)
61 #define WEIGHTS_PLANE2_OFFSET (32)
62 #define CEM_LDR_RGB_DIRECT (8)
63 #define CEM_LDR_RGB_BASE_OFFSET (9)
64 #define CEM_LDR_RGBA_DIRECT (12)
65 #define CEM_LDR_RGBA_BASE_OFFSET (13)
66 #define PIXEL_MAX_VALUE (255.0f)
67
68 #define QUANT_2 (0)
69 #define QUANT_3 (1)
70 #define QUANT_4 (2)
71 #define QUANT_5 (3)
72 #define QUANT_6 (4)
73 #define QUANT_8 (5)
74 #define QUANT_10 (6)
75 #define QUANT_12 (7)
76 #define QUANT_16 (8)
77 #define QUANT_20 (9)
78 #define QUANT_24 (10)
79 #define QUANT_32 (11)
80 #define QUANT_40 (12)
81 #define QUANT_48 (13)
82 #define QUANT_64 (14)
83 #define QUANT_80 (15)
84 #define QUANT_96 (16)
85 #define QUANT_128 (17)
86 #define QUANT_160 (18)
87 #define QUANT_192 (19)
88 #define QUANT_256 (20)
89 #define QUANT_MAX (21)
90
91 #define WEIGHT_RANGE_6 (6)
92 #define WEIGHT_QUANTIZE_NUM (32)
93 #define COLOR_NUM (256)
94 #define MAX_PARTITION_COUNT (4)
95 #define PARTITION_COUNT (2)
96 #define MAX_BLOCK_SIZE (32)
97 #define WEIGHT_QUANTIZE_GROUP (12)
98 #define SECOND_PARTITION_INDEX (1)
99
100 #define START_INDEX (0)
101 #define FLOAT_ZERO (0.0f)
102 #define FLOAT_ONE (1.0f)
103 #define INT_ZERO (0)
104 #define INT_ONE (1)
105 #define SHORT_ZERO (0)
106 #define UINT_ZERO (0)
107 #define UINT_ONE (1u)
108 #define EP0_INDEX (0)
109 #define EP1_INDEX (1)
110 #define END_POINT_NUM (2)
111 #define EP0_R_INDEX (0)
112 #define EP1_R_INDEX (1)
113 #define EP0_G_INDEX (2)
114 #define EP1_G_INDEX (3)
115 #define EP0_B_INDEX (4)
116 #define EP1_B_INDEX (5)
117 #define EP0_A_INDEX (6)
118 #define EP1_A_INDEX (7)
119 #define COLOR_COMPONENT_NUM (8)
120 #define QUANTIZE_WEIGHT_MIN (0)
121
122 #define TRIT_MSB_SIZE (8)
123 #define TRIT_BLOCK_SIZE (5)
124 #define TRIT_ROUND_NUM (4)
125 #define QUINT_MSB_SIZE (7)
126 #define QUINT_BLOCK_SIZE (3)
127 #define QUINT_ROUND_NUM (2)
128 #define ISE_0 (0)
129 #define ISE_1 (1)
130 #define ISE_2 (2)
131 #define ISE_3 (3)
132 #define ISE_4 (4)
133
134 #define WEIGHT_0 (0)
135 #define WEIGHT_1 (1)
136 #define WEIGHT_2 (2)
137 #define WEIGHT_3 (3)
138 #define WEIGHT_4 (4)
139 #define WEIGHT_5 (5)
140 #define WEIGHT_6 (6)
141 #define WEIGHT_7 (7)
142 #define WEIGHT_8 (8)
143 #define WEIGHT_9 (9)
144 #define WEIGHT_10 (10)
145 #define WEIGHT_11 (11)
146 #define WEIGHT_12 (12)
147 #define WEIGHT_13 (13)
148 #define WEIGHT_14 (14)
149 #define WEIGHT_15 (15)
150
151 #define BYTE_1_POS (8)
152 #define BYTE_2_POS (16)
153 #define BYTE_3_POS (24)
154 #define BYTE_MASK (0xFFu)
155 #define CEM_POS (13)
156 #define COLOR_EP_POS (17)
157 #define COLOR_EP_HIGH_BIT (15)
158 #define MASK_FOR_4BITS (0xFu)
159 #define MASK_FOR_15BITS (0x7FFFu)
160 #define MASK_FOR_17BITS (0x1FFFFu)
161
162 #define HEIGHT_BITS_OFFSET (2)
163 #define WIDTH_BITS_OFFSET (4)
164 #define MASK_FOR_2BITS (0x3u)
165 #define MASK_FOR_1BITS (0x1u)
166 #define WEIGHT_METHOD_OFFSET (2u)
167 #define WEIGHT_METHOD_RIGHT_BIT (1)
168 #define WEIGHT_METHOD_POS (4u)
169 #define BLOCK_WIDTH_POS (5u)
170 #define BLOCK_HEIGHT_POS (5u)
171 #define WEIGHT_PRECISION_POS (9u)
172 #define IS_DUALPLANE_POS (10u)
173
174 typedef struct {
175 int partid;
176 uint bitmaps[PARTITION_COUNT];
177 } PartInfo;
178
179 int GetPart(PartInfo* partInfo, int i)
180 {
181 if (i >= MAX_BLOCK_SIZE) {
182 return 0;
183 }
184 return (int)(((*partInfo).bitmaps[SECOND_PARTITION_INDEX] >> i) & MASK_FOR_1BITS);
185 }
186
187 __constant short g_scrambleTable[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
188 0, 1,
189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 0,
191 0, 1, 2,
192 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
193 0, 1, 2, 3,
194 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
195 0, 1, 2, 3, 4,
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 0, 2, 4, 5, 3, 1,
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 0, 1, 2, 3, 4, 5, 6, 7,
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 0, 2, 4, 6, 8, 9, 7, 5, 3, 1,
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
203 0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1,
204 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
205 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
206 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
207 0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1,
208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
209 0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19,
210 11, 3, 17, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0,
211 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
212 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
213 };
214
215 __constant short g_weightUnquant[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
216 0, 64,
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218 0, 32, 64,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 0, 21, 43, 64,
221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 16, 32, 48, 64,
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 64, 12, 52, 25, 39,
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
226 0, 9, 18, 27, 37, 46, 55, 64,
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228 0, 64, 7, 57, 14, 50, 21, 43, 28, 36,
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36,
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64,
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
234 0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35,
235 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
236 0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34,
237 0, 0, 0, 0, 0, 0, 0, 0,
238 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38,
239 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
240 };
241 __constant short g_integerFromTrits[243] = { // the numbers of integer to derivated from trits is 243
242 0, 1, 2, 4, 5, 6, 8, 9, 10,
243 16, 17, 18, 20, 21, 22, 24, 25, 26,
244 3, 7, 15, 19, 23, 27, 12, 13, 14,
245 32, 33, 34, 36, 37, 38, 40, 41, 42,
246 48, 49, 50, 52, 53, 54, 56, 57, 58,
247 35, 39, 47, 51, 55, 59, 44, 45, 46,
248 64, 65, 66, 68, 69, 70, 72, 73, 74,
249 80, 81, 82, 84, 85, 86, 88, 89, 90,
250 67, 71, 79, 83, 87, 91, 76, 77, 78,
251
252 128, 129, 130, 132, 133, 134, 136, 137, 138,
253 144, 145, 146, 148, 149, 150, 152, 153, 154,
254 131, 135, 143, 147, 151, 155, 140, 141, 142,
255 160, 161, 162, 164, 165, 166, 168, 169, 170,
256 176, 177, 178, 180, 181, 182, 184, 185, 186,
257 163, 167, 175, 179, 183, 187, 172, 173, 174,
258 192, 193, 194, 196, 197, 198, 200, 201, 202,
259 208, 209, 210, 212, 213, 214, 216, 217, 218,
260 195, 199, 207, 211, 215, 219, 204, 205, 206,
261
262 96, 97, 98, 100, 101, 102, 104, 105, 106,
263 112, 113, 114, 116, 117, 118, 120, 121, 122,
264 99, 103, 111, 115, 119, 123, 108, 109, 110,
265 224, 225, 226, 228, 229, 230, 232, 233, 234,
266 240, 241, 242, 244, 245, 246, 248, 249, 250,
267 227, 231, 239, 243, 247, 251, 236, 237, 238,
268 28, 29, 30, 60, 61, 62, 92, 93, 94,
269 156, 157, 158, 188, 189, 190, 220, 221, 222,
270 31, 63, 127, 159, 191, 255, 252, 253, 254
271 };
272
273 __constant int g_bitsTritsQuintsTable[QUANT_MAX * 3] = { // 1 quints match 3 number
274 1, 0, 0, // RANGE_2
275 0, 1, 0, // RANGE_3
276 2, 0, 0, // RANGE_4
277 0, 0, 1, // RANGE_5
278 1, 1, 0, // RANGE_6
279 3, 0, 0, // RANGE_8
280 1, 0, 1, // RANGE_10
281 2, 1, 0, // RANGE_12
282 4, 0, 0, // RANGE_16
283 2, 0, 1, // RANGE_20
284 3, 1, 0, // RANGE_24
285 5, 0, 0, // RANGE_32
286 3, 0, 1, // RANGE_40
287 4, 1, 0, // RANGE_48
288 6, 0, 0, // RANGE_64
289 4, 0, 1, // RANGE_80
290 5, 1, 0, // RANGE_96
291 7, 0, 0, // RANGE_128
292 5, 0, 1, // RANGE_160
293 6, 1, 0, // RANGE_192
294 8, 0, 0 // RANGE_256
295 };
296
297 __constant short g_integerFromQuints[125] = { // the numbers of integer to derivated from quints is 125
298 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6,
299 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, 58, 59, 60, 37, 45, 53,
300 61, 14,
301 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85,
302 93, 22,
303 96, 97, 98, 99, 100, 104, 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123,
304 124, 101, 109, 117, 125, 30,
305 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, 126, 127, 94, 95, 62, 39,
306 47, 55, 63, 31
307 };
308
309 __constant short g_colorQuantTables[QUANT_MAX * COLOR_NUM] = {
310 // QUANT_2
311 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
312 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
313 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
314 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
315 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
317 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
320 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
321 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
322 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
323 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
324 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
325 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
327 // QUANT_3
328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
337 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
338 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
339 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
341 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
342 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
343 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
344 // QUANT_4
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
348 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
349 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
351 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
353 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
354 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
355 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
356 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
357 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
358 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
359 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
360 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
361 // QUANT_5
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
364 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
366 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
367 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
368 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
369 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
370 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
371 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
372 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
373 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
374 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
375 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
376 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
377 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
378 // QUANT_6
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
383 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4,
384 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
386 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
387 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
388 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
389 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
390 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
391 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
392 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
393 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
394 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
395 // QUANT_8
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
398 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
399 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
402 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
403 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
404 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
405 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
406 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
407 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
408 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
409 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
410 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
411 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
412 // QUANT_10
413 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
414 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
415 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
416 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
417 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6,
418 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
419 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
420 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
421 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
422 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7,
423 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
424 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 5,
425 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
426 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
427 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
428 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
429 // QUANT_12
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4,
431 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
432 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
433 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
434 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
435 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
436 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10,
437 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
438 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
439 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7,
440 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
441 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
442 3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
443 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5,
444 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
445 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
446 // QUANT_16
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
448 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
449 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
450 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
451 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
452 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
453 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
454 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
455 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
456 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
457 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
458 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
459 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
460 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
461 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
462 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
463 // QUANT_20
464 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
465 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
466 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
467 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2,
468 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
469 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10,
470 10, 10, 10, 10, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
471 14, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
472 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 15, 15, 15,
473 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11,
474 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7,
475 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
476 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
477 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 9,
478 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5,
479 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1,
480 // QUANT_24
481 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
482 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2, 2,
483 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10,
484 10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 4, 4, 4,
485 4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12,
486 12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 6, 6,
487 6, 6, 6, 6, 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14,
488 14, 14, 14, 14, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
489 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 15, 15, 15, 15,
490 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7,
491 7, 7, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 13, 13, 13,
492 13, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5, 5, 5, 5,
493 5, 5, 5, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 11, 11,
494 11, 11, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, 3,
495 3, 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9,
496 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
497 // QUANT_32
498 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
499 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
500 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
501 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
502 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10,
503 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
504 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
505 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
506 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
507 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
508 19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
509 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
510 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25,
511 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
512 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
513 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31,
514 // QUANT_40
515 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,
516 16, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 2, 2,
517 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18,
518 18, 26, 26, 26, 26, 26, 26, 26, 34, 34, 34, 34, 34, 34, 4, 4,
519 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20,
520 20, 20, 28, 28, 28, 28, 28, 28, 36, 36, 36, 36, 36, 36, 36, 6,
521 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14, 22, 22, 22, 22,
522 22, 22, 30, 30, 30, 30, 30, 30, 30, 38, 38, 38, 38, 38, 38, 38,
523 39, 39, 39, 39, 39, 39, 39, 31, 31, 31, 31, 31, 31, 31, 23, 23,
524 23, 23, 23, 23, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7,
525 7, 37, 37, 37, 37, 37, 37, 37, 29, 29, 29, 29, 29, 29, 21, 21,
526 21, 21, 21, 21, 21, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5,
527 5, 5, 35, 35, 35, 35, 35, 35, 27, 27, 27, 27, 27, 27, 27, 19,
528 19, 19, 19, 19, 19, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3,
529 3, 3, 33, 33, 33, 33, 33, 33, 33, 25, 25, 25, 25, 25, 25, 17,
530 17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1,
531 // QUANT_48
532 0, 0, 0, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 2, 2,
533 2, 2, 2, 18, 18, 18, 18, 18, 18, 34, 34, 34, 34, 34, 4, 4,
534 4, 4, 4, 4, 20, 20, 20, 20, 20, 36, 36, 36, 36, 36, 6, 6,
535 6, 6, 6, 6, 22, 22, 22, 22, 22, 38, 38, 38, 38, 38, 38, 8,
536 8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 40, 40, 40, 40, 40, 10,
537 10, 10, 10, 10, 26, 26, 26, 26, 26, 26, 42, 42, 42, 42, 42, 12,
538 12, 12, 12, 12, 12, 28, 28, 28, 28, 28, 44, 44, 44, 44, 44, 14,
539 14, 14, 14, 14, 14, 30, 30, 30, 30, 30, 46, 46, 46, 46, 46, 46,
540 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 15, 15, 15, 15, 15,
541 15, 45, 45, 45, 45, 45, 29, 29, 29, 29, 29, 13, 13, 13, 13, 13,
542 13, 43, 43, 43, 43, 43, 27, 27, 27, 27, 27, 27, 11, 11, 11, 11,
543 11, 41, 41, 41, 41, 41, 25, 25, 25, 25, 25, 25, 9, 9, 9, 9,
544 9, 39, 39, 39, 39, 39, 39, 23, 23, 23, 23, 23, 7, 7, 7, 7,
545 7, 7, 37, 37, 37, 37, 37, 21, 21, 21, 21, 21, 5, 5, 5, 5,
546 5, 5, 35, 35, 35, 35, 35, 19, 19, 19, 19, 19, 19, 3, 3, 3,
547 3, 3, 33, 33, 33, 33, 33, 17, 17, 17, 17, 17, 17, 1, 1, 1,
548 // QUANT_64
549 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
550 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
551 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
552 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16,
553 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
554 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
555 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
556 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
557 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
558 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
559 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
560 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
561 47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51,
562 51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55,
563 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59,
564 59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63,
565 // QUANT_80
566 0, 0, 16, 16, 16, 32, 32, 32, 48, 48, 48, 48, 64, 64, 64, 2,
567 2, 2, 18, 18, 18, 34, 34, 34, 50, 50, 50, 50, 66, 66, 66, 4,
568 4, 4, 20, 20, 20, 36, 36, 36, 36, 52, 52, 52, 68, 68, 68, 6,
569 6, 6, 22, 22, 22, 38, 38, 38, 38, 54, 54, 54, 70, 70, 70, 8,
570 8, 8, 24, 24, 24, 24, 40, 40, 40, 56, 56, 56, 72, 72, 72, 10,
571 10, 10, 26, 26, 26, 26, 42, 42, 42, 58, 58, 58, 74, 74, 74, 12,
572 12, 12, 12, 28, 28, 28, 44, 44, 44, 60, 60, 60, 76, 76, 76, 14,
573 14, 14, 14, 30, 30, 30, 46, 46, 46, 62, 62, 62, 78, 78, 78, 78,
574 79, 79, 79, 79, 63, 63, 63, 47, 47, 47, 31, 31, 31, 15, 15, 15,
575 15, 77, 77, 77, 61, 61, 61, 45, 45, 45, 29, 29, 29, 13, 13, 13,
576 13, 75, 75, 75, 59, 59, 59, 43, 43, 43, 27, 27, 27, 27, 11, 11,
577 11, 73, 73, 73, 57, 57, 57, 41, 41, 41, 25, 25, 25, 25, 9, 9,
578 9, 71, 71, 71, 55, 55, 55, 39, 39, 39, 39, 23, 23, 23, 7, 7,
579 7, 69, 69, 69, 53, 53, 53, 37, 37, 37, 37, 21, 21, 21, 5, 5,
580 5, 67, 67, 67, 51, 51, 51, 51, 35, 35, 35, 19, 19, 19, 3, 3,
581 3, 65, 65, 65, 49, 49, 49, 49, 33, 33, 33, 17, 17, 17, 1, 1,
582 // QUANT_96
583 0, 0, 32, 32, 64, 64, 64, 2, 2, 2, 34, 34, 66, 66, 66, 4,
584 4, 4, 36, 36, 68, 68, 68, 6, 6, 6, 38, 38, 70, 70, 70, 8,
585 8, 8, 40, 40, 40, 72, 72, 10, 10, 10, 42, 42, 42, 74, 74, 12,
586 12, 12, 44, 44, 44, 76, 76, 14, 14, 14, 46, 46, 46, 78, 78, 16,
587 16, 16, 48, 48, 48, 80, 80, 80, 18, 18, 50, 50, 50, 82, 82, 82,
588 20, 20, 52, 52, 52, 84, 84, 84, 22, 22, 54, 54, 54, 86, 86, 86,
589 24, 24, 56, 56, 56, 88, 88, 88, 26, 26, 58, 58, 58, 90, 90, 90,
590 28, 28, 60, 60, 60, 92, 92, 92, 30, 30, 62, 62, 62, 94, 94, 94,
591 95, 95, 95, 63, 63, 63, 31, 31, 93, 93, 93, 61, 61, 61, 29, 29,
592 91, 91, 91, 59, 59, 59, 27, 27, 89, 89, 89, 57, 57, 57, 25, 25,
593 87, 87, 87, 55, 55, 55, 23, 23, 85, 85, 85, 53, 53, 53, 21, 21,
594 83, 83, 83, 51, 51, 51, 19, 19, 81, 81, 81, 49, 49, 49, 17, 17,
595 17, 79, 79, 47, 47, 47, 15, 15, 15, 77, 77, 45, 45, 45, 13, 13,
596 13, 75, 75, 43, 43, 43, 11, 11, 11, 73, 73, 41, 41, 41, 9, 9,
597 9, 71, 71, 71, 39, 39, 7, 7, 7, 69, 69, 69, 37, 37, 5, 5,
598 5, 67, 67, 67, 35, 35, 3, 3, 3, 65, 65, 65, 33, 33, 1, 1,
599 // QUANT_128
600 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
601 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
602 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23,
603 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31,
604 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39,
605 40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47,
606 48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55,
607 56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63,
608 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
609 72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
610 80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87,
611 88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
612 96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103,
613 104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
614 112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119,
615 120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127,
616 // QUANT_160
617 0, 32, 32, 64, 96, 96, 128, 128, 2, 34, 34, 66, 98, 98, 130, 130,
618 4, 36, 36, 68, 100, 100, 132, 132, 6, 38, 38, 70, 102, 102, 134, 134,
619 8, 40, 40, 72, 104, 104, 136, 136, 10, 42, 42, 74, 106, 106, 138, 138,
620 12, 44, 44, 76, 108, 108, 140, 140, 14, 46, 46, 78, 110, 110, 142, 142,
621 16, 48, 48, 80, 112, 112, 144, 144, 18, 50, 50, 82, 114, 114, 146, 146,
622 20, 52, 52, 84, 116, 116, 148, 148, 22, 54, 54, 86, 118, 118, 150, 150,
623 24, 56, 56, 88, 120, 120, 152, 152, 26, 58, 58, 90, 122, 122, 154, 154,
624 28, 60, 60, 92, 124, 124, 156, 156, 30, 62, 62, 94, 126, 126, 158, 158,
625 159, 159, 127, 127, 95, 63, 63, 31, 157, 157, 125, 125, 93, 61, 61, 29,
626 155, 155, 123, 123, 91, 59, 59, 27, 153, 153, 121, 121, 89, 57, 57, 25,
627 151, 151, 119, 119, 87, 55, 55, 23, 149, 149, 117, 117, 85, 53, 53, 21,
628 147, 147, 115, 115, 83, 51, 51, 19, 145, 145, 113, 113, 81, 49, 49, 17,
629 143, 143, 111, 111, 79, 47, 47, 15, 141, 141, 109, 109, 77, 45, 45, 13,
630 139, 139, 107, 107, 75, 43, 43, 11, 137, 137, 105, 105, 73, 41, 41, 9,
631 135, 135, 103, 103, 71, 39, 39, 7, 133, 133, 101, 101, 69, 37, 37, 5,
632 131, 131, 99, 99, 67, 35, 35, 3, 129, 129, 97, 97, 65, 33, 33, 1,
633 // QUANT_192
634 0, 64, 128, 128, 2, 66, 130, 130, 4, 68, 132, 132, 6, 70, 134, 134,
635 8, 72, 136, 136, 10, 74, 138, 138, 12, 76, 140, 140, 14, 78, 142, 142,
636 16, 80, 144, 144, 18, 82, 146, 146, 20, 84, 148, 148, 22, 86, 150, 150,
637 24, 88, 152, 152, 26, 90, 154, 154, 28, 92, 156, 156, 30, 94, 158, 158,
638 32, 96, 160, 160, 34, 98, 162, 162, 36, 100, 164, 164, 38, 102, 166, 166,
639 40, 104, 168, 168, 42, 106, 170, 170, 44, 108, 172, 172, 46, 110, 174, 174,
640 48, 112, 176, 176, 50, 114, 178, 178, 52, 116, 180, 180, 54, 118, 182, 182,
641 56, 120, 184, 184, 58, 122, 186, 186, 60, 124, 188, 188, 62, 126, 190, 190,
642 191, 191, 127, 63, 189, 189, 125, 61, 187, 187, 123, 59, 185, 185, 121, 57,
643 183, 183, 119, 55, 181, 181, 117, 53, 179, 179, 115, 51, 177, 177, 113, 49,
644 175, 175, 111, 47, 173, 173, 109, 45, 171, 171, 107, 43, 169, 169, 105, 41,
645 167, 167, 103, 39, 165, 165, 101, 37, 163, 163, 99, 35, 161, 161, 97, 33,
646 159, 159, 95, 31, 157, 157, 93, 29, 155, 155, 91, 27, 153, 153, 89, 25,
647 151, 151, 87, 23, 149, 149, 85, 21, 147, 147, 83, 19, 145, 145, 81, 17,
648 143, 143, 79, 15, 141, 141, 77, 13, 139, 139, 75, 11, 137, 137, 73, 9,
649 135, 135, 71, 7, 133, 133, 69, 5, 131, 131, 67, 3, 129, 129, 65, 1,
650 // QUANT_256
651 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
652 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
653 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
654 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
655 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
656 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
657 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
658 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
659 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
660 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
661 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
662 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
663 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
664 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
665 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
666 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
667 };
668
669 __constant short color_unquant_tables[QUANT_MAX][COLOR_NUM] = {
670 {
671 0, 255
672 },
673 {
674 0, 128, 255
675 },
676 {
677 0, 85, 170, 255
678 },
679 {
680 0, 64, 128, 192, 255
681 },
682 {
683 0, 255, 51, 204, 102, 153
684 },
685 {
686 0, 36, 73, 109, 146, 182, 219, 255
687 },
688 {
689 0, 255, 28, 227, 56, 199, 84, 171, 113, 142
690 },
691 {
692 0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139
693 },
694 { // 16
695 0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255
696 },
697 { // 20
698 0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
699 54, 201, 121, 134
700 },
701 { // 24
702 0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
703 22, 233, 55, 200, 88, 167, 121, 134
704 },
705 { // 32
706 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
707 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255
708 },
709 { // 40
710 0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
711 13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
712 26, 229, 58, 197, 91, 164, 123, 132
713 },
714 { // 48
715 0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
716 5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
717 11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131
718 },
719 { // 64
720 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
721 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
722 130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
723 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255
724 },
725 { // 80
726 0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
727 3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
728 6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
729 9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
730 13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130
731 },
732 { // 96
733 0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
734 64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
735 2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
736 67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
737 5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
738 70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
739 },
740 { // 128
741 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
742 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
743 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
744 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
745 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
746 161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
747 193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
748 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255
749 },
750 { // 160
751 0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
752 64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
753 1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
754 65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
755 3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
756 67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
757 4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
758 68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
759 6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
760 70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
761 },
762 {
763 0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
764 32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
765 64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
766 96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
767 1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
768 33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
769 65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
770 97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
771 2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
772 34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
773 66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
774 98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129
775 },
776 {
777 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
778 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
779 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
780 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
781 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
782 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
783 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
784 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
785 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
786 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
787 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
788 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
789 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
790 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
791 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
792 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
793 }
794 };
795
796 void Swap(float4* lhs, float4* rhs)
797 {
798 float4 tmp = *lhs;
799 *lhs = *rhs;
800 *rhs = tmp;
801 }
802
803 void FindMinMax(float4* texels, float4 ptMean, float4 vecK, float4* e0, float4* e1)
804 {
805 float a = 1e31f; // max float is clipped to 1e31f
806 float b = -1e31f; // min float is clipped to -1e31f
807 for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
808 float t = dot(texels[i] - ptMean, vecK);
809 a = min(a, t);
810 b = max(b, t);
811 }
812 *e0 = clamp(vecK * a + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
813 *e1 = clamp(vecK * b + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
814 // if the direction_vector ends up pointing from light to dark, FLIP IT!
815 // this will make the endpoint the darkest one;
816 float4 e0u = round(*e0);
817 float4 e1u = round(*e1);
818 if (e0u.x + e0u.y + e0u.z > e1u.x + e1u.y + e1u.z) {
819 Swap(e0, e1);
820 }
821 }
822
823 void MaxAccumulationPixelDirection(float4* texels, float4 ptMean, float4* e0, float4* e1, bool hasAlpha)
824 {
825 float4 sumR = (float4)(FLOAT_ZERO);
826 float4 sumG = (float4)(FLOAT_ZERO);
827 float4 sumB = (float4)(FLOAT_ZERO);
828 float4 sumA = (float4)(FLOAT_ZERO);
829 for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
830 float4 dt = texels[i] - ptMean;
831 sumR += (dt.x > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
832 sumG += (dt.y > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
833 sumB += (dt.z > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
834 sumA += (dt.w > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
835 }
836 float dotR = dot(sumR, sumR);
837 float dotG = dot(sumG, sumG);
838 float dotB = dot(sumB, sumB);
839 float dotA = dot(sumA, sumA);
840 float maxDot = dotR;
841 float4 vecK = sumR;
842 if (dotG > maxDot) {
843 vecK = sumG;
844 maxDot = dotG;
845 }
846 if (dotB > maxDot) {
847 vecK = sumB;
848 maxDot = dotB;
849 }
850 if (hasAlpha && dotA > maxDot) {
851 vecK = sumA;
852 maxDot = dotA;
853 }
854 // safe normalize
855 float lenk = length(vecK);
856 vecK = (lenk < SMALL_VALUE) ? vecK : normalize(vecK);
857 FindMinMax(texels, ptMean, vecK, e0, e1);
858 }
859
860 void EncodeColorNormal(short quantLevel, float4 e0, float4 e1, short* endpointQuantized)
861 {
862 int4 e0q = (int4)((int)(round(e0.x)), (int)(round(e0.y)),
863 (int)(round(e0.z)), (int)(round(e0.w)));
864 int4 e1q = (int4)((int)(round(e1.x)), (int)(round(e1.y)),
865 (int)(round(e1.z)), (int)(round(e1.w)));
866 endpointQuantized[EP0_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.x];
867 endpointQuantized[EP1_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.x];
868 endpointQuantized[EP0_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.y];
869 endpointQuantized[EP1_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.y];
870 endpointQuantized[EP0_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.z];
871 endpointQuantized[EP1_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.z];
872 endpointQuantized[EP0_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.w];
873 endpointQuantized[EP1_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.w];
874 }
875
876 void DecodeColor(short quantLevel, short endpointQuantized[COLOR_COMPONENT_NUM], float4* e0, float4* e1)
877 {
878 (*e0).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_R_INDEX]]);
879 (*e1).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_R_INDEX]]);
880 (*e0).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_G_INDEX]]);
881 (*e1).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_G_INDEX]]);
882 (*e0).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_B_INDEX]]);
883 (*e1).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_B_INDEX]]);
884 (*e0).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_A_INDEX]]);
885 (*e1).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_A_INDEX]]);
886 }
887
888 // calculate quantize weights
889 short QuantizeWeight(uint weightRange, float weight)
890 {
891 short q = (short)(round(weight * ((float)(weightRange))));
892 return clamp(q, (short)(QUANTIZE_WEIGHT_MIN), (short)(weightRange));
893 }
894
895 void CalculateNormalWeights(int part, PartInfo* partInfo, float4* texels,
896 float4 endPoint[END_POINT_NUM], float* projw)
897 {
898 int i = START_INDEX;
899 float4 vecK = endPoint[EP1_INDEX] - endPoint[EP0_INDEX];
900 if (length(vecK) < SMALL_VALUE && !partInfo) {
901 for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
902 projw[i] = FLOAT_ZERO;
903 }
904 } else {
905 vecK = normalize(vecK);
906 float minw = 1e31f; // max float is clipped to 1e31f
907 float maxw = -1e31f; // min float is clipped to -1e31f
908 for (i = START_INDEX; i < BLOCK_SIZE; ++i) {
909 if ((!partInfo) || (GetPart(partInfo, i) == part)) {
910 float w = dot(vecK, texels[i] - endPoint[EP0_INDEX]);
911 minw = min(w, minw);
912 maxw = max(w, maxw);
913 projw[i] = w;
914 }
915 }
916 float invlen = maxw - minw;
917 invlen = max(SMALL_VALUE, invlen);
918 invlen = FLOAT_ONE / invlen; // invlen min is SMALL_VALUE, not zero
919 for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
920 if ((!partInfo) || (GetPart(partInfo, i) == part)) {
921 projw[i] = (projw[i] - minw) * invlen;
922 }
923 }
924 }
925 }
926
927 void QuantizeWeights(float projw[X_GRIDS * Y_GRIDS], uint weightRange, short* weights)
928 {
929 for (int i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
930 weights[i] = QuantizeWeight(weightRange, projw[i]);
931 }
932 }
933
934 void CalculateQuantizedWeights(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM], short* weights)
935 {
936 float projw[X_GRIDS * Y_GRIDS];
937 CalculateNormalWeights(INT_ZERO, NULL, texels, endPoint, projw);
938 QuantizeWeights(projw, weightRange, weights);
939 }
940
941 void Orbits8Ptr(uint4* outputs, uint* bitoffset, uint number, uint bitcount)
942 {
943 uint newpos = *bitoffset + bitcount;
944 uint nidx = newpos >> 5; // split low bits (5 bits) to get high bits
945 uint uidx = *bitoffset >> 5; // split low bits (5 bits) to get high bits
946 uint bitIdx = *bitoffset & 31u; // split low bits to get low bits (31 for mask 5 bits)
947 if (uidx == 0) { // high bits is 0 for x
948 (*outputs).x |= (number << bitIdx);
949 (*outputs).y |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
950 } else if (uidx == 1) { // high bits is 1 for y
951 (*outputs).y |= (number << bitIdx);
952 (*outputs).z |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
953 } else if (uidx == 2) { // high bits is 2 for z
954 (*outputs).z |= (number << bitIdx);
955 (*outputs).w |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
956 }
957 *bitoffset = newpos;
958 }
959
960 void SplitHighLow(uint n, uint i, int* high, uint* low)
961 {
962 uint low_mask = (UINT_ONE << i) - UINT_ONE;
963 *low = n & low_mask;
964 *high = ((int)(n >> i)) & 0xFF; // mask 0xFF to get low 8 bits
965 }
966
967 uint ReverseByte(uint p)
968 {
969 p = ((p & 0xFu) << 4) | ((p >> 4) & 0xFu); // 0xFu 4 for reverse
970 p = ((p & 0x33u) << 2) | ((p >> 2) & 0x33u); // 0x33u 2 for reverse
971 p = ((p & 0x55u) << 1) | ((p >> 1) & 0x55u); // 0x55u 1 for reverse
972 return p;
973 }
974
975 void EncodeTrits(uint bitcount, uint tritInput[TRIT_BLOCK_SIZE], uint4* outputs, uint* outpos)
976 {
977 int t0;
978 int t1;
979 int t2;
980 int t3;
981 int t4;
982 uint m0;
983 uint m1;
984 uint m2;
985 uint m3;
986 uint m4;
987 SplitHighLow(tritInput[ISE_0], bitcount, &t0, &m0);
988 SplitHighLow(tritInput[ISE_1], bitcount, &t1, &m1);
989 SplitHighLow(tritInput[ISE_2], bitcount, &t2, &m2);
990 SplitHighLow(tritInput[ISE_3], bitcount, &t3, &m3);
991 SplitHighLow(tritInput[ISE_4], bitcount, &t4, &m4);
992 ushort packhigh = (ushort)(
993 g_integerFromTrits[t4 * 81 + t3 * 27 + t2 * 9 + t1 * 3 + t0]); // trits for 3 9 27 81
994 Orbits8Ptr(outputs, outpos, m0, bitcount);
995 Orbits8Ptr(outputs, outpos, packhigh & 3u, 2u); // low 2bits (mask 3u) offset 2u
996
997 Orbits8Ptr(outputs, outpos, m1, bitcount);
998 Orbits8Ptr(outputs, outpos, (packhigh >> 2) & 3u, 2u); // right shift 2 bits for low 2bits (mask 3u) offset 2u
999
1000 Orbits8Ptr(outputs, outpos, m2, bitcount);
1001 Orbits8Ptr(outputs, outpos, (packhigh >> 4) & 1u, 1u); // right shift 4 bits for low 1bits (mask 1u) offset 1u
1002
1003 Orbits8Ptr(outputs, outpos, m3, bitcount);
1004 Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1005
1006 Orbits8Ptr(outputs, outpos, m4, bitcount);
1007 Orbits8Ptr(outputs, outpos, (packhigh >> 7) & 1u, 1u); // right shift 7 bits for low 1bits (mask 1u) offset 1u
1008 }
1009
1010 void EncodeQuints(uint bitcount, uint quintInput[QUINT_BLOCK_SIZE], uint4* outputs, uint* outpos)
1011 {
1012 int q0;
1013 int q1;
1014 int q2;
1015 uint m0;
1016 uint m1;
1017 uint m2;
1018 SplitHighLow(quintInput[ISE_0], bitcount, &q0, &m0);
1019 SplitHighLow(quintInput[ISE_1], bitcount, &q1, &m1);
1020 SplitHighLow(quintInput[ISE_2], bitcount, &q2, &m2);
1021 ushort packhigh = (ushort)(g_integerFromQuints[q2 * 25 + q1 * 5 + q0]); // Quints 5 25
1022 Orbits8Ptr(outputs, outpos, m0, bitcount);
1023 Orbits8Ptr(outputs, outpos, packhigh & 7u, 3u); // low 3bits (mask 7u) offset 3u
1024 Orbits8Ptr(outputs, outpos, m1, bitcount);
1025 Orbits8Ptr(outputs, outpos, (packhigh >> 3) & 3u, 2u); // right shift 3 bits for low 2bits (mask 3u) offset 2u
1026 Orbits8Ptr(outputs, outpos, m2, bitcount);
1027 Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1028 }
1029
1030 void BiseEndpoints(short numbers[COLOR_COMPONENT_NUM], int range, uint4* outputs, bool hasAlpha, uint* bitPos)
1031 {
1032 uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1033 uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1034 uint quints = (uint)(g_bitsTritsQuintsTable[range * 3 + 2]); // Quints 3 offset 2
1035 uint count = hasAlpha ? 8u : 6u; // RGBA 4x2 = 8 or RGB 3x2 = 6
1036 if (trits == UINT_ONE) {
1037 uint tritsInput[TRIT_BLOCK_SIZE];
1038 tritsInput[ISE_0] = numbers[EP0_R_INDEX];
1039 tritsInput[ISE_1] = numbers[EP1_R_INDEX];
1040 tritsInput[ISE_2] = numbers[EP0_G_INDEX];
1041 tritsInput[ISE_3] = numbers[EP1_G_INDEX];
1042 tritsInput[ISE_4] = numbers[EP0_B_INDEX];
1043 EncodeTrits(bits, tritsInput, outputs, bitPos);
1044 tritsInput[ISE_0] = numbers[EP1_B_INDEX];
1045 tritsInput[ISE_1] = numbers[EP0_A_INDEX];
1046 tritsInput[ISE_2] = numbers[EP1_A_INDEX];
1047 tritsInput[ISE_3] = UINT_ZERO;
1048 tritsInput[ISE_4] = UINT_ZERO;
1049 EncodeTrits(bits, tritsInput, outputs, bitPos);
1050 *bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * count + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1051 } else if (quints == UINT_ONE) {
1052 uint quintsInput[QUINT_BLOCK_SIZE];
1053 quintsInput[ISE_0] = numbers[EP0_R_INDEX];
1054 quintsInput[ISE_1] = numbers[EP1_R_INDEX];
1055 quintsInput[ISE_2] = numbers[EP0_G_INDEX];
1056 EncodeQuints(bits, quintsInput, outputs, bitPos);
1057 quintsInput[ISE_0] = numbers[EP1_G_INDEX];
1058 quintsInput[ISE_1] = numbers[EP0_B_INDEX];
1059 quintsInput[ISE_2] = numbers[EP1_B_INDEX];
1060 EncodeQuints(bits, quintsInput, outputs, bitPos);
1061 quintsInput[ISE_0] = numbers[EP0_A_INDEX];
1062 quintsInput[ISE_1] = numbers[EP1_A_INDEX];
1063 quintsInput[ISE_2] = UINT_ZERO;
1064 EncodeQuints(bits, quintsInput, outputs, bitPos);
1065 *bitPos = ((QUINT_MSB_SIZE + QUINT_BLOCK_SIZE * bits) * count + QUINT_ROUND_NUM) / QUINT_BLOCK_SIZE;
1066 } else {
1067 for (uint i = UINT_ZERO; i < count; ++i) {
1068 Orbits8Ptr(outputs, bitPos, numbers[i], bits);
1069 }
1070 }
1071 }
1072
1073 void BiseWeights(short numbers[BLOCK_SIZE], int range, uint4* outputs)
1074 {
1075 uint bitPos = UINT_ZERO;
1076 uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1077 uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1078 if (trits == UINT_ONE) {
1079 uint tritsInput[TRIT_BLOCK_SIZE];
1080 tritsInput[ISE_0] = numbers[WEIGHT_0];
1081 tritsInput[ISE_1] = numbers[WEIGHT_1];
1082 tritsInput[ISE_2] = numbers[WEIGHT_2];
1083 tritsInput[ISE_3] = numbers[WEIGHT_3];
1084 tritsInput[ISE_4] = numbers[WEIGHT_4];
1085 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1086 tritsInput[ISE_0] = numbers[WEIGHT_5];
1087 tritsInput[ISE_1] = numbers[WEIGHT_6];
1088 tritsInput[ISE_2] = numbers[WEIGHT_7];
1089 tritsInput[ISE_3] = numbers[WEIGHT_8];
1090 tritsInput[ISE_4] = numbers[WEIGHT_9];
1091 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1092 tritsInput[ISE_0] = numbers[WEIGHT_10];
1093 tritsInput[ISE_1] = numbers[WEIGHT_11];
1094 tritsInput[ISE_2] = numbers[WEIGHT_12];
1095 tritsInput[ISE_3] = numbers[WEIGHT_13];
1096 tritsInput[ISE_4] = numbers[WEIGHT_14];
1097 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1098 tritsInput[ISE_0] = numbers[WEIGHT_15];
1099 tritsInput[ISE_1] = UINT_ZERO;
1100 tritsInput[ISE_2] = UINT_ZERO;
1101 tritsInput[ISE_3] = UINT_ZERO;
1102 tritsInput[ISE_4] = UINT_ZERO;
1103 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1104 bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * BLOCK_SIZE + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1105 } else {
1106 for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
1107 Orbits8Ptr(outputs, &bitPos, numbers[i], bits);
1108 }
1109 }
1110 }
1111
1112 uint4 AssembleBlock(uint blockMode, uint colorEndpointMode, uint4 epIse, uint4 wtIse)
1113 {
1114 uint4 phyBlk = (uint4)(0, 0, 0, 0); // initialize to (0, 0, 0, 0)
1115 phyBlk.w |= ReverseByte(wtIse.x & BYTE_MASK) << BYTE_3_POS;
1116 phyBlk.w |= ReverseByte((wtIse.x >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1117 phyBlk.w |= ReverseByte((wtIse.x >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1118 phyBlk.w |= ReverseByte((wtIse.x >> BYTE_3_POS) & BYTE_MASK);
1119 phyBlk.z |= ReverseByte(wtIse.y & BYTE_MASK) << BYTE_3_POS;
1120 phyBlk.z |= ReverseByte((wtIse.y >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1121 phyBlk.z |= ReverseByte((wtIse.y >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1122 phyBlk.z |= ReverseByte((wtIse.y >> BYTE_3_POS) & BYTE_MASK);
1123 phyBlk.y |= ReverseByte(wtIse.z & BYTE_MASK) << BYTE_3_POS;
1124 phyBlk.y |= ReverseByte((wtIse.z >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1125 phyBlk.y |= ReverseByte((wtIse.z >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1126 phyBlk.y |= ReverseByte((wtIse.z >> BYTE_3_POS) & BYTE_MASK);
1127 phyBlk.x = blockMode;
1128
1129 phyBlk.x |= (colorEndpointMode & MASK_FOR_4BITS) << CEM_POS;
1130 phyBlk.x |= (epIse.x & MASK_FOR_15BITS) << COLOR_EP_POS;
1131 phyBlk.y |= ((epIse.x >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1132 phyBlk.y |= (epIse.y & MASK_FOR_15BITS) << COLOR_EP_POS;
1133 phyBlk.z |= ((epIse.y >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1134
1135 return phyBlk;
1136 }
1137
1138 uint AssembleBlockmode(uint weightQuantmethod, bool isDualPlane)
1139 {
1140 uint a = (uint)((Y_GRIDS - HEIGHT_BITS_OFFSET) & MASK_FOR_2BITS);
1141 uint b = (uint)((X_GRIDS - WIDTH_BITS_OFFSET) & MASK_FOR_2BITS);
1142 uint d = isDualPlane ? UINT_ONE : UINT_ZERO;
1143 uint h = (weightQuantmethod < 6u) ? UINT_ZERO : UINT_ONE; // low/high-precision limit is 6u
1144 uint r = (weightQuantmethod % 6u) + WEIGHT_METHOD_OFFSET; // low/high-precision limit is 6u
1145 uint blockMode = (r >> WEIGHT_METHOD_RIGHT_BIT) & MASK_FOR_2BITS;
1146 blockMode |= (r & MASK_FOR_1BITS) << WEIGHT_METHOD_POS;
1147 blockMode |= (a & MASK_FOR_2BITS) << BLOCK_WIDTH_POS;
1148 blockMode |= (b & MASK_FOR_2BITS) << BLOCK_HEIGHT_POS;
1149 blockMode |= h << WEIGHT_PRECISION_POS;
1150 blockMode |= d << IS_DUALPLANE_POS;
1151 return blockMode;
1152 }
1153
1154 uint4 EndpointIse(float4* ep0, float4* ep1, short endpointQuantmethod, bool hasAlpha)
1155 {
1156 short epQuantized[COLOR_COMPONENT_NUM];
1157 EncodeColorNormal(endpointQuantmethod, *ep0, *ep1, epQuantized);
1158 DecodeColor(endpointQuantmethod, epQuantized, ep0, ep1);
1159 if (!hasAlpha) {
1160 epQuantized[EP0_A_INDEX] = SHORT_ZERO;
1161 epQuantized[EP1_A_INDEX] = SHORT_ZERO;
1162 }
1163 uint4 epIse = (uint4)(UINT_ZERO);
1164 uint bitPos = UINT_ZERO;
1165 BiseEndpoints(epQuantized, endpointQuantmethod, &epIse, hasAlpha, &bitPos);
1166 return epIse;
1167 }
1168
1169 float4 CalTexel(short weight, float4 ep0, float4 ep1)
1170 {
1171 short weight0 = BLOCK_MAX_WEIGHTS_SHORT - weight;
1172 return (ep0 * weight0 + ep1 * weight) / BLOCK_MAX_WEIGHTS_FLOAT;
1173 }
1174
1175 uint4 WeightIse(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM],
1176 short weightQuantmethod, float* errval)
1177 {
1178 int i = START_INDEX;
1179 short wtQuantized[X_GRIDS * Y_GRIDS];
1180 CalculateQuantizedWeights(texels, weightRange, endPoint, wtQuantized);
1181 float sumErr = FLOAT_ZERO;
1182 for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
1183 short w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1184 wtQuantized[i] = g_scrambleTable[w];
1185 w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1186 short wt = g_weightUnquant[w];
1187 float4 new_texel = CalTexel(wt, endPoint[EP0_INDEX], endPoint[EP1_INDEX]);
1188 float4 diff = new_texel - texels[i];
1189 sumErr += dot(diff, diff);
1190 }
1191 *errval = sumErr;
1192 uint4 wtIse = (uint4)(UINT_ZERO);
1193 BiseWeights(wtQuantized, (int)(weightQuantmethod), &wtIse);
1194 return wtIse;
1195 }
1196
1197 float TryEncode(float4* texels, float4 texelsMean, uint4* epIse, uint4* wtIse, short3* bestBlockmode)
1198 {
1199 float errval;
1200 bool hasAlpha = true;
1201 *bestBlockmode = (short3)(QUANT_6, QUANT_256, WEIGHT_RANGE_6);
1202 short weightQuantmethod = (*bestBlockmode).x;
1203 short endpointQuantmethod = (*bestBlockmode).y;
1204 short weightRange = (*bestBlockmode).z;
1205 float4 ep0;
1206 float4 ep1;
1207 float4 endPoint[END_POINT_NUM];
1208 MaxAccumulationPixelDirection(texels, texelsMean, &ep0, &ep1, hasAlpha);
1209 *epIse = EndpointIse(&ep0, &ep1, endpointQuantmethod, hasAlpha);
1210 endPoint[EP0_INDEX] = ep0;
1211 endPoint[EP1_INDEX] = ep1;
1212 *wtIse = WeightIse(texels, weightRange - UINT_ONE, endPoint, weightQuantmethod, &errval);
1213 return errval;
1214 }
1215
1216 uint4 EncodeBlock(float4* texels, float4 texelsMean, int blockID, __global uint* errs)
1217 {
1218 bool hasAlpha = true;
1219 bool isDualPlane = false;
1220 float errval = 10000000.0f; // the errval is initialized to 10000000.0f
1221
1222 uint4 epIse, wtIse;
1223 short3 bestBlockmode, tmpBestBlockMode;
1224 errval = TryEncode(texels, texelsMean, &epIse, &wtIse, &bestBlockmode);
1225
1226 uint blockMode = AssembleBlockmode(bestBlockmode.x, isDualPlane);
1227 uint ColorEndpointMode;
1228 if (hasAlpha) {
1229 ColorEndpointMode = CEM_LDR_RGBA_DIRECT;
1230 } else {
1231 ColorEndpointMode = CEM_LDR_RGB_DIRECT;
1232 }
1233 errs[blockID] = (uint)(errval);
1234 return AssembleBlock(blockMode, ColorEndpointMode, epIse, wtIse);
1235 }
1236
1237 void GotTexelFromImage(read_only image2d_t inputImage, float4 texels[BLOCK_SIZE],
1238 int width, int height, float4 *texelMean)
1239 {
1240 int2 pos = (int2)(get_global_id(0), get_global_id(1));
1241 pos.x *= DIM;
1242 pos.y *= DIM;
1243 for (int i = 0; i < DIM; ++i) {
1244 for (int j = 0; j < DIM; ++j) {
1245 int2 pixelPos = pos + (int2)(j, i);
1246 if (pixelPos.x >= width) {
1247 pixelPos.x = width - 1;
1248 }
1249 if (pixelPos.y >= height) {
1250 pixelPos.y = height - 1;
1251 }
1252 float4 texel = read_imagef(inputImage, pixelPos);
1253 texels[i * DIM + j] = texel * PIXEL_MAX_VALUE;
1254 *texelMean += texel * PIXEL_MAX_VALUE;
1255 }
1256 }
1257 }
1258
1259 kernel void AstcCl(read_only image2d_t inputImage, __global uint4* astcArr, __global uint* errs,
1260 int width, int height)
1261 {
1262 const int2 globalSize = (int2)(get_global_size(0), get_global_size(1));
1263 const int2 globalId = (int2)(get_global_id(0), get_global_id(1));
1264 int blockID = globalId.y * globalSize.x + globalId.x;
1265 float4 texels[BLOCK_SIZE];
1266 float4 texelMean = 0;
1267 GotTexelFromImage(inputImage, texels, width, height, &texelMean);
1268 texelMean = texelMean / ((float)(BLOCK_SIZE));
1269 astcArr[blockID] = EncodeBlock(texels, texelMean, blockID, errs);
1270 }
1271 )";
1272
AstcClClose(ClAstcHandle * clAstcHandle)1273 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClClose(ClAstcHandle *clAstcHandle)
1274 {
1275 if (clAstcHandle == nullptr) {
1276 IMAGE_LOGE("astc AstcClClose clAstcHandle is nullptr!");
1277 return CL_ASTC_ENC_FAILED;
1278 }
1279 cl_int clRet;
1280 if (clAstcHandle->kernel != nullptr) {
1281 clRet = clReleaseKernel(clAstcHandle->kernel);
1282 if (clRet != CL_SUCCESS) {
1283 IMAGE_LOGE("astc clReleaseKernel failed ret %{public}d!", clRet);
1284 return CL_ASTC_ENC_FAILED;
1285 }
1286 clAstcHandle->kernel = nullptr;
1287 }
1288 if (clAstcHandle->queue != nullptr) {
1289 clRet = clReleaseCommandQueue(clAstcHandle->queue);
1290 if (clRet != CL_SUCCESS) {
1291 IMAGE_LOGE("astc clReleaseCommandQueue failed ret %{public}d!", clRet);
1292 return CL_ASTC_ENC_FAILED;
1293 }
1294 clAstcHandle->queue = nullptr;
1295 }
1296 if (clAstcHandle->context != nullptr) {
1297 clRet = clReleaseContext(clAstcHandle->context);
1298 if (clRet != CL_SUCCESS) {
1299 IMAGE_LOGE("astc clReleaseContext failed ret %{public}d!", clRet);
1300 return CL_ASTC_ENC_FAILED;
1301 }
1302 clAstcHandle->context = nullptr;
1303 }
1304 if (clAstcHandle->encObj.blockErrs_ != nullptr) {
1305 free(clAstcHandle->encObj.blockErrs_);
1306 clAstcHandle->encObj.blockErrs_ = nullptr;
1307 }
1308 if (clAstcHandle != nullptr) {
1309 free(clAstcHandle);
1310 }
1311 return CL_ASTC_ENC_SUCCESS;
1312 }
1313
CheckClBinIsExist(const std::string & name)1314 static bool CheckClBinIsExist(const std::string &name)
1315 {
1316 return (access(name.c_str(), F_OK) != -1); // -1 means that the file is not exist
1317 }
1318
SaveClBin(cl_program program,const std::string & clBinPath)1319 static CL_ASTC_STATUS SaveClBin(cl_program program, const std::string &clBinPath)
1320 {
1321 size_t programBinarySizes;
1322 cl_int clRet = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &programBinarySizes, NULL);
1323 if (clRet != CL_SUCCESS) {
1324 IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARY_SIZES failed ret %{public}d!", clRet);
1325 return CL_ASTC_ENC_FAILED;
1326 }
1327 if ((programBinarySizes == 0) || (programBinarySizes > MAX_MALLOC_BYTES)) {
1328 IMAGE_LOGE("astc clGetProgramInfo programBinarySizes %{public}zu too big!", programBinarySizes);
1329 return CL_ASTC_ENC_FAILED;
1330 }
1331 uint8_t *programBinaries = static_cast<uint8_t *>(malloc(programBinarySizes));
1332 if (programBinaries == nullptr) {
1333 IMAGE_LOGE("astc programBinaries malloc failed!");
1334 return CL_ASTC_ENC_FAILED;
1335 }
1336 clRet = clGetProgramInfo(program, CL_PROGRAM_BINARIES, programBinarySizes, &programBinaries, NULL);
1337 if (clRet != CL_SUCCESS) {
1338 IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARIES failed ret %{public}d!", clRet);
1339 free(programBinaries);
1340 return CL_ASTC_ENC_FAILED;
1341 }
1342 FILE *fp = fopen(clBinPath.c_str(), "wb");
1343 if (fp == nullptr) {
1344 IMAGE_LOGE("astc create file: %{public}s failed!", clBinPath.c_str());
1345 free(programBinaries);
1346 return CL_ASTC_ENC_FAILED;
1347 }
1348 CL_ASTC_STATUS ret = CL_ASTC_ENC_SUCCESS;
1349 if (fwrite(programBinaries, 1, programBinarySizes, fp) != programBinarySizes) {
1350 IMAGE_LOGE("astc fwrite programBinaries file failed!");
1351 ret = CL_ASTC_ENC_FAILED;
1352 }
1353 if (fclose(fp) != 0) {
1354 IMAGE_LOGE("astc SaveClBin close file failed!");
1355 ret = CL_ASTC_ENC_FAILED;
1356 }
1357 fp = nullptr;
1358 free(programBinaries);
1359 return ret;
1360 }
1361
BuildProgramAndCreateKernel(cl_program program,ClAstcHandle * clAstcHandle)1362 static CL_ASTC_STATUS BuildProgramAndCreateKernel(cl_program program, ClAstcHandle *clAstcHandle)
1363 {
1364 cl_int clRet = clBuildProgram(program, 1, &clAstcHandle->deviceID, "-cl-std=CL3.0", nullptr, nullptr);
1365 if (clRet != CL_SUCCESS) {
1366 IMAGE_LOGE("astc clBuildProgram failed ret %{public}d!", clRet);
1367 return CL_ASTC_ENC_FAILED;
1368 }
1369 clAstcHandle->kernel = clCreateKernel(program, "AstcCl", &clRet);
1370 if (clRet != CL_SUCCESS) {
1371 IMAGE_LOGE("astc clCreateKernel failed ret %{public}d!", clRet);
1372 return CL_ASTC_ENC_FAILED;
1373 }
1374 return CL_ASTC_ENC_SUCCESS;
1375 }
1376
AstcClBuildProgram(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1377 static CL_ASTC_STATUS AstcClBuildProgram(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1378 {
1379 cl_int clRet;
1380 cl_program program = nullptr;
1381 if (!CheckClBinIsExist(clBinPath)) {
1382 size_t sourceSize = strlen(g_programSource) + 1; // '\0' occupies 1 bytes
1383 program = clCreateProgramWithSource(clAstcHandle->context, 1, &g_programSource, &sourceSize, &clRet);
1384 if (clRet != CL_SUCCESS) {
1385 IMAGE_LOGE("astc clCreateProgramWithSource failed ret %{public}d!", clRet);
1386 return CL_ASTC_ENC_FAILED;
1387 }
1388 if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1389 IMAGE_LOGE("astc clCreateProgramWithSource failed ret %{public}d!", clRet);
1390 clReleaseProgram(program);
1391 return CL_ASTC_ENC_FAILED;
1392 }
1393 if (SaveClBin(program, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1394 IMAGE_LOGI("astc SaveClBin failed!");
1395 }
1396 } else {
1397 std::ifstream contents{clBinPath};
1398 std::string binaryContent{std::istreambuf_iterator<char>{contents}, {}};
1399 size_t binSize = binaryContent.length();
1400 if ((binSize == 0) || (binSize > MAX_MALLOC_BYTES)) {
1401 IMAGE_LOGE("astc AstcClBuildProgram read CLbin file lenth error %{public}zu!", binSize);
1402 return CL_ASTC_ENC_FAILED;
1403 }
1404 const char *binary = static_cast<const char *>(binaryContent.c_str());
1405 program = clCreateProgramWithBinary(clAstcHandle->context, 1, &clAstcHandle->deviceID, &binSize,
1406 (const unsigned char **)&binary, nullptr, &clRet);
1407 if (clRet != CL_SUCCESS) {
1408 IMAGE_LOGE("astc clCreateProgramWithBinary failed ret %{public}d!", clRet);
1409 return CL_ASTC_ENC_FAILED;
1410 }
1411 if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1412 IMAGE_LOGE("astc BuildProgramAndCreateKernel with bin failed!");
1413 clReleaseProgram(program);
1414 return CL_ASTC_ENC_FAILED;
1415 }
1416 }
1417 clRet = clReleaseProgram(program);
1418 if (clRet != CL_SUCCESS) {
1419 IMAGE_LOGE("astc clReleaseProgram failed ret %{public}d!", clRet);
1420 return CL_ASTC_ENC_FAILED;
1421 }
1422 return CL_ASTC_ENC_SUCCESS;
1423 }
1424
AstcCreateClKernel(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1425 static CL_ASTC_STATUS AstcCreateClKernel(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1426 {
1427 if (!OHOS::InitOpenCL()) {
1428 IMAGE_LOGE("astc InitOpenCL error!");
1429 return CL_ASTC_ENC_FAILED;
1430 }
1431 cl_int clRet;
1432 cl_platform_id platformID;
1433 clRet = clGetPlatformIDs(1, &platformID, NULL);
1434 if (clRet != CL_SUCCESS) {
1435 IMAGE_LOGE("astc clGetPlatformIDs failed ret %{public}d!", clRet);
1436 return CL_ASTC_ENC_FAILED;
1437 }
1438 clRet = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &clAstcHandle->deviceID, NULL);
1439 if (clRet != CL_SUCCESS) {
1440 IMAGE_LOGE("astc clGetDeviceIDs failed ret %{public}d!", clRet);
1441 return CL_ASTC_ENC_FAILED;
1442 }
1443 clAstcHandle->context = clCreateContext(0, 1, &clAstcHandle->deviceID, NULL, NULL, &clRet);
1444 if (clRet != CL_SUCCESS) {
1445 IMAGE_LOGE("astc clCreateContext failed ret %{public}d!", clRet);
1446 return CL_ASTC_ENC_FAILED;
1447 }
1448 cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0};
1449 clAstcHandle->queue = clCreateCommandQueueWithProperties(clAstcHandle->context,
1450 clAstcHandle->deviceID, props, &clRet);
1451 if (clRet != CL_SUCCESS) {
1452 IMAGE_LOGE("astc clCreateCommandQueueWithProperties failed ret %{public}d!", clRet);
1453 return CL_ASTC_ENC_FAILED;
1454 }
1455 if (AstcClBuildProgram(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1456 IMAGE_LOGE("astc AstcClBuildProgram failed!");
1457 return CL_ASTC_ENC_FAILED;
1458 }
1459 return CL_ASTC_ENC_SUCCESS;
1460 }
1461
AstcClCreate(ClAstcHandle ** handle,const std::string & clBinPath)1462 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClCreate(ClAstcHandle **handle, const std::string &clBinPath)
1463 {
1464 ClAstcHandle *clAstcHandle = static_cast<ClAstcHandle *>(calloc(1, sizeof(ClAstcHandle)));
1465 if (clAstcHandle == nullptr) {
1466 IMAGE_LOGE("astc AstcClCreate handle calloc failed!");
1467 return CL_ASTC_ENC_FAILED;
1468 }
1469 *handle = clAstcHandle;
1470 size_t numMaxBlocks = static_cast<size_t>(((MAX_WIDTH + DIM - 1) / DIM) * ((MAX_HEIGHT + DIM - 1) / DIM));
1471 clAstcHandle->encObj.blockErrs_ =
1472 static_cast<uint32_t *>(malloc((numMaxBlocks * sizeof(uint32_t)))); // 8MB mem Max
1473 if (clAstcHandle->encObj.blockErrs_ == nullptr) {
1474 IMAGE_LOGE("astc blockErrs_ malloc failed!");
1475 AstcClClose(*handle);
1476 return CL_ASTC_ENC_FAILED;
1477 }
1478 if (AstcCreateClKernel(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1479 IMAGE_LOGE("astc AstcCreateClKernel failed!");
1480 AstcClClose(*handle);
1481 return CL_ASTC_ENC_FAILED;
1482 }
1483 return CL_ASTC_ENC_SUCCESS;
1484 }
1485
AstcClEncImageCheckImageOption(const ClAstcImageOption * imageIn)1486 static CL_ASTC_STATUS AstcClEncImageCheckImageOption(const ClAstcImageOption *imageIn)
1487 {
1488 if ((imageIn->width <= 0) || (imageIn->height <= 0) || (imageIn->stride < imageIn->width)) {
1489 IMAGE_LOGE("astc AstcClEncImage width <= 0 or height <= 0 or stride < width!");
1490 return CL_ASTC_ENC_FAILED;
1491 }
1492 if ((imageIn->width > MAX_WIDTH) || (imageIn->height > MAX_HEIGHT)) {
1493 IMAGE_LOGE("astc AstcClEncImage width[%{public}d] \
1494 need be [1, %{public}d] and height[%{public}d] need be [1, %{public}d]", \
1495 imageIn->width, MAX_WIDTH, imageIn->height, MAX_HEIGHT);
1496 return CL_ASTC_ENC_FAILED;
1497 }
1498 return CL_ASTC_ENC_SUCCESS;
1499 }
1500
AstcClFillImage(ClAstcImageOption * imageIn,uint8_t * data,int32_t stride,int32_t width,int32_t height)1501 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClFillImage(ClAstcImageOption *imageIn, uint8_t *data, int32_t stride,
1502 int32_t width, int32_t height)
1503 {
1504 if (imageIn == nullptr) {
1505 IMAGE_LOGE("astc AstcClFillImage imageIn is nullptr!");
1506 return CL_ASTC_ENC_FAILED;
1507 }
1508 imageIn->data = data;
1509 imageIn->stride = stride >> STRIDE_RGBA_LOG2;
1510 imageIn->width = width;
1511 imageIn->height = height;
1512 if (AstcClEncImageCheckImageOption(imageIn)) {
1513 IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1514 return CL_ASTC_ENC_FAILED;
1515 }
1516 return CL_ASTC_ENC_SUCCESS;
1517 }
1518
GenAstcHeader(uint8_t * buffer,uint8_t blockX,uint8_t blockY,uint32_t dimX,uint32_t dimY)1519 static void GenAstcHeader(uint8_t *buffer, uint8_t blockX, uint8_t blockY, uint32_t dimX, uint32_t dimY)
1520 {
1521 uint8_t *headInfo = buffer;
1522 *headInfo++ = MAGIC_FILE_CONSTANT & BYTES_MASK;
1523 *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_8BITS) & BYTES_MASK;
1524 *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_16BITS) & BYTES_MASK;
1525 *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_24BITS) & BYTES_MASK;
1526 *headInfo++ = static_cast<uint8_t>(blockX);
1527 *headInfo++ = static_cast<uint8_t>(blockY);
1528 *headInfo++ = 1;
1529 *headInfo++ = dimX & BYTES_MASK;
1530 *headInfo++ = (dimX >> BIT_SHIFT_8BITS) & BYTES_MASK;
1531 *headInfo++ = (dimX >> BIT_SHIFT_16BITS) & BYTES_MASK;
1532 *headInfo++ = dimY & BYTES_MASK;
1533 *headInfo++ = (dimY >> BIT_SHIFT_8BITS) & BYTES_MASK;
1534 *headInfo++ = (dimY >> BIT_SHIFT_16BITS) & BYTES_MASK;
1535 *headInfo++ = 1;
1536 *headInfo++ = 0;
1537 *headInfo++ = 0;
1538 }
1539
ReleaseClAstcObj(ClAstcObjEnc * obj)1540 static void ReleaseClAstcObj(ClAstcObjEnc *obj)
1541 {
1542 cl_int clRet;
1543 if (obj != nullptr) {
1544 if (obj->inputImage != nullptr) {
1545 clRet = clReleaseMemObject(obj->inputImage);
1546 if (clRet != CL_SUCCESS) {
1547 IMAGE_LOGE("astc inputImage release failed ret %{public}d!", clRet);
1548 }
1549 obj->inputImage = nullptr;
1550 }
1551 if (obj->astcResult != nullptr) {
1552 clRet = clReleaseMemObject(obj->astcResult);
1553 if (clRet != CL_SUCCESS) {
1554 IMAGE_LOGE("astc astcResult release failed ret %{public}d!", clRet);
1555 }
1556 obj->astcResult = nullptr;
1557 }
1558 if (obj->errBuffer != nullptr) {
1559 clRet = clReleaseMemObject(obj->errBuffer);
1560 if (clRet != CL_SUCCESS) {
1561 IMAGE_LOGE("astc errBuffer release failed ret %{public}d!", clRet);
1562 }
1563 obj->errBuffer = nullptr;
1564 }
1565 }
1566 }
1567
GetMaxAndSumVal(size_t numBlocks,uint32_t * blockErrs,uint32_t & maxVal,uint32_t & sumVal)1568 static void GetMaxAndSumVal(size_t numBlocks, uint32_t *blockErrs, uint32_t &maxVal, uint32_t &sumVal)
1569 {
1570 sumVal = 0;
1571 for (size_t i = 0; i < numBlocks; i++) {
1572 sumVal += blockErrs[i];
1573 maxVal = fmax(maxVal, blockErrs[i]);
1574 }
1575 }
1576
ClCreateBufferAndImage(const ClAstcImageOption * imageIn,ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj)1577 static CL_ASTC_STATUS ClCreateBufferAndImage(const ClAstcImageOption *imageIn,
1578 ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj)
1579 {
1580 uint8_t *data = imageIn->data;
1581 int32_t stride = imageIn->stride;
1582 int32_t width = imageIn->width;
1583 int32_t height = imageIn->height;
1584 size_t numBlocks = static_cast<size_t>(((width + DIM - 1) / DIM) * ((height + DIM - 1) / DIM));
1585 uint32_t *blockErrs = encObj->blockErrs_;
1586 size_t blockErrBytes = sizeof(uint32_t) * numBlocks;
1587 encObj->astcSize = numBlocks * TEXTURE_BLOCK_BYTES;
1588 if ((blockErrs == nullptr) || (memset_s(blockErrs, blockErrBytes, 0, blockErrBytes))) {
1589 IMAGE_LOGE("astc blockErrs is nullptr or memset failed!");
1590 return CL_ASTC_ENC_FAILED;
1591 }
1592 cl_image_format imageFormat = { CL_RGBA, CL_UNORM_INT8 };
1593 cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, stride, height };
1594 cl_int clRet;
1595 encObj->inputImage = clCreateImage(clAstcHandle->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imageFormat,
1596 &desc, data, &clRet);
1597 if (clRet != CL_SUCCESS) {
1598 IMAGE_LOGE("astc clCreateImage failed ret %{public}d!", clRet);
1599 return CL_ASTC_ENC_FAILED;
1600 }
1601 encObj->astcResult = clCreateBuffer(clAstcHandle->context,
1602 CL_MEM_ALLOC_HOST_PTR, encObj->astcSize, NULL, &clRet);
1603 if (clRet != CL_SUCCESS) {
1604 IMAGE_LOGE("astc clCreateBuffer astcResult failed ret %{public}d!", clRet);
1605 return CL_ASTC_ENC_FAILED;
1606 }
1607 encObj->errBuffer = clCreateBuffer(clAstcHandle->context, CL_MEM_USE_HOST_PTR, blockErrBytes, blockErrs, &clRet);
1608 if (clRet != CL_SUCCESS) {
1609 IMAGE_LOGE("astc clCreateBuffer errBuffer failed ret %{public}d!", clRet);
1610 return CL_ASTC_ENC_FAILED;
1611 }
1612 return CL_ASTC_ENC_SUCCESS;
1613 }
1614
ClKernelArgSet(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1615 static CL_ASTC_STATUS ClKernelArgSet(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1616 {
1617 int32_t kernelId = 0;
1618 cl_int clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->inputImage);
1619 if (clRet != CL_SUCCESS) {
1620 IMAGE_LOGE("astc clSetKernelArg inputImage failed ret %{public}d!", clRet);
1621 return CL_ASTC_ENC_FAILED;
1622 }
1623 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->astcResult);
1624 if (clRet != CL_SUCCESS) {
1625 IMAGE_LOGE("astc clSetKernelArg astcResult failed ret %{public}d!", clRet);
1626 return CL_ASTC_ENC_FAILED;
1627 }
1628 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->errBuffer);
1629 if (clRet != CL_SUCCESS) {
1630 IMAGE_LOGE("astc clSetKernelArg errBuffer failed ret %{public}d!", clRet);
1631 return CL_ASTC_ENC_FAILED;
1632 }
1633 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &width);
1634 if (clRet != CL_SUCCESS) {
1635 IMAGE_LOGE("astc clSetKernelArg width failed ret %{public}d!", clRet);
1636 return CL_ASTC_ENC_FAILED;
1637 }
1638 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &height);
1639 if (clRet != CL_SUCCESS) {
1640 IMAGE_LOGE("astc clSetKernelArg height failed ret %{public}d!", clRet);
1641 return CL_ASTC_ENC_FAILED;
1642 }
1643 return CL_ASTC_ENC_SUCCESS;
1644 }
1645
ClKernelArgSetAndRun(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1646 static CL_ASTC_STATUS ClKernelArgSetAndRun(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1647 {
1648 if (ClKernelArgSet(clAstcHandle, encObj, width, height) != CL_ASTC_ENC_SUCCESS) {
1649 IMAGE_LOGE("astc ClKernelArgSet failed!");
1650 return CL_ASTC_ENC_FAILED;
1651 }
1652 size_t local[] = {WORK_GROUP_SIZE, WORK_GROUP_SIZE};
1653 size_t global[GLOBAL_WH_NUM_CL];
1654 global[0] = (width + DIM - 1) / DIM;
1655 global[1] = (height + DIM - 1) / DIM;
1656 size_t localMax;
1657 cl_int clRet = clGetKernelWorkGroupInfo(clAstcHandle->kernel, clAstcHandle->deviceID, CL_KERNEL_WORK_GROUP_SIZE,
1658 sizeof(size_t), &localMax, nullptr);
1659 if (clRet != CL_SUCCESS) {
1660 IMAGE_LOGE("astc clGetKernelWorkGroupInfo failed ret %{public}d!", clRet);
1661 return CL_ASTC_ENC_FAILED;
1662 }
1663 while (local[0] * local[1] > localMax) {
1664 local[0]--;
1665 local[1]--;
1666 }
1667 if ((local[0] < 1) || (local[1] < 1)) {
1668 IMAGE_LOGE("astc ClKernelArgSetAndRun local set failed!");
1669 return CL_ASTC_ENC_FAILED;
1670 }
1671 clRet = clEnqueueNDRangeKernel(clAstcHandle->queue, clAstcHandle->kernel, GLOBAL_WH_NUM_CL, nullptr, global, local,
1672 0, nullptr, nullptr);
1673 if (clRet != CL_SUCCESS) {
1674 IMAGE_LOGE("astc clEnqueueNDRangeKernel failed ret %{public}d!", clRet);
1675 return CL_ASTC_ENC_FAILED;
1676 }
1677 clRet = clFinish(clAstcHandle->queue);
1678 if (clRet != CL_SUCCESS) {
1679 IMAGE_LOGE("astc clFinish failed ret %{public}d!", clRet);
1680 return CL_ASTC_ENC_FAILED;
1681 }
1682 return CL_ASTC_ENC_SUCCESS;
1683 }
1684
ClReadAstcBufAndBlockError(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,const ClAstcImageOption * imageIn,uint8_t * buffer)1685 static CL_ASTC_STATUS ClReadAstcBufAndBlockError(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj,
1686 const ClAstcImageOption *imageIn, uint8_t *buffer)
1687 {
1688 cl_int clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->astcResult, CL_TRUE,
1689 0, encObj->astcSize, buffer + TEXTURE_HEAD_BYTES, 0, NULL, NULL);
1690 if (clRet != CL_SUCCESS) {
1691 IMAGE_LOGE("astc clEnqueueReadBuffer astcResult failed ret %{public}d!", clRet);
1692 return CL_ASTC_ENC_FAILED;
1693 }
1694 uint32_t maxVal = 0;
1695 uint32_t sumVal = 0;
1696 size_t numBlocks = ((imageIn->width + DIM - 1) / DIM) * ((imageIn->height + DIM - 1) / DIM);
1697 clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->errBuffer, CL_TRUE,
1698 0, sizeof(uint32_t) * numBlocks, encObj->blockErrs_, 0, NULL, NULL);
1699 if (clRet != CL_SUCCESS) {
1700 IMAGE_LOGE("astc clEnqueueReadBuffer blockErrs failed ret %{public}d!", clRet);
1701 return CL_ASTC_ENC_FAILED;
1702 }
1703 GetMaxAndSumVal(numBlocks, encObj->blockErrs_, maxVal, sumVal);
1704 return CL_ASTC_ENC_SUCCESS;
1705 }
1706
AstcClEncImage(ClAstcHandle * clAstcHandle,const ClAstcImageOption * imageIn,uint8_t * buffer)1707 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClEncImage(ClAstcHandle *clAstcHandle,
1708 const ClAstcImageOption *imageIn, uint8_t *buffer)
1709 {
1710 if ((clAstcHandle == nullptr) || (imageIn == nullptr) || (buffer == nullptr)) {
1711 IMAGE_LOGE("astc AstcClEncImage clAstcHandle or imageIn or buffer is nullptr!");
1712 return CL_ASTC_ENC_FAILED;
1713 }
1714 if (AstcClEncImageCheckImageOption(imageIn)) {
1715 IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1716 return CL_ASTC_ENC_FAILED;
1717 }
1718 GenAstcHeader(buffer, DIM, DIM, imageIn->width, imageIn->height);
1719 ClAstcObjEnc *encObj = &clAstcHandle->encObj;
1720 if (ClCreateBufferAndImage(imageIn, clAstcHandle, encObj) != CL_ASTC_ENC_SUCCESS) {
1721 ReleaseClAstcObj(encObj);
1722 IMAGE_LOGE("astc ClCreateBufferAndImage failed!");
1723 return CL_ASTC_ENC_FAILED;
1724 }
1725 if (ClKernelArgSetAndRun(clAstcHandle, encObj, imageIn->width, imageIn->height) != CL_ASTC_ENC_SUCCESS) {
1726 ReleaseClAstcObj(encObj);
1727 IMAGE_LOGE("astc ClKernelArgSetAndRun failed!");
1728 return CL_ASTC_ENC_FAILED;
1729 }
1730 if (ClReadAstcBufAndBlockError(clAstcHandle, encObj, imageIn, buffer) != CL_ASTC_ENC_SUCCESS) {
1731 ReleaseClAstcObj(encObj);
1732 IMAGE_LOGE("astc ClReadAstcBufAndBlockError failed!");
1733 return CL_ASTC_ENC_FAILED;
1734 }
1735 ReleaseClAstcObj(encObj);
1736 return CL_ASTC_ENC_SUCCESS;
1737 }
1738 }
1739 }
1740 }