1 /*
2 * Copyright (C) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "image_compressor.h"
17
18 #include <unistd.h>
19 #include <fstream>
20
21 #include "securec.h"
22 #include "media_errors.h"
23 #include "image_log.h"
24
25 #undef LOG_DOMAIN
26 #define LOG_DOMAIN LOG_TAG_DOMAIN_ID_PLUGIN
27
28 #undef LOG_TAG
29 #define LOG_TAG "ClAstcEnc"
30
31 namespace OHOS {
32 namespace ImagePlugin {
33 namespace AstcEncBasedCl {
34 constexpr int MAX_WIDTH = 8192;
35 constexpr int MAX_HEIGHT = 8192;
36 constexpr int TEXTURE_HEAD_BYTES = 16;
37 constexpr int TEXTURE_BLOCK_BYTES = 16;
38 constexpr int MAGIC_FILE_CONSTANT = 0x5CA1AB13;
39 constexpr int DIM = 4;
40 constexpr uint8_t BIT_SHIFT_8BITS = 8;
41 constexpr uint8_t BIT_SHIFT_16BITS = 16;
42 constexpr uint8_t BIT_SHIFT_24BITS = 24;
43 constexpr uint8_t BYTES_MASK = 0xFF;
44 constexpr uint8_t GLOBAL_WH_NUM_CL = 2;
45 constexpr size_t MAX_MALLOC_BYTES = 10000000; // max 10MB
46 constexpr size_t WORK_GROUP_SIZE = 8;
47
48 const char *g_programSource = R"(
49 // Notice: the code from line 42 to line 1266 is openCL language
50 // openCL cound only support C language style and could not support constexpr and static_cast in same platform
51 #define DIM (4)
52 #define BLOCK_SIZE (16)
53 #define X_GRIDS (4)
54 #define Y_GRIDS (4)
55 #define SMALL_VALUE (0.00001f) // avoid divide 0
56 #define BLOCK_MAX_WEIGHTS (64)
57 #define BLOCK_MAX_WEIGHTS_SHORT (64)
58 #define BLOCK_MAX_WEIGHTS_FLOAT (64.0f)
59 #define BLOCK_MAX_WEIGHTS_2PLANE (32)
60 #define WEIGHTS_PLANE2_OFFSET (32)
61 #define CEM_LDR_RGB_DIRECT (8)
62 #define CEM_LDR_RGB_BASE_OFFSET (9)
63 #define CEM_LDR_RGBA_DIRECT (12)
64 #define CEM_LDR_RGBA_BASE_OFFSET (13)
65 #define PIXEL_MAX_VALUE (255.0f)
66
67 #define QUANT_2 (0)
68 #define QUANT_3 (1)
69 #define QUANT_4 (2)
70 #define QUANT_5 (3)
71 #define QUANT_6 (4)
72 #define QUANT_8 (5)
73 #define QUANT_10 (6)
74 #define QUANT_12 (7)
75 #define QUANT_16 (8)
76 #define QUANT_20 (9)
77 #define QUANT_24 (10)
78 #define QUANT_32 (11)
79 #define QUANT_40 (12)
80 #define QUANT_48 (13)
81 #define QUANT_64 (14)
82 #define QUANT_80 (15)
83 #define QUANT_96 (16)
84 #define QUANT_128 (17)
85 #define QUANT_160 (18)
86 #define QUANT_192 (19)
87 #define QUANT_256 (20)
88 #define QUANT_MAX (21)
89
90 #define WEIGHT_RANGE_6 (6)
91 #define WEIGHT_QUANTIZE_NUM (32)
92 #define COLOR_NUM (256)
93 #define MAX_PARTITION_COUNT (4)
94 #define PARTITION_COUNT (2)
95 #define MAX_BLOCK_SIZE (32)
96 #define WEIGHT_QUANTIZE_GROUP (12)
97 #define SECOND_PARTITION_INDEX (1)
98
99 #define START_INDEX (0)
100 #define FLOAT_ZERO (0.0f)
101 #define FLOAT_ONE (1.0f)
102 #define INT_ZERO (0)
103 #define INT_ONE (1)
104 #define SHORT_ZERO (0)
105 #define UINT_ZERO (0)
106 #define UINT_ONE (1u)
107 #define EP0_INDEX (0)
108 #define EP1_INDEX (1)
109 #define END_POINT_NUM (2)
110 #define EP0_R_INDEX (0)
111 #define EP1_R_INDEX (1)
112 #define EP0_G_INDEX (2)
113 #define EP1_G_INDEX (3)
114 #define EP0_B_INDEX (4)
115 #define EP1_B_INDEX (5)
116 #define EP0_A_INDEX (6)
117 #define EP1_A_INDEX (7)
118 #define COLOR_COMPONENT_NUM (8)
119 #define QUANTIZE_WEIGHT_MIN (0)
120
121 #define TRIT_MSB_SIZE (8)
122 #define TRIT_BLOCK_SIZE (5)
123 #define TRIT_ROUND_NUM (4)
124 #define QUINT_MSB_SIZE (7)
125 #define QUINT_BLOCK_SIZE (3)
126 #define QUINT_ROUND_NUM (2)
127 #define ISE_0 (0)
128 #define ISE_1 (1)
129 #define ISE_2 (2)
130 #define ISE_3 (3)
131 #define ISE_4 (4)
132
133 #define WEIGHT_0 (0)
134 #define WEIGHT_1 (1)
135 #define WEIGHT_2 (2)
136 #define WEIGHT_3 (3)
137 #define WEIGHT_4 (4)
138 #define WEIGHT_5 (5)
139 #define WEIGHT_6 (6)
140 #define WEIGHT_7 (7)
141 #define WEIGHT_8 (8)
142 #define WEIGHT_9 (9)
143 #define WEIGHT_10 (10)
144 #define WEIGHT_11 (11)
145 #define WEIGHT_12 (12)
146 #define WEIGHT_13 (13)
147 #define WEIGHT_14 (14)
148 #define WEIGHT_15 (15)
149
150 #define BYTE_1_POS (8)
151 #define BYTE_2_POS (16)
152 #define BYTE_3_POS (24)
153 #define BYTE_MASK (0xFFu)
154 #define CEM_POS (13)
155 #define COLOR_EP_POS (17)
156 #define COLOR_EP_HIGH_BIT (15)
157 #define MASK_FOR_4BITS (0xFu)
158 #define MASK_FOR_15BITS (0x7FFFu)
159 #define MASK_FOR_17BITS (0x1FFFFu)
160
161 #define HEIGHT_BITS_OFFSET (2)
162 #define WIDTH_BITS_OFFSET (4)
163 #define MASK_FOR_2BITS (0x3u)
164 #define MASK_FOR_1BITS (0x1u)
165 #define WEIGHT_METHOD_OFFSET (2u)
166 #define WEIGHT_METHOD_RIGHT_BIT (1)
167 #define WEIGHT_METHOD_POS (4u)
168 #define BLOCK_WIDTH_POS (5u)
169 #define BLOCK_HEIGHT_POS (5u)
170 #define WEIGHT_PRECISION_POS (9u)
171 #define IS_DUALPLANE_POS (10u)
172
173 typedef struct {
174 int partid;
175 uint bitmaps[PARTITION_COUNT];
176 } PartInfo;
177
178 int GetPart(PartInfo* partInfo, int i)
179 {
180 if (i >= MAX_BLOCK_SIZE) {
181 return 0;
182 }
183 return (int)(((*partInfo).bitmaps[SECOND_PARTITION_INDEX] >> i) & MASK_FOR_1BITS);
184 }
185
186 __constant short g_scrambleTable[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
187 0, 1,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0,
190 0, 1, 2,
191 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192 0, 1, 2, 3,
193 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194 0, 1, 2, 3, 4,
195 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
196 0, 2, 4, 5, 3, 1,
197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
198 0, 1, 2, 3, 4, 5, 6, 7,
199 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
200 0, 2, 4, 6, 8, 9, 7, 5, 3, 1,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202 0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1,
203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
204 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
206 0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1,
207 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
208 0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19,
209 11, 3, 17, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0,
210 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
211 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
212 };
213
214 __constant short g_weightUnquant[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
215 0, 64,
216 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217 0, 32, 64,
218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 21, 43, 64,
220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 16, 32, 48, 64,
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 64, 12, 52, 25, 39,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 9, 18, 27, 37, 46, 55, 64,
226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227 0, 64, 7, 57, 14, 50, 21, 43, 28, 36,
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
229 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36,
230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64,
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
233 0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35,
234 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
235 0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34,
236 0, 0, 0, 0, 0, 0, 0, 0,
237 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38,
238 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
239 };
240 __constant short g_integerFromTrits[243] = { // the numbers of integer to derivated from trits is 243
241 0, 1, 2, 4, 5, 6, 8, 9, 10,
242 16, 17, 18, 20, 21, 22, 24, 25, 26,
243 3, 7, 15, 19, 23, 27, 12, 13, 14,
244 32, 33, 34, 36, 37, 38, 40, 41, 42,
245 48, 49, 50, 52, 53, 54, 56, 57, 58,
246 35, 39, 47, 51, 55, 59, 44, 45, 46,
247 64, 65, 66, 68, 69, 70, 72, 73, 74,
248 80, 81, 82, 84, 85, 86, 88, 89, 90,
249 67, 71, 79, 83, 87, 91, 76, 77, 78,
250
251 128, 129, 130, 132, 133, 134, 136, 137, 138,
252 144, 145, 146, 148, 149, 150, 152, 153, 154,
253 131, 135, 143, 147, 151, 155, 140, 141, 142,
254 160, 161, 162, 164, 165, 166, 168, 169, 170,
255 176, 177, 178, 180, 181, 182, 184, 185, 186,
256 163, 167, 175, 179, 183, 187, 172, 173, 174,
257 192, 193, 194, 196, 197, 198, 200, 201, 202,
258 208, 209, 210, 212, 213, 214, 216, 217, 218,
259 195, 199, 207, 211, 215, 219, 204, 205, 206,
260
261 96, 97, 98, 100, 101, 102, 104, 105, 106,
262 112, 113, 114, 116, 117, 118, 120, 121, 122,
263 99, 103, 111, 115, 119, 123, 108, 109, 110,
264 224, 225, 226, 228, 229, 230, 232, 233, 234,
265 240, 241, 242, 244, 245, 246, 248, 249, 250,
266 227, 231, 239, 243, 247, 251, 236, 237, 238,
267 28, 29, 30, 60, 61, 62, 92, 93, 94,
268 156, 157, 158, 188, 189, 190, 220, 221, 222,
269 31, 63, 127, 159, 191, 255, 252, 253, 254
270 };
271
272 __constant int g_bitsTritsQuintsTable[QUANT_MAX * 3] = { // 1 quints match 3 number
273 1, 0, 0, // RANGE_2
274 0, 1, 0, // RANGE_3
275 2, 0, 0, // RANGE_4
276 0, 0, 1, // RANGE_5
277 1, 1, 0, // RANGE_6
278 3, 0, 0, // RANGE_8
279 1, 0, 1, // RANGE_10
280 2, 1, 0, // RANGE_12
281 4, 0, 0, // RANGE_16
282 2, 0, 1, // RANGE_20
283 3, 1, 0, // RANGE_24
284 5, 0, 0, // RANGE_32
285 3, 0, 1, // RANGE_40
286 4, 1, 0, // RANGE_48
287 6, 0, 0, // RANGE_64
288 4, 0, 1, // RANGE_80
289 5, 1, 0, // RANGE_96
290 7, 0, 0, // RANGE_128
291 5, 0, 1, // RANGE_160
292 6, 1, 0, // RANGE_192
293 8, 0, 0 // RANGE_256
294 };
295
296 __constant short g_integerFromQuints[125] = { // the numbers of integer to derivated from quints is 125
297 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6,
298 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, 58, 59, 60, 37, 45, 53,
299 61, 14,
300 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85,
301 93, 22,
302 96, 97, 98, 99, 100, 104, 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123,
303 124, 101, 109, 117, 125, 30,
304 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, 126, 127, 94, 95, 62, 39,
305 47, 55, 63, 31
306 };
307
308 __constant short g_colorQuantTables[QUANT_MAX * COLOR_NUM] = {
309 // QUANT_2
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
312 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
313 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
314 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
315 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
317 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
320 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
321 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
322 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
323 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
324 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
325 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326 // QUANT_3
327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
332 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
337 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
338 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
339 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
340 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
341 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
342 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
343 // QUANT_4
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
347 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
349 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
351 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
353 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
354 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
355 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
356 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
357 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
358 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
359 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
360 // QUANT_5
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
366 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
367 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
368 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
369 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
370 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
371 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
372 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
373 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
374 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
375 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
376 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
377 // QUANT_6
378 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
380 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4,
383 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
384 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
386 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
387 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
388 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
389 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
390 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
391 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
392 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
393 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
394 // QUANT_8
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
397 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
398 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
399 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
401 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
402 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
403 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
404 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
405 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
406 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
407 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
408 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
409 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
410 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
411 // QUANT_10
412 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
413 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
414 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
415 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
416 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6,
417 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
418 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
419 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
420 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
421 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7,
422 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
423 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 5,
424 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
425 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
426 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
427 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
428 // QUANT_12
429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4,
430 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
431 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
432 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
433 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
434 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
435 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10,
436 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
437 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
438 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7,
439 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
440 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
441 3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
442 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5,
443 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
444 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
445 // QUANT_16
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
448 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
449 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
450 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
451 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
452 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
453 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
454 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
455 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
456 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
457 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
458 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
459 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
460 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
461 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
462 // QUANT_20
463 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
464 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
465 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
466 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2,
467 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
468 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10,
469 10, 10, 10, 10, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
470 14, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
471 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 15, 15, 15,
472 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11,
473 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7,
474 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
475 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
476 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 9,
477 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5,
478 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1,
479 // QUANT_24
480 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
481 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2, 2,
482 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10,
483 10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 4, 4, 4,
484 4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12,
485 12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 6, 6,
486 6, 6, 6, 6, 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14,
487 14, 14, 14, 14, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
488 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 15, 15, 15, 15,
489 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7,
490 7, 7, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 13, 13, 13,
491 13, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5, 5, 5, 5,
492 5, 5, 5, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 11, 11,
493 11, 11, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, 3,
494 3, 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9,
495 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
496 // QUANT_32
497 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
498 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
499 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
500 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
501 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10,
502 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
503 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
504 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
505 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
506 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
507 19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
508 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
509 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25,
510 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
511 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
512 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31,
513 // QUANT_40
514 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,
515 16, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 2, 2,
516 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18,
517 18, 26, 26, 26, 26, 26, 26, 26, 34, 34, 34, 34, 34, 34, 4, 4,
518 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20,
519 20, 20, 28, 28, 28, 28, 28, 28, 36, 36, 36, 36, 36, 36, 36, 6,
520 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14, 22, 22, 22, 22,
521 22, 22, 30, 30, 30, 30, 30, 30, 30, 38, 38, 38, 38, 38, 38, 38,
522 39, 39, 39, 39, 39, 39, 39, 31, 31, 31, 31, 31, 31, 31, 23, 23,
523 23, 23, 23, 23, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7,
524 7, 37, 37, 37, 37, 37, 37, 37, 29, 29, 29, 29, 29, 29, 21, 21,
525 21, 21, 21, 21, 21, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5,
526 5, 5, 35, 35, 35, 35, 35, 35, 27, 27, 27, 27, 27, 27, 27, 19,
527 19, 19, 19, 19, 19, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3,
528 3, 3, 33, 33, 33, 33, 33, 33, 33, 25, 25, 25, 25, 25, 25, 17,
529 17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1,
530 // QUANT_48
531 0, 0, 0, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 2, 2,
532 2, 2, 2, 18, 18, 18, 18, 18, 18, 34, 34, 34, 34, 34, 4, 4,
533 4, 4, 4, 4, 20, 20, 20, 20, 20, 36, 36, 36, 36, 36, 6, 6,
534 6, 6, 6, 6, 22, 22, 22, 22, 22, 38, 38, 38, 38, 38, 38, 8,
535 8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 40, 40, 40, 40, 40, 10,
536 10, 10, 10, 10, 26, 26, 26, 26, 26, 26, 42, 42, 42, 42, 42, 12,
537 12, 12, 12, 12, 12, 28, 28, 28, 28, 28, 44, 44, 44, 44, 44, 14,
538 14, 14, 14, 14, 14, 30, 30, 30, 30, 30, 46, 46, 46, 46, 46, 46,
539 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 15, 15, 15, 15, 15,
540 15, 45, 45, 45, 45, 45, 29, 29, 29, 29, 29, 13, 13, 13, 13, 13,
541 13, 43, 43, 43, 43, 43, 27, 27, 27, 27, 27, 27, 11, 11, 11, 11,
542 11, 41, 41, 41, 41, 41, 25, 25, 25, 25, 25, 25, 9, 9, 9, 9,
543 9, 39, 39, 39, 39, 39, 39, 23, 23, 23, 23, 23, 7, 7, 7, 7,
544 7, 7, 37, 37, 37, 37, 37, 21, 21, 21, 21, 21, 5, 5, 5, 5,
545 5, 5, 35, 35, 35, 35, 35, 19, 19, 19, 19, 19, 19, 3, 3, 3,
546 3, 3, 33, 33, 33, 33, 33, 17, 17, 17, 17, 17, 17, 1, 1, 1,
547 // QUANT_64
548 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
549 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
550 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
551 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16,
552 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
553 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
554 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
555 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
556 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
557 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
558 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
559 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
560 47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51,
561 51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55,
562 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59,
563 59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63,
564 // QUANT_80
565 0, 0, 16, 16, 16, 32, 32, 32, 48, 48, 48, 48, 64, 64, 64, 2,
566 2, 2, 18, 18, 18, 34, 34, 34, 50, 50, 50, 50, 66, 66, 66, 4,
567 4, 4, 20, 20, 20, 36, 36, 36, 36, 52, 52, 52, 68, 68, 68, 6,
568 6, 6, 22, 22, 22, 38, 38, 38, 38, 54, 54, 54, 70, 70, 70, 8,
569 8, 8, 24, 24, 24, 24, 40, 40, 40, 56, 56, 56, 72, 72, 72, 10,
570 10, 10, 26, 26, 26, 26, 42, 42, 42, 58, 58, 58, 74, 74, 74, 12,
571 12, 12, 12, 28, 28, 28, 44, 44, 44, 60, 60, 60, 76, 76, 76, 14,
572 14, 14, 14, 30, 30, 30, 46, 46, 46, 62, 62, 62, 78, 78, 78, 78,
573 79, 79, 79, 79, 63, 63, 63, 47, 47, 47, 31, 31, 31, 15, 15, 15,
574 15, 77, 77, 77, 61, 61, 61, 45, 45, 45, 29, 29, 29, 13, 13, 13,
575 13, 75, 75, 75, 59, 59, 59, 43, 43, 43, 27, 27, 27, 27, 11, 11,
576 11, 73, 73, 73, 57, 57, 57, 41, 41, 41, 25, 25, 25, 25, 9, 9,
577 9, 71, 71, 71, 55, 55, 55, 39, 39, 39, 39, 23, 23, 23, 7, 7,
578 7, 69, 69, 69, 53, 53, 53, 37, 37, 37, 37, 21, 21, 21, 5, 5,
579 5, 67, 67, 67, 51, 51, 51, 51, 35, 35, 35, 19, 19, 19, 3, 3,
580 3, 65, 65, 65, 49, 49, 49, 49, 33, 33, 33, 17, 17, 17, 1, 1,
581 // QUANT_96
582 0, 0, 32, 32, 64, 64, 64, 2, 2, 2, 34, 34, 66, 66, 66, 4,
583 4, 4, 36, 36, 68, 68, 68, 6, 6, 6, 38, 38, 70, 70, 70, 8,
584 8, 8, 40, 40, 40, 72, 72, 10, 10, 10, 42, 42, 42, 74, 74, 12,
585 12, 12, 44, 44, 44, 76, 76, 14, 14, 14, 46, 46, 46, 78, 78, 16,
586 16, 16, 48, 48, 48, 80, 80, 80, 18, 18, 50, 50, 50, 82, 82, 82,
587 20, 20, 52, 52, 52, 84, 84, 84, 22, 22, 54, 54, 54, 86, 86, 86,
588 24, 24, 56, 56, 56, 88, 88, 88, 26, 26, 58, 58, 58, 90, 90, 90,
589 28, 28, 60, 60, 60, 92, 92, 92, 30, 30, 62, 62, 62, 94, 94, 94,
590 95, 95, 95, 63, 63, 63, 31, 31, 93, 93, 93, 61, 61, 61, 29, 29,
591 91, 91, 91, 59, 59, 59, 27, 27, 89, 89, 89, 57, 57, 57, 25, 25,
592 87, 87, 87, 55, 55, 55, 23, 23, 85, 85, 85, 53, 53, 53, 21, 21,
593 83, 83, 83, 51, 51, 51, 19, 19, 81, 81, 81, 49, 49, 49, 17, 17,
594 17, 79, 79, 47, 47, 47, 15, 15, 15, 77, 77, 45, 45, 45, 13, 13,
595 13, 75, 75, 43, 43, 43, 11, 11, 11, 73, 73, 41, 41, 41, 9, 9,
596 9, 71, 71, 71, 39, 39, 7, 7, 7, 69, 69, 69, 37, 37, 5, 5,
597 5, 67, 67, 67, 35, 35, 3, 3, 3, 65, 65, 65, 33, 33, 1, 1,
598 // QUANT_128
599 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
600 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
601 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23,
602 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31,
603 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39,
604 40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47,
605 48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55,
606 56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63,
607 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
608 72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
609 80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87,
610 88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
611 96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103,
612 104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
613 112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119,
614 120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127,
615 // QUANT_160
616 0, 32, 32, 64, 96, 96, 128, 128, 2, 34, 34, 66, 98, 98, 130, 130,
617 4, 36, 36, 68, 100, 100, 132, 132, 6, 38, 38, 70, 102, 102, 134, 134,
618 8, 40, 40, 72, 104, 104, 136, 136, 10, 42, 42, 74, 106, 106, 138, 138,
619 12, 44, 44, 76, 108, 108, 140, 140, 14, 46, 46, 78, 110, 110, 142, 142,
620 16, 48, 48, 80, 112, 112, 144, 144, 18, 50, 50, 82, 114, 114, 146, 146,
621 20, 52, 52, 84, 116, 116, 148, 148, 22, 54, 54, 86, 118, 118, 150, 150,
622 24, 56, 56, 88, 120, 120, 152, 152, 26, 58, 58, 90, 122, 122, 154, 154,
623 28, 60, 60, 92, 124, 124, 156, 156, 30, 62, 62, 94, 126, 126, 158, 158,
624 159, 159, 127, 127, 95, 63, 63, 31, 157, 157, 125, 125, 93, 61, 61, 29,
625 155, 155, 123, 123, 91, 59, 59, 27, 153, 153, 121, 121, 89, 57, 57, 25,
626 151, 151, 119, 119, 87, 55, 55, 23, 149, 149, 117, 117, 85, 53, 53, 21,
627 147, 147, 115, 115, 83, 51, 51, 19, 145, 145, 113, 113, 81, 49, 49, 17,
628 143, 143, 111, 111, 79, 47, 47, 15, 141, 141, 109, 109, 77, 45, 45, 13,
629 139, 139, 107, 107, 75, 43, 43, 11, 137, 137, 105, 105, 73, 41, 41, 9,
630 135, 135, 103, 103, 71, 39, 39, 7, 133, 133, 101, 101, 69, 37, 37, 5,
631 131, 131, 99, 99, 67, 35, 35, 3, 129, 129, 97, 97, 65, 33, 33, 1,
632 // QUANT_192
633 0, 64, 128, 128, 2, 66, 130, 130, 4, 68, 132, 132, 6, 70, 134, 134,
634 8, 72, 136, 136, 10, 74, 138, 138, 12, 76, 140, 140, 14, 78, 142, 142,
635 16, 80, 144, 144, 18, 82, 146, 146, 20, 84, 148, 148, 22, 86, 150, 150,
636 24, 88, 152, 152, 26, 90, 154, 154, 28, 92, 156, 156, 30, 94, 158, 158,
637 32, 96, 160, 160, 34, 98, 162, 162, 36, 100, 164, 164, 38, 102, 166, 166,
638 40, 104, 168, 168, 42, 106, 170, 170, 44, 108, 172, 172, 46, 110, 174, 174,
639 48, 112, 176, 176, 50, 114, 178, 178, 52, 116, 180, 180, 54, 118, 182, 182,
640 56, 120, 184, 184, 58, 122, 186, 186, 60, 124, 188, 188, 62, 126, 190, 190,
641 191, 191, 127, 63, 189, 189, 125, 61, 187, 187, 123, 59, 185, 185, 121, 57,
642 183, 183, 119, 55, 181, 181, 117, 53, 179, 179, 115, 51, 177, 177, 113, 49,
643 175, 175, 111, 47, 173, 173, 109, 45, 171, 171, 107, 43, 169, 169, 105, 41,
644 167, 167, 103, 39, 165, 165, 101, 37, 163, 163, 99, 35, 161, 161, 97, 33,
645 159, 159, 95, 31, 157, 157, 93, 29, 155, 155, 91, 27, 153, 153, 89, 25,
646 151, 151, 87, 23, 149, 149, 85, 21, 147, 147, 83, 19, 145, 145, 81, 17,
647 143, 143, 79, 15, 141, 141, 77, 13, 139, 139, 75, 11, 137, 137, 73, 9,
648 135, 135, 71, 7, 133, 133, 69, 5, 131, 131, 67, 3, 129, 129, 65, 1,
649 // QUANT_256
650 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
651 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
652 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
653 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
654 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
655 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
656 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
657 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
658 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
659 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
660 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
661 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
662 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
663 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
664 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
665 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
666 };
667
668 __constant short color_unquant_tables[QUANT_MAX][COLOR_NUM] = {
669 {
670 0, 255
671 },
672 {
673 0, 128, 255
674 },
675 {
676 0, 85, 170, 255
677 },
678 {
679 0, 64, 128, 192, 255
680 },
681 {
682 0, 255, 51, 204, 102, 153
683 },
684 {
685 0, 36, 73, 109, 146, 182, 219, 255
686 },
687 {
688 0, 255, 28, 227, 56, 199, 84, 171, 113, 142
689 },
690 {
691 0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139
692 },
693 { // 16
694 0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255
695 },
696 { // 20
697 0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
698 54, 201, 121, 134
699 },
700 { // 24
701 0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
702 22, 233, 55, 200, 88, 167, 121, 134
703 },
704 { // 32
705 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
706 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255
707 },
708 { // 40
709 0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
710 13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
711 26, 229, 58, 197, 91, 164, 123, 132
712 },
713 { // 48
714 0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
715 5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
716 11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131
717 },
718 { // 64
719 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
720 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
721 130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
722 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255
723 },
724 { // 80
725 0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
726 3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
727 6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
728 9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
729 13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130
730 },
731 { // 96
732 0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
733 64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
734 2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
735 67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
736 5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
737 70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
738 },
739 { // 128
740 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
741 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
742 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
743 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
744 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
745 161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
746 193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
747 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255
748 },
749 { // 160
750 0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
751 64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
752 1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
753 65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
754 3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
755 67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
756 4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
757 68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
758 6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
759 70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
760 },
761 {
762 0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
763 32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
764 64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
765 96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
766 1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
767 33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
768 65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
769 97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
770 2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
771 34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
772 66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
773 98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129
774 },
775 {
776 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
777 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
778 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
779 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
780 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
781 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
782 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
783 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
784 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
785 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
786 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
787 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
788 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
789 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
790 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
791 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
792 }
793 };
794
795 void Swap(float4* lhs, float4* rhs)
796 {
797 if ((lhs == NULL) || (rhs == NULL)) {
798 return;
799 }
800 float4 tmp = *lhs;
801 *lhs = *rhs;
802 *rhs = tmp;
803 }
804
805 void FindMinMax(float4* texels, float4 ptMean, float4 vecK, float4* e0, float4* e1)
806 {
807 if ((texels == NULL) || (e0 == NULL) || (e1 == NULL)) {
808 return;
809 }
810 float a = 1e31f; // max float is clipped to 1e31f
811 float b = -1e31f; // min float is clipped to -1e31f
812 for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
813 float t = dot(texels[i] - ptMean, vecK);
814 a = min(a, t);
815 b = max(b, t);
816 }
817 *e0 = clamp(vecK * a + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
818 *e1 = clamp(vecK * b + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
819 // if the direction_vector ends up pointing from light to dark, FLIP IT!
820 // this will make the endpoint the darkest one;
821 float4 e0u = round(*e0);
822 float4 e1u = round(*e1);
823 if (e0u.x + e0u.y + e0u.z > e1u.x + e1u.y + e1u.z) {
824 Swap(e0, e1);
825 }
826 }
827
828 void MaxAccumulationPixelDirection(float4* texels, float4 ptMean, float4* e0, float4* e1, bool hasAlpha)
829 {
830 if ((texels == NULL) || (e0 == NULL) || (e1 == NULL)) {
831 return;
832 }
833 float4 sumR = (float4)(FLOAT_ZERO);
834 float4 sumG = (float4)(FLOAT_ZERO);
835 float4 sumB = (float4)(FLOAT_ZERO);
836 float4 sumA = (float4)(FLOAT_ZERO);
837 for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
838 float4 dt = texels[i] - ptMean;
839 sumR += (dt.x > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
840 sumG += (dt.y > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
841 sumB += (dt.z > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
842 sumA += (dt.w > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
843 }
844 float dotR = dot(sumR, sumR);
845 float dotG = dot(sumG, sumG);
846 float dotB = dot(sumB, sumB);
847 float dotA = dot(sumA, sumA);
848 float maxDot = dotR;
849 float4 vecK = sumR;
850 if (dotG > maxDot) {
851 vecK = sumG;
852 maxDot = dotG;
853 }
854 if (dotB > maxDot) {
855 vecK = sumB;
856 maxDot = dotB;
857 }
858 if (hasAlpha && dotA > maxDot) {
859 vecK = sumA;
860 maxDot = dotA;
861 }
862 // safe normalize
863 float lenk = length(vecK);
864 vecK = (lenk < SMALL_VALUE) ? vecK : normalize(vecK);
865 FindMinMax(texels, ptMean, vecK, e0, e1);
866 }
867
868 void EncodeColorNormal(short quantLevel, float4 e0, float4 e1, short* endpointQuantized)
869 {
870 if (endpointQuantized == NULL) {
871 return;
872 }
873 int4 e0q = (int4)((int)(round(e0.x)), (int)(round(e0.y)),
874 (int)(round(e0.z)), (int)(round(e0.w)));
875 int4 e1q = (int4)((int)(round(e1.x)), (int)(round(e1.y)),
876 (int)(round(e1.z)), (int)(round(e1.w)));
877 endpointQuantized[EP0_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.x];
878 endpointQuantized[EP1_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.x];
879 endpointQuantized[EP0_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.y];
880 endpointQuantized[EP1_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.y];
881 endpointQuantized[EP0_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.z];
882 endpointQuantized[EP1_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.z];
883 endpointQuantized[EP0_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.w];
884 endpointQuantized[EP1_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.w];
885 }
886
887 void DecodeColor(short quantLevel, short endpointQuantized[COLOR_COMPONENT_NUM], float4* e0, float4* e1)
888 {
889 if ((endpointQuantized == NULL) || (e0 == NULL) || (e1 == NULL)) {
890 return;
891 }
892 (*e0).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_R_INDEX]]);
893 (*e1).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_R_INDEX]]);
894 (*e0).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_G_INDEX]]);
895 (*e1).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_G_INDEX]]);
896 (*e0).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_B_INDEX]]);
897 (*e1).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_B_INDEX]]);
898 (*e0).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_A_INDEX]]);
899 (*e1).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_A_INDEX]]);
900 }
901
902 // calculate quantize weights
903 short QuantizeWeight(uint weightRange, float weight)
904 {
905 short q = (short)(round(weight * ((float)(weightRange))));
906 return clamp(q, (short)(QUANTIZE_WEIGHT_MIN), (short)(weightRange));
907 }
908
909 void CalculateNormalWeights(int part, float4* texels,
910 float4 endPoint[END_POINT_NUM], float* projw)
911 {
912 if ((texels == NULL) || (endPoint == NULL) || (projw == NULL)) {
913 return;
914 }
915 int i = START_INDEX;
916 float4 vecK = endPoint[EP1_INDEX] - endPoint[EP0_INDEX];
917 if (length(vecK) < SMALL_VALUE) {
918 for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
919 projw[i] = FLOAT_ZERO;
920 }
921 } else {
922 vecK = normalize(vecK);
923 float minw = 1e31f; // max float is clipped to 1e31f
924 float maxw = -1e31f; // min float is clipped to -1e31f
925 for (i = START_INDEX; i < BLOCK_SIZE; ++i) {
926 float w = dot(vecK, texels[i] - endPoint[EP0_INDEX]);
927 minw = min(w, minw);
928 maxw = max(w, maxw);
929 projw[i] = w;
930 }
931 float invlen = maxw - minw;
932 invlen = max(SMALL_VALUE, invlen);
933 invlen = FLOAT_ONE / invlen; // invlen min is SMALL_VALUE, not zero
934 for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
935 projw[i] = (projw[i] - minw) * invlen;
936 }
937 }
938 }
939
940 void QuantizeWeights(float projw[X_GRIDS * Y_GRIDS], uint weightRange, short* weights)
941 {
942 for (int i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
943 weights[i] = QuantizeWeight(weightRange, projw[i]);
944 }
945 }
946
947 void CalculateQuantizedWeights(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM], short* weights)
948 {
949 if ((texels == NULL) || (endPoint == NULL) || (weights == NULL)) {
950 return;
951 }
952 float projw[X_GRIDS * Y_GRIDS];
953 CalculateNormalWeights(INT_ZERO, texels, endPoint, projw);
954 QuantizeWeights(projw, weightRange, weights);
955 }
956
957 void Orbits8Ptr(uint4* outputs, uint* bitoffset, uint number, uint bitcount)
958 {
959 if ((outputs == NULL) || (bitoffset == NULL)) {
960 return;
961 }
962 uint newpos = *bitoffset + bitcount;
963 uint nidx = newpos >> 5; // split low bits (5 bits) to get high bits
964 uint uidx = *bitoffset >> 5; // split low bits (5 bits) to get high bits
965 uint bitIdx = *bitoffset & 31u; // split low bits to get low bits (31 for mask 5 bits)
966 if (uidx == 0) { // high bits is 0 for x
967 (*outputs).x |= (number << bitIdx);
968 (*outputs).y |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
969 } else if (uidx == 1) { // high bits is 1 for y
970 (*outputs).y |= (number << bitIdx);
971 (*outputs).z |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
972 } else if (uidx == 2) { // high bits is 2 for z
973 (*outputs).z |= (number << bitIdx);
974 (*outputs).w |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
975 }
976 *bitoffset = newpos;
977 }
978
979 void SplitHighLow(uint n, uint i, int* high, uint* low)
980 {
981 uint low_mask = (UINT_ONE << i) - UINT_ONE;
982 *low = n & low_mask;
983 *high = ((int)(n >> i)) & 0xFF; // mask 0xFF to get low 8 bits
984 }
985
986 uint ReverseByte(uint p)
987 {
988 p = ((p & 0xFu) << 4) | ((p >> 4) & 0xFu); // 0xFu 4 for reverse
989 p = ((p & 0x33u) << 2) | ((p >> 2) & 0x33u); // 0x33u 2 for reverse
990 p = ((p & 0x55u) << 1) | ((p >> 1) & 0x55u); // 0x55u 1 for reverse
991 return p;
992 }
993
994 void EncodeTrits(uint bitcount, uint tritInput[TRIT_BLOCK_SIZE], uint4* outputs, uint* outpos)
995 {
996 int t0;
997 int t1;
998 int t2;
999 int t3;
1000 int t4;
1001 uint m0;
1002 uint m1;
1003 uint m2;
1004 uint m3;
1005 uint m4;
1006 SplitHighLow(tritInput[ISE_0], bitcount, &t0, &m0);
1007 SplitHighLow(tritInput[ISE_1], bitcount, &t1, &m1);
1008 SplitHighLow(tritInput[ISE_2], bitcount, &t2, &m2);
1009 SplitHighLow(tritInput[ISE_3], bitcount, &t3, &m3);
1010 SplitHighLow(tritInput[ISE_4], bitcount, &t4, &m4);
1011 ushort packhigh = (ushort)(
1012 g_integerFromTrits[t4 * 81 + t3 * 27 + t2 * 9 + t1 * 3 + t0]); // trits for 3 9 27 81
1013 Orbits8Ptr(outputs, outpos, m0, bitcount);
1014 Orbits8Ptr(outputs, outpos, packhigh & 3u, 2u); // low 2bits (mask 3u) offset 2u
1015
1016 Orbits8Ptr(outputs, outpos, m1, bitcount);
1017 Orbits8Ptr(outputs, outpos, (packhigh >> 2) & 3u, 2u); // right shift 2 bits for low 2bits (mask 3u) offset 2u
1018
1019 Orbits8Ptr(outputs, outpos, m2, bitcount);
1020 Orbits8Ptr(outputs, outpos, (packhigh >> 4) & 1u, 1u); // right shift 4 bits for low 1bits (mask 1u) offset 1u
1021
1022 Orbits8Ptr(outputs, outpos, m3, bitcount);
1023 Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1024
1025 Orbits8Ptr(outputs, outpos, m4, bitcount);
1026 Orbits8Ptr(outputs, outpos, (packhigh >> 7) & 1u, 1u); // right shift 7 bits for low 1bits (mask 1u) offset 1u
1027 }
1028
1029 void EncodeQuints(uint bitcount, uint quintInput[QUINT_BLOCK_SIZE], uint4* outputs, uint* outpos)
1030 {
1031 int q0;
1032 int q1;
1033 int q2;
1034 uint m0;
1035 uint m1;
1036 uint m2;
1037 SplitHighLow(quintInput[ISE_0], bitcount, &q0, &m0);
1038 SplitHighLow(quintInput[ISE_1], bitcount, &q1, &m1);
1039 SplitHighLow(quintInput[ISE_2], bitcount, &q2, &m2);
1040 ushort packhigh = (ushort)(g_integerFromQuints[q2 * 25 + q1 * 5 + q0]); // Quints 5 25
1041 Orbits8Ptr(outputs, outpos, m0, bitcount);
1042 Orbits8Ptr(outputs, outpos, packhigh & 7u, 3u); // low 3bits (mask 7u) offset 3u
1043 Orbits8Ptr(outputs, outpos, m1, bitcount);
1044 Orbits8Ptr(outputs, outpos, (packhigh >> 3) & 3u, 2u); // right shift 3 bits for low 2bits (mask 3u) offset 2u
1045 Orbits8Ptr(outputs, outpos, m2, bitcount);
1046 Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1047 }
1048
1049 void BiseEndpoints(short numbers[COLOR_COMPONENT_NUM], int range, uint4* outputs, bool hasAlpha, uint* bitPos)
1050 {
1051 uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1052 uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1053 uint quints = (uint)(g_bitsTritsQuintsTable[range * 3 + 2]); // Quints 3 offset 2
1054 uint count = hasAlpha ? 8u : 6u; // RGBA 4x2 = 8 or RGB 3x2 = 6
1055 if (trits == UINT_ONE) {
1056 uint tritsInput[TRIT_BLOCK_SIZE];
1057 tritsInput[ISE_0] = numbers[EP0_R_INDEX];
1058 tritsInput[ISE_1] = numbers[EP1_R_INDEX];
1059 tritsInput[ISE_2] = numbers[EP0_G_INDEX];
1060 tritsInput[ISE_3] = numbers[EP1_G_INDEX];
1061 tritsInput[ISE_4] = numbers[EP0_B_INDEX];
1062 EncodeTrits(bits, tritsInput, outputs, bitPos);
1063 tritsInput[ISE_0] = numbers[EP1_B_INDEX];
1064 tritsInput[ISE_1] = numbers[EP0_A_INDEX];
1065 tritsInput[ISE_2] = numbers[EP1_A_INDEX];
1066 tritsInput[ISE_3] = UINT_ZERO;
1067 tritsInput[ISE_4] = UINT_ZERO;
1068 EncodeTrits(bits, tritsInput, outputs, bitPos);
1069 *bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * count + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1070 } else if (quints == UINT_ONE) {
1071 uint quintsInput[QUINT_BLOCK_SIZE];
1072 quintsInput[ISE_0] = numbers[EP0_R_INDEX];
1073 quintsInput[ISE_1] = numbers[EP1_R_INDEX];
1074 quintsInput[ISE_2] = numbers[EP0_G_INDEX];
1075 EncodeQuints(bits, quintsInput, outputs, bitPos);
1076 quintsInput[ISE_0] = numbers[EP1_G_INDEX];
1077 quintsInput[ISE_1] = numbers[EP0_B_INDEX];
1078 quintsInput[ISE_2] = numbers[EP1_B_INDEX];
1079 EncodeQuints(bits, quintsInput, outputs, bitPos);
1080 quintsInput[ISE_0] = numbers[EP0_A_INDEX];
1081 quintsInput[ISE_1] = numbers[EP1_A_INDEX];
1082 quintsInput[ISE_2] = UINT_ZERO;
1083 EncodeQuints(bits, quintsInput, outputs, bitPos);
1084 *bitPos = ((QUINT_MSB_SIZE + QUINT_BLOCK_SIZE * bits) * count + QUINT_ROUND_NUM) / QUINT_BLOCK_SIZE;
1085 } else {
1086 for (uint i = UINT_ZERO; i < count; ++i) {
1087 Orbits8Ptr(outputs, bitPos, numbers[i], bits);
1088 }
1089 }
1090 }
1091
1092 void BiseWeights(short numbers[BLOCK_SIZE], int range, uint4* outputs)
1093 {
1094 uint bitPos = UINT_ZERO;
1095 uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1096 uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1097 if (trits == UINT_ONE) {
1098 uint tritsInput[TRIT_BLOCK_SIZE];
1099 tritsInput[ISE_0] = numbers[WEIGHT_0];
1100 tritsInput[ISE_1] = numbers[WEIGHT_1];
1101 tritsInput[ISE_2] = numbers[WEIGHT_2];
1102 tritsInput[ISE_3] = numbers[WEIGHT_3];
1103 tritsInput[ISE_4] = numbers[WEIGHT_4];
1104 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1105 tritsInput[ISE_0] = numbers[WEIGHT_5];
1106 tritsInput[ISE_1] = numbers[WEIGHT_6];
1107 tritsInput[ISE_2] = numbers[WEIGHT_7];
1108 tritsInput[ISE_3] = numbers[WEIGHT_8];
1109 tritsInput[ISE_4] = numbers[WEIGHT_9];
1110 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1111 tritsInput[ISE_0] = numbers[WEIGHT_10];
1112 tritsInput[ISE_1] = numbers[WEIGHT_11];
1113 tritsInput[ISE_2] = numbers[WEIGHT_12];
1114 tritsInput[ISE_3] = numbers[WEIGHT_13];
1115 tritsInput[ISE_4] = numbers[WEIGHT_14];
1116 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1117 tritsInput[ISE_0] = numbers[WEIGHT_15];
1118 tritsInput[ISE_1] = UINT_ZERO;
1119 tritsInput[ISE_2] = UINT_ZERO;
1120 tritsInput[ISE_3] = UINT_ZERO;
1121 tritsInput[ISE_4] = UINT_ZERO;
1122 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1123 bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * BLOCK_SIZE + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1124 } else {
1125 for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
1126 Orbits8Ptr(outputs, &bitPos, numbers[i], bits);
1127 }
1128 }
1129 }
1130
1131 uint4 AssembleBlock(uint blockMode, uint colorEndpointMode, uint4 epIse, uint4 wtIse)
1132 {
1133 uint4 phyBlk = (uint4)(0, 0, 0, 0); // initialize to (0, 0, 0, 0)
1134 phyBlk.w |= ReverseByte(wtIse.x & BYTE_MASK) << BYTE_3_POS;
1135 phyBlk.w |= ReverseByte((wtIse.x >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1136 phyBlk.w |= ReverseByte((wtIse.x >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1137 phyBlk.w |= ReverseByte((wtIse.x >> BYTE_3_POS) & BYTE_MASK);
1138 phyBlk.z |= ReverseByte(wtIse.y & BYTE_MASK) << BYTE_3_POS;
1139 phyBlk.z |= ReverseByte((wtIse.y >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1140 phyBlk.z |= ReverseByte((wtIse.y >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1141 phyBlk.z |= ReverseByte((wtIse.y >> BYTE_3_POS) & BYTE_MASK);
1142 phyBlk.y |= ReverseByte(wtIse.z & BYTE_MASK) << BYTE_3_POS;
1143 phyBlk.y |= ReverseByte((wtIse.z >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1144 phyBlk.y |= ReverseByte((wtIse.z >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1145 phyBlk.y |= ReverseByte((wtIse.z >> BYTE_3_POS) & BYTE_MASK);
1146 phyBlk.x = blockMode;
1147
1148 phyBlk.x |= (colorEndpointMode & MASK_FOR_4BITS) << CEM_POS;
1149 phyBlk.x |= (epIse.x & MASK_FOR_15BITS) << COLOR_EP_POS;
1150 phyBlk.y |= ((epIse.x >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1151 phyBlk.y |= (epIse.y & MASK_FOR_15BITS) << COLOR_EP_POS;
1152 phyBlk.z |= ((epIse.y >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1153
1154 return phyBlk;
1155 }
1156
1157 uint AssembleBlockmode(uint weightQuantmethod, bool isDualPlane)
1158 {
1159 uint a = (uint)((Y_GRIDS - HEIGHT_BITS_OFFSET) & MASK_FOR_2BITS);
1160 uint b = (uint)((X_GRIDS - WIDTH_BITS_OFFSET) & MASK_FOR_2BITS);
1161 uint d = isDualPlane ? UINT_ONE : UINT_ZERO;
1162 uint h = (weightQuantmethod < 6u) ? UINT_ZERO : UINT_ONE; // low/high-precision limit is 6u
1163 uint r = (weightQuantmethod % 6u) + WEIGHT_METHOD_OFFSET; // low/high-precision limit is 6u
1164 uint blockMode = (r >> WEIGHT_METHOD_RIGHT_BIT) & MASK_FOR_2BITS;
1165 blockMode |= (r & MASK_FOR_1BITS) << WEIGHT_METHOD_POS;
1166 blockMode |= (a & MASK_FOR_2BITS) << BLOCK_WIDTH_POS;
1167 blockMode |= (b & MASK_FOR_2BITS) << BLOCK_HEIGHT_POS;
1168 blockMode |= h << WEIGHT_PRECISION_POS;
1169 blockMode |= d << IS_DUALPLANE_POS;
1170 return blockMode;
1171 }
1172
1173 uint4 EndpointIse(float4* ep0, float4* ep1, short endpointQuantmethod, bool hasAlpha)
1174 {
1175 short epQuantized[COLOR_COMPONENT_NUM];
1176 EncodeColorNormal(endpointQuantmethod, *ep0, *ep1, epQuantized);
1177 DecodeColor(endpointQuantmethod, epQuantized, ep0, ep1);
1178 if (!hasAlpha) {
1179 epQuantized[EP0_A_INDEX] = SHORT_ZERO;
1180 epQuantized[EP1_A_INDEX] = SHORT_ZERO;
1181 }
1182 uint4 epIse = (uint4)(UINT_ZERO);
1183 uint bitPos = UINT_ZERO;
1184 BiseEndpoints(epQuantized, endpointQuantmethod, &epIse, hasAlpha, &bitPos);
1185 return epIse;
1186 }
1187
1188 float4 CalTexel(short weight, float4 ep0, float4 ep1)
1189 {
1190 short weight0 = BLOCK_MAX_WEIGHTS_SHORT - weight;
1191 return (ep0 * weight0 + ep1 * weight) / BLOCK_MAX_WEIGHTS_FLOAT;
1192 }
1193
1194 uint4 WeightIse(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM],
1195 short weightQuantmethod, float* errval)
1196 {
1197 int i = START_INDEX;
1198 short wtQuantized[X_GRIDS * Y_GRIDS];
1199 CalculateQuantizedWeights(texels, weightRange, endPoint, wtQuantized);
1200 float sumErr = FLOAT_ZERO;
1201 for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
1202 short w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1203 wtQuantized[i] = g_scrambleTable[w];
1204 w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1205 short wt = g_weightUnquant[w];
1206 float4 new_texel = CalTexel(wt, endPoint[EP0_INDEX], endPoint[EP1_INDEX]);
1207 float4 diff = new_texel - texels[i];
1208 sumErr += dot(diff, diff);
1209 }
1210 *errval = sumErr;
1211 uint4 wtIse = (uint4)(UINT_ZERO);
1212 BiseWeights(wtQuantized, (int)(weightQuantmethod), &wtIse);
1213 return wtIse;
1214 }
1215
1216 float TryEncode(float4* texels, float4 texelsMean, uint4* epIse, uint4* wtIse, short3* bestBlockmode)
1217 {
1218 float errval;
1219 bool hasAlpha = true;
1220 *bestBlockmode = (short3)(QUANT_6, QUANT_256, WEIGHT_RANGE_6);
1221 short weightQuantmethod = (*bestBlockmode).x;
1222 short endpointQuantmethod = (*bestBlockmode).y;
1223 short weightRange = (*bestBlockmode).z;
1224 float4 ep0;
1225 float4 ep1;
1226 float4 endPoint[END_POINT_NUM];
1227 MaxAccumulationPixelDirection(texels, texelsMean, &ep0, &ep1, hasAlpha);
1228 *epIse = EndpointIse(&ep0, &ep1, endpointQuantmethod, hasAlpha);
1229 endPoint[EP0_INDEX] = ep0;
1230 endPoint[EP1_INDEX] = ep1;
1231 *wtIse = WeightIse(texels, weightRange - UINT_ONE, endPoint, weightQuantmethod, &errval);
1232 return errval;
1233 }
1234
1235 uint4 EncodeBlock(float4* texels, float4 texelsMean, int blockID, __global uint* errs)
1236 {
1237 bool hasAlpha = true;
1238 bool isDualPlane = false;
1239 float errval = 10000000.0f; // the errval is initialized to 10000000.0f
1240
1241 uint4 epIse, wtIse;
1242 short3 bestBlockmode, tmpBestBlockMode;
1243 errval = TryEncode(texels, texelsMean, &epIse, &wtIse, &bestBlockmode);
1244
1245 uint blockMode = AssembleBlockmode(bestBlockmode.x, isDualPlane);
1246 uint ColorEndpointMode;
1247 if (hasAlpha) {
1248 ColorEndpointMode = CEM_LDR_RGBA_DIRECT;
1249 } else {
1250 ColorEndpointMode = CEM_LDR_RGB_DIRECT;
1251 }
1252 errs[blockID] = (uint)(errval);
1253 return AssembleBlock(blockMode, ColorEndpointMode, epIse, wtIse);
1254 }
1255
1256 void GotTexelFromImage(read_only image2d_t inputImage, float4 texels[BLOCK_SIZE],
1257 int width, int height, float4 *texelMean)
1258 {
1259 int2 pos = (int2)(get_global_id(0), get_global_id(1));
1260 pos.x *= DIM;
1261 pos.y *= DIM;
1262 for (int i = 0; i < DIM; ++i) {
1263 for (int j = 0; j < DIM; ++j) {
1264 int2 pixelPos = pos + (int2)(j, i);
1265 if (pixelPos.x >= width) {
1266 pixelPos.x = width - 1;
1267 }
1268 if (pixelPos.y >= height) {
1269 pixelPos.y = height - 1;
1270 }
1271 float4 texel = read_imagef(inputImage, pixelPos);
1272 texels[i * DIM + j] = texel * PIXEL_MAX_VALUE;
1273 *texelMean += texel * PIXEL_MAX_VALUE;
1274 }
1275 }
1276 }
1277
1278 kernel void AstcCl(read_only image2d_t inputImage, __global uint4* astcArr, __global uint* errs,
1279 int width, int height)
1280 {
1281 const int2 globalSize = (int2)(get_global_size(0), get_global_size(1));
1282 const int2 globalId = (int2)(get_global_id(0), get_global_id(1));
1283 int blockID = globalId.y * globalSize.x + globalId.x;
1284 float4 texels[BLOCK_SIZE];
1285 float4 texelMean = 0;
1286 GotTexelFromImage(inputImage, texels, width, height, &texelMean);
1287 texelMean = texelMean / ((float)(BLOCK_SIZE));
1288 astcArr[blockID] = EncodeBlock(texels, texelMean, blockID, errs);
1289 }
1290 )";
1291
1292 class OpenCLSoManager {
1293 public:
1294 OpenCLSoManager();
1295 ~OpenCLSoManager();
1296 bool LoadOpenCLSo();
1297 private:
1298 void *clSoHandle = nullptr;
1299 bool loadSuccess = false;
1300 std::mutex openClSoMutex_ = {};
1301 };
1302
1303 static OpenCLSoManager g_clSoManager;
1304 std::mutex checkClBinPathMutex = {};
1305
OpenCLSoManager()1306 OpenCLSoManager::OpenCLSoManager()
1307 {
1308 clSoHandle = nullptr;
1309 loadSuccess = false;
1310 }
1311
~OpenCLSoManager()1312 OpenCLSoManager::~OpenCLSoManager()
1313 {
1314 if (!UnLoadCLExtern(clSoHandle)) {
1315 IMAGE_LOGE("astcenc OpenCLSoManager UnLoad failed!");
1316 } else {
1317 IMAGE_LOGD("astcenc OpenCLSoManager UnLoad success!");
1318 loadSuccess = false;
1319 }
1320 }
1321
LoadOpenCLSo()1322 bool OpenCLSoManager::LoadOpenCLSo()
1323 {
1324 std::lock_guard<std::mutex> lock(openClSoMutex_);
1325 if (!loadSuccess) {
1326 loadSuccess = InitOpenCLExtern(&clSoHandle);
1327 }
1328 return loadSuccess;
1329 }
1330
AstcClClose(ClAstcHandle * clAstcHandle)1331 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClClose(ClAstcHandle *clAstcHandle)
1332 {
1333 if (clAstcHandle == nullptr) {
1334 IMAGE_LOGE("astc AstcClClose clAstcHandle is nullptr!");
1335 return CL_ASTC_ENC_FAILED;
1336 }
1337 cl_int clRet;
1338 if (clAstcHandle->kernel != nullptr) {
1339 clRet = clReleaseKernel(clAstcHandle->kernel);
1340 if (clRet != CL_SUCCESS) {
1341 IMAGE_LOGE("astc clReleaseKernel failed ret %{public}d!", clRet);
1342 return CL_ASTC_ENC_FAILED;
1343 }
1344 clAstcHandle->kernel = nullptr;
1345 }
1346 if (clAstcHandle->queue != nullptr) {
1347 clRet = clReleaseCommandQueue(clAstcHandle->queue);
1348 if (clRet != CL_SUCCESS) {
1349 IMAGE_LOGE("astc clReleaseCommandQueue failed ret %{public}d!", clRet);
1350 return CL_ASTC_ENC_FAILED;
1351 }
1352 clAstcHandle->queue = nullptr;
1353 }
1354 if (clAstcHandle->context != nullptr) {
1355 clRet = clReleaseContext(clAstcHandle->context);
1356 if (clRet != CL_SUCCESS) {
1357 IMAGE_LOGE("astc clReleaseContext failed ret %{public}d!", clRet);
1358 return CL_ASTC_ENC_FAILED;
1359 }
1360 clAstcHandle->context = nullptr;
1361 }
1362 if (clAstcHandle->encObj.blockErrs_ != nullptr) {
1363 free(clAstcHandle->encObj.blockErrs_);
1364 clAstcHandle->encObj.blockErrs_ = nullptr;
1365 }
1366 if (clAstcHandle != nullptr) {
1367 free(clAstcHandle);
1368 }
1369 return CL_ASTC_ENC_SUCCESS;
1370 }
1371
CheckClBinIsExist(const std::string & name)1372 static bool CheckClBinIsExist(const std::string &name)
1373 {
1374 std::lock_guard<std::mutex> lock(checkClBinPathMutex);
1375 return (access(name.c_str(), F_OK) != -1); // -1 means that the file is not exist
1376 }
1377
SaveClBin(cl_program program,const std::string & clBinPath)1378 static CL_ASTC_STATUS SaveClBin(cl_program program, const std::string &clBinPath)
1379 {
1380 std::lock_guard<std::mutex> lock(checkClBinPathMutex);
1381 size_t programBinarySizes;
1382 cl_int clRet = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &programBinarySizes, NULL);
1383 if (clRet != CL_SUCCESS) {
1384 IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARY_SIZES failed ret %{public}d!", clRet);
1385 return CL_ASTC_ENC_FAILED;
1386 }
1387 bool genBinFail = (programBinarySizes == 0) || (programBinarySizes > MAX_MALLOC_BYTES);
1388 if (genBinFail) {
1389 IMAGE_LOGE("astc clGetProgramInfo programBinarySizes %{public}zu too big!", programBinarySizes);
1390 return CL_ASTC_ENC_FAILED;
1391 }
1392 uint8_t *programBinaries = static_cast<uint8_t *>(malloc(programBinarySizes));
1393 if (programBinaries == nullptr) {
1394 IMAGE_LOGE("astc programBinaries malloc failed!");
1395 return CL_ASTC_ENC_FAILED;
1396 }
1397 clRet = clGetProgramInfo(program, CL_PROGRAM_BINARIES, programBinarySizes, &programBinaries, NULL);
1398 if (clRet != CL_SUCCESS) {
1399 IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARIES failed ret %{public}d!", clRet);
1400 free(programBinaries);
1401 return CL_ASTC_ENC_FAILED;
1402 }
1403 FILE *fp = fopen(clBinPath.c_str(), "wb");
1404 if (fp == nullptr) {
1405 IMAGE_LOGE("astc create file: %{public}s failed!", clBinPath.c_str());
1406 free(programBinaries);
1407 return CL_ASTC_ENC_FAILED;
1408 }
1409 CL_ASTC_STATUS ret = CL_ASTC_ENC_SUCCESS;
1410 if (fwrite(programBinaries, 1, programBinarySizes, fp) != programBinarySizes) {
1411 IMAGE_LOGE("astc fwrite programBinaries file failed!");
1412 ret = CL_ASTC_ENC_FAILED;
1413 }
1414 if (fclose(fp) != 0) {
1415 IMAGE_LOGE("astc SaveClBin close file failed!");
1416 ret = CL_ASTC_ENC_FAILED;
1417 }
1418 fp = nullptr;
1419 free(programBinaries);
1420 return ret;
1421 }
1422
BuildProgramAndCreateKernel(cl_program program,ClAstcHandle * clAstcHandle)1423 static CL_ASTC_STATUS BuildProgramAndCreateKernel(cl_program program, ClAstcHandle *clAstcHandle)
1424 {
1425 cl_int clRet = clBuildProgram(program, 1, &clAstcHandle->deviceID, "-cl-std=CL3.0", nullptr, nullptr);
1426 if (clRet != CL_SUCCESS) {
1427 IMAGE_LOGE("astc clBuildProgram failed ret %{public}d!", clRet);
1428 return CL_ASTC_ENC_FAILED;
1429 }
1430 clAstcHandle->kernel = clCreateKernel(program, "AstcCl", &clRet);
1431 if (clRet != CL_SUCCESS) {
1432 IMAGE_LOGE("astc clCreateKernel failed ret %{public}d!", clRet);
1433 return CL_ASTC_ENC_FAILED;
1434 }
1435 return CL_ASTC_ENC_SUCCESS;
1436 }
1437
AstcClBuildProgram(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1438 static CL_ASTC_STATUS AstcClBuildProgram(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1439 {
1440 cl_int clRet;
1441 cl_program program = nullptr;
1442 if (!CheckClBinIsExist(clBinPath)) {
1443 size_t sourceSize = strlen(g_programSource) + 1; // '\0' occupies 1 bytes
1444 program = clCreateProgramWithSource(clAstcHandle->context, 1, &g_programSource, &sourceSize, &clRet);
1445 if (clRet != CL_SUCCESS) {
1446 IMAGE_LOGE("astc clCreateProgramWithSource failed ret %{public}d!", clRet);
1447 return CL_ASTC_ENC_FAILED;
1448 }
1449 if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1450 IMAGE_LOGE("astc BuildProgramAndCreateKernel failed ret %{public}d!", clRet);
1451 clReleaseProgram(program);
1452 return CL_ASTC_ENC_FAILED;
1453 }
1454 if (SaveClBin(program, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1455 IMAGE_LOGI("astc SaveClBin failed!");
1456 }
1457 } else {
1458 std::ifstream contents{clBinPath};
1459 std::string binaryContent{std::istreambuf_iterator<char>{contents}, {}};
1460 size_t binSize = binaryContent.length();
1461 bool invaildSize = (binSize == 0) || (binSize > MAX_MALLOC_BYTES);
1462 if (invaildSize) {
1463 IMAGE_LOGE("astc AstcClBuildProgram read CLbin file lenth error %{public}zu!", binSize);
1464 return CL_ASTC_ENC_FAILED;
1465 }
1466 const char *binary = static_cast<const char *>(binaryContent.c_str());
1467 program = clCreateProgramWithBinary(clAstcHandle->context, 1, &clAstcHandle->deviceID, &binSize,
1468 (const unsigned char **)&binary, nullptr, &clRet);
1469 if (clRet != CL_SUCCESS) {
1470 IMAGE_LOGE("astc clCreateProgramWithBinary failed ret %{public}d!", clRet);
1471 return CL_ASTC_ENC_FAILED;
1472 }
1473 if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1474 IMAGE_LOGE("astc BuildProgramAndCreateKernel with bin failed!");
1475 clReleaseProgram(program);
1476 return CL_ASTC_ENC_FAILED;
1477 }
1478 }
1479 clRet = clReleaseProgram(program);
1480 if (clRet != CL_SUCCESS) {
1481 IMAGE_LOGE("astc clReleaseProgram failed ret %{public}d!", clRet);
1482 return CL_ASTC_ENC_FAILED;
1483 }
1484 return CL_ASTC_ENC_SUCCESS;
1485 }
1486
AstcCreateClKernel(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1487 static CL_ASTC_STATUS AstcCreateClKernel(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1488 {
1489 if (!g_clSoManager.LoadOpenCLSo()) {
1490 IMAGE_LOGE("astc InitOpenCL error!");
1491 return CL_ASTC_ENC_FAILED;
1492 }
1493 cl_int clRet;
1494 cl_platform_id platformID;
1495 clRet = clGetPlatformIDs(1, &platformID, NULL);
1496 if (clRet != CL_SUCCESS) {
1497 IMAGE_LOGE("astc clGetPlatformIDs failed ret %{public}d!", clRet);
1498 return CL_ASTC_ENC_FAILED;
1499 }
1500 clRet = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &clAstcHandle->deviceID, NULL);
1501 if (clRet != CL_SUCCESS) {
1502 IMAGE_LOGE("astc clGetDeviceIDs failed ret %{public}d!", clRet);
1503 return CL_ASTC_ENC_FAILED;
1504 }
1505 clAstcHandle->context = clCreateContext(0, 1, &clAstcHandle->deviceID, NULL, NULL, &clRet);
1506 if (clRet != CL_SUCCESS) {
1507 IMAGE_LOGE("astc clCreateContext failed ret %{public}d!", clRet);
1508 return CL_ASTC_ENC_FAILED;
1509 }
1510 cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0};
1511 clAstcHandle->queue = clCreateCommandQueueWithProperties(clAstcHandle->context,
1512 clAstcHandle->deviceID, props, &clRet);
1513 if (clRet != CL_SUCCESS) {
1514 IMAGE_LOGE("astc clCreateCommandQueueWithProperties failed ret %{public}d!", clRet);
1515 return CL_ASTC_ENC_FAILED;
1516 }
1517 if (AstcClBuildProgram(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1518 IMAGE_LOGE("astc AstcClBuildProgram failed!");
1519 return CL_ASTC_ENC_FAILED;
1520 }
1521 return CL_ASTC_ENC_SUCCESS;
1522 }
1523
AstcClCreate(ClAstcHandle ** handle,const std::string & clBinPath)1524 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClCreate(ClAstcHandle **handle, const std::string &clBinPath)
1525 {
1526 ClAstcHandle *clAstcHandle = static_cast<ClAstcHandle *>(calloc(1, sizeof(ClAstcHandle)));
1527 if (clAstcHandle == nullptr) {
1528 IMAGE_LOGE("astc AstcClCreate handle calloc failed!");
1529 return CL_ASTC_ENC_FAILED;
1530 }
1531 *handle = clAstcHandle;
1532 size_t numMaxBlocks = static_cast<size_t>(((MAX_WIDTH + DIM - 1) / DIM) * ((MAX_HEIGHT + DIM - 1) / DIM));
1533 clAstcHandle->encObj.blockErrs_ =
1534 static_cast<uint32_t *>(malloc((numMaxBlocks * sizeof(uint32_t)))); // 8MB mem Max
1535 if (clAstcHandle->encObj.blockErrs_ == nullptr) {
1536 IMAGE_LOGE("astc blockErrs_ malloc failed!");
1537 AstcClClose(*handle);
1538 return CL_ASTC_ENC_FAILED;
1539 }
1540 if (AstcCreateClKernel(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1541 IMAGE_LOGE("astc AstcCreateClKernel failed!");
1542 AstcClClose(*handle);
1543 return CL_ASTC_ENC_FAILED;
1544 }
1545 return CL_ASTC_ENC_SUCCESS;
1546 }
1547
AstcClEncImageCheckImageOption(const ClAstcImageOption * imageIn)1548 static CL_ASTC_STATUS AstcClEncImageCheckImageOption(const ClAstcImageOption *imageIn)
1549 {
1550 if ((imageIn->width <= 0) || (imageIn->height <= 0) || (imageIn->stride < imageIn->width)) {
1551 IMAGE_LOGE("astc AstcClEncImage width <= 0 or height <= 0 or stride < width!");
1552 return CL_ASTC_ENC_FAILED;
1553 }
1554 if ((imageIn->width > MAX_WIDTH) || (imageIn->height > MAX_HEIGHT)) {
1555 IMAGE_LOGE("astc AstcClEncImage width[%{public}d] \
1556 need be [1, %{public}d] and height[%{public}d] need be [1, %{public}d]", \
1557 imageIn->width, MAX_WIDTH, imageIn->height, MAX_HEIGHT);
1558 return CL_ASTC_ENC_FAILED;
1559 }
1560 return CL_ASTC_ENC_SUCCESS;
1561 }
1562
AstcClFillImage(ClAstcImageOption * imageIn,uint8_t * data,int32_t stride,int32_t width,int32_t height)1563 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClFillImage(ClAstcImageOption *imageIn, uint8_t *data, int32_t stride,
1564 int32_t width, int32_t height)
1565 {
1566 if (imageIn == nullptr) {
1567 IMAGE_LOGE("astc AstcClFillImage imageIn is nullptr!");
1568 return CL_ASTC_ENC_FAILED;
1569 }
1570 imageIn->data = data;
1571 imageIn->stride = stride;
1572 imageIn->width = width;
1573 imageIn->height = height;
1574 if (AstcClEncImageCheckImageOption(imageIn) != CL_ASTC_ENC_SUCCESS) {
1575 IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1576 return CL_ASTC_ENC_FAILED;
1577 }
1578 return CL_ASTC_ENC_SUCCESS;
1579 }
1580
GenAstcHeader(uint8_t * buffer,uint8_t blockX,uint8_t blockY,uint32_t dimX,uint32_t dimY)1581 static void GenAstcHeader(uint8_t *buffer, uint8_t blockX, uint8_t blockY, uint32_t dimX, uint32_t dimY)
1582 {
1583 uint8_t *headInfo = buffer;
1584 *headInfo++ = MAGIC_FILE_CONSTANT & BYTES_MASK;
1585 *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_8BITS) & BYTES_MASK;
1586 *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_16BITS) & BYTES_MASK;
1587 *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_24BITS) & BYTES_MASK;
1588 *headInfo++ = static_cast<uint8_t>(blockX);
1589 *headInfo++ = static_cast<uint8_t>(blockY);
1590 *headInfo++ = 1;
1591 *headInfo++ = dimX & BYTES_MASK;
1592 *headInfo++ = (dimX >> BIT_SHIFT_8BITS) & BYTES_MASK;
1593 *headInfo++ = (dimX >> BIT_SHIFT_16BITS) & BYTES_MASK;
1594 *headInfo++ = dimY & BYTES_MASK;
1595 *headInfo++ = (dimY >> BIT_SHIFT_8BITS) & BYTES_MASK;
1596 *headInfo++ = (dimY >> BIT_SHIFT_16BITS) & BYTES_MASK;
1597 *headInfo++ = 1;
1598 *headInfo++ = 0;
1599 *headInfo++ = 0;
1600 }
1601
ReleaseClAstcObj(ClAstcObjEnc * obj)1602 static void ReleaseClAstcObj(ClAstcObjEnc *obj)
1603 {
1604 cl_int clRet;
1605 if (obj != nullptr) {
1606 if (obj->inputImage != nullptr) {
1607 clRet = clReleaseMemObject(obj->inputImage);
1608 if (clRet != CL_SUCCESS) {
1609 IMAGE_LOGE("astc inputImage release failed ret %{public}d!", clRet);
1610 }
1611 obj->inputImage = nullptr;
1612 }
1613 if (obj->astcResult != nullptr) {
1614 clRet = clReleaseMemObject(obj->astcResult);
1615 if (clRet != CL_SUCCESS) {
1616 IMAGE_LOGE("astc astcResult release failed ret %{public}d!", clRet);
1617 }
1618 obj->astcResult = nullptr;
1619 }
1620 if (obj->errBuffer != nullptr) {
1621 clRet = clReleaseMemObject(obj->errBuffer);
1622 if (clRet != CL_SUCCESS) {
1623 IMAGE_LOGE("astc errBuffer release failed ret %{public}d!", clRet);
1624 }
1625 obj->errBuffer = nullptr;
1626 }
1627 }
1628 }
1629
GetMaxAndSumVal(size_t numBlocks,uint32_t * blockErrs,uint32_t & maxVal,uint32_t & sumVal)1630 static void GetMaxAndSumVal(size_t numBlocks, uint32_t *blockErrs, uint32_t &maxVal, uint32_t &sumVal)
1631 {
1632 sumVal = 0;
1633 for (size_t i = 0; i < numBlocks; i++) {
1634 sumVal += blockErrs[i];
1635 maxVal = fmax(maxVal, blockErrs[i]);
1636 }
1637 }
1638
ClCreateBufferAndImage(const ClAstcImageOption * imageIn,ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj)1639 static CL_ASTC_STATUS ClCreateBufferAndImage(const ClAstcImageOption *imageIn,
1640 ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj)
1641 {
1642 uint8_t *data = imageIn->data;
1643 int32_t stride = imageIn->stride;
1644 int32_t width = imageIn->width;
1645 int32_t height = imageIn->height;
1646 size_t numBlocks = static_cast<size_t>(((width + DIM - 1) / DIM) * ((height + DIM - 1) / DIM));
1647 uint32_t *blockErrs = encObj->blockErrs_;
1648 size_t blockErrBytes = sizeof(uint32_t) * numBlocks;
1649 encObj->astcSize = numBlocks * TEXTURE_BLOCK_BYTES;
1650 if ((blockErrs == nullptr) || (memset_s(blockErrs, blockErrBytes, 0, blockErrBytes))) {
1651 IMAGE_LOGE("astc blockErrs is nullptr or memset failed!");
1652 return CL_ASTC_ENC_FAILED;
1653 }
1654 cl_image_format imageFormat = { CL_RGBA, CL_UNORM_INT8 };
1655 cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, stride, height };
1656 cl_int clRet;
1657 encObj->inputImage = clCreateImage(clAstcHandle->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imageFormat,
1658 &desc, data, &clRet);
1659 if (clRet != CL_SUCCESS) {
1660 IMAGE_LOGE("astc clCreateImage failed ret %{public}d!", clRet);
1661 return CL_ASTC_ENC_FAILED;
1662 }
1663 encObj->astcResult = clCreateBuffer(clAstcHandle->context,
1664 CL_MEM_ALLOC_HOST_PTR, encObj->astcSize, NULL, &clRet);
1665 if (clRet != CL_SUCCESS) {
1666 IMAGE_LOGE("astc clCreateBuffer astcResult failed ret %{public}d!", clRet);
1667 return CL_ASTC_ENC_FAILED;
1668 }
1669 encObj->errBuffer = clCreateBuffer(clAstcHandle->context, CL_MEM_USE_HOST_PTR, blockErrBytes, blockErrs, &clRet);
1670 if (clRet != CL_SUCCESS) {
1671 IMAGE_LOGE("astc clCreateBuffer errBuffer failed ret %{public}d!", clRet);
1672 return CL_ASTC_ENC_FAILED;
1673 }
1674 return CL_ASTC_ENC_SUCCESS;
1675 }
1676
ClKernelArgSet(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1677 static CL_ASTC_STATUS ClKernelArgSet(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1678 {
1679 int32_t kernelId = 0;
1680 cl_int clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->inputImage);
1681 if (clRet != CL_SUCCESS) {
1682 IMAGE_LOGE("astc clSetKernelArg inputImage failed ret %{public}d!", clRet);
1683 return CL_ASTC_ENC_FAILED;
1684 }
1685 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->astcResult);
1686 if (clRet != CL_SUCCESS) {
1687 IMAGE_LOGE("astc clSetKernelArg astcResult failed ret %{public}d!", clRet);
1688 return CL_ASTC_ENC_FAILED;
1689 }
1690 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->errBuffer);
1691 if (clRet != CL_SUCCESS) {
1692 IMAGE_LOGE("astc clSetKernelArg errBuffer failed ret %{public}d!", clRet);
1693 return CL_ASTC_ENC_FAILED;
1694 }
1695 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &width);
1696 if (clRet != CL_SUCCESS) {
1697 IMAGE_LOGE("astc clSetKernelArg width failed ret %{public}d!", clRet);
1698 return CL_ASTC_ENC_FAILED;
1699 }
1700 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &height);
1701 if (clRet != CL_SUCCESS) {
1702 IMAGE_LOGE("astc clSetKernelArg height failed ret %{public}d!", clRet);
1703 return CL_ASTC_ENC_FAILED;
1704 }
1705 return CL_ASTC_ENC_SUCCESS;
1706 }
1707
ClKernelArgSetAndRun(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1708 static CL_ASTC_STATUS ClKernelArgSetAndRun(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1709 {
1710 if (ClKernelArgSet(clAstcHandle, encObj, width, height) != CL_ASTC_ENC_SUCCESS) {
1711 IMAGE_LOGE("astc ClKernelArgSet failed!");
1712 return CL_ASTC_ENC_FAILED;
1713 }
1714 size_t local[] = {WORK_GROUP_SIZE, WORK_GROUP_SIZE};
1715 size_t global[GLOBAL_WH_NUM_CL];
1716 global[0] = static_cast<size_t>((width + DIM - 1) / DIM);
1717 global[1] = static_cast<size_t>((height + DIM - 1) / DIM);
1718 size_t localMax;
1719 cl_int clRet = clGetKernelWorkGroupInfo(clAstcHandle->kernel, clAstcHandle->deviceID, CL_KERNEL_WORK_GROUP_SIZE,
1720 sizeof(size_t), &localMax, nullptr);
1721 if (clRet != CL_SUCCESS) {
1722 IMAGE_LOGE("astc clGetKernelWorkGroupInfo failed ret %{public}d!", clRet);
1723 return CL_ASTC_ENC_FAILED;
1724 }
1725 while (local[0] * local[1] > localMax) {
1726 local[0]--;
1727 local[1]--;
1728 }
1729 bool invalidLocal = (local[0] < 1) || (local[1] < 1);
1730 if (invalidLocal) {
1731 IMAGE_LOGE("astc ClKernelArgSetAndRun local set failed!");
1732 return CL_ASTC_ENC_FAILED;
1733 }
1734 clRet = clEnqueueNDRangeKernel(clAstcHandle->queue, clAstcHandle->kernel, GLOBAL_WH_NUM_CL, nullptr, global, local,
1735 0, nullptr, nullptr);
1736 if (clRet != CL_SUCCESS) {
1737 IMAGE_LOGE("astc clEnqueueNDRangeKernel failed ret %{public}d!", clRet);
1738 return CL_ASTC_ENC_FAILED;
1739 }
1740 clRet = clFinish(clAstcHandle->queue);
1741 if (clRet != CL_SUCCESS) {
1742 IMAGE_LOGE("astc clFinish failed ret %{public}d!", clRet);
1743 return CL_ASTC_ENC_FAILED;
1744 }
1745 return CL_ASTC_ENC_SUCCESS;
1746 }
1747
ClReadAstcBufAndBlockError(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,const ClAstcImageOption * imageIn,uint8_t * buffer)1748 static CL_ASTC_STATUS ClReadAstcBufAndBlockError(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj,
1749 const ClAstcImageOption *imageIn, uint8_t *buffer)
1750 {
1751 cl_int clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->astcResult, CL_TRUE,
1752 0, encObj->astcSize, buffer + TEXTURE_HEAD_BYTES, 0, NULL, NULL);
1753 if (clRet != CL_SUCCESS) {
1754 IMAGE_LOGE("astc clEnqueueReadBuffer astcResult failed ret %{public}d!", clRet);
1755 return CL_ASTC_ENC_FAILED;
1756 }
1757 uint32_t maxVal = 0;
1758 uint32_t sumVal = 0;
1759 size_t numBlocks = ((imageIn->width + DIM - 1) / DIM) * ((imageIn->height + DIM - 1) / DIM);
1760 clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->errBuffer, CL_TRUE,
1761 0, sizeof(uint32_t) * numBlocks, encObj->blockErrs_, 0, NULL, NULL);
1762 if (clRet != CL_SUCCESS) {
1763 IMAGE_LOGE("astc clEnqueueReadBuffer blockErrs failed ret %{public}d!", clRet);
1764 return CL_ASTC_ENC_FAILED;
1765 }
1766 GetMaxAndSumVal(numBlocks, encObj->blockErrs_, maxVal, sumVal);
1767 return CL_ASTC_ENC_SUCCESS;
1768 }
1769
AstcClEncImage(ClAstcHandle * clAstcHandle,const ClAstcImageOption * imageIn,uint8_t * buffer)1770 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClEncImage(ClAstcHandle *clAstcHandle,
1771 const ClAstcImageOption *imageIn, uint8_t *buffer)
1772 {
1773 if ((clAstcHandle == nullptr) || (imageIn == nullptr) || (buffer == nullptr)) {
1774 IMAGE_LOGE("astc AstcClEncImage clAstcHandle or imageIn or buffer is nullptr!");
1775 return CL_ASTC_ENC_FAILED;
1776 }
1777 if (AstcClEncImageCheckImageOption(imageIn) != CL_ASTC_ENC_SUCCESS) {
1778 IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1779 return CL_ASTC_ENC_FAILED;
1780 }
1781 GenAstcHeader(buffer, DIM, DIM, imageIn->width, imageIn->height);
1782 ClAstcObjEnc *encObj = &clAstcHandle->encObj;
1783 if (encObj == nullptr) {
1784 IMAGE_LOGE("astc AstcClEncImage clAstcHandle encObj is nullptr!");
1785 return CL_ASTC_ENC_FAILED;
1786 }
1787 if (ClCreateBufferAndImage(imageIn, clAstcHandle, encObj) != CL_ASTC_ENC_SUCCESS) {
1788 ReleaseClAstcObj(encObj);
1789 IMAGE_LOGE("astc ClCreateBufferAndImage failed!");
1790 return CL_ASTC_ENC_FAILED;
1791 }
1792 if (ClKernelArgSetAndRun(clAstcHandle, encObj, imageIn->width, imageIn->height) != CL_ASTC_ENC_SUCCESS) {
1793 ReleaseClAstcObj(encObj);
1794 IMAGE_LOGE("astc ClKernelArgSetAndRun failed!");
1795 return CL_ASTC_ENC_FAILED;
1796 }
1797 if (ClReadAstcBufAndBlockError(clAstcHandle, encObj, imageIn, buffer) != CL_ASTC_ENC_SUCCESS) {
1798 ReleaseClAstcObj(encObj);
1799 IMAGE_LOGE("astc ClReadAstcBufAndBlockError failed!");
1800 return CL_ASTC_ENC_FAILED;
1801 }
1802 ReleaseClAstcObj(encObj);
1803 return CL_ASTC_ENC_SUCCESS;
1804 }
1805 }
1806 }
1807 }