1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2023 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Functions to generate block size descriptor and decimation tables.
20 */
21
22 #include "astcenc_internal.h"
23
24 /**
25 * @brief Decode the properties of an encoded 2D block mode.
26 *
27 * @param block_mode The encoded block mode.
28 * @param[out] x_weights The number of weights in the X dimension.
29 * @param[out] y_weights The number of weights in the Y dimension.
30 * @param[out] is_dual_plane True if this block mode has two weight planes.
31 * @param[out] quant_mode The quantization level for the weights.
32 * @param[out] weight_bits The storage bit count for the weights.
33 *
34 * @return Returns true if a valid mode, false otherwise.
35 */
decode_block_mode_2d(unsigned int block_mode,unsigned int & x_weights,unsigned int & y_weights,bool & is_dual_plane,unsigned int & quant_mode,unsigned int & weight_bits)36 static bool decode_block_mode_2d(
37 unsigned int block_mode,
38 unsigned int& x_weights,
39 unsigned int& y_weights,
40 bool& is_dual_plane,
41 unsigned int& quant_mode,
42 unsigned int& weight_bits
43 ) {
44 unsigned int base_quant_mode = (block_mode >> 4) & 1;
45 unsigned int H = (block_mode >> 9) & 1;
46 unsigned int D = (block_mode >> 10) & 1;
47 unsigned int A = (block_mode >> 5) & 0x3;
48
49 x_weights = 0;
50 y_weights = 0;
51
52 if ((block_mode & 3) != 0)
53 {
54 base_quant_mode |= (block_mode & 3) << 1;
55 unsigned int B = (block_mode >> 7) & 3;
56 switch ((block_mode >> 2) & 3)
57 {
58 case 0:
59 x_weights = B + 4;
60 y_weights = A + 2;
61 break;
62 case 1:
63 x_weights = B + 8;
64 y_weights = A + 2;
65 break;
66 case 2:
67 x_weights = A + 2;
68 y_weights = B + 8;
69 break;
70 case 3:
71 B &= 1;
72 if (block_mode & 0x100)
73 {
74 x_weights = B + 2;
75 y_weights = A + 2;
76 }
77 else
78 {
79 x_weights = A + 2;
80 y_weights = B + 6;
81 }
82 break;
83 }
84 }
85 else
86 {
87 base_quant_mode |= ((block_mode >> 2) & 3) << 1;
88 if (((block_mode >> 2) & 3) == 0)
89 {
90 return false;
91 }
92
93 unsigned int B = (block_mode >> 9) & 3;
94 switch ((block_mode >> 7) & 3)
95 {
96 case 0:
97 x_weights = 12;
98 y_weights = A + 2;
99 break;
100 case 1:
101 x_weights = A + 2;
102 y_weights = 12;
103 break;
104 case 2:
105 x_weights = A + 6;
106 y_weights = B + 6;
107 D = 0;
108 H = 0;
109 break;
110 case 3:
111 switch ((block_mode >> 5) & 3)
112 {
113 case 0:
114 x_weights = 6;
115 y_weights = 10;
116 break;
117 case 1:
118 x_weights = 10;
119 y_weights = 6;
120 break;
121 case 2:
122 case 3:
123 return false;
124 }
125 break;
126 }
127 }
128
129 unsigned int weight_count = x_weights * y_weights * (D + 1);
130 quant_mode = (base_quant_mode - 2) + 6 * H;
131 is_dual_plane = D != 0;
132
133 weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
134 return (weight_count <= BLOCK_MAX_WEIGHTS &&
135 weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
136 weight_bits <= BLOCK_MAX_WEIGHT_BITS);
137 }
138
139 /**
140 * @brief Decode the properties of an encoded 3D block mode.
141 *
142 * @param block_mode The encoded block mode.
143 * @param[out] x_weights The number of weights in the X dimension.
144 * @param[out] y_weights The number of weights in the Y dimension.
145 * @param[out] z_weights The number of weights in the Z dimension.
146 * @param[out] is_dual_plane True if this block mode has two weight planes.
147 * @param[out] quant_mode The quantization level for the weights.
148 * @param[out] weight_bits The storage bit count for the weights.
149 *
150 * @return Returns true if a valid mode, false otherwise.
151 */
decode_block_mode_3d(unsigned int block_mode,unsigned int & x_weights,unsigned int & y_weights,unsigned int & z_weights,bool & is_dual_plane,unsigned int & quant_mode,unsigned int & weight_bits)152 static bool decode_block_mode_3d(
153 unsigned int block_mode,
154 unsigned int& x_weights,
155 unsigned int& y_weights,
156 unsigned int& z_weights,
157 bool& is_dual_plane,
158 unsigned int& quant_mode,
159 unsigned int& weight_bits
160 ) {
161 unsigned int base_quant_mode = (block_mode >> 4) & 1;
162 unsigned int H = (block_mode >> 9) & 1;
163 unsigned int D = (block_mode >> 10) & 1;
164 unsigned int A = (block_mode >> 5) & 0x3;
165
166 x_weights = 0;
167 y_weights = 0;
168 z_weights = 0;
169
170 if ((block_mode & 3) != 0)
171 {
172 base_quant_mode |= (block_mode & 3) << 1;
173 unsigned int B = (block_mode >> 7) & 3;
174 unsigned int C = (block_mode >> 2) & 0x3;
175 x_weights = A + 2;
176 y_weights = B + 2;
177 z_weights = C + 2;
178 }
179 else
180 {
181 base_quant_mode |= ((block_mode >> 2) & 3) << 1;
182 if (((block_mode >> 2) & 3) == 0)
183 {
184 return false;
185 }
186
187 int B = (block_mode >> 9) & 3;
188 if (((block_mode >> 7) & 3) != 3)
189 {
190 D = 0;
191 H = 0;
192 }
193 switch ((block_mode >> 7) & 3)
194 {
195 case 0:
196 x_weights = 6;
197 y_weights = B + 2;
198 z_weights = A + 2;
199 break;
200 case 1:
201 x_weights = A + 2;
202 y_weights = 6;
203 z_weights = B + 2;
204 break;
205 case 2:
206 x_weights = A + 2;
207 y_weights = B + 2;
208 z_weights = 6;
209 break;
210 case 3:
211 x_weights = 2;
212 y_weights = 2;
213 z_weights = 2;
214 switch ((block_mode >> 5) & 3)
215 {
216 case 0:
217 x_weights = 6;
218 break;
219 case 1:
220 y_weights = 6;
221 break;
222 case 2:
223 z_weights = 6;
224 break;
225 case 3:
226 return false;
227 }
228 break;
229 }
230 }
231
232 unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
233 quant_mode = (base_quant_mode - 2) + 6 * H;
234 is_dual_plane = D != 0;
235
236 weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
237 return (weight_count <= BLOCK_MAX_WEIGHTS &&
238 weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
239 weight_bits <= BLOCK_MAX_WEIGHT_BITS);
240 }
241
242 /**
243 * @brief Create a 2D decimation entry for a block-size and weight-decimation pair.
244 *
245 * @param x_texels The number of texels in the X dimension.
246 * @param y_texels The number of texels in the Y dimension.
247 * @param x_weights The number of weights in the X dimension.
248 * @param y_weights The number of weights in the Y dimension.
249 * @param[out] di The decimation info structure to populate.
250 * @param[out] wb The decimation table init scratch working buffers.
251 */
init_decimation_info_2d(unsigned int x_texels,unsigned int y_texels,unsigned int x_weights,unsigned int y_weights,decimation_info & di,dt_init_working_buffers & wb)252 static void init_decimation_info_2d(
253 unsigned int x_texels,
254 unsigned int y_texels,
255 unsigned int x_weights,
256 unsigned int y_weights,
257 decimation_info& di,
258 dt_init_working_buffers& wb
259 ) {
260 unsigned int texels_per_block = x_texels * y_texels;
261 unsigned int weights_per_block = x_weights * y_weights;
262
263 uint8_t max_texel_count_of_weight = 0;
264
265 promise(weights_per_block > 0);
266 promise(texels_per_block > 0);
267 promise(x_texels > 0);
268 promise(y_texels > 0);
269
270 for (unsigned int i = 0; i < weights_per_block; i++)
271 {
272 wb.texel_count_of_weight[i] = 0;
273 }
274
275 for (unsigned int i = 0; i < texels_per_block; i++)
276 {
277 wb.weight_count_of_texel[i] = 0;
278 }
279
280 for (unsigned int y = 0; y < y_texels; y++)
281 {
282 for (unsigned int x = 0; x < x_texels; x++)
283 {
284 unsigned int texel = y * x_texels + x;
285
286 unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
287 unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
288
289 unsigned int x_weight_frac = x_weight & 0xF;
290 unsigned int y_weight_frac = y_weight & 0xF;
291 unsigned int x_weight_int = x_weight >> 4;
292 unsigned int y_weight_int = y_weight >> 4;
293
294 unsigned int qweight[4];
295 qweight[0] = x_weight_int + y_weight_int * x_weights;
296 qweight[1] = qweight[0] + 1;
297 qweight[2] = qweight[0] + x_weights;
298 qweight[3] = qweight[2] + 1;
299
300 // Truncated-precision bilinear interpolation
301 unsigned int prod = x_weight_frac * y_weight_frac;
302
303 unsigned int weight[4];
304 weight[3] = (prod + 8) >> 4;
305 weight[1] = x_weight_frac - weight[3];
306 weight[2] = y_weight_frac - weight[3];
307 weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
308
309 for (unsigned int i = 0; i < 4; i++)
310 {
311 if (weight[i] != 0)
312 {
313 wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
314 wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
315 wb.weight_count_of_texel[texel]++;
316 wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
317 wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
318 wb.texel_count_of_weight[qweight[i]]++;
319 max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
320 }
321 }
322 }
323 }
324
325 uint8_t max_texel_weight_count = 0;
326 for (unsigned int i = 0; i < texels_per_block; i++)
327 {
328 di.texel_weight_count[i] = wb.weight_count_of_texel[i];
329 max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
330
331 for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
332 {
333 di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
334 di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
335 di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
336 }
337
338 // Init all 4 entries so we can rely on zeros for vectorization
339 for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
340 {
341 di.texel_weight_contribs_int_tr[j][i] = 0;
342 di.texel_weight_contribs_float_tr[j][i] = 0.0f;
343 di.texel_weights_tr[j][i] = 0;
344 }
345 }
346
347 di.max_texel_weight_count = max_texel_weight_count;
348
349 for (unsigned int i = 0; i < weights_per_block; i++)
350 {
351 unsigned int texel_count_wt = wb.texel_count_of_weight[i];
352 di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
353
354 for (unsigned int j = 0; j < texel_count_wt; j++)
355 {
356 uint8_t texel = wb.texels_of_weight[i][j];
357
358 // Create transposed versions of these for better vectorization
359 di.weight_texels_tr[j][i] = texel;
360 di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
361
362 // Store the per-texel contribution of this weight for each texel it contributes to
363 di.texel_contrib_for_weight[j][i] = 0.0f;
364 for (unsigned int k = 0; k < 4; k++)
365 {
366 uint8_t dttw = di.texel_weights_tr[k][texel];
367 float dttwf = di.texel_weight_contribs_float_tr[k][texel];
368 if (dttw == i && dttwf != 0.0f)
369 {
370 di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
371 break;
372 }
373 }
374 }
375
376 // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
377 // Match last texel in active lane in SIMD group, for better gathers
378 uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
379 for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
380 {
381 di.weight_texels_tr[j][i] = last_texel;
382 di.weights_texel_contribs_tr[j][i] = 0.0f;
383 }
384 }
385
386 // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
387 unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
388 for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
389 {
390 di.texel_weight_count[i] = 0;
391
392 for (unsigned int j = 0; j < 4; j++)
393 {
394 di.texel_weight_contribs_float_tr[j][i] = 0;
395 di.texel_weights_tr[j][i] = 0;
396 di.texel_weight_contribs_int_tr[j][i] = 0;
397 }
398 }
399
400 // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
401 // Match last texel in active lane in SIMD group, for better gathers
402 unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
403 uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
404
405 unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
406 for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
407 {
408 di.weight_texel_count[i] = 0;
409
410 for (unsigned int j = 0; j < max_texel_count_of_weight; j++)
411 {
412 di.weight_texels_tr[j][i] = last_texel;
413 di.weights_texel_contribs_tr[j][i] = 0.0f;
414 }
415 }
416
417 di.texel_count = static_cast<uint8_t>(texels_per_block);
418 di.weight_count = static_cast<uint8_t>(weights_per_block);
419 di.weight_x = static_cast<uint8_t>(x_weights);
420 di.weight_y = static_cast<uint8_t>(y_weights);
421 di.weight_z = 1;
422 }
423
424 /**
425 * @brief Create a 3D decimation entry for a block-size and weight-decimation pair.
426 *
427 * @param x_texels The number of texels in the X dimension.
428 * @param y_texels The number of texels in the Y dimension.
429 * @param z_texels The number of texels in the Z dimension.
430 * @param x_weights The number of weights in the X dimension.
431 * @param y_weights The number of weights in the Y dimension.
432 * @param z_weights The number of weights in the Z dimension.
433 * @param[out] di The decimation info structure to populate.
434 @param[out] wb The decimation table init scratch working buffers.
435 */
init_decimation_info_3d(unsigned int x_texels,unsigned int y_texels,unsigned int z_texels,unsigned int x_weights,unsigned int y_weights,unsigned int z_weights,decimation_info & di,dt_init_working_buffers & wb)436 static void init_decimation_info_3d(
437 unsigned int x_texels,
438 unsigned int y_texels,
439 unsigned int z_texels,
440 unsigned int x_weights,
441 unsigned int y_weights,
442 unsigned int z_weights,
443 decimation_info& di,
444 dt_init_working_buffers& wb
445 ) {
446 unsigned int texels_per_block = x_texels * y_texels * z_texels;
447 unsigned int weights_per_block = x_weights * y_weights * z_weights;
448
449 uint8_t max_texel_count_of_weight = 0;
450
451 promise(weights_per_block > 0);
452 promise(texels_per_block > 0);
453
454 for (unsigned int i = 0; i < weights_per_block; i++)
455 {
456 wb.texel_count_of_weight[i] = 0;
457 }
458
459 for (unsigned int i = 0; i < texels_per_block; i++)
460 {
461 wb.weight_count_of_texel[i] = 0;
462 }
463
464 for (unsigned int z = 0; z < z_texels; z++)
465 {
466 for (unsigned int y = 0; y < y_texels; y++)
467 {
468 for (unsigned int x = 0; x < x_texels; x++)
469 {
470 int texel = (z * y_texels + y) * x_texels + x;
471
472 int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
473 int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
474 int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;
475
476 int x_weight_frac = x_weight & 0xF;
477 int y_weight_frac = y_weight & 0xF;
478 int z_weight_frac = z_weight & 0xF;
479 int x_weight_int = x_weight >> 4;
480 int y_weight_int = y_weight >> 4;
481 int z_weight_int = z_weight >> 4;
482 int qweight[4];
483 int weight[4];
484 qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
485 qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
486
487 // simplex interpolation
488 int fs = x_weight_frac;
489 int ft = y_weight_frac;
490 int fp = z_weight_frac;
491
492 int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
493 int N = x_weights;
494 int NM = x_weights * y_weights;
495
496 int s1, s2, w0, w1, w2, w3;
497 switch (cas)
498 {
499 case 7:
500 s1 = 1;
501 s2 = N;
502 w0 = 16 - fs;
503 w1 = fs - ft;
504 w2 = ft - fp;
505 w3 = fp;
506 break;
507 case 3:
508 s1 = N;
509 s2 = 1;
510 w0 = 16 - ft;
511 w1 = ft - fs;
512 w2 = fs - fp;
513 w3 = fp;
514 break;
515 case 5:
516 s1 = 1;
517 s2 = NM;
518 w0 = 16 - fs;
519 w1 = fs - fp;
520 w2 = fp - ft;
521 w3 = ft;
522 break;
523 case 4:
524 s1 = NM;
525 s2 = 1;
526 w0 = 16 - fp;
527 w1 = fp - fs;
528 w2 = fs - ft;
529 w3 = ft;
530 break;
531 case 2:
532 s1 = N;
533 s2 = NM;
534 w0 = 16 - ft;
535 w1 = ft - fp;
536 w2 = fp - fs;
537 w3 = fs;
538 break;
539 case 0:
540 s1 = NM;
541 s2 = N;
542 w0 = 16 - fp;
543 w1 = fp - ft;
544 w2 = ft - fs;
545 w3 = fs;
546 break;
547 default:
548 s1 = NM;
549 s2 = N;
550 w0 = 16 - fp;
551 w1 = fp - ft;
552 w2 = ft - fs;
553 w3 = fs;
554 break;
555 }
556
557 qweight[1] = qweight[0] + s1;
558 qweight[2] = qweight[1] + s2;
559 weight[0] = w0;
560 weight[1] = w1;
561 weight[2] = w2;
562 weight[3] = w3;
563
564 for (unsigned int i = 0; i < 4; i++)
565 {
566 if (weight[i] != 0)
567 {
568 wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
569 wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
570 wb.weight_count_of_texel[texel]++;
571 wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
572 wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
573 wb.texel_count_of_weight[qweight[i]]++;
574 max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
575 }
576 }
577 }
578 }
579 }
580
581 uint8_t max_texel_weight_count = 0;
582 for (unsigned int i = 0; i < texels_per_block; i++)
583 {
584 di.texel_weight_count[i] = wb.weight_count_of_texel[i];
585 max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
586
587 // Init all 4 entries so we can rely on zeros for vectorization
588 for (unsigned int j = 0; j < 4; j++)
589 {
590 di.texel_weight_contribs_int_tr[j][i] = 0;
591 di.texel_weight_contribs_float_tr[j][i] = 0.0f;
592 di.texel_weights_tr[j][i] = 0;
593 }
594
595 for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
596 {
597 di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
598 di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
599 di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
600 }
601 }
602
603 di.max_texel_weight_count = max_texel_weight_count;
604
605 for (unsigned int i = 0; i < weights_per_block; i++)
606 {
607 unsigned int texel_count_wt = wb.texel_count_of_weight[i];
608 di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
609
610 for (unsigned int j = 0; j < texel_count_wt; j++)
611 {
612 unsigned int texel = wb.texels_of_weight[i][j];
613
614 // Create transposed versions of these for better vectorization
615 di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel);
616 di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
617
618 // Store the per-texel contribution of this weight for each texel it contributes to
619 di.texel_contrib_for_weight[j][i] = 0.0f;
620 for (unsigned int k = 0; k < 4; k++)
621 {
622 uint8_t dttw = di.texel_weights_tr[k][texel];
623 float dttwf = di.texel_weight_contribs_float_tr[k][texel];
624 if (dttw == i && dttwf != 0.0f)
625 {
626 di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
627 break;
628 }
629 }
630 }
631
632 // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
633 // Match last texel in active lane in SIMD group, for better gathers
634 uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
635 for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
636 {
637 di.weight_texels_tr[j][i] = last_texel;
638 di.weights_texel_contribs_tr[j][i] = 0.0f;
639 }
640 }
641
642 // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
643 unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
644 for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
645 {
646 di.texel_weight_count[i] = 0;
647
648 for (unsigned int j = 0; j < 4; j++)
649 {
650 di.texel_weight_contribs_float_tr[j][i] = 0;
651 di.texel_weights_tr[j][i] = 0;
652 di.texel_weight_contribs_int_tr[j][i] = 0;
653 }
654 }
655
656 // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
657 // Match last texel in active lane in SIMD group, for better gathers
658 int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
659 uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
660
661 unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
662 for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
663 {
664 di.weight_texel_count[i] = 0;
665
666 for (int j = 0; j < max_texel_count_of_weight; j++)
667 {
668 di.weight_texels_tr[j][i] = last_texel;
669 di.weights_texel_contribs_tr[j][i] = 0.0f;
670 }
671 }
672
673 di.texel_count = static_cast<uint8_t>(texels_per_block);
674 di.weight_count = static_cast<uint8_t>(weights_per_block);
675 di.weight_x = static_cast<uint8_t>(x_weights);
676 di.weight_y = static_cast<uint8_t>(y_weights);
677 di.weight_z = static_cast<uint8_t>(z_weights);
678 }
679
680 /**
681 * @brief Assign the texels to use for kmeans clustering.
682 *
683 * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used.
684 * The @c bsd.texel_count is an input and must be populated beforehand.
685 *
686 * @param[in,out] bsd The block size descriptor to populate.
687 */
assign_kmeans_texels(block_size_descriptor & bsd)688 static void assign_kmeans_texels(
689 block_size_descriptor& bsd
690 ) {
691 // Use all texels for kmeans on a small block
692 if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)
693 {
694 for (uint8_t i = 0; i < bsd.texel_count; i++)
695 {
696 bsd.kmeans_texels[i] = i;
697 }
698
699 return;
700 }
701
702 // Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block
703 uint64_t rng_state[2];
704 astc::rand_init(rng_state);
705
706 // Initialize array used for tracking used indices
707 bool seen[BLOCK_MAX_TEXELS];
708 for (uint8_t i = 0; i < bsd.texel_count; i++)
709 {
710 seen[i] = false;
711 }
712
713 // Assign 64 random indices, retrying if we see repeats
714 unsigned int arr_elements_set = 0;
715 while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
716 {
717 uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
718 texel = texel % bsd.texel_count;
719 if (!seen[texel])
720 {
721 bsd.kmeans_texels[arr_elements_set++] = texel;
722 seen[texel] = true;
723 }
724 }
725 }
726
727 /**
728 * @brief Allocate a single 2D decimation table entry.
729 *
730 * @param x_texels The number of texels in the X dimension.
731 * @param y_texels The number of texels in the Y dimension.
732 * @param x_weights The number of weights in the X dimension.
733 * @param y_weights The number of weights in the Y dimension.
734 * @param bsd The block size descriptor we are populating.
735 * @param wb The decimation table init scratch working buffers.
736 * @param index The packed array index to populate.
737 */
construct_dt_entry_2d(unsigned int x_texels,unsigned int y_texels,unsigned int x_weights,unsigned int y_weights,block_size_descriptor & bsd,dt_init_working_buffers & wb,unsigned int index)738 static void construct_dt_entry_2d(
739 unsigned int x_texels,
740 unsigned int y_texels,
741 unsigned int x_weights,
742 unsigned int y_weights,
743 block_size_descriptor& bsd,
744 dt_init_working_buffers& wb,
745 unsigned int index
746 ) {
747 unsigned int weight_count = x_weights * y_weights;
748 assert(weight_count <= BLOCK_MAX_WEIGHTS);
749
750 bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
751
752 decimation_info& di = bsd.decimation_tables[index];
753 init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
754
755 int maxprec_1plane = -1;
756 int maxprec_2planes = -1;
757 for (int i = 0; i < 12; i++)
758 {
759 unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
760 if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
761 {
762 maxprec_1plane = i;
763 }
764
765 if (try_2planes)
766 {
767 unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
768 if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
769 {
770 maxprec_2planes = i;
771 }
772 }
773 }
774
775 // At least one of the two should be valid ...
776 assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
777 bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
778 bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
779 bsd.decimation_modes[index].refprec_1plane = 0;
780 bsd.decimation_modes[index].refprec_2planes = 0;
781 }
782
783 /**
784 * @brief Allocate block modes and decimation tables for a single 2D block size.
785 *
786 * @param x_texels The number of texels in the X dimension.
787 * @param y_texels The number of texels in the Y dimension.
788 * @param can_omit_modes Can we discard modes that astcenc won't use, even if legal?
789 * @param mode_cutoff Percentile cutoff in range [0,1]. Low values more likely to be used.
790 * @param[out] bsd The block size descriptor to populate.
791 */
792 #ifdef ASTC_CUSTOMIZED_ENABLE
construct_block_size_descriptor_2d(QualityProfile privateProfile,unsigned int x_texels,unsigned int y_texels,bool can_omit_modes,float mode_cutoff,block_size_descriptor & bsd)793 static bool construct_block_size_descriptor_2d(
794 #else
795 static void construct_block_size_descriptor_2d(
796 #endif
797 QualityProfile privateProfile,
798 unsigned int x_texels,
799 unsigned int y_texels,
800 bool can_omit_modes,
801 float mode_cutoff,
802 block_size_descriptor& bsd
803 ) {
804 // Store a remap table for storing packed decimation modes.
805 // Indexing uses [Y * 16 + X] and max size for each axis is 12.
806 static const unsigned int MAX_DMI = 12 * 16 + 12;
807 int decimation_mode_index[MAX_DMI];
808
809 dt_init_working_buffers* wb = new dt_init_working_buffers;
810
811 bsd.xdim = static_cast<uint8_t>(x_texels);
812 bsd.ydim = static_cast<uint8_t>(y_texels);
813 bsd.zdim = 1;
814 bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
815
816 for (unsigned int i = 0; i < MAX_DMI; i++)
817 {
818 decimation_mode_index[i] = -1;
819 }
820
821 // Gather all the decimation grids that can be used with the current block
822 #if !defined(ASTCENC_DECOMPRESS_ONLY)
823 const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
824 float always_cutoff = (privateProfile != HIGH_QUALITY_PROFILE) ? 1.0f : 0.0f;
825 #else
826 // Unused in decompress-only builds
827 (void)can_omit_modes;
828 (void)mode_cutoff;
829 #endif
830
831 // Construct the list of block formats referencing the decimation tables
832 unsigned int packed_bm_idx = 0;
833 unsigned int packed_dm_idx = 0;
834
835 // Trackers
836 unsigned int bm_counts[4] { 0 };
837 unsigned int dm_counts[4] { 0 };
838
839 // Clear the list to a known-bad value
840 for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
841 {
842 bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
843 }
844
845 // Iterate four times to build a usefully ordered list:
846 // - Pass 0 - keep selected single plane "always" block modes
847 // - Pass 1 - keep selected single plane "non-always" block modes
848 // - Pass 2 - keep select dual plane block modes
849 // - Pass 3 - keep everything else that's legal
850 unsigned int limit = can_omit_modes ? 3 : 4;
851 for (unsigned int j = 0; j < limit; j ++)
852 {
853 for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
854 {
855 // Skip modes we've already included in a previous pass
856 if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
857 {
858 continue;
859 }
860 if ((privateProfile == HIGH_SPEED_PROFILE) && (i != HIGH_SPEED_PROFILE_BLOCK_MODE))
861 {
862 continue;
863 }
864 #ifdef ASTC_CUSTOMIZED_ENABLE
865 if (privateProfile == CUSTOMIZED_PROFILE)
866 {
867 if (!g_astcCustomizedSoManager.LoadSutCustomizedSo() ||
868 g_astcCustomizedSoManager.isCustomizedBlockModeFunc_ == nullptr)
869 {
870 printf("astcenc customized so dlopen failed or isCustomizedBlockModeFunc_ is nullptr!\n");
871 delete wb;
872 #if !defined(ASTCENC_DECOMPRESS_ONLY)
873 delete[] percentiles;
874 #endif
875 return false;
876 }
877 if (!g_astcCustomizedSoManager.isCustomizedBlockModeFunc_(i))
878 {
879 continue;
880 }
881 }
882 #endif
883 // Decode parameters
884 unsigned int x_weights;
885 unsigned int y_weights;
886 bool is_dual_plane;
887 unsigned int quant_mode;
888 unsigned int weight_bits;
889 bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
890
891 // Always skip invalid encodings for the current block size
892 if (!valid || (x_weights > x_texels) || (y_weights > y_texels))
893 {
894 continue;
895 }
896
897 // Selectively skip dual plane encodings
898 if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))
899 {
900 continue;
901 }
902
903 // Always skip encodings we can't physically encode based on
904 // generic encoding bit availability
905 if (is_dual_plane)
906 {
907 // This is the only check we need as only support 1 partition
908 if ((109 - weight_bits) <= 0)
909 {
910 continue;
911 }
912 }
913 else
914 {
915 // This is conservative - fewer bits may be available for > 1 partition
916 if ((111 - weight_bits) <= 0)
917 {
918 continue;
919 }
920 }
921
922 // Selectively skip encodings based on percentile
923 bool percentile_hit = false;
924 #if !defined(ASTCENC_DECOMPRESS_ONLY)
925 if (j == 0)
926 {
927 percentile_hit = percentiles[i] <= always_cutoff;
928 }
929 else
930 {
931 percentile_hit = percentiles[i] <= mode_cutoff;
932 }
933 #endif
934
935 if (j != 3 && !percentile_hit)
936 {
937 continue;
938 }
939
940 // Allocate and initialize the decimation table entry if we've not used it yet
941 int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
942 if (decimation_mode < 0)
943 {
944 construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
945 if (privateProfile == HIGH_SPEED_PROFILE)
946 {
947 bsd.decimation_modes[packed_dm_idx].maxprec_1plane = 4; // Speed optimization: max prec num is limited to 4
948 }
949 decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;
950 decimation_mode = packed_dm_idx;
951
952 dm_counts[j]++;
953 packed_dm_idx++;
954 }
955
956 auto& bm = bsd.block_modes[packed_bm_idx];
957
958 bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
959 bm.quant_mode = static_cast<uint8_t>(quant_mode);
960 bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);
961 bm.weight_bits = static_cast<uint8_t>(weight_bits);
962 bm.mode_index = static_cast<uint16_t>(i);
963
964 auto& dm = bsd.decimation_modes[decimation_mode];
965
966 if (is_dual_plane)
967 {
968 dm.set_ref_2plane(bm.get_weight_quant_mode());
969 }
970 else
971 {
972 dm.set_ref_1plane(bm.get_weight_quant_mode());
973 }
974
975 bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
976
977 packed_bm_idx++;
978 bm_counts[j]++;
979 }
980 }
981
982 bsd.block_mode_count_1plane_always = bm_counts[0];
983 bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];
984 bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];
985 bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];
986
987 bsd.decimation_mode_count_always = dm_counts[0];
988 bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];
989 bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];
990
991 #if !defined(ASTCENC_DECOMPRESS_ONLY)
992 assert(bsd.block_mode_count_1plane_always > 0);
993 assert(bsd.decimation_mode_count_always > 0);
994
995 delete[] percentiles;
996 #endif
997
998 // Ensure the end of the array contains valid data (should never get read)
999 for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
1000 {
1001 bsd.decimation_modes[i].maxprec_1plane = -1;
1002 bsd.decimation_modes[i].maxprec_2planes = -1;
1003 bsd.decimation_modes[i].refprec_1plane = 0;
1004 bsd.decimation_modes[i].refprec_2planes = 0;
1005 }
1006
1007 // Determine the texels to use for kmeans clustering.
1008 assign_kmeans_texels(bsd);
1009
1010 delete wb;
1011 #ifdef ASTC_CUSTOMIZED_ENABLE
1012 return true;
1013 #endif
1014 }
1015
1016 /**
1017 * @brief Allocate block modes and decimation tables for a single 3D block size.
1018 *
1019 * TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as
1020 * the percentile mode cutoffs. If 3D becomes more widely used we should look at this.
1021 *
1022 * @param x_texels The number of texels in the X dimension.
1023 * @param y_texels The number of texels in the Y dimension.
1024 * @param z_texels The number of texels in the Z dimension.
1025 * @param[out] bsd The block size descriptor to populate.
1026 */
construct_block_size_descriptor_3d(unsigned int x_texels,unsigned int y_texels,unsigned int z_texels,block_size_descriptor & bsd)1027 static void construct_block_size_descriptor_3d(
1028 unsigned int x_texels,
1029 unsigned int y_texels,
1030 unsigned int z_texels,
1031 block_size_descriptor& bsd
1032 ) {
1033 // Store a remap table for storing packed decimation modes.
1034 // Indexing uses [Z * 64 + Y * 8 + X] and max size for each axis is 6.
1035 static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
1036 int decimation_mode_index[MAX_DMI];
1037 unsigned int decimation_mode_count = 0;
1038
1039 dt_init_working_buffers* wb = new dt_init_working_buffers;
1040
1041 bsd.xdim = static_cast<uint8_t>(x_texels);
1042 bsd.ydim = static_cast<uint8_t>(y_texels);
1043 bsd.zdim = static_cast<uint8_t>(z_texels);
1044 bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
1045
1046 for (unsigned int i = 0; i < MAX_DMI; i++)
1047 {
1048 decimation_mode_index[i] = -1;
1049 }
1050
1051 // gather all the infill-modes that can be used with the current block size
1052 for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
1053 {
1054 for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
1055 {
1056 for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
1057 {
1058 unsigned int weight_count = x_weights * y_weights * z_weights;
1059 if (weight_count > BLOCK_MAX_WEIGHTS)
1060 {
1061 continue;
1062 }
1063
1064 decimation_info& di = bsd.decimation_tables[decimation_mode_count];
1065 decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
1066 init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);
1067
1068 int maxprec_1plane = -1;
1069 int maxprec_2planes = -1;
1070 for (unsigned int i = 0; i < 12; i++)
1071 {
1072 unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
1073 if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
1074 {
1075 maxprec_1plane = i;
1076 }
1077
1078 unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
1079 if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
1080 {
1081 maxprec_2planes = i;
1082 }
1083 }
1084
1085 if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)
1086 {
1087 maxprec_2planes = -1;
1088 }
1089
1090 bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
1091 bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
1092 bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
1093 bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
1094 decimation_mode_count++;
1095 }
1096 }
1097 }
1098
1099 // Ensure the end of the array contains valid data (should never get read)
1100 for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
1101 {
1102 bsd.decimation_modes[i].maxprec_1plane = -1;
1103 bsd.decimation_modes[i].maxprec_2planes = -1;
1104 bsd.decimation_modes[i].refprec_1plane = 0;
1105 bsd.decimation_modes[i].refprec_2planes = 0;
1106 }
1107
1108 bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
1109 bsd.decimation_mode_count_selected = decimation_mode_count;
1110 bsd.decimation_mode_count_all = decimation_mode_count;
1111
1112 // Construct the list of block formats referencing the decimation tables
1113
1114 // Clear the list to a known-bad value
1115 for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
1116 {
1117 bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
1118 }
1119
1120 unsigned int packed_idx = 0;
1121 unsigned int bm_counts[2] { 0 };
1122
1123 // Iterate two times to build a usefully ordered list:
1124 // - Pass 0 - keep valid single plane block modes
1125 // - Pass 1 - keep valid dual plane block modes
1126 for (unsigned int j = 0; j < 2; j++)
1127 {
1128 for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
1129 {
1130 // Skip modes we've already included in a previous pass
1131 if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
1132 {
1133 continue;
1134 }
1135
1136 unsigned int x_weights;
1137 unsigned int y_weights;
1138 unsigned int z_weights;
1139 bool is_dual_plane;
1140 unsigned int quant_mode;
1141 unsigned int weight_bits;
1142
1143 bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
1144 // Skip invalid encodings
1145 if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
1146 {
1147 continue;
1148 }
1149
1150 // Skip encodings in the wrong iteration
1151 if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))
1152 {
1153 continue;
1154 }
1155
1156 // Always skip encodings we can't physically encode based on bit availability
1157 if (is_dual_plane)
1158 {
1159 // This is the only check we need as only support 1 partition
1160 if ((109 - weight_bits) <= 0)
1161 {
1162 continue;
1163 }
1164 }
1165 else
1166 {
1167 // This is conservative - fewer bits may be available for > 1 partition
1168 if ((111 - weight_bits) <= 0)
1169 {
1170 continue;
1171 }
1172 }
1173
1174 int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
1175 bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
1176 bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
1177 bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
1178 bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
1179 bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
1180
1181 bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
1182 bm_counts[j]++;
1183 packed_idx++;
1184 }
1185 }
1186
1187 bsd.block_mode_count_1plane_always = 0; // Skipped for 3D modes
1188 bsd.block_mode_count_1plane_selected = bm_counts[0];
1189 bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];
1190 bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];
1191
1192 // Determine the texels to use for kmeans clustering.
1193 assign_kmeans_texels(bsd);
1194
1195 delete wb;
1196 }
1197
1198 /* See header for documentation. */
1199 #ifdef ASTC_CUSTOMIZED_ENABLE
init_block_size_descriptor(QualityProfile privateProfile,unsigned int x_texels,unsigned int y_texels,unsigned int z_texels,bool can_omit_modes,unsigned int partition_count_cutoff,float mode_cutoff,block_size_descriptor & bsd)1200 bool init_block_size_descriptor(
1201 #else
1202 void init_block_size_descriptor(
1203 #endif
1204 QualityProfile privateProfile,
1205 unsigned int x_texels,
1206 unsigned int y_texels,
1207 unsigned int z_texels,
1208 bool can_omit_modes,
1209 unsigned int partition_count_cutoff,
1210 float mode_cutoff,
1211 block_size_descriptor& bsd
1212 ) {
1213 if (z_texels > 1)
1214 {
1215 construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);
1216 }
1217 else
1218 {
1219 #ifdef ASTC_CUSTOMIZED_ENABLE
1220 if (!construct_block_size_descriptor_2d(privateProfile, x_texels, y_texels, can_omit_modes, mode_cutoff, bsd))
1221 {
1222 return false;
1223 }
1224 #else
1225 construct_block_size_descriptor_2d(privateProfile, x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);
1226 #endif
1227 }
1228
1229 init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);
1230 #ifdef ASTC_CUSTOMIZED_ENABLE
1231 return true;
1232 #endif
1233 }
1234