• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2023 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions to generate block size descriptor and decimation tables.
20  */
21 
22 #include "astcenc_internal.h"
23 
24 /**
25  * @brief Decode the properties of an encoded 2D block mode.
26  *
27  * @param      block_mode      The encoded block mode.
28  * @param[out] x_weights       The number of weights in the X dimension.
29  * @param[out] y_weights       The number of weights in the Y dimension.
30  * @param[out] is_dual_plane   True if this block mode has two weight planes.
31  * @param[out] quant_mode      The quantization level for the weights.
32  * @param[out] weight_bits     The storage bit count for the weights.
33  *
34  * @return Returns true if a valid mode, false otherwise.
35  */
decode_block_mode_2d(unsigned int block_mode,unsigned int & x_weights,unsigned int & y_weights,bool & is_dual_plane,unsigned int & quant_mode,unsigned int & weight_bits)36 static bool decode_block_mode_2d(
37 	unsigned int block_mode,
38 	unsigned int& x_weights,
39 	unsigned int& y_weights,
40 	bool& is_dual_plane,
41 	unsigned int& quant_mode,
42 	unsigned int& weight_bits
43 ) {
44 	unsigned int base_quant_mode = (block_mode >> 4) & 1;
45 	unsigned int H = (block_mode >> 9) & 1;
46 	unsigned int D = (block_mode >> 10) & 1;
47 	unsigned int A = (block_mode >> 5) & 0x3;
48 
49 	x_weights = 0;
50 	y_weights = 0;
51 
52 	if ((block_mode & 3) != 0)
53 	{
54 		base_quant_mode |= (block_mode & 3) << 1;
55 		unsigned int B = (block_mode >> 7) & 3;
56 		switch ((block_mode >> 2) & 3)
57 		{
58 		case 0:
59 			x_weights = B + 4;
60 			y_weights = A + 2;
61 			break;
62 		case 1:
63 			x_weights = B + 8;
64 			y_weights = A + 2;
65 			break;
66 		case 2:
67 			x_weights = A + 2;
68 			y_weights = B + 8;
69 			break;
70 		case 3:
71 			B &= 1;
72 			if (block_mode & 0x100)
73 			{
74 				x_weights = B + 2;
75 				y_weights = A + 2;
76 			}
77 			else
78 			{
79 				x_weights = A + 2;
80 				y_weights = B + 6;
81 			}
82 			break;
83 		}
84 	}
85 	else
86 	{
87 		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
88 		if (((block_mode >> 2) & 3) == 0)
89 		{
90 			return false;
91 		}
92 
93 		unsigned int B = (block_mode >> 9) & 3;
94 		switch ((block_mode >> 7) & 3)
95 		{
96 		case 0:
97 			x_weights = 12;
98 			y_weights = A + 2;
99 			break;
100 		case 1:
101 			x_weights = A + 2;
102 			y_weights = 12;
103 			break;
104 		case 2:
105 			x_weights = A + 6;
106 			y_weights = B + 6;
107 			D = 0;
108 			H = 0;
109 			break;
110 		case 3:
111 			switch ((block_mode >> 5) & 3)
112 			{
113 			case 0:
114 				x_weights = 6;
115 				y_weights = 10;
116 				break;
117 			case 1:
118 				x_weights = 10;
119 				y_weights = 6;
120 				break;
121 			case 2:
122 			case 3:
123 				return false;
124 			}
125 			break;
126 		}
127 	}
128 
129 	unsigned int weight_count = x_weights * y_weights * (D + 1);
130 	quant_mode = (base_quant_mode - 2) + 6 * H;
131 	is_dual_plane = D != 0;
132 
133 	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
134 	return (weight_count <= BLOCK_MAX_WEIGHTS &&
135 	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
136 	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
137 }
138 
139 /**
140  * @brief Decode the properties of an encoded 3D block mode.
141  *
142  * @param      block_mode      The encoded block mode.
143  * @param[out] x_weights       The number of weights in the X dimension.
144  * @param[out] y_weights       The number of weights in the Y dimension.
145  * @param[out] z_weights       The number of weights in the Z dimension.
146  * @param[out] is_dual_plane   True if this block mode has two weight planes.
147  * @param[out] quant_mode      The quantization level for the weights.
148  * @param[out] weight_bits     The storage bit count for the weights.
149  *
150  * @return Returns true if a valid mode, false otherwise.
151  */
decode_block_mode_3d(unsigned int block_mode,unsigned int & x_weights,unsigned int & y_weights,unsigned int & z_weights,bool & is_dual_plane,unsigned int & quant_mode,unsigned int & weight_bits)152 static bool decode_block_mode_3d(
153 	unsigned int block_mode,
154 	unsigned int& x_weights,
155 	unsigned int& y_weights,
156 	unsigned int& z_weights,
157 	bool& is_dual_plane,
158 	unsigned int& quant_mode,
159 	unsigned int& weight_bits
160 ) {
161 	unsigned int base_quant_mode = (block_mode >> 4) & 1;
162 	unsigned int H = (block_mode >> 9) & 1;
163 	unsigned int D = (block_mode >> 10) & 1;
164 	unsigned int A = (block_mode >> 5) & 0x3;
165 
166 	x_weights = 0;
167 	y_weights = 0;
168 	z_weights = 0;
169 
170 	if ((block_mode & 3) != 0)
171 	{
172 		base_quant_mode |= (block_mode & 3) << 1;
173 		unsigned int B = (block_mode >> 7) & 3;
174 		unsigned int C = (block_mode >> 2) & 0x3;
175 		x_weights = A + 2;
176 		y_weights = B + 2;
177 		z_weights = C + 2;
178 	}
179 	else
180 	{
181 		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
182 		if (((block_mode >> 2) & 3) == 0)
183 		{
184 			return false;
185 		}
186 
187 		int B = (block_mode >> 9) & 3;
188 		if (((block_mode >> 7) & 3) != 3)
189 		{
190 			D = 0;
191 			H = 0;
192 		}
193 		switch ((block_mode >> 7) & 3)
194 		{
195 		case 0:
196 			x_weights = 6;
197 			y_weights = B + 2;
198 			z_weights = A + 2;
199 			break;
200 		case 1:
201 			x_weights = A + 2;
202 			y_weights = 6;
203 			z_weights = B + 2;
204 			break;
205 		case 2:
206 			x_weights = A + 2;
207 			y_weights = B + 2;
208 			z_weights = 6;
209 			break;
210 		case 3:
211 			x_weights = 2;
212 			y_weights = 2;
213 			z_weights = 2;
214 			switch ((block_mode >> 5) & 3)
215 			{
216 			case 0:
217 				x_weights = 6;
218 				break;
219 			case 1:
220 				y_weights = 6;
221 				break;
222 			case 2:
223 				z_weights = 6;
224 				break;
225 			case 3:
226 				return false;
227 			}
228 			break;
229 		}
230 	}
231 
232 	unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
233 	quant_mode = (base_quant_mode - 2) + 6 * H;
234 	is_dual_plane = D != 0;
235 
236 	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
237 	return (weight_count <= BLOCK_MAX_WEIGHTS &&
238 	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
239 	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
240 }
241 
242 /**
243  * @brief Create a 2D decimation entry for a block-size and weight-decimation pair.
244  *
245  * @param      x_texels    The number of texels in the X dimension.
246  * @param      y_texels    The number of texels in the Y dimension.
247  * @param      x_weights   The number of weights in the X dimension.
248  * @param      y_weights   The number of weights in the Y dimension.
249  * @param[out] di          The decimation info structure to populate.
250  * @param[out] wb          The decimation table init scratch working buffers.
251  */
init_decimation_info_2d(unsigned int x_texels,unsigned int y_texels,unsigned int x_weights,unsigned int y_weights,decimation_info & di,dt_init_working_buffers & wb)252 static void init_decimation_info_2d(
253 	unsigned int x_texels,
254 	unsigned int y_texels,
255 	unsigned int x_weights,
256 	unsigned int y_weights,
257 	decimation_info& di,
258 	dt_init_working_buffers& wb
259 ) {
260 	unsigned int texels_per_block = x_texels * y_texels;
261 	unsigned int weights_per_block = x_weights * y_weights;
262 
263 	uint8_t max_texel_count_of_weight = 0;
264 
265 	promise(weights_per_block > 0);
266 	promise(texels_per_block > 0);
267 	promise(x_texels > 0);
268 	promise(y_texels > 0);
269 
270 	for (unsigned int i = 0; i < weights_per_block; i++)
271 	{
272 		wb.texel_count_of_weight[i] = 0;
273 	}
274 
275 	for (unsigned int i = 0; i < texels_per_block; i++)
276 	{
277 		wb.weight_count_of_texel[i] = 0;
278 	}
279 
280 	for (unsigned int y = 0; y < y_texels; y++)
281 	{
282 		for (unsigned int x = 0; x < x_texels; x++)
283 		{
284 			unsigned int texel = y * x_texels + x;
285 
286 			unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
287 			unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
288 
289 			unsigned int x_weight_frac = x_weight & 0xF;
290 			unsigned int y_weight_frac = y_weight & 0xF;
291 			unsigned int x_weight_int = x_weight >> 4;
292 			unsigned int y_weight_int = y_weight >> 4;
293 
294 			unsigned int qweight[4];
295 			qweight[0] = x_weight_int + y_weight_int * x_weights;
296 			qweight[1] = qweight[0] + 1;
297 			qweight[2] = qweight[0] + x_weights;
298 			qweight[3] = qweight[2] + 1;
299 
300 			// Truncated-precision bilinear interpolation
301 			unsigned int prod = x_weight_frac * y_weight_frac;
302 
303 			unsigned int weight[4];
304 			weight[3] = (prod + 8) >> 4;
305 			weight[1] = x_weight_frac - weight[3];
306 			weight[2] = y_weight_frac - weight[3];
307 			weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
308 
309 			for (unsigned int i = 0; i < 4; i++)
310 			{
311 				if (weight[i] != 0)
312 				{
313 					wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
314 					wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
315 					wb.weight_count_of_texel[texel]++;
316 					wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
317 					wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
318 					wb.texel_count_of_weight[qweight[i]]++;
319 					max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
320 				}
321 			}
322 		}
323 	}
324 
325 	uint8_t max_texel_weight_count = 0;
326 	for (unsigned int i = 0; i < texels_per_block; i++)
327 	{
328 		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
329 		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
330 
331 		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
332 		{
333 			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
334 			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
335 			di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
336 		}
337 
338 		// Init all 4 entries so we can rely on zeros for vectorization
339 		for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
340 		{
341 			di.texel_weight_contribs_int_tr[j][i] = 0;
342 			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
343 			di.texel_weights_tr[j][i] = 0;
344 		}
345 	}
346 
347 	di.max_texel_weight_count = max_texel_weight_count;
348 
349 	for (unsigned int i = 0; i < weights_per_block; i++)
350 	{
351 		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
352 		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
353 
354 		for (unsigned int j = 0; j < texel_count_wt; j++)
355 		{
356 			uint8_t texel = wb.texels_of_weight[i][j];
357 
358 			// Create transposed versions of these for better vectorization
359 			di.weight_texels_tr[j][i] = texel;
360 			di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
361 
362 			// Store the per-texel contribution of this weight for each texel it contributes to
363 			di.texel_contrib_for_weight[j][i] = 0.0f;
364 			for (unsigned int k = 0; k < 4; k++)
365 			{
366 				uint8_t dttw = di.texel_weights_tr[k][texel];
367 				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
368 				if (dttw == i && dttwf != 0.0f)
369 				{
370 					di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
371 					break;
372 				}
373 			}
374 		}
375 
376 		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
377 		// Match last texel in active lane in SIMD group, for better gathers
378 		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
379 		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
380 		{
381 			di.weight_texels_tr[j][i] = last_texel;
382 			di.weights_texel_contribs_tr[j][i] = 0.0f;
383 		}
384 	}
385 
386 	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
387 	unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
388 	for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
389 	{
390 		di.texel_weight_count[i] = 0;
391 
392 		for (unsigned int j = 0; j < 4; j++)
393 		{
394 			di.texel_weight_contribs_float_tr[j][i] = 0;
395 			di.texel_weights_tr[j][i] = 0;
396 			di.texel_weight_contribs_int_tr[j][i] = 0;
397 		}
398 	}
399 
400 	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
401 	// Match last texel in active lane in SIMD group, for better gathers
402 	unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
403 	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
404 
405 	unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
406 	for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
407 	{
408 		di.weight_texel_count[i] = 0;
409 
410 		for (unsigned int j = 0; j < max_texel_count_of_weight; j++)
411 		{
412 			di.weight_texels_tr[j][i] = last_texel;
413 			di.weights_texel_contribs_tr[j][i] = 0.0f;
414 		}
415 	}
416 
417 	di.texel_count = static_cast<uint8_t>(texels_per_block);
418 	di.weight_count = static_cast<uint8_t>(weights_per_block);
419 	di.weight_x = static_cast<uint8_t>(x_weights);
420 	di.weight_y = static_cast<uint8_t>(y_weights);
421 	di.weight_z = 1;
422 }
423 
424 /**
425  * @brief Create a 3D decimation entry for a block-size and weight-decimation pair.
426  *
427  * @param      x_texels    The number of texels in the X dimension.
428  * @param      y_texels    The number of texels in the Y dimension.
429  * @param      z_texels    The number of texels in the Z dimension.
430  * @param      x_weights   The number of weights in the X dimension.
431  * @param      y_weights   The number of weights in the Y dimension.
432  * @param      z_weights   The number of weights in the Z dimension.
433  * @param[out] di          The decimation info structure to populate.
434    @param[out] wb          The decimation table init scratch working buffers.
435  */
init_decimation_info_3d(unsigned int x_texels,unsigned int y_texels,unsigned int z_texels,unsigned int x_weights,unsigned int y_weights,unsigned int z_weights,decimation_info & di,dt_init_working_buffers & wb)436 static void init_decimation_info_3d(
437 	unsigned int x_texels,
438 	unsigned int y_texels,
439 	unsigned int z_texels,
440 	unsigned int x_weights,
441 	unsigned int y_weights,
442 	unsigned int z_weights,
443 	decimation_info& di,
444 	dt_init_working_buffers& wb
445 ) {
446 	unsigned int texels_per_block = x_texels * y_texels * z_texels;
447 	unsigned int weights_per_block = x_weights * y_weights * z_weights;
448 
449 	uint8_t max_texel_count_of_weight = 0;
450 
451 	promise(weights_per_block > 0);
452 	promise(texels_per_block > 0);
453 
454 	for (unsigned int i = 0; i < weights_per_block; i++)
455 	{
456 		wb.texel_count_of_weight[i] = 0;
457 	}
458 
459 	for (unsigned int i = 0; i < texels_per_block; i++)
460 	{
461 		wb.weight_count_of_texel[i] = 0;
462 	}
463 
464 	for (unsigned int z = 0; z < z_texels; z++)
465 	{
466 		for (unsigned int y = 0; y < y_texels; y++)
467 		{
468 			for (unsigned int x = 0; x < x_texels; x++)
469 			{
470 				int texel = (z * y_texels + y) * x_texels + x;
471 
472 				int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
473 				int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
474 				int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;
475 
476 				int x_weight_frac = x_weight & 0xF;
477 				int y_weight_frac = y_weight & 0xF;
478 				int z_weight_frac = z_weight & 0xF;
479 				int x_weight_int = x_weight >> 4;
480 				int y_weight_int = y_weight >> 4;
481 				int z_weight_int = z_weight >> 4;
482 				int qweight[4];
483 				int weight[4];
484 				qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
485 				qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
486 
487 				// simplex interpolation
488 				int fs = x_weight_frac;
489 				int ft = y_weight_frac;
490 				int fp = z_weight_frac;
491 
492 				int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
493 				int N = x_weights;
494 				int NM = x_weights * y_weights;
495 
496 				int s1, s2, w0, w1, w2, w3;
497 				switch (cas)
498 				{
499 				case 7:
500 					s1 = 1;
501 					s2 = N;
502 					w0 = 16 - fs;
503 					w1 = fs - ft;
504 					w2 = ft - fp;
505 					w3 = fp;
506 					break;
507 				case 3:
508 					s1 = N;
509 					s2 = 1;
510 					w0 = 16 - ft;
511 					w1 = ft - fs;
512 					w2 = fs - fp;
513 					w3 = fp;
514 					break;
515 				case 5:
516 					s1 = 1;
517 					s2 = NM;
518 					w0 = 16 - fs;
519 					w1 = fs - fp;
520 					w2 = fp - ft;
521 					w3 = ft;
522 					break;
523 				case 4:
524 					s1 = NM;
525 					s2 = 1;
526 					w0 = 16 - fp;
527 					w1 = fp - fs;
528 					w2 = fs - ft;
529 					w3 = ft;
530 					break;
531 				case 2:
532 					s1 = N;
533 					s2 = NM;
534 					w0 = 16 - ft;
535 					w1 = ft - fp;
536 					w2 = fp - fs;
537 					w3 = fs;
538 					break;
539 				case 0:
540 					s1 = NM;
541 					s2 = N;
542 					w0 = 16 - fp;
543 					w1 = fp - ft;
544 					w2 = ft - fs;
545 					w3 = fs;
546 					break;
547 				default:
548 					s1 = NM;
549 					s2 = N;
550 					w0 = 16 - fp;
551 					w1 = fp - ft;
552 					w2 = ft - fs;
553 					w3 = fs;
554 					break;
555 				}
556 
557 				qweight[1] = qweight[0] + s1;
558 				qweight[2] = qweight[1] + s2;
559 				weight[0] = w0;
560 				weight[1] = w1;
561 				weight[2] = w2;
562 				weight[3] = w3;
563 
564 				for (unsigned int i = 0; i < 4; i++)
565 				{
566 					if (weight[i] != 0)
567 					{
568 						wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
569 						wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
570 						wb.weight_count_of_texel[texel]++;
571 						wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
572 						wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
573 						wb.texel_count_of_weight[qweight[i]]++;
574 						max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
575 					}
576 				}
577 			}
578 		}
579 	}
580 
581 	uint8_t max_texel_weight_count = 0;
582 	for (unsigned int i = 0; i < texels_per_block; i++)
583 	{
584 		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
585 		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
586 
587 		// Init all 4 entries so we can rely on zeros for vectorization
588 		for (unsigned int j = 0; j < 4; j++)
589 		{
590 			di.texel_weight_contribs_int_tr[j][i] = 0;
591 			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
592 			di.texel_weights_tr[j][i] = 0;
593 		}
594 
595 		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
596 		{
597 			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
598 			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
599 			di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
600 		}
601 	}
602 
603 	di.max_texel_weight_count = max_texel_weight_count;
604 
605 	for (unsigned int i = 0; i < weights_per_block; i++)
606 	{
607 		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
608 		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
609 
610 		for (unsigned int j = 0; j < texel_count_wt; j++)
611 		{
612 			unsigned int texel = wb.texels_of_weight[i][j];
613 
614 			// Create transposed versions of these for better vectorization
615 			di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel);
616 			di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
617 
618 			// Store the per-texel contribution of this weight for each texel it contributes to
619 			di.texel_contrib_for_weight[j][i] = 0.0f;
620 			for (unsigned int k = 0; k < 4; k++)
621 			{
622 				uint8_t dttw = di.texel_weights_tr[k][texel];
623 				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
624 				if (dttw == i && dttwf != 0.0f)
625 				{
626 					di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
627 					break;
628 				}
629 			}
630 		}
631 
632 		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
633 		// Match last texel in active lane in SIMD group, for better gathers
634 		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
635 		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
636 		{
637 			di.weight_texels_tr[j][i] = last_texel;
638 			di.weights_texel_contribs_tr[j][i] = 0.0f;
639 		}
640 	}
641 
642 	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
643 	unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
644 	for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
645 	{
646 		di.texel_weight_count[i] = 0;
647 
648 		for (unsigned int j = 0; j < 4; j++)
649 		{
650 			di.texel_weight_contribs_float_tr[j][i] = 0;
651 			di.texel_weights_tr[j][i] = 0;
652 			di.texel_weight_contribs_int_tr[j][i] = 0;
653 		}
654 	}
655 
656 	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
657 	// Match last texel in active lane in SIMD group, for better gathers
658 	int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
659 	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
660 
661 	unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
662 	for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
663 	{
664 		di.weight_texel_count[i] = 0;
665 
666 		for (int j = 0; j < max_texel_count_of_weight; j++)
667 		{
668 			di.weight_texels_tr[j][i] = last_texel;
669 			di.weights_texel_contribs_tr[j][i] = 0.0f;
670 		}
671 	}
672 
673 	di.texel_count = static_cast<uint8_t>(texels_per_block);
674 	di.weight_count = static_cast<uint8_t>(weights_per_block);
675 	di.weight_x = static_cast<uint8_t>(x_weights);
676 	di.weight_y = static_cast<uint8_t>(y_weights);
677 	di.weight_z = static_cast<uint8_t>(z_weights);
678 }
679 
680 /**
681  * @brief Assign the texels to use for kmeans clustering.
682  *
683  * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used.
684  * The @c bsd.texel_count is an input and must be populated beforehand.
685  *
686  * @param[in,out] bsd   The block size descriptor to populate.
687  */
assign_kmeans_texels(block_size_descriptor & bsd)688 static void assign_kmeans_texels(
689 	block_size_descriptor& bsd
690 ) {
691 	// Use all texels for kmeans on a small block
692 	if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)
693 	{
694 		for (uint8_t i = 0; i < bsd.texel_count; i++)
695 		{
696 			bsd.kmeans_texels[i] = i;
697 		}
698 
699 		return;
700 	}
701 
702 	// Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block
703 	uint64_t rng_state[2];
704 	astc::rand_init(rng_state);
705 
706 	// Initialize array used for tracking used indices
707 	bool seen[BLOCK_MAX_TEXELS];
708 	for (uint8_t i = 0; i < bsd.texel_count; i++)
709 	{
710 		seen[i] = false;
711 	}
712 
713 	// Assign 64 random indices, retrying if we see repeats
714 	unsigned int arr_elements_set = 0;
715 	while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
716 	{
717 		uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
718 		texel = texel % bsd.texel_count;
719 		if (!seen[texel])
720 		{
721 			bsd.kmeans_texels[arr_elements_set++] = texel;
722 			seen[texel] = true;
723 		}
724 	}
725 }
726 
727 /**
728  * @brief Allocate a single 2D decimation table entry.
729  *
730  * @param x_texels    The number of texels in the X dimension.
731  * @param y_texels    The number of texels in the Y dimension.
732  * @param x_weights   The number of weights in the X dimension.
733  * @param y_weights   The number of weights in the Y dimension.
734  * @param bsd         The block size descriptor we are populating.
735  * @param wb          The decimation table init scratch working buffers.
736  * @param index       The packed array index to populate.
737  */
construct_dt_entry_2d(unsigned int x_texels,unsigned int y_texels,unsigned int x_weights,unsigned int y_weights,block_size_descriptor & bsd,dt_init_working_buffers & wb,unsigned int index)738 static void construct_dt_entry_2d(
739 	unsigned int x_texels,
740 	unsigned int y_texels,
741 	unsigned int x_weights,
742 	unsigned int y_weights,
743 	block_size_descriptor& bsd,
744 	dt_init_working_buffers& wb,
745 	unsigned int index
746 ) {
747 	unsigned int weight_count = x_weights * y_weights;
748 	assert(weight_count <= BLOCK_MAX_WEIGHTS);
749 
750 	bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
751 
752 	decimation_info& di = bsd.decimation_tables[index];
753 	init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
754 
755 	int maxprec_1plane = -1;
756 	int maxprec_2planes = -1;
757 	for (int i = 0; i < 12; i++)
758 	{
759 		unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
760 		if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
761 		{
762 			maxprec_1plane = i;
763 		}
764 
765 		if (try_2planes)
766 		{
767 			unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
768 			if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
769 			{
770 				maxprec_2planes = i;
771 			}
772 		}
773 	}
774 
775 	// At least one of the two should be valid ...
776 	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
777 	bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
778 	bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
779 	bsd.decimation_modes[index].refprec_1plane = 0;
780 	bsd.decimation_modes[index].refprec_2planes = 0;
781 }
782 
783 /**
784  * @brief Allocate block modes and decimation tables for a single 2D block size.
785  *
786  * @param      x_texels         The number of texels in the X dimension.
787  * @param      y_texels         The number of texels in the Y dimension.
788  * @param      can_omit_modes   Can we discard modes that astcenc won't use, even if legal?
789  * @param      mode_cutoff      Percentile cutoff in range [0,1]. Low values more likely to be used.
790  * @param[out] bsd              The block size descriptor to populate.
791  */
792 #ifdef ASTC_CUSTOMIZED_ENABLE
construct_block_size_descriptor_2d(QualityProfile privateProfile,unsigned int x_texels,unsigned int y_texels,bool can_omit_modes,float mode_cutoff,block_size_descriptor & bsd)793 static bool construct_block_size_descriptor_2d(
794 #else
795 static void construct_block_size_descriptor_2d(
796 #endif
797 	QualityProfile privateProfile,
798 	unsigned int x_texels,
799 	unsigned int y_texels,
800 	bool can_omit_modes,
801 	float mode_cutoff,
802 	block_size_descriptor& bsd
803 ) {
804 	// Store a remap table for storing packed decimation modes.
805 	// Indexing uses [Y * 16 + X] and max size for each axis is 12.
806 	static const unsigned int MAX_DMI = 12 * 16 + 12;
807 	int decimation_mode_index[MAX_DMI];
808 
809 	dt_init_working_buffers* wb = new dt_init_working_buffers;
810 
811 	bsd.xdim = static_cast<uint8_t>(x_texels);
812 	bsd.ydim = static_cast<uint8_t>(y_texels);
813 	bsd.zdim = 1;
814 	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
815 
816 	for (unsigned int i = 0; i < MAX_DMI; i++)
817 	{
818 		decimation_mode_index[i] = -1;
819 	}
820 
821 	// Gather all the decimation grids that can be used with the current block
822 #if !defined(ASTCENC_DECOMPRESS_ONLY)
823 	const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
824 	float always_cutoff = (privateProfile != HIGH_QUALITY_PROFILE) ? 1.0f : 0.0f;
825 #else
826 	// Unused in decompress-only builds
827 	(void)can_omit_modes;
828 	(void)mode_cutoff;
829 #endif
830 
831 	// Construct the list of block formats referencing the decimation tables
832 	unsigned int packed_bm_idx = 0;
833 	unsigned int packed_dm_idx = 0;
834 
835 	// Trackers
836 	unsigned int bm_counts[4] { 0 };
837 	unsigned int dm_counts[4] { 0 };
838 
839 	// Clear the list to a known-bad value
840 	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
841 	{
842 		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
843 	}
844 
845 	// Iterate four times to build a usefully ordered list:
846 	//   - Pass 0 - keep selected single plane "always" block modes
847 	//   - Pass 1 - keep selected single plane "non-always" block modes
848 	//   - Pass 2 - keep select dual plane block modes
849 	//   - Pass 3 - keep everything else that's legal
850 	unsigned int limit = can_omit_modes ? 3 : 4;
851 	for (unsigned int j = 0; j < limit; j ++)
852 	{
853 		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
854 		{
855 			// Skip modes we've already included in a previous pass
856 			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
857 			{
858 				continue;
859 			}
860 			if ((privateProfile == HIGH_SPEED_PROFILE) && (i != HIGH_SPEED_PROFILE_BLOCK_MODE))
861 			{
862 				continue;
863 			}
864 #ifdef ASTC_CUSTOMIZED_ENABLE
865 			if (privateProfile == CUSTOMIZED_PROFILE)
866 			{
867 				if (!g_astcCustomizedSoManager.LoadSutCustomizedSo() ||
868 					g_astcCustomizedSoManager.isCustomizedBlockModeFunc_ == nullptr)
869 				{
870 					printf("astcenc customized so dlopen failed or isCustomizedBlockModeFunc_ is nullptr!\n");
871 					delete wb;
872 #if !defined(ASTCENC_DECOMPRESS_ONLY)
873 					delete[] percentiles;
874 #endif
875 					return false;
876 				}
877 				if (!g_astcCustomizedSoManager.isCustomizedBlockModeFunc_(i))
878 				{
879 					continue;
880 				}
881 			}
882 #endif
883 			// Decode parameters
884 			unsigned int x_weights;
885 			unsigned int y_weights;
886 			bool is_dual_plane;
887 			unsigned int quant_mode;
888 			unsigned int weight_bits;
889 			bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
890 
891 			// Always skip invalid encodings for the current block size
892 			if (!valid || (x_weights > x_texels) || (y_weights > y_texels))
893 			{
894 				continue;
895 			}
896 
897 			// Selectively skip dual plane encodings
898 			if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))
899 			{
900 				continue;
901 			}
902 
903 			// Always skip encodings we can't physically encode based on
904 			// generic encoding bit availability
905 			if (is_dual_plane)
906 			{
907 				 // This is the only check we need as only support 1 partition
908 				 if ((109 - weight_bits) <= 0)
909 				 {
910 					continue;
911 				 }
912 			}
913 			else
914 			{
915 				// This is conservative - fewer bits may be available for > 1 partition
916 				 if ((111 - weight_bits) <= 0)
917 				 {
918 					continue;
919 				 }
920 			}
921 
922 			// Selectively skip encodings based on percentile
923 			bool percentile_hit = false;
924 	#if !defined(ASTCENC_DECOMPRESS_ONLY)
925 			if (j == 0)
926 			{
927 				percentile_hit = percentiles[i] <= always_cutoff;
928 			}
929 			else
930 			{
931 				percentile_hit = percentiles[i] <= mode_cutoff;
932 			}
933 	#endif
934 
935 			if (j != 3 && !percentile_hit)
936 			{
937 				continue;
938 			}
939 
940 			// Allocate and initialize the decimation table entry if we've not used it yet
941 			int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
942 			if (decimation_mode < 0)
943 			{
944 				construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
945 				if (privateProfile == HIGH_SPEED_PROFILE)
946 				{
947 					bsd.decimation_modes[packed_dm_idx].maxprec_1plane = 4; // Speed optimization: max prec num is limited to 4
948 				}
949 				decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;
950 				decimation_mode = packed_dm_idx;
951 
952 				dm_counts[j]++;
953 				packed_dm_idx++;
954 			}
955 
956 			auto& bm = bsd.block_modes[packed_bm_idx];
957 
958 			bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
959 			bm.quant_mode = static_cast<uint8_t>(quant_mode);
960 			bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);
961 			bm.weight_bits = static_cast<uint8_t>(weight_bits);
962 			bm.mode_index = static_cast<uint16_t>(i);
963 
964 			auto& dm = bsd.decimation_modes[decimation_mode];
965 
966 			if (is_dual_plane)
967 			{
968 				dm.set_ref_2plane(bm.get_weight_quant_mode());
969 			}
970 			else
971 			{
972 				dm.set_ref_1plane(bm.get_weight_quant_mode());
973 			}
974 
975 			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
976 
977 			packed_bm_idx++;
978 			bm_counts[j]++;
979 		}
980 	}
981 
982 	bsd.block_mode_count_1plane_always = bm_counts[0];
983 	bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];
984 	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];
985 	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];
986 
987 	bsd.decimation_mode_count_always = dm_counts[0];
988 	bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];
989 	bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];
990 
991 #if !defined(ASTCENC_DECOMPRESS_ONLY)
992 	assert(bsd.block_mode_count_1plane_always > 0);
993 	assert(bsd.decimation_mode_count_always > 0);
994 
995 	delete[] percentiles;
996 #endif
997 
998 	// Ensure the end of the array contains valid data (should never get read)
999 	for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
1000 	{
1001 		bsd.decimation_modes[i].maxprec_1plane = -1;
1002 		bsd.decimation_modes[i].maxprec_2planes = -1;
1003 		bsd.decimation_modes[i].refprec_1plane = 0;
1004 		bsd.decimation_modes[i].refprec_2planes = 0;
1005 	}
1006 
1007 	// Determine the texels to use for kmeans clustering.
1008 	assign_kmeans_texels(bsd);
1009 
1010 	delete wb;
1011 #ifdef ASTC_CUSTOMIZED_ENABLE
1012 	return true;
1013 #endif
1014 }
1015 
1016 /**
1017  * @brief Allocate block modes and decimation tables for a single 3D block size.
1018  *
1019  * TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as
1020  * the percentile mode cutoffs. If 3D becomes more widely used we should look at this.
1021  *
1022  * @param      x_texels   The number of texels in the X dimension.
1023  * @param      y_texels   The number of texels in the Y dimension.
1024  * @param      z_texels   The number of texels in the Z dimension.
1025  * @param[out] bsd        The block size descriptor to populate.
1026  */
construct_block_size_descriptor_3d(unsigned int x_texels,unsigned int y_texels,unsigned int z_texels,block_size_descriptor & bsd)1027 static void construct_block_size_descriptor_3d(
1028 	unsigned int x_texels,
1029 	unsigned int y_texels,
1030 	unsigned int z_texels,
1031 	block_size_descriptor& bsd
1032 ) {
1033 	// Store a remap table for storing packed decimation modes.
1034 	// Indexing uses [Z * 64 + Y *  8 + X] and max size for each axis is 6.
1035 	static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
1036 	int decimation_mode_index[MAX_DMI];
1037 	unsigned int decimation_mode_count = 0;
1038 
1039 	dt_init_working_buffers* wb = new dt_init_working_buffers;
1040 
1041 	bsd.xdim = static_cast<uint8_t>(x_texels);
1042 	bsd.ydim = static_cast<uint8_t>(y_texels);
1043 	bsd.zdim = static_cast<uint8_t>(z_texels);
1044 	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
1045 
1046 	for (unsigned int i = 0; i < MAX_DMI; i++)
1047 	{
1048 		decimation_mode_index[i] = -1;
1049 	}
1050 
1051 	// gather all the infill-modes that can be used with the current block size
1052 	for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
1053 	{
1054 		for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
1055 		{
1056 			for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
1057 			{
1058 				unsigned int weight_count = x_weights * y_weights * z_weights;
1059 				if (weight_count > BLOCK_MAX_WEIGHTS)
1060 				{
1061 					continue;
1062 				}
1063 
1064 				decimation_info& di = bsd.decimation_tables[decimation_mode_count];
1065 				decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
1066 				init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);
1067 
1068 				int maxprec_1plane = -1;
1069 				int maxprec_2planes = -1;
1070 				for (unsigned int i = 0; i < 12; i++)
1071 				{
1072 					unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
1073 					if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
1074 					{
1075 						maxprec_1plane = i;
1076 					}
1077 
1078 					unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
1079 					if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
1080 					{
1081 						maxprec_2planes = i;
1082 					}
1083 				}
1084 
1085 				if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)
1086 				{
1087 					maxprec_2planes = -1;
1088 				}
1089 
1090 				bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
1091 				bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
1092 				bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
1093 				bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
1094 				decimation_mode_count++;
1095 			}
1096 		}
1097 	}
1098 
1099 	// Ensure the end of the array contains valid data (should never get read)
1100 	for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
1101 	{
1102 		bsd.decimation_modes[i].maxprec_1plane = -1;
1103 		bsd.decimation_modes[i].maxprec_2planes = -1;
1104 		bsd.decimation_modes[i].refprec_1plane = 0;
1105 		bsd.decimation_modes[i].refprec_2planes = 0;
1106 	}
1107 
1108 	bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
1109 	bsd.decimation_mode_count_selected = decimation_mode_count;
1110 	bsd.decimation_mode_count_all = decimation_mode_count;
1111 
1112 	// Construct the list of block formats referencing the decimation tables
1113 
1114 	// Clear the list to a known-bad value
1115 	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
1116 	{
1117 		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
1118 	}
1119 
1120 	unsigned int packed_idx = 0;
1121 	unsigned int bm_counts[2] { 0 };
1122 
1123 	// Iterate two times to build a usefully ordered list:
1124 	//   - Pass 0 - keep valid single plane block modes
1125 	//   - Pass 1 - keep valid dual plane block modes
1126 	for (unsigned int j = 0; j < 2; j++)
1127 	{
1128 		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
1129 		{
1130 			// Skip modes we've already included in a previous pass
1131 			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
1132 			{
1133 				continue;
1134 			}
1135 
1136 			unsigned int x_weights;
1137 			unsigned int y_weights;
1138 			unsigned int z_weights;
1139 			bool is_dual_plane;
1140 			unsigned int quant_mode;
1141 			unsigned int weight_bits;
1142 
1143 			bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
1144 			// Skip invalid encodings
1145 			if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
1146 			{
1147 				continue;
1148 			}
1149 
1150 			// Skip encodings in the wrong iteration
1151 			if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))
1152 			{
1153 				continue;
1154 			}
1155 
1156 			// Always skip encodings we can't physically encode based on bit availability
1157 			if (is_dual_plane)
1158 			{
1159 				 // This is the only check we need as only support 1 partition
1160 				 if ((109 - weight_bits) <= 0)
1161 				 {
1162 					continue;
1163 				 }
1164 			}
1165 			else
1166 			{
1167 				// This is conservative - fewer bits may be available for > 1 partition
1168 				 if ((111 - weight_bits) <= 0)
1169 				 {
1170 					continue;
1171 				 }
1172 			}
1173 
1174 			int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
1175 			bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
1176 			bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
1177 			bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
1178 			bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
1179 			bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
1180 
1181 			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
1182 			bm_counts[j]++;
1183 			packed_idx++;
1184 		}
1185 	}
1186 
1187 	bsd.block_mode_count_1plane_always = 0;  // Skipped for 3D modes
1188 	bsd.block_mode_count_1plane_selected = bm_counts[0];
1189 	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];
1190 	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];
1191 
1192 	// Determine the texels to use for kmeans clustering.
1193 	assign_kmeans_texels(bsd);
1194 
1195 	delete wb;
1196 }
1197 
1198 /* See header for documentation. */
1199 #ifdef ASTC_CUSTOMIZED_ENABLE
init_block_size_descriptor(QualityProfile privateProfile,unsigned int x_texels,unsigned int y_texels,unsigned int z_texels,bool can_omit_modes,unsigned int partition_count_cutoff,float mode_cutoff,block_size_descriptor & bsd)1200 bool init_block_size_descriptor(
1201 #else
1202 void init_block_size_descriptor(
1203 #endif
1204 	QualityProfile privateProfile,
1205 	unsigned int x_texels,
1206 	unsigned int y_texels,
1207 	unsigned int z_texels,
1208 	bool can_omit_modes,
1209 	unsigned int partition_count_cutoff,
1210 	float mode_cutoff,
1211 	block_size_descriptor& bsd
1212 ) {
1213 	if (z_texels > 1)
1214 	{
1215 		construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);
1216 	}
1217 	else
1218 	{
1219 #ifdef ASTC_CUSTOMIZED_ENABLE
1220 		if (!construct_block_size_descriptor_2d(privateProfile, x_texels, y_texels, can_omit_modes, mode_cutoff, bsd))
1221 		{
1222 			return false;
1223 		}
1224 #else
1225 		construct_block_size_descriptor_2d(privateProfile, x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);
1226 #endif
1227 	}
1228 
1229 	init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);
1230 #ifdef ASTC_CUSTOMIZED_ENABLE
1231 	return true;
1232 #endif
1233 }
1234