• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 #if !defined(ASTCENC_DECOMPRESS_ONLY)
19 
20 /**
21  * @brief Functions for finding best partition for a block.
22  *
23  * The partition search operates in two stages. The first pass uses kmeans clustering to group
24  * texels into an ideal partitioning for the requested partition count, and then compares that
25  * against the 1024 partitionings generated by the ASTC partition hash function. The generated
26  * partitions are then ranked by the number of texels in the wrong partition, compared to the ideal
27  * clustering. All 1024 partitions are tested for similarity and ranked, apart from duplicates and
28  * partitionings that actually generate fewer than the requested partition count, but only the top
29  * N candidates are actually put through a more detailed search. N is determined by the compressor
30  * quality preset.
31  *
32  * For the detailed search, each candidate is checked against two possible encoding methods:
33  *
34  *   - The best partitioning assuming different chroma colors (RGB + RGB or RGB + delta endpoints).
35  *   - The best partitioning assuming same chroma colors (RGB + scale endpoints).
36  *
37  * This is implemented by computing the compute mean color and dominant direction for each
38  * partition. This defines two lines, both of which go through the mean color value.
39  *
40  * - One line has a direction defined by the dominant direction; this is used to assess the error
41  *   from using an uncorrelated color representation.
42  * - The other line goes through (0,0,0,1) and is used to assess the error from using a same chroma
43  *   (RGB + scale) color representation.
44  *
45  * The best candidate is selected by computing the squared-errors that result from using these
46  * lines for endpoint selection.
47  */
48 
49 #include "astcenc_internal.h"
50 
51 /**
52  * @brief Pick some initital kmeans cluster centers.
53  *
54  * @param      blk               The image block color data to compress.
55  * @param      texel_count       The number of texels in the block.
56  * @param      partition_count   The number of partitions in the block.
57  * @param[out] cluster_centers   The initital partition cluster center colors.
58  */
kmeans_init(const image_block & blk,unsigned int texel_count,unsigned int partition_count,vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS])59 static void kmeans_init(
60 	const image_block& blk,
61 	unsigned int texel_count,
62 	unsigned int partition_count,
63 	vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS]
64 ) {
65 	promise(texel_count > 0);
66 	promise(partition_count > 0);
67 
68 	unsigned int clusters_selected = 0;
69 	float distances[BLOCK_MAX_TEXELS];
70 
71 	// Pick a random sample as first cluster center; 145897 from random.org
72 	unsigned int sample = 145897 % texel_count;
73 	vfloat4 center_color = blk.texel(sample);
74 	cluster_centers[clusters_selected] = center_color;
75 	clusters_selected++;
76 
77 	// Compute the distance to the first cluster center
78 	float distance_sum = 0.0f;
79 	for (unsigned int i = 0; i < texel_count; i++)
80 	{
81 		vfloat4 color = blk.texel(i);
82 		vfloat4 diff = color - center_color;
83 		float distance = dot_s(diff * diff, blk.channel_weight);
84 		distance_sum += distance;
85 		distances[i] = distance;
86 	}
87 
88 	// More numbers from random.org for weighted-random center selection
89 	const float cluster_cutoffs[9] {
90 		0.626220f, 0.932770f, 0.275454f,
91 		0.318558f, 0.240113f, 0.009190f,
92 		0.347661f, 0.731960f, 0.156391f
93 	};
94 
95 	unsigned int cutoff = (clusters_selected - 1) + 3 * (partition_count - 2);
96 
97 	// Pick the remaining samples as needed
98 	while (true)
99 	{
100 		// Pick the next center in a weighted-random fashion.
101 		float summa = 0.0f;
102 		float distance_cutoff = distance_sum * cluster_cutoffs[cutoff++];
103 		for (sample = 0; sample < texel_count; sample++)
104 		{
105 			summa += distances[sample];
106 			if (summa >= distance_cutoff)
107 			{
108 				break;
109 			}
110 		}
111 
112 		// Clamp to a valid range and store the selected cluster center
113 		sample = astc::min(sample, texel_count - 1);
114 
115 		center_color = blk.texel(sample);
116 		cluster_centers[clusters_selected++] = center_color;
117 		if (clusters_selected >= partition_count)
118 		{
119 			break;
120 		}
121 
122 		// Compute the distance to the new cluster center, keep the min dist
123 		distance_sum = 0.0f;
124 		for (unsigned int i = 0; i < texel_count; i++)
125 		{
126 			vfloat4 color = blk.texel(i);
127 			vfloat4 diff = color - center_color;
128 			float distance = dot_s(diff * diff, blk.channel_weight);
129 			distance = astc::min(distance, distances[i]);
130 			distance_sum += distance;
131 			distances[i] = distance;
132 		}
133 	}
134 }
135 
136 /**
137  * @brief Assign texels to clusters, based on a set of chosen center points.
138  *
139  * @param      blk                  The image block color data to compress.
140  * @param      texel_count          The number of texels in the block.
141  * @param      partition_count      The number of partitions in the block.
142  * @param      cluster_centers      The partition cluster center colors.
143  * @param[out] partition_of_texel   The partition assigned for each texel.
144  */
kmeans_assign(const image_block & blk,unsigned int texel_count,unsigned int partition_count,const vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],uint8_t partition_of_texel[BLOCK_MAX_TEXELS])145 static void kmeans_assign(
146 	const image_block& blk,
147 	unsigned int texel_count,
148 	unsigned int partition_count,
149 	const vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
150 	uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
151 ) {
152 	promise(texel_count > 0);
153 	promise(partition_count > 0);
154 
155 	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
156 
157 	// Find the best partition for every texel
158 	for (unsigned int i = 0; i < texel_count; i++)
159 	{
160 		float best_distance = std::numeric_limits<float>::max();
161 		unsigned int best_partition = 0;
162 
163 		vfloat4 color = blk.texel(i);
164 		for (unsigned int j = 0; j < partition_count; j++)
165 		{
166 			vfloat4 diff = color - cluster_centers[j];
167 			float distance = dot_s(diff * diff, blk.channel_weight);
168 			if (distance < best_distance)
169 			{
170 				best_distance = distance;
171 				best_partition = j;
172 			}
173 		}
174 
175 		partition_of_texel[i] = static_cast<uint8_t>(best_partition);
176 		partition_texel_count[best_partition]++;
177 	}
178 
179 	// It is possible to get a situation where a partition ends up without any texels. In this case,
180 	// assign texel N to partition N. This is silly, but ensures that every partition retains at
181 	// least one texel. Reassigning a texel in this manner may cause another partition to go empty,
182 	// so if we actually did a reassignment, run the whole loop over again.
183 	bool problem_case;
184 	do
185 	{
186 		problem_case = false;
187 		for (unsigned int i = 0; i < partition_count; i++)
188 		{
189 			if (partition_texel_count[i] == 0)
190 			{
191 				partition_texel_count[partition_of_texel[i]]--;
192 				partition_texel_count[i]++;
193 				partition_of_texel[i] = static_cast<uint8_t>(i);
194 				problem_case = true;
195 			}
196 		}
197 	} while (problem_case);
198 }
199 
200 /**
201  * @brief Compute new cluster centers based on their center of gravity.
202  *
203  * @param       blk                  The image block color data to compress.
204  * @param       texel_count          The number of texels in the block.
205  * @param       partition_count      The number of partitions in the block.
206  * @param[out]  cluster_centers      The new cluster center colors.
207  * @param       partition_of_texel   The partition assigned for each texel.
208  */
kmeans_update(const image_block & blk,unsigned int texel_count,unsigned int partition_count,vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],const uint8_t partition_of_texel[BLOCK_MAX_TEXELS])209 static void kmeans_update(
210 	const image_block& blk,
211 	unsigned int texel_count,
212 	unsigned int partition_count,
213 	vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
214 	const uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
215 ) {
216 	promise(texel_count > 0);
217 	promise(partition_count > 0);
218 
219 	vfloat4 color_sum[BLOCK_MAX_PARTITIONS] {
220 		vfloat4::zero(),
221 		vfloat4::zero(),
222 		vfloat4::zero(),
223 		vfloat4::zero()
224 	};
225 
226 	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
227 
228 	// Find the center-of-gravity in each cluster
229 	for (unsigned int i = 0; i < texel_count; i++)
230 	{
231 		uint8_t partition = partition_of_texel[i];
232 		color_sum[partition] += blk.texel(i);
233 		partition_texel_count[partition]++;
234 	}
235 
236 	// Set the center of gravity to be the new cluster center
237 	for (unsigned int i = 0; i < partition_count; i++)
238 	{
239 		float scale = 1.0f / static_cast<float>(partition_texel_count[i]);
240 		cluster_centers[i] = color_sum[i] * scale;
241 	}
242 }
243 
244 /**
245  * @brief Compute bit-mismatch for partitioning in 2-partition mode.
246  *
247  * @param a   The texel assignment bitvector for the block.
248  * @param b   The texel assignment bitvector for the partition table.
249  *
250  * @return    The number of bit mismatches.
251  */
partition_mismatch2(const uint64_t a[2],const uint64_t b[2])252 static inline unsigned int partition_mismatch2(
253 	const uint64_t a[2],
254 	const uint64_t b[2]
255 ) {
256 	int v1 = popcount(a[0] ^ b[0]) + popcount(a[1] ^ b[1]);
257 	int v2 = popcount(a[0] ^ b[1]) + popcount(a[1] ^ b[0]);
258 	return astc::min(v1, v2);
259 }
260 
261 /**
262  * @brief Compute bit-mismatch for partitioning in 3-partition mode.
263  *
264  * @param a   The texel assignment bitvector for the block.
265  * @param b   The texel assignment bitvector for the partition table.
266  *
267  * @return    The number of bit mismatches.
268  */
partition_mismatch3(const uint64_t a[3],const uint64_t b[3])269 static inline unsigned int partition_mismatch3(
270 	const uint64_t a[3],
271 	const uint64_t b[3]
272 ) {
273 	int p00 = popcount(a[0] ^ b[0]);
274 	int p01 = popcount(a[0] ^ b[1]);
275 	int p02 = popcount(a[0] ^ b[2]);
276 
277 	int p10 = popcount(a[1] ^ b[0]);
278 	int p11 = popcount(a[1] ^ b[1]);
279 	int p12 = popcount(a[1] ^ b[2]);
280 
281 	int p20 = popcount(a[2] ^ b[0]);
282 	int p21 = popcount(a[2] ^ b[1]);
283 	int p22 = popcount(a[2] ^ b[2]);
284 
285 	int s0 = p11 + p22;
286 	int s1 = p12 + p21;
287 	int v0 = astc::min(s0, s1) + p00;
288 
289 	int s2 = p10 + p22;
290 	int s3 = p12 + p20;
291 	int v1 = astc::min(s2, s3) + p01;
292 
293 	int s4 = p10 + p21;
294 	int s5 = p11 + p20;
295 	int v2 = astc::min(s4, s5) + p02;
296 
297 	return astc::min(v0, v1, v2);
298 }
299 
300 /**
301  * @brief Compute bit-mismatch for partitioning in 4-partition mode.
302  *
303  * @param a   The texel assignment bitvector for the block.
304  * @param b   The texel assignment bitvector for the partition table.
305  *
306  * @return    The number of bit mismatches.
307  */
partition_mismatch4(const uint64_t a[4],const uint64_t b[4])308 static inline unsigned int partition_mismatch4(
309 	const uint64_t a[4],
310 	const uint64_t b[4]
311 ) {
312 	int p00 = popcount(a[0] ^ b[0]);
313 	int p01 = popcount(a[0] ^ b[1]);
314 	int p02 = popcount(a[0] ^ b[2]);
315 	int p03 = popcount(a[0] ^ b[3]);
316 
317 	int p10 = popcount(a[1] ^ b[0]);
318 	int p11 = popcount(a[1] ^ b[1]);
319 	int p12 = popcount(a[1] ^ b[2]);
320 	int p13 = popcount(a[1] ^ b[3]);
321 
322 	int p20 = popcount(a[2] ^ b[0]);
323 	int p21 = popcount(a[2] ^ b[1]);
324 	int p22 = popcount(a[2] ^ b[2]);
325 	int p23 = popcount(a[2] ^ b[3]);
326 
327 	int p30 = popcount(a[3] ^ b[0]);
328 	int p31 = popcount(a[3] ^ b[1]);
329 	int p32 = popcount(a[3] ^ b[2]);
330 	int p33 = popcount(a[3] ^ b[3]);
331 
332 	int mx23 = astc::min(p22 + p33, p23 + p32);
333 	int mx13 = astc::min(p21 + p33, p23 + p31);
334 	int mx12 = astc::min(p21 + p32, p22 + p31);
335 	int mx03 = astc::min(p20 + p33, p23 + p30);
336 	int mx02 = astc::min(p20 + p32, p22 + p30);
337 	int mx01 = astc::min(p21 + p30, p20 + p31);
338 
339 	int v0 = p00 + astc::min(p11 + mx23, p12 + mx13, p13 + mx12);
340 	int v1 = p01 + astc::min(p10 + mx23, p12 + mx03, p13 + mx02);
341 	int v2 = p02 + astc::min(p11 + mx03, p10 + mx13, p13 + mx01);
342 	int v3 = p03 + astc::min(p11 + mx02, p12 + mx01, p10 + mx12);
343 
344 	return astc::min(v0, v1, v2, v3);
345 }
346 
347 using mismatch_dispatch = unsigned int (*)(const uint64_t*, const uint64_t*);
348 
349 /**
350  * @brief Count the partition table mismatches vs the data clustering.
351  *
352  * @param      bsd               The block size information.
353  * @param      partition_count   The number of partitions in the block.
354  * @param      bitmaps           The block texel partition assignment patterns.
355  * @param[out] mismatch_counts   The array storing per partitioning mismatch counts.
356  */
count_partition_mismatch_bits(const block_size_descriptor & bsd,unsigned int partition_count,const uint64_t bitmaps[BLOCK_MAX_PARTITIONS],unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS])357 static void count_partition_mismatch_bits(
358 	const block_size_descriptor& bsd,
359 	unsigned int partition_count,
360 	const uint64_t bitmaps[BLOCK_MAX_PARTITIONS],
361 	unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS]
362 ) {
363 	unsigned int active_count = bsd.partitioning_count_selected[partition_count - 1];
364 
365 	if (partition_count == 2)
366 	{
367 		for (unsigned int i = 0; i < active_count; i++)
368 		{
369 			int bitcount = partition_mismatch2(bitmaps, bsd.coverage_bitmaps_2[i]);
370 			mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_2[i]));
371 		}
372 	}
373 	else if (partition_count == 3)
374 	{
375 		for (unsigned int i = 0; i < active_count; i++)
376 		{
377 			int bitcount = partition_mismatch3(bitmaps, bsd.coverage_bitmaps_3[i]);
378 			mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_3[i]));
379 		}
380 	}
381 	else
382 	{
383 		for (unsigned int i = 0; i < active_count; i++)
384 		{
385 			int bitcount = partition_mismatch4(bitmaps, bsd.coverage_bitmaps_4[i]);
386 			mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_4[i]));
387 		}
388 	}
389 }
390 
391 /**
392  * @brief Use counting sort on the mismatch array to sort partition candidates.
393  *
394  * @param      partitioning_count   The number of packed partitionings.
395  * @param      mismatch_count       Partitioning mismatch counts, in index order.
396  * @param[out] partition_ordering   Partition index values, in mismatch order.
397  *
398  * @return The number of active partitions in this selection.
399  */
get_partition_ordering_by_mismatch_bits(unsigned int partitioning_count,const unsigned int mismatch_count[BLOCK_MAX_PARTITIONINGS],unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS])400 static unsigned int get_partition_ordering_by_mismatch_bits(
401 	unsigned int partitioning_count,
402 	const unsigned int mismatch_count[BLOCK_MAX_PARTITIONINGS],
403 	unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS]
404 ) {
405 	unsigned int mscount[256] { 0 };
406 
407 	// Create the histogram of mismatch counts
408 	for (unsigned int i = 0; i < partitioning_count; i++)
409 	{
410 		mscount[mismatch_count[i]]++;
411 	}
412 
413 	unsigned int active_count = partitioning_count - mscount[255];
414 
415 	// Create a running sum from the histogram array
416 	// Cells store previous values only; i.e. exclude self after sum
417 	unsigned int summa = 0;
418 	for (unsigned int i = 0; i < 256; i++)
419 	{
420 		unsigned int cnt = mscount[i];
421 		mscount[i] = summa;
422 		summa += cnt;
423 	}
424 
425 	// Use the running sum as the index, incrementing after read to allow
426 	// sequential entries with the same count
427 	for (unsigned int i = 0; i < partitioning_count; i++)
428 	{
429 		unsigned int idx = mscount[mismatch_count[i]]++;
430 		partition_ordering[idx] = i;
431 	}
432 
433 	return active_count;
434 }
435 
436 /**
437  * @brief Use k-means clustering to compute a partition ordering for a block..
438  *
439  * @param      bsd                  The block size information.
440  * @param      blk                  The image block color data to compress.
441  * @param      partition_count      The desired number of partitions in the block.
442  * @param[out] partition_ordering   The list of recommended partition indices, in priority order.
443  *
444  * @return The number of active partitionings in this selection.
445  */
compute_kmeans_partition_ordering(const block_size_descriptor & bsd,const image_block & blk,unsigned int partition_count,unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS])446 static unsigned int compute_kmeans_partition_ordering(
447 	const block_size_descriptor& bsd,
448 	const image_block& blk,
449 	unsigned int partition_count,
450 	unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS]
451 ) {
452 	vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS];
453 	uint8_t texel_partitions[BLOCK_MAX_TEXELS];
454 
455 	// Use three passes of k-means clustering to partition the block data
456 	for (unsigned int i = 0; i < 3; i++)
457 	{
458 		if (i == 0)
459 		{
460 			kmeans_init(blk, bsd.texel_count, partition_count, cluster_centers);
461 		}
462 		else
463 		{
464 			kmeans_update(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions);
465 		}
466 
467 		kmeans_assign(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions);
468 	}
469 
470 	// Construct the block bitmaps of texel assignments to each partition
471 	uint64_t bitmaps[BLOCK_MAX_PARTITIONS] { 0 };
472 	unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
473 	promise(texels_to_process > 0);
474 	for (unsigned int i = 0; i < texels_to_process; i++)
475 	{
476 		unsigned int idx = bsd.kmeans_texels[i];
477 		bitmaps[texel_partitions[idx]] |= 1ULL << i;
478 	}
479 
480 	// Count the mismatch between the block and the format's partition tables
481 	unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS];
482 	count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts);
483 
484 	// Sort the partitions based on the number of mismatched bits
485 	return get_partition_ordering_by_mismatch_bits(
486 	    bsd.partitioning_count_selected[partition_count - 1],
487 	    mismatch_counts, partition_ordering);
488 }
489 
490 /* See header for documentation. */
find_best_partition_candidates(const block_size_descriptor & bsd,const image_block & blk,unsigned int partition_count,unsigned int partition_search_limit,unsigned int best_partitions[2])491 void find_best_partition_candidates(
492 	const block_size_descriptor& bsd,
493 	const image_block& blk,
494 	unsigned int partition_count,
495 	unsigned int partition_search_limit,
496 	unsigned int best_partitions[2]
497 ) {
498 	// Constant used to estimate quantization error for a given partitioning; the optimal value for
499 	// this depends on bitrate. These values have been determined empirically.
500 	unsigned int texels_per_block = bsd.texel_count;
501 	float weight_imprecision_estim = 0.055f;
502 	if (texels_per_block <= 20)
503 	{
504 		weight_imprecision_estim = 0.03f;
505 	}
506 	else if (texels_per_block <= 31)
507 	{
508 		weight_imprecision_estim = 0.04f;
509 	}
510 	else if (texels_per_block <= 41)
511 	{
512 		weight_imprecision_estim = 0.05f;
513 	}
514 
515 	promise(partition_count > 0);
516 	promise(partition_search_limit > 0);
517 
518 	weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim;
519 
520 	unsigned int partition_sequence[BLOCK_MAX_PARTITIONINGS];
521 	unsigned int sequence_len = compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence);
522 	partition_search_limit = astc::min(partition_search_limit, sequence_len);
523 
524 	bool uses_alpha = !blk.is_constant_channel(3);
525 
526 	// Partitioning errors assuming uncorrelated-chrominance endpoints
527 	float uncor_best_error { ERROR_CALC_DEFAULT };
528 	unsigned int uncor_best_partition { 0 };
529 
530 	// Partitioning errors assuming same-chrominance endpoints
531 	// Store two so we can always return one different to uncorr
532 	float samec_best_errors[2] { ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT };
533 	unsigned int samec_best_partitions[2] { 0, 0 };
534 
535 	if (uses_alpha)
536 	{
537 		for (unsigned int i = 0; i < partition_search_limit; i++)
538 		{
539 			unsigned int partition = partition_sequence[i];
540 			const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
541 
542 			// Compute weighting to give to each component in each partition
543 			partition_metrics pms[BLOCK_MAX_PARTITIONS];
544 
545 			compute_avgs_and_dirs_4_comp(pi, blk, pms);
546 
547 			line4 uncor_lines[BLOCK_MAX_PARTITIONS];
548 			line4 samec_lines[BLOCK_MAX_PARTITIONS];
549 
550 			processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS];
551 			processed_line4 samec_plines[BLOCK_MAX_PARTITIONS];
552 
553 			float uncor_line_lens[BLOCK_MAX_PARTITIONS];
554 			float samec_line_lens[BLOCK_MAX_PARTITIONS];
555 
556 			for (unsigned int j = 0; j < partition_count; j++)
557 			{
558 				partition_metrics& pm = pms[j];
559 
560 				uncor_lines[j].a = pm.avg;
561 				uncor_lines[j].b = normalize_safe(pm.dir, unit4());
562 
563 				uncor_plines[j].amod = uncor_lines[j].a - uncor_lines[j].b * dot(uncor_lines[j].a, uncor_lines[j].b);
564 				uncor_plines[j].bs   = uncor_lines[j].b;
565 
566 				samec_lines[j].a = vfloat4::zero();
567 				samec_lines[j].b = normalize_safe(pm.avg, unit4());
568 
569 				samec_plines[j].amod = vfloat4::zero();
570 				samec_plines[j].bs   = samec_lines[j].b;
571 			}
572 
573 			float uncor_error = 0.0f;
574 			float samec_error = 0.0f;
575 
576 			compute_error_squared_rgba(pi,
577 			                           blk,
578 			                           uncor_plines,
579 			                           samec_plines,
580 			                           uncor_line_lens,
581 			                           samec_line_lens,
582 			                           uncor_error,
583 			                           samec_error);
584 
585 			// Compute an estimate of error introduced by weight quantization imprecision.
586 			// This error is computed as follows, for each partition
587 			//     1: compute the principal-axis vector (full length) in error-space
588 			//     2: convert the principal-axis vector to regular RGB-space
589 			//     3: scale the vector by a constant that estimates average quantization error
590 			//     4: for each texel, square the vector, then do a dot-product with the texel's
591 			//        error weight; sum up the results across all texels.
592 			//     4(optimized): square the vector once, then do a dot-product with the average
593 			//        texel error, then multiply by the number of texels.
594 
595 			for (unsigned int j = 0; j < partition_count; j++)
596 			{
597 				float tpp = static_cast<float>(pi.partition_texel_count[j]);
598 				vfloat4 error_weights(tpp * weight_imprecision_estim);
599 
600 				vfloat4 uncor_vector = uncor_lines[j].b * uncor_line_lens[j];
601 				vfloat4 samec_vector = samec_lines[j].b * samec_line_lens[j];
602 
603 				uncor_error += dot_s(uncor_vector * uncor_vector, error_weights);
604 				samec_error += dot_s(samec_vector * samec_vector, error_weights);
605 			}
606 
607 			if (uncor_error < uncor_best_error)
608 			{
609 				uncor_best_error = uncor_error;
610 				uncor_best_partition = partition;
611 			}
612 
613 			if (samec_error < samec_best_errors[0])
614 			{
615 				samec_best_errors[1] = samec_best_errors[0];
616 				samec_best_partitions[1] = samec_best_partitions[0];
617 
618 				samec_best_errors[0] = samec_error;
619 				samec_best_partitions[0] = partition;
620 			}
621 			else if (samec_error < samec_best_errors[1])
622 			{
623 				samec_best_errors[1] = samec_error;
624 				samec_best_partitions[1] = partition;
625 			}
626 		}
627 	}
628 	else
629 	{
630 		for (unsigned int i = 0; i < partition_search_limit; i++)
631 		{
632 			unsigned int partition = partition_sequence[i];
633 			const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
634 
635 			// Compute weighting to give to each component in each partition
636 			partition_metrics pms[BLOCK_MAX_PARTITIONS];
637 			compute_avgs_and_dirs_3_comp_rgb(pi, blk, pms);
638 
639 			partition_lines3 plines[BLOCK_MAX_PARTITIONS];
640 
641 			for (unsigned int j = 0; j < partition_count; j++)
642 			{
643 				partition_metrics& pm = pms[j];
644 				partition_lines3& pl = plines[j];
645 
646 				pl.uncor_line.a = pm.avg;
647 				pl.uncor_line.b = normalize_safe(pm.dir.swz<0, 1, 2>(), unit3());
648 
649 				pl.samec_line.a = vfloat4::zero();
650 				pl.samec_line.b = normalize_safe(pm.avg.swz<0, 1, 2>(), unit3());
651 
652 				pl.uncor_pline.amod = pl.uncor_line.a - pl.uncor_line.b * dot3(pl.uncor_line.a, pl.uncor_line.b);
653 				pl.uncor_pline.bs   = pl.uncor_line.b;
654 
655 				pl.samec_pline.amod = vfloat4::zero();
656 				pl.samec_pline.bs   = pl.samec_line.b;
657 			}
658 
659 			float uncor_error = 0.0f;
660 			float samec_error = 0.0f;
661 
662 			compute_error_squared_rgb(pi,
663 			                          blk,
664 			                          plines,
665 			                          uncor_error,
666 			                          samec_error);
667 
668 			// Compute an estimate of error introduced by weight quantization imprecision.
669 			// This error is computed as follows, for each partition
670 			//     1: compute the principal-axis vector (full length) in error-space
671 			//     2: convert the principal-axis vector to regular RGB-space
672 			//     3: scale the vector by a constant that estimates average quantization error
673 			//     4: for each texel, square the vector, then do a dot-product with the texel's
674 			//        error weight; sum up the results across all texels.
675 			//     4(optimized): square the vector once, then do a dot-product with the average
676 			//        texel error, then multiply by the number of texels.
677 
678 			for (unsigned int j = 0; j < partition_count; j++)
679 			{
680 				partition_lines3& pl = plines[j];
681 
682 				float tpp = static_cast<float>(pi.partition_texel_count[j]);
683 				vfloat4 error_weights(tpp * weight_imprecision_estim);
684 
685 				vfloat4 uncor_vector = pl.uncor_line.b * pl.uncor_line_len;
686 				vfloat4 samec_vector = pl.samec_line.b * pl.samec_line_len;
687 
688 				uncor_error += dot3_s(uncor_vector * uncor_vector, error_weights);
689 				samec_error += dot3_s(samec_vector * samec_vector, error_weights);
690 			}
691 
692 			if (uncor_error < uncor_best_error)
693 			{
694 				uncor_best_error = uncor_error;
695 				uncor_best_partition = partition;
696 			}
697 
698 			if (samec_error < samec_best_errors[0])
699 			{
700 				samec_best_errors[1] = samec_best_errors[0];
701 				samec_best_partitions[1] = samec_best_partitions[0];
702 
703 				samec_best_errors[0] = samec_error;
704 				samec_best_partitions[0] = partition;
705 			}
706 			else if (samec_error < samec_best_errors[1])
707 			{
708 				samec_best_errors[1] = samec_error;
709 				samec_best_partitions[1] = partition;
710 			}
711 		}
712 	}
713 
714 	// Same partition is best for both, so use this first unconditionally
715 	if (uncor_best_partition == samec_best_partitions[0])
716 	{
717 		best_partitions[0] = samec_best_partitions[0];
718 		best_partitions[1] = samec_best_partitions[1];
719 	}
720 	// Uncor is best
721 	else if (uncor_best_error <= samec_best_errors[0])
722 	{
723 		best_partitions[0] = uncor_best_partition;
724 		best_partitions[1] = samec_best_partitions[0];
725 	}
726 	// Samec is best
727 	else
728 	{
729 		best_partitions[0] = samec_best_partitions[0];
730 		best_partitions[1] = uncor_best_partition;
731 	}
732 
733 	// Convert these back into canonical partition IDs for the rest of the codec
734 	best_partitions[0] = bsd.get_raw_partition_info(partition_count, best_partitions[0]).partition_index;
735 	best_partitions[1] = bsd.get_raw_partition_info(partition_count, best_partitions[1]).partition_index;
736 }
737 
738 #endif
739