1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 #if !defined(ASTCENC_DECOMPRESS_ONLY)
19
20 /**
21 * @brief Functions for finding best partition for a block.
22 *
23 * The partition search operates in two stages. The first pass uses kmeans clustering to group
24 * texels into an ideal partitioning for the requested partition count, and then compares that
25 * against the 1024 partitionings generated by the ASTC partition hash function. The generated
26 * partitions are then ranked by the number of texels in the wrong partition, compared to the ideal
27 * clustering. All 1024 partitions are tested for similarity and ranked, apart from duplicates and
28 * partitionings that actually generate fewer than the requested partition count, but only the top
29 * N candidates are actually put through a more detailed search. N is determined by the compressor
30 * quality preset.
31 *
32 * For the detailed search, each candidate is checked against two possible encoding methods:
33 *
34 * - The best partitioning assuming different chroma colors (RGB + RGB or RGB + delta endpoints).
35 * - The best partitioning assuming same chroma colors (RGB + scale endpoints).
36 *
37 * This is implemented by computing the compute mean color and dominant direction for each
38 * partition. This defines two lines, both of which go through the mean color value.
39 *
40 * - One line has a direction defined by the dominant direction; this is used to assess the error
41 * from using an uncorrelated color representation.
42 * - The other line goes through (0,0,0,1) and is used to assess the error from using a same chroma
43 * (RGB + scale) color representation.
44 *
45 * The best candidate is selected by computing the squared-errors that result from using these
46 * lines for endpoint selection.
47 */
48
49 #include "astcenc_internal.h"
50
51 /**
52 * @brief Pick some initital kmeans cluster centers.
53 *
54 * @param blk The image block color data to compress.
55 * @param texel_count The number of texels in the block.
56 * @param partition_count The number of partitions in the block.
57 * @param[out] cluster_centers The initital partition cluster center colors.
58 */
kmeans_init(const image_block & blk,unsigned int texel_count,unsigned int partition_count,vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS])59 static void kmeans_init(
60 const image_block& blk,
61 unsigned int texel_count,
62 unsigned int partition_count,
63 vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS]
64 ) {
65 promise(texel_count > 0);
66 promise(partition_count > 0);
67
68 unsigned int clusters_selected = 0;
69 float distances[BLOCK_MAX_TEXELS];
70
71 // Pick a random sample as first cluster center; 145897 from random.org
72 unsigned int sample = 145897 % texel_count;
73 vfloat4 center_color = blk.texel(sample);
74 cluster_centers[clusters_selected] = center_color;
75 clusters_selected++;
76
77 // Compute the distance to the first cluster center
78 float distance_sum = 0.0f;
79 for (unsigned int i = 0; i < texel_count; i++)
80 {
81 vfloat4 color = blk.texel(i);
82 vfloat4 diff = color - center_color;
83 float distance = dot_s(diff * diff, blk.channel_weight);
84 distance_sum += distance;
85 distances[i] = distance;
86 }
87
88 // More numbers from random.org for weighted-random center selection
89 const float cluster_cutoffs[9] {
90 0.626220f, 0.932770f, 0.275454f,
91 0.318558f, 0.240113f, 0.009190f,
92 0.347661f, 0.731960f, 0.156391f
93 };
94
95 unsigned int cutoff = (clusters_selected - 1) + 3 * (partition_count - 2);
96
97 // Pick the remaining samples as needed
98 while (true)
99 {
100 // Pick the next center in a weighted-random fashion.
101 float summa = 0.0f;
102 float distance_cutoff = distance_sum * cluster_cutoffs[cutoff++];
103 for (sample = 0; sample < texel_count; sample++)
104 {
105 summa += distances[sample];
106 if (summa >= distance_cutoff)
107 {
108 break;
109 }
110 }
111
112 // Clamp to a valid range and store the selected cluster center
113 sample = astc::min(sample, texel_count - 1);
114
115 center_color = blk.texel(sample);
116 cluster_centers[clusters_selected++] = center_color;
117 if (clusters_selected >= partition_count)
118 {
119 break;
120 }
121
122 // Compute the distance to the new cluster center, keep the min dist
123 distance_sum = 0.0f;
124 for (unsigned int i = 0; i < texel_count; i++)
125 {
126 vfloat4 color = blk.texel(i);
127 vfloat4 diff = color - center_color;
128 float distance = dot_s(diff * diff, blk.channel_weight);
129 distance = astc::min(distance, distances[i]);
130 distance_sum += distance;
131 distances[i] = distance;
132 }
133 }
134 }
135
136 /**
137 * @brief Assign texels to clusters, based on a set of chosen center points.
138 *
139 * @param blk The image block color data to compress.
140 * @param texel_count The number of texels in the block.
141 * @param partition_count The number of partitions in the block.
142 * @param cluster_centers The partition cluster center colors.
143 * @param[out] partition_of_texel The partition assigned for each texel.
144 */
kmeans_assign(const image_block & blk,unsigned int texel_count,unsigned int partition_count,const vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],uint8_t partition_of_texel[BLOCK_MAX_TEXELS])145 static void kmeans_assign(
146 const image_block& blk,
147 unsigned int texel_count,
148 unsigned int partition_count,
149 const vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
150 uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
151 ) {
152 promise(texel_count > 0);
153 promise(partition_count > 0);
154
155 uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
156
157 // Find the best partition for every texel
158 for (unsigned int i = 0; i < texel_count; i++)
159 {
160 float best_distance = std::numeric_limits<float>::max();
161 unsigned int best_partition = 0;
162
163 vfloat4 color = blk.texel(i);
164 for (unsigned int j = 0; j < partition_count; j++)
165 {
166 vfloat4 diff = color - cluster_centers[j];
167 float distance = dot_s(diff * diff, blk.channel_weight);
168 if (distance < best_distance)
169 {
170 best_distance = distance;
171 best_partition = j;
172 }
173 }
174
175 partition_of_texel[i] = static_cast<uint8_t>(best_partition);
176 partition_texel_count[best_partition]++;
177 }
178
179 // It is possible to get a situation where a partition ends up without any texels. In this case,
180 // assign texel N to partition N. This is silly, but ensures that every partition retains at
181 // least one texel. Reassigning a texel in this manner may cause another partition to go empty,
182 // so if we actually did a reassignment, run the whole loop over again.
183 bool problem_case;
184 do
185 {
186 problem_case = false;
187 for (unsigned int i = 0; i < partition_count; i++)
188 {
189 if (partition_texel_count[i] == 0)
190 {
191 partition_texel_count[partition_of_texel[i]]--;
192 partition_texel_count[i]++;
193 partition_of_texel[i] = static_cast<uint8_t>(i);
194 problem_case = true;
195 }
196 }
197 } while (problem_case);
198 }
199
200 /**
201 * @brief Compute new cluster centers based on their center of gravity.
202 *
203 * @param blk The image block color data to compress.
204 * @param texel_count The number of texels in the block.
205 * @param partition_count The number of partitions in the block.
206 * @param[out] cluster_centers The new cluster center colors.
207 * @param partition_of_texel The partition assigned for each texel.
208 */
kmeans_update(const image_block & blk,unsigned int texel_count,unsigned int partition_count,vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],const uint8_t partition_of_texel[BLOCK_MAX_TEXELS])209 static void kmeans_update(
210 const image_block& blk,
211 unsigned int texel_count,
212 unsigned int partition_count,
213 vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
214 const uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
215 ) {
216 promise(texel_count > 0);
217 promise(partition_count > 0);
218
219 vfloat4 color_sum[BLOCK_MAX_PARTITIONS] {
220 vfloat4::zero(),
221 vfloat4::zero(),
222 vfloat4::zero(),
223 vfloat4::zero()
224 };
225
226 uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
227
228 // Find the center-of-gravity in each cluster
229 for (unsigned int i = 0; i < texel_count; i++)
230 {
231 uint8_t partition = partition_of_texel[i];
232 color_sum[partition] += blk.texel(i);
233 partition_texel_count[partition]++;
234 }
235
236 // Set the center of gravity to be the new cluster center
237 for (unsigned int i = 0; i < partition_count; i++)
238 {
239 float scale = 1.0f / static_cast<float>(partition_texel_count[i]);
240 cluster_centers[i] = color_sum[i] * scale;
241 }
242 }
243
244 /**
245 * @brief Compute bit-mismatch for partitioning in 2-partition mode.
246 *
247 * @param a The texel assignment bitvector for the block.
248 * @param b The texel assignment bitvector for the partition table.
249 *
250 * @return The number of bit mismatches.
251 */
partition_mismatch2(const uint64_t a[2],const uint64_t b[2])252 static inline unsigned int partition_mismatch2(
253 const uint64_t a[2],
254 const uint64_t b[2]
255 ) {
256 int v1 = popcount(a[0] ^ b[0]) + popcount(a[1] ^ b[1]);
257 int v2 = popcount(a[0] ^ b[1]) + popcount(a[1] ^ b[0]);
258 return astc::min(v1, v2);
259 }
260
261 /**
262 * @brief Compute bit-mismatch for partitioning in 3-partition mode.
263 *
264 * @param a The texel assignment bitvector for the block.
265 * @param b The texel assignment bitvector for the partition table.
266 *
267 * @return The number of bit mismatches.
268 */
partition_mismatch3(const uint64_t a[3],const uint64_t b[3])269 static inline unsigned int partition_mismatch3(
270 const uint64_t a[3],
271 const uint64_t b[3]
272 ) {
273 int p00 = popcount(a[0] ^ b[0]);
274 int p01 = popcount(a[0] ^ b[1]);
275 int p02 = popcount(a[0] ^ b[2]);
276
277 int p10 = popcount(a[1] ^ b[0]);
278 int p11 = popcount(a[1] ^ b[1]);
279 int p12 = popcount(a[1] ^ b[2]);
280
281 int p20 = popcount(a[2] ^ b[0]);
282 int p21 = popcount(a[2] ^ b[1]);
283 int p22 = popcount(a[2] ^ b[2]);
284
285 int s0 = p11 + p22;
286 int s1 = p12 + p21;
287 int v0 = astc::min(s0, s1) + p00;
288
289 int s2 = p10 + p22;
290 int s3 = p12 + p20;
291 int v1 = astc::min(s2, s3) + p01;
292
293 int s4 = p10 + p21;
294 int s5 = p11 + p20;
295 int v2 = astc::min(s4, s5) + p02;
296
297 return astc::min(v0, v1, v2);
298 }
299
300 /**
301 * @brief Compute bit-mismatch for partitioning in 4-partition mode.
302 *
303 * @param a The texel assignment bitvector for the block.
304 * @param b The texel assignment bitvector for the partition table.
305 *
306 * @return The number of bit mismatches.
307 */
partition_mismatch4(const uint64_t a[4],const uint64_t b[4])308 static inline unsigned int partition_mismatch4(
309 const uint64_t a[4],
310 const uint64_t b[4]
311 ) {
312 int p00 = popcount(a[0] ^ b[0]);
313 int p01 = popcount(a[0] ^ b[1]);
314 int p02 = popcount(a[0] ^ b[2]);
315 int p03 = popcount(a[0] ^ b[3]);
316
317 int p10 = popcount(a[1] ^ b[0]);
318 int p11 = popcount(a[1] ^ b[1]);
319 int p12 = popcount(a[1] ^ b[2]);
320 int p13 = popcount(a[1] ^ b[3]);
321
322 int p20 = popcount(a[2] ^ b[0]);
323 int p21 = popcount(a[2] ^ b[1]);
324 int p22 = popcount(a[2] ^ b[2]);
325 int p23 = popcount(a[2] ^ b[3]);
326
327 int p30 = popcount(a[3] ^ b[0]);
328 int p31 = popcount(a[3] ^ b[1]);
329 int p32 = popcount(a[3] ^ b[2]);
330 int p33 = popcount(a[3] ^ b[3]);
331
332 int mx23 = astc::min(p22 + p33, p23 + p32);
333 int mx13 = astc::min(p21 + p33, p23 + p31);
334 int mx12 = astc::min(p21 + p32, p22 + p31);
335 int mx03 = astc::min(p20 + p33, p23 + p30);
336 int mx02 = astc::min(p20 + p32, p22 + p30);
337 int mx01 = astc::min(p21 + p30, p20 + p31);
338
339 int v0 = p00 + astc::min(p11 + mx23, p12 + mx13, p13 + mx12);
340 int v1 = p01 + astc::min(p10 + mx23, p12 + mx03, p13 + mx02);
341 int v2 = p02 + astc::min(p11 + mx03, p10 + mx13, p13 + mx01);
342 int v3 = p03 + astc::min(p11 + mx02, p12 + mx01, p10 + mx12);
343
344 return astc::min(v0, v1, v2, v3);
345 }
346
347 using mismatch_dispatch = unsigned int (*)(const uint64_t*, const uint64_t*);
348
349 /**
350 * @brief Count the partition table mismatches vs the data clustering.
351 *
352 * @param bsd The block size information.
353 * @param partition_count The number of partitions in the block.
354 * @param bitmaps The block texel partition assignment patterns.
355 * @param[out] mismatch_counts The array storing per partitioning mismatch counts.
356 */
count_partition_mismatch_bits(const block_size_descriptor & bsd,unsigned int partition_count,const uint64_t bitmaps[BLOCK_MAX_PARTITIONS],unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS])357 static void count_partition_mismatch_bits(
358 const block_size_descriptor& bsd,
359 unsigned int partition_count,
360 const uint64_t bitmaps[BLOCK_MAX_PARTITIONS],
361 unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS]
362 ) {
363 unsigned int active_count = bsd.partitioning_count_selected[partition_count - 1];
364
365 if (partition_count == 2)
366 {
367 for (unsigned int i = 0; i < active_count; i++)
368 {
369 int bitcount = partition_mismatch2(bitmaps, bsd.coverage_bitmaps_2[i]);
370 mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_2[i]));
371 }
372 }
373 else if (partition_count == 3)
374 {
375 for (unsigned int i = 0; i < active_count; i++)
376 {
377 int bitcount = partition_mismatch3(bitmaps, bsd.coverage_bitmaps_3[i]);
378 mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_3[i]));
379 }
380 }
381 else
382 {
383 for (unsigned int i = 0; i < active_count; i++)
384 {
385 int bitcount = partition_mismatch4(bitmaps, bsd.coverage_bitmaps_4[i]);
386 mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_4[i]));
387 }
388 }
389 }
390
391 /**
392 * @brief Use counting sort on the mismatch array to sort partition candidates.
393 *
394 * @param partitioning_count The number of packed partitionings.
395 * @param mismatch_count Partitioning mismatch counts, in index order.
396 * @param[out] partition_ordering Partition index values, in mismatch order.
397 *
398 * @return The number of active partitions in this selection.
399 */
get_partition_ordering_by_mismatch_bits(unsigned int partitioning_count,const unsigned int mismatch_count[BLOCK_MAX_PARTITIONINGS],unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS])400 static unsigned int get_partition_ordering_by_mismatch_bits(
401 unsigned int partitioning_count,
402 const unsigned int mismatch_count[BLOCK_MAX_PARTITIONINGS],
403 unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS]
404 ) {
405 unsigned int mscount[256] { 0 };
406
407 // Create the histogram of mismatch counts
408 for (unsigned int i = 0; i < partitioning_count; i++)
409 {
410 mscount[mismatch_count[i]]++;
411 }
412
413 unsigned int active_count = partitioning_count - mscount[255];
414
415 // Create a running sum from the histogram array
416 // Cells store previous values only; i.e. exclude self after sum
417 unsigned int summa = 0;
418 for (unsigned int i = 0; i < 256; i++)
419 {
420 unsigned int cnt = mscount[i];
421 mscount[i] = summa;
422 summa += cnt;
423 }
424
425 // Use the running sum as the index, incrementing after read to allow
426 // sequential entries with the same count
427 for (unsigned int i = 0; i < partitioning_count; i++)
428 {
429 unsigned int idx = mscount[mismatch_count[i]]++;
430 partition_ordering[idx] = i;
431 }
432
433 return active_count;
434 }
435
436 /**
437 * @brief Use k-means clustering to compute a partition ordering for a block..
438 *
439 * @param bsd The block size information.
440 * @param blk The image block color data to compress.
441 * @param partition_count The desired number of partitions in the block.
442 * @param[out] partition_ordering The list of recommended partition indices, in priority order.
443 *
444 * @return The number of active partitionings in this selection.
445 */
compute_kmeans_partition_ordering(const block_size_descriptor & bsd,const image_block & blk,unsigned int partition_count,unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS])446 static unsigned int compute_kmeans_partition_ordering(
447 const block_size_descriptor& bsd,
448 const image_block& blk,
449 unsigned int partition_count,
450 unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS]
451 ) {
452 vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS];
453 uint8_t texel_partitions[BLOCK_MAX_TEXELS];
454
455 // Use three passes of k-means clustering to partition the block data
456 for (unsigned int i = 0; i < 3; i++)
457 {
458 if (i == 0)
459 {
460 kmeans_init(blk, bsd.texel_count, partition_count, cluster_centers);
461 }
462 else
463 {
464 kmeans_update(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions);
465 }
466
467 kmeans_assign(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions);
468 }
469
470 // Construct the block bitmaps of texel assignments to each partition
471 uint64_t bitmaps[BLOCK_MAX_PARTITIONS] { 0 };
472 unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
473 promise(texels_to_process > 0);
474 for (unsigned int i = 0; i < texels_to_process; i++)
475 {
476 unsigned int idx = bsd.kmeans_texels[i];
477 bitmaps[texel_partitions[idx]] |= 1ULL << i;
478 }
479
480 // Count the mismatch between the block and the format's partition tables
481 unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS];
482 count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts);
483
484 // Sort the partitions based on the number of mismatched bits
485 return get_partition_ordering_by_mismatch_bits(
486 bsd.partitioning_count_selected[partition_count - 1],
487 mismatch_counts, partition_ordering);
488 }
489
490 /* See header for documentation. */
find_best_partition_candidates(const block_size_descriptor & bsd,const image_block & blk,unsigned int partition_count,unsigned int partition_search_limit,unsigned int best_partitions[2])491 void find_best_partition_candidates(
492 const block_size_descriptor& bsd,
493 const image_block& blk,
494 unsigned int partition_count,
495 unsigned int partition_search_limit,
496 unsigned int best_partitions[2]
497 ) {
498 // Constant used to estimate quantization error for a given partitioning; the optimal value for
499 // this depends on bitrate. These values have been determined empirically.
500 unsigned int texels_per_block = bsd.texel_count;
501 float weight_imprecision_estim = 0.055f;
502 if (texels_per_block <= 20)
503 {
504 weight_imprecision_estim = 0.03f;
505 }
506 else if (texels_per_block <= 31)
507 {
508 weight_imprecision_estim = 0.04f;
509 }
510 else if (texels_per_block <= 41)
511 {
512 weight_imprecision_estim = 0.05f;
513 }
514
515 promise(partition_count > 0);
516 promise(partition_search_limit > 0);
517
518 weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim;
519
520 unsigned int partition_sequence[BLOCK_MAX_PARTITIONINGS];
521 unsigned int sequence_len = compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence);
522 partition_search_limit = astc::min(partition_search_limit, sequence_len);
523
524 bool uses_alpha = !blk.is_constant_channel(3);
525
526 // Partitioning errors assuming uncorrelated-chrominance endpoints
527 float uncor_best_error { ERROR_CALC_DEFAULT };
528 unsigned int uncor_best_partition { 0 };
529
530 // Partitioning errors assuming same-chrominance endpoints
531 // Store two so we can always return one different to uncorr
532 float samec_best_errors[2] { ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT };
533 unsigned int samec_best_partitions[2] { 0, 0 };
534
535 if (uses_alpha)
536 {
537 for (unsigned int i = 0; i < partition_search_limit; i++)
538 {
539 unsigned int partition = partition_sequence[i];
540 const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
541
542 // Compute weighting to give to each component in each partition
543 partition_metrics pms[BLOCK_MAX_PARTITIONS];
544
545 compute_avgs_and_dirs_4_comp(pi, blk, pms);
546
547 line4 uncor_lines[BLOCK_MAX_PARTITIONS];
548 line4 samec_lines[BLOCK_MAX_PARTITIONS];
549
550 processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS];
551 processed_line4 samec_plines[BLOCK_MAX_PARTITIONS];
552
553 float uncor_line_lens[BLOCK_MAX_PARTITIONS];
554 float samec_line_lens[BLOCK_MAX_PARTITIONS];
555
556 for (unsigned int j = 0; j < partition_count; j++)
557 {
558 partition_metrics& pm = pms[j];
559
560 uncor_lines[j].a = pm.avg;
561 uncor_lines[j].b = normalize_safe(pm.dir, unit4());
562
563 uncor_plines[j].amod = uncor_lines[j].a - uncor_lines[j].b * dot(uncor_lines[j].a, uncor_lines[j].b);
564 uncor_plines[j].bs = uncor_lines[j].b;
565
566 samec_lines[j].a = vfloat4::zero();
567 samec_lines[j].b = normalize_safe(pm.avg, unit4());
568
569 samec_plines[j].amod = vfloat4::zero();
570 samec_plines[j].bs = samec_lines[j].b;
571 }
572
573 float uncor_error = 0.0f;
574 float samec_error = 0.0f;
575
576 compute_error_squared_rgba(pi,
577 blk,
578 uncor_plines,
579 samec_plines,
580 uncor_line_lens,
581 samec_line_lens,
582 uncor_error,
583 samec_error);
584
585 // Compute an estimate of error introduced by weight quantization imprecision.
586 // This error is computed as follows, for each partition
587 // 1: compute the principal-axis vector (full length) in error-space
588 // 2: convert the principal-axis vector to regular RGB-space
589 // 3: scale the vector by a constant that estimates average quantization error
590 // 4: for each texel, square the vector, then do a dot-product with the texel's
591 // error weight; sum up the results across all texels.
592 // 4(optimized): square the vector once, then do a dot-product with the average
593 // texel error, then multiply by the number of texels.
594
595 for (unsigned int j = 0; j < partition_count; j++)
596 {
597 float tpp = static_cast<float>(pi.partition_texel_count[j]);
598 vfloat4 error_weights(tpp * weight_imprecision_estim);
599
600 vfloat4 uncor_vector = uncor_lines[j].b * uncor_line_lens[j];
601 vfloat4 samec_vector = samec_lines[j].b * samec_line_lens[j];
602
603 uncor_error += dot_s(uncor_vector * uncor_vector, error_weights);
604 samec_error += dot_s(samec_vector * samec_vector, error_weights);
605 }
606
607 if (uncor_error < uncor_best_error)
608 {
609 uncor_best_error = uncor_error;
610 uncor_best_partition = partition;
611 }
612
613 if (samec_error < samec_best_errors[0])
614 {
615 samec_best_errors[1] = samec_best_errors[0];
616 samec_best_partitions[1] = samec_best_partitions[0];
617
618 samec_best_errors[0] = samec_error;
619 samec_best_partitions[0] = partition;
620 }
621 else if (samec_error < samec_best_errors[1])
622 {
623 samec_best_errors[1] = samec_error;
624 samec_best_partitions[1] = partition;
625 }
626 }
627 }
628 else
629 {
630 for (unsigned int i = 0; i < partition_search_limit; i++)
631 {
632 unsigned int partition = partition_sequence[i];
633 const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
634
635 // Compute weighting to give to each component in each partition
636 partition_metrics pms[BLOCK_MAX_PARTITIONS];
637 compute_avgs_and_dirs_3_comp_rgb(pi, blk, pms);
638
639 partition_lines3 plines[BLOCK_MAX_PARTITIONS];
640
641 for (unsigned int j = 0; j < partition_count; j++)
642 {
643 partition_metrics& pm = pms[j];
644 partition_lines3& pl = plines[j];
645
646 pl.uncor_line.a = pm.avg;
647 pl.uncor_line.b = normalize_safe(pm.dir.swz<0, 1, 2>(), unit3());
648
649 pl.samec_line.a = vfloat4::zero();
650 pl.samec_line.b = normalize_safe(pm.avg.swz<0, 1, 2>(), unit3());
651
652 pl.uncor_pline.amod = pl.uncor_line.a - pl.uncor_line.b * dot3(pl.uncor_line.a, pl.uncor_line.b);
653 pl.uncor_pline.bs = pl.uncor_line.b;
654
655 pl.samec_pline.amod = vfloat4::zero();
656 pl.samec_pline.bs = pl.samec_line.b;
657 }
658
659 float uncor_error = 0.0f;
660 float samec_error = 0.0f;
661
662 compute_error_squared_rgb(pi,
663 blk,
664 plines,
665 uncor_error,
666 samec_error);
667
668 // Compute an estimate of error introduced by weight quantization imprecision.
669 // This error is computed as follows, for each partition
670 // 1: compute the principal-axis vector (full length) in error-space
671 // 2: convert the principal-axis vector to regular RGB-space
672 // 3: scale the vector by a constant that estimates average quantization error
673 // 4: for each texel, square the vector, then do a dot-product with the texel's
674 // error weight; sum up the results across all texels.
675 // 4(optimized): square the vector once, then do a dot-product with the average
676 // texel error, then multiply by the number of texels.
677
678 for (unsigned int j = 0; j < partition_count; j++)
679 {
680 partition_lines3& pl = plines[j];
681
682 float tpp = static_cast<float>(pi.partition_texel_count[j]);
683 vfloat4 error_weights(tpp * weight_imprecision_estim);
684
685 vfloat4 uncor_vector = pl.uncor_line.b * pl.uncor_line_len;
686 vfloat4 samec_vector = pl.samec_line.b * pl.samec_line_len;
687
688 uncor_error += dot3_s(uncor_vector * uncor_vector, error_weights);
689 samec_error += dot3_s(samec_vector * samec_vector, error_weights);
690 }
691
692 if (uncor_error < uncor_best_error)
693 {
694 uncor_best_error = uncor_error;
695 uncor_best_partition = partition;
696 }
697
698 if (samec_error < samec_best_errors[0])
699 {
700 samec_best_errors[1] = samec_best_errors[0];
701 samec_best_partitions[1] = samec_best_partitions[0];
702
703 samec_best_errors[0] = samec_error;
704 samec_best_partitions[0] = partition;
705 }
706 else if (samec_error < samec_best_errors[1])
707 {
708 samec_best_errors[1] = samec_error;
709 samec_best_partitions[1] = partition;
710 }
711 }
712 }
713
714 // Same partition is best for both, so use this first unconditionally
715 if (uncor_best_partition == samec_best_partitions[0])
716 {
717 best_partitions[0] = samec_best_partitions[0];
718 best_partitions[1] = samec_best_partitions[1];
719 }
720 // Uncor is best
721 else if (uncor_best_error <= samec_best_errors[0])
722 {
723 best_partitions[0] = uncor_best_partition;
724 best_partitions[1] = samec_best_partitions[0];
725 }
726 // Samec is best
727 else
728 {
729 best_partitions[0] = samec_best_partitions[0];
730 best_partitions[1] = uncor_best_partition;
731 }
732
733 // Convert these back into canonical partition IDs for the rest of the codec
734 best_partitions[0] = bsd.get_raw_partition_info(partition_count, best_partitions[0]).partition_index;
735 best_partitions[1] = bsd.get_raw_partition_info(partition_count, best_partitions[1]).partition_index;
736 }
737
738 #endif
739