• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2023 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions for generating partition tables on demand.
20  */
21 
22 #include "astcenc_internal.h"
23 
24 /** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */
25 #define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
26 
27 /**
28  * @brief Generate a canonical representation of a partition pattern.
29  *
30  * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store
31  * the remapped texel index. Remapping ensures that we only match on the partition pattern,
32  * independent of the partition order generated by the hash.
33  *
34  * @param      texel_count          The number of texels in the block.
35  * @param      partition_of_texel   The partition assignments, in hash order.
36  * @param[out] bit_pattern          The output bit pattern representation.
37  */
generate_canonical_partitioning(unsigned int texel_count,const uint8_t * partition_of_texel,uint64_t bit_pattern[BIT_PATTERN_WORDS])38 static void generate_canonical_partitioning(
39 	unsigned int texel_count,
40 	const uint8_t* partition_of_texel,
41 	uint64_t bit_pattern[BIT_PATTERN_WORDS]
42 ) {
43 	// Clear the pattern
44 	for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)
45 	{
46 		bit_pattern[i] = 0;
47 	}
48 
49 	// Store a mapping to reorder the raw partitions so that the partitions are ordered such
50 	// that the lowest texel index in partition N is smaller than the lowest texel index in
51 	// partition N + 1.
52 	int mapped_index[BLOCK_MAX_PARTITIONS];
53 	int map_weight_count = 0;
54 
55 	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
56 	{
57 		mapped_index[i] = -1;
58 	}
59 
60 	for (unsigned int i = 0; i < texel_count; i++)
61 	{
62 		int index = partition_of_texel[i];
63 		if (mapped_index[index] < 0)
64 		{
65 			mapped_index[index] = map_weight_count++;
66 		}
67 
68 		uint64_t xlat_index = mapped_index[index];
69 		bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
70 	}
71 }
72 
73 /**
74  * @brief Compare two canonical patterns to see if they are the same.
75  *
76  * @param part1   The first canonical bit pattern to check.
77  * @param part2   The second canonical bit pattern to check.
78  *
79  * @return @c true if the patterns are the same, @c false otherwise.
80  */
compare_canonical_partitionings(const uint64_t part1[BIT_PATTERN_WORDS],const uint64_t part2[BIT_PATTERN_WORDS])81 static bool compare_canonical_partitionings(
82 	const uint64_t part1[BIT_PATTERN_WORDS],
83 	const uint64_t part2[BIT_PATTERN_WORDS]
84 ) {
85 	return (part1[0] == part2[0])
86 #if BIT_PATTERN_WORDS > 1
87 	    && (part1[1] == part2[1])
88 #endif
89 #if BIT_PATTERN_WORDS > 2
90 	    && (part1[2] == part2[2])
91 #endif
92 #if BIT_PATTERN_WORDS > 3
93 	    && (part1[3] == part2[3])
94 #endif
95 #if BIT_PATTERN_WORDS > 4
96 	    && (part1[4] == part2[4])
97 #endif
98 #if BIT_PATTERN_WORDS > 5
99 	    && (part1[5] == part2[5])
100 #endif
101 #if BIT_PATTERN_WORDS > 6
102 	    && (part1[6] == part2[6])
103 #endif
104 	    ;
105 }
106 
107 /**
108  * @brief Hash function used for procedural partition assignment.
109  *
110  * @param inp   The hash seed.
111  *
112  * @return The hashed value.
113  */
hash52(uint32_t inp)114 static uint32_t hash52(
115 	uint32_t inp
116 ) {
117 	inp ^= inp >> 15;
118 
119 	// (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
120 	inp *= 0xEEDE0891;
121 	inp ^= inp >> 5;
122 	inp += inp << 16;
123 	inp ^= inp >> 7;
124 	inp ^= inp >> 3;
125 	inp ^= inp << 6;
126 	inp ^= inp >> 17;
127 	return inp;
128 }
129 
130 /**
131  * @brief Select texel assignment for a single coordinate.
132  *
133  * @param seed              The seed - the partition index from the block.
134  * @param x                 The texel X coordinate in the block.
135  * @param y                 The texel Y coordinate in the block.
136  * @param z                 The texel Z coordinate in the block.
137  * @param partition_count   The total partition count of this encoding.
138  * @param small_block       @c true if the block has fewer than 32 texels.
139  *
140  * @return The assigned partition index for this texel.
141  */
select_partition(int seed,int x,int y,int z,int partition_count,bool small_block)142 static uint8_t select_partition(
143 	int seed,
144 	int x,
145 	int y,
146 	int z,
147 	int partition_count,
148 	bool small_block
149 ) {
150 	// For small blocks bias the coordinates to get better distribution
151 	if (small_block)
152 	{
153 		x <<= 1;
154 		y <<= 1;
155 		z <<= 1;
156 	}
157 
158 	seed += (partition_count - 1) * 1024;
159 
160 	uint32_t rnum = hash52(seed);
161 
162 	uint8_t seed1 = rnum & 0xF;
163 	uint8_t seed2 = (rnum >> 4) & 0xF;
164 	uint8_t seed3 = (rnum >> 8) & 0xF;
165 	uint8_t seed4 = (rnum >> 12) & 0xF;
166 	uint8_t seed5 = (rnum >> 16) & 0xF;
167 	uint8_t seed6 = (rnum >> 20) & 0xF;
168 	uint8_t seed7 = (rnum >> 24) & 0xF;
169 	uint8_t seed8 = (rnum >> 28) & 0xF;
170 	uint8_t seed9 = (rnum >> 18) & 0xF;
171 	uint8_t seed10 = (rnum >> 22) & 0xF;
172 	uint8_t seed11 = (rnum >> 26) & 0xF;
173 	uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
174 
175 	// Squaring all the seeds in order to bias their distribution towards lower values.
176 	seed1 *= seed1;
177 	seed2 *= seed2;
178 	seed3 *= seed3;
179 	seed4 *= seed4;
180 	seed5 *= seed5;
181 	seed6 *= seed6;
182 	seed7 *= seed7;
183 	seed8 *= seed8;
184 	seed9 *= seed9;
185 	seed10 *= seed10;
186 	seed11 *= seed11;
187 	seed12 *= seed12;
188 
189 	int sh1, sh2;
190 	if (seed & 1)
191 	{
192 		sh1 = (seed & 2 ? 4 : 5);
193 		sh2 = (partition_count == 3 ? 6 : 5);
194 	}
195 	else
196 	{
197 		sh1 = (partition_count == 3 ? 6 : 5);
198 		sh2 = (seed & 2 ? 4 : 5);
199 	}
200 
201 	int sh3 = (seed & 0x10) ? sh1 : sh2;
202 
203 	seed1 >>= sh1;
204 	seed2 >>= sh2;
205 	seed3 >>= sh1;
206 	seed4 >>= sh2;
207 	seed5 >>= sh1;
208 	seed6 >>= sh2;
209 	seed7 >>= sh1;
210 	seed8 >>= sh2;
211 
212 	seed9 >>= sh3;
213 	seed10 >>= sh3;
214 	seed11 >>= sh3;
215 	seed12 >>= sh3;
216 
217 	int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
218 	int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
219 	int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
220 	int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
221 
222 	// Apply the saw
223 	a &= 0x3F;
224 	b &= 0x3F;
225 	c &= 0x3F;
226 	d &= 0x3F;
227 
228 	// Remove some of the components if we are to output < 4 partitions.
229 	if (partition_count <= 3)
230 	{
231 		d = 0;
232 	}
233 
234 	if (partition_count <= 2)
235 	{
236 		c = 0;
237 	}
238 
239 	if (partition_count <= 1)
240 	{
241 		b = 0;
242 	}
243 
244 	uint8_t partition;
245 	if (a >= b && a >= c && a >= d)
246 	{
247 		partition = 0;
248 	}
249 	else if (b >= c && b >= d)
250 	{
251 		partition = 1;
252 	}
253 	else if (c >= d)
254 	{
255 		partition = 2;
256 	}
257 	else
258 	{
259 		partition = 3;
260 	}
261 
262 	return partition;
263 }
264 
265 /**
266  * @brief Generate a single partition info structure.
267  *
268  * @param[out] bsd                     The block size information.
269  * @param      partition_count         The partition count of this partitioning.
270  * @param      partition_index         The partition index / seed of this partitioning.
271  * @param      partition_remap_index   The remapped partition index of this partitioning.
272  * @param[out] pi                      The partition info structure to populate.
273  *
274  * @return True if this is a useful partition index, False if we can skip it.
275  */
generate_one_partition_info_entry(block_size_descriptor & bsd,unsigned int partition_count,unsigned int partition_index,unsigned int partition_remap_index,partition_info & pi)276 static bool generate_one_partition_info_entry(
277 	block_size_descriptor& bsd,
278 	unsigned int partition_count,
279 	unsigned int partition_index,
280 	unsigned int partition_remap_index,
281 	partition_info& pi
282 ) {
283 	int texels_per_block = bsd.texel_count;
284 	bool small_block = texels_per_block < 32;
285 
286 	uint8_t *partition_of_texel = pi.partition_of_texel;
287 
288 	// Assign texels to partitions
289 	int texel_idx = 0;
290 	int counts[BLOCK_MAX_PARTITIONS] { 0 };
291 	for (unsigned int z = 0; z < bsd.zdim; z++)
292 	{
293 		for (unsigned int y = 0; y <  bsd.ydim; y++)
294 		{
295 			for (unsigned int x = 0; x <  bsd.xdim; x++)
296 			{
297 				uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
298 				pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
299 				*partition_of_texel++ = part;
300 			}
301 		}
302 	}
303 
304 	// Fill loop tail so we can overfetch later
305 	for (unsigned int i = 0; i < partition_count; i++)
306 	{
307 		int ptex_count = counts[i];
308 		int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
309 		for (int j = ptex_count; j < ptex_count_simd; j++)
310 		{
311 			pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
312 		}
313 	}
314 
315 	// Populate the actual procedural partition count
316 	if (counts[0] == 0)
317 	{
318 		pi.partition_count = 0;
319 	}
320 	else if (counts[1] == 0)
321 	{
322 		pi.partition_count = 1;
323 	}
324 	else if (counts[2] == 0)
325 	{
326 		pi.partition_count = 2;
327 	}
328 	else if (counts[3] == 0)
329 	{
330 		pi.partition_count = 3;
331 	}
332 	else
333 	{
334 		pi.partition_count = 4;
335 	}
336 
337 	// Populate the partition index
338 	pi.partition_index = static_cast<uint16_t>(partition_index);
339 
340 	// Populate the coverage bitmaps for 2/3/4 partitions
341 	uint64_t* bitmaps { nullptr };
342 	if (partition_count == 2)
343 	{
344 		bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
345 	}
346 	else if (partition_count == 3)
347 	{
348 		bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
349 	}
350 	else if (partition_count == 4)
351 	{
352 		bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
353 	}
354 
355 	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
356 	{
357 		pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
358 	}
359 
360 	// Valid partitionings have texels in all of the requested partitions
361 	bool valid = pi.partition_count == partition_count;
362 
363 	if (bitmaps)
364 	{
365 		// Populate the partition coverage bitmap
366 		for (unsigned int i = 0; i < partition_count; i++)
367 		{
368 			bitmaps[i] = 0ULL;
369 		}
370 
371 		unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
372 		for (unsigned int i = 0; i < texels_to_process; i++)
373 		{
374 			unsigned int idx = bsd.kmeans_texels[i];
375 			bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
376 		}
377 	}
378 
379 	return valid;
380 }
381 
build_partition_table_for_one_partition_count(block_size_descriptor & bsd,bool can_omit_partitionings,unsigned int partition_count_cutoff,unsigned int partition_count,partition_info * ptab,uint64_t * canonical_patterns)382 static void build_partition_table_for_one_partition_count(
383 	block_size_descriptor& bsd,
384 	bool can_omit_partitionings,
385 	unsigned int partition_count_cutoff,
386 	unsigned int partition_count,
387 	partition_info* ptab,
388 	uint64_t* canonical_patterns
389 ) {
390 	unsigned int next_index = 0;
391 	bsd.partitioning_count_selected[partition_count - 1] = 0;
392 	bsd.partitioning_count_all[partition_count - 1] = 0;
393 
394 	// Skip tables larger than config max partition count if we can omit modes
395 	if (can_omit_partitionings && (partition_count > partition_count_cutoff))
396 	{
397 		return;
398 	}
399 
400 	// Iterate through twice
401 	//   - Pass 0: Keep selected partitionings
402 	//   - Pass 1: Keep non-selected partitionings (skip if in omit mode)
403 	unsigned int max_iter = can_omit_partitionings ? 1 : 2;
404 
405 	// Tracker for things we built in the first iteration
406 	uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
407 	for (unsigned int x = 0; x < max_iter; x++)
408 	{
409 		for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
410 		{
411 			// Don't include things we built in the first pass
412 			if ((x == 1) && build[i])
413 			{
414 				continue;
415 			}
416 
417 			bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
418 			if ((x == 0) && !keep_useful)
419 			{
420 				continue;
421 			}
422 
423 			generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);
424 			bool keep_canonical = true;
425 			for (unsigned int j = 0; j < next_index; j++)
426 			{
427 				bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns +  j * BIT_PATTERN_WORDS);
428 				if (match)
429 				{
430 					keep_canonical = false;
431 					break;
432 				}
433 			}
434 
435 			if (keep_useful && keep_canonical)
436 			{
437 				if (x == 0)
438 				{
439 					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
440 					bsd.partitioning_count_selected[partition_count - 1]++;
441 					bsd.partitioning_count_all[partition_count - 1]++;
442 					build[i] = 1;
443 					next_index++;
444 				}
445 			}
446 			else
447 			{
448 				if (x == 1)
449 				{
450 					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
451 					bsd.partitioning_count_all[partition_count - 1]++;
452 					next_index++;
453 				}
454 			}
455 		}
456 	}
457 }
458 
459 /* See header for documentation. */
init_partition_tables(block_size_descriptor & bsd,bool can_omit_partitionings,unsigned int partition_count_cutoff)460 void init_partition_tables(
461 	block_size_descriptor& bsd,
462 	bool can_omit_partitionings,
463 	unsigned int partition_count_cutoff
464 ) {
465 	partition_info* par_tab2 = bsd.partitionings;
466 	partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
467 	partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
468 	partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
469 
470 	generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
471 	bsd.partitioning_count_selected[0] = 1;
472 	bsd.partitioning_count_all[0] = 1;
473 
474 	uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS];
475 
476 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
477 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
478 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
479 
480 	delete[] canonical_patterns;
481 }
482