1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2020 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Functions for generating partition tables on demand.
20 */
21
22 #include "astc_codec_internals.h"
23
24 /*
25 Produce a canonicalized representation of a partition pattern
26
27 The largest possible such representation is 432 bits, equal to 7 uint64_t values.
28 */
gen_canonicalized_partition_table(int texel_count,const uint8_t * partition_table,uint64_t canonicalized[7])29 static void gen_canonicalized_partition_table(
30 int texel_count,
31 const uint8_t* partition_table,
32 uint64_t canonicalized[7]
33 ) {
34 int i;
35 for (i = 0; i < 7; i++)
36 canonicalized[i] = 0;
37
38 int mapped_index[4];
39 int map_weight_count = 0;
40 for (i = 0; i < 4; i++)
41 mapped_index[i] = -1;
42
43 for (i = 0; i < texel_count; i++)
44 {
45 int index = partition_table[i];
46 if (mapped_index[index] == -1)
47 mapped_index[index] = map_weight_count++;
48 uint64_t xlat_index = mapped_index[index];
49 canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F));
50 }
51 }
52
compare_canonicalized_partition_tables(const uint64_t part1[7],const uint64_t part2[7])53 static int compare_canonicalized_partition_tables(
54 const uint64_t part1[7],
55 const uint64_t part2[7]
56 ) {
57 if (part1[0] != part2[0])
58 return 0;
59 if (part1[1] != part2[1])
60 return 0;
61 if (part1[2] != part2[2])
62 return 0;
63 if (part1[3] != part2[3])
64 return 0;
65 if (part1[4] != part2[4])
66 return 0;
67 if (part1[5] != part2[5])
68 return 0;
69 if (part1[6] != part2[6])
70 return 0;
71 return 1;
72 }
73
74 /*
75 For a partition table, detect partitionss that are equivalent, then mark them as invalid. This reduces the number of partitions that the codec has to consider and thus improves encode
76 performance. */
partition_table_zap_equal_elements(int texel_count,partition_info * pi)77 static void partition_table_zap_equal_elements(
78 int texel_count,
79 partition_info* pi
80 ) {
81 int partition_tables_zapped = 0;
82 int i, j;
83 uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7];
84
85
86 for (i = 0; i < PARTITION_COUNT; i++)
87 {
88 gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7);
89 }
90
91 for (i = 0; i < PARTITION_COUNT; i++)
92 {
93 for (j = 0; j < i; j++)
94 {
95 if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j))
96 {
97 pi[i].partition_count = 0;
98 partition_tables_zapped++;
99 break;
100 }
101 }
102 }
103 delete[]canonicalizeds;
104 }
105
hash52(uint32_t inp)106 static uint32_t hash52(uint32_t inp)
107 {
108 inp ^= inp >> 15;
109
110 inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1)
111 inp ^= inp >> 5;
112 inp += inp << 16;
113 inp ^= inp >> 7;
114 inp ^= inp >> 3;
115 inp ^= inp << 6;
116 inp ^= inp >> 17;
117 return inp;
118 }
119
select_partition(int seed,int x,int y,int z,int partitioncount,int small_block)120 static int select_partition(
121 int seed,
122 int x,
123 int y,
124 int z,
125 int partitioncount,
126 int small_block
127 ) {
128 if (small_block)
129 {
130 x <<= 1;
131 y <<= 1;
132 z <<= 1;
133 }
134
135 seed += (partitioncount - 1) * 1024;
136
137 uint32_t rnum = hash52(seed);
138
139 uint8_t seed1 = rnum & 0xF;
140 uint8_t seed2 = (rnum >> 4) & 0xF;
141 uint8_t seed3 = (rnum >> 8) & 0xF;
142 uint8_t seed4 = (rnum >> 12) & 0xF;
143 uint8_t seed5 = (rnum >> 16) & 0xF;
144 uint8_t seed6 = (rnum >> 20) & 0xF;
145 uint8_t seed7 = (rnum >> 24) & 0xF;
146 uint8_t seed8 = (rnum >> 28) & 0xF;
147 uint8_t seed9 = (rnum >> 18) & 0xF;
148 uint8_t seed10 = (rnum >> 22) & 0xF;
149 uint8_t seed11 = (rnum >> 26) & 0xF;
150 uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
151
152 // squaring all the seeds in order to bias their distribution
153 // towards lower values.
154 seed1 *= seed1;
155 seed2 *= seed2;
156 seed3 *= seed3;
157 seed4 *= seed4;
158 seed5 *= seed5;
159 seed6 *= seed6;
160 seed7 *= seed7;
161 seed8 *= seed8;
162 seed9 *= seed9;
163 seed10 *= seed10;
164 seed11 *= seed11;
165 seed12 *= seed12;
166
167 int sh1, sh2, sh3;
168 if (seed & 1)
169 {
170 sh1 = (seed & 2 ? 4 : 5);
171 sh2 = (partitioncount == 3 ? 6 : 5);
172 }
173 else
174 {
175 sh1 = (partitioncount == 3 ? 6 : 5);
176 sh2 = (seed & 2 ? 4 : 5);
177 }
178 sh3 = (seed & 0x10) ? sh1 : sh2;
179
180 seed1 >>= sh1;
181 seed2 >>= sh2;
182 seed3 >>= sh1;
183 seed4 >>= sh2;
184 seed5 >>= sh1;
185 seed6 >>= sh2;
186 seed7 >>= sh1;
187 seed8 >>= sh2;
188
189 seed9 >>= sh3;
190 seed10 >>= sh3;
191 seed11 >>= sh3;
192 seed12 >>= sh3;
193
194 int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
195 int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
196 int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
197 int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
198
199 // apply the saw
200 a &= 0x3F;
201 b &= 0x3F;
202 c &= 0x3F;
203 d &= 0x3F;
204
205 // remove some of the components if we are to output < 4 partitions.
206 if (partitioncount <= 3)
207 d = 0;
208 if (partitioncount <= 2)
209 c = 0;
210 if (partitioncount <= 1)
211 b = 0;
212
213 int partition;
214 if (a >= b && a >= c && a >= d)
215 partition = 0;
216 else if (b >= c && b >= d)
217 partition = 1;
218 else if (c >= d)
219 partition = 2;
220 else
221 partition = 3;
222 return partition;
223 }
224
generate_one_partition_table(const block_size_descriptor * bsd,int partition_count,int partition_index,partition_info * pt)225 static void generate_one_partition_table(
226 const block_size_descriptor* bsd,
227 int partition_count,
228 int partition_index,
229 partition_info* pt
230 ) {
231 int texels_per_block = bsd->texel_count;
232 int small_block = texels_per_block < 32;
233
234 uint8_t *partition_of_texel = pt->partition_of_texel;
235 int x, y, z, i;
236
237 for (z = 0; z < bsd->zdim; z++)
238 for (y = 0; y < bsd->ydim; y++)
239 for (x = 0; x < bsd->xdim; x++)
240 {
241 uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
242 *partition_of_texel++ = part;
243 }
244
245 int counts[4];
246 for (i = 0; i < 4; i++)
247 counts[i] = 0;
248
249 for (i = 0; i < texels_per_block; i++)
250 {
251 int partition = pt->partition_of_texel[i];
252 counts[partition]++;
253 }
254
255 if (counts[0] == 0)
256 pt->partition_count = 0;
257 else if (counts[1] == 0)
258 pt->partition_count = 1;
259 else if (counts[2] == 0)
260 pt->partition_count = 2;
261 else if (counts[3] == 0)
262 pt->partition_count = 3;
263 else
264 pt->partition_count = 4;
265 }
266
267 /* Public function, see header file for detailed documentation */
init_partition_tables(block_size_descriptor * bsd)268 void init_partition_tables(
269 block_size_descriptor* bsd
270 ) {
271 partition_info *par_tab2 = bsd->partitions;
272 partition_info *par_tab3 = par_tab2 + PARTITION_COUNT;
273 partition_info *par_tab4 = par_tab3 + PARTITION_COUNT;
274 partition_info *par_tab1 = par_tab4 + PARTITION_COUNT;
275
276 generate_one_partition_table(bsd, 1, 0, par_tab1);
277 for (int i = 0; i < 1024; i++)
278 {
279 generate_one_partition_table(bsd, 2, i, par_tab2 + i);
280 generate_one_partition_table(bsd, 3, i, par_tab3 + i);
281 generate_one_partition_table(bsd, 4, i, par_tab4 + i);
282 }
283
284 partition_table_zap_equal_elements(bsd->texel_count, par_tab2);
285 partition_table_zap_equal_elements(bsd->texel_count, par_tab3);
286 partition_table_zap_equal_elements(bsd->texel_count, par_tab4);
287 }
288