1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2020 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Functions for generating partition tables on demand.
20 */
21
22 #include "astc_codec_internals.h"
23
24 /*
25 Produce a canonicalized representation of a partition pattern
26
27 The largest possible such representation is 432 bits, equal to 7 uint64_t values.
28 */
gen_canonicalized_partition_table(int texel_count,const uint8_t * partition_table,uint64_t canonicalized[7])29 static void gen_canonicalized_partition_table(
30 int texel_count,
31 const uint8_t* partition_table,
32 uint64_t canonicalized[7]
33 ) {
34 int i;
35 for (i = 0; i < 7; i++)
36 canonicalized[i] = 0;
37
38 int mapped_index[4];
39 int map_weight_count = 0;
40 for (i = 0; i < 4; i++)
41 mapped_index[i] = -1;
42
43 for (i = 0; i < texel_count; i++)
44 {
45 int index = partition_table[i];
46 if (mapped_index[index] == -1)
47 mapped_index[index] = map_weight_count++;
48 uint64_t xlat_index = mapped_index[index];
49 canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F));
50 }
51 }
52
compare_canonicalized_partition_tables(const uint64_t part1[7],const uint64_t part2[7])53 static int compare_canonicalized_partition_tables(
54 const uint64_t part1[7],
55 const uint64_t part2[7]
56 ) {
57 if (part1[0] != part2[0])
58 return 0;
59 if (part1[1] != part2[1])
60 return 0;
61 if (part1[2] != part2[2])
62 return 0;
63 if (part1[3] != part2[3])
64 return 0;
65 if (part1[4] != part2[4])
66 return 0;
67 if (part1[5] != part2[5])
68 return 0;
69 if (part1[6] != part2[6])
70 return 0;
71 return 1;
72 }
73
74 /*
75 For a partition table, detect partitionss that are equivalent, then mark them as invalid. This reduces the number of partitions that the codec has to consider and thus improves encode
76 performance. */
partition_table_zap_equal_elements(int texel_count,partition_info * pi)77 static void partition_table_zap_equal_elements(
78 int texel_count,
79 partition_info* pi
80 ) {
81 int i, j;
82 uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7];
83
84
85 for (i = 0; i < PARTITION_COUNT; i++)
86 {
87 gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7);
88 }
89
90 for (i = 0; i < PARTITION_COUNT; i++)
91 {
92 for (j = 0; j < i; j++)
93 {
94 if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j))
95 {
96 pi[i].partition_count = 0;
97 break;
98 }
99 }
100 }
101 delete[]canonicalizeds;
102 }
103
hash52(uint32_t inp)104 static uint32_t hash52(uint32_t inp)
105 {
106 inp ^= inp >> 15;
107
108 inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1)
109 inp ^= inp >> 5;
110 inp += inp << 16;
111 inp ^= inp >> 7;
112 inp ^= inp >> 3;
113 inp ^= inp << 6;
114 inp ^= inp >> 17;
115 return inp;
116 }
117
select_partition(int seed,int x,int y,int z,int partitioncount,int small_block)118 static int select_partition(
119 int seed,
120 int x,
121 int y,
122 int z,
123 int partitioncount,
124 int small_block
125 ) {
126 if (small_block)
127 {
128 x <<= 1;
129 y <<= 1;
130 z <<= 1;
131 }
132
133 seed += (partitioncount - 1) * 1024;
134
135 uint32_t rnum = hash52(seed);
136
137 uint8_t seed1 = rnum & 0xF;
138 uint8_t seed2 = (rnum >> 4) & 0xF;
139 uint8_t seed3 = (rnum >> 8) & 0xF;
140 uint8_t seed4 = (rnum >> 12) & 0xF;
141 uint8_t seed5 = (rnum >> 16) & 0xF;
142 uint8_t seed6 = (rnum >> 20) & 0xF;
143 uint8_t seed7 = (rnum >> 24) & 0xF;
144 uint8_t seed8 = (rnum >> 28) & 0xF;
145 uint8_t seed9 = (rnum >> 18) & 0xF;
146 uint8_t seed10 = (rnum >> 22) & 0xF;
147 uint8_t seed11 = (rnum >> 26) & 0xF;
148 uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
149
150 // squaring all the seeds in order to bias their distribution
151 // towards lower values.
152 seed1 *= seed1;
153 seed2 *= seed2;
154 seed3 *= seed3;
155 seed4 *= seed4;
156 seed5 *= seed5;
157 seed6 *= seed6;
158 seed7 *= seed7;
159 seed8 *= seed8;
160 seed9 *= seed9;
161 seed10 *= seed10;
162 seed11 *= seed11;
163 seed12 *= seed12;
164
165 int sh1, sh2, sh3;
166 if (seed & 1)
167 {
168 sh1 = (seed & 2 ? 4 : 5);
169 sh2 = (partitioncount == 3 ? 6 : 5);
170 }
171 else
172 {
173 sh1 = (partitioncount == 3 ? 6 : 5);
174 sh2 = (seed & 2 ? 4 : 5);
175 }
176 sh3 = (seed & 0x10) ? sh1 : sh2;
177
178 seed1 >>= sh1;
179 seed2 >>= sh2;
180 seed3 >>= sh1;
181 seed4 >>= sh2;
182 seed5 >>= sh1;
183 seed6 >>= sh2;
184 seed7 >>= sh1;
185 seed8 >>= sh2;
186
187 seed9 >>= sh3;
188 seed10 >>= sh3;
189 seed11 >>= sh3;
190 seed12 >>= sh3;
191
192 int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
193 int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
194 int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
195 int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
196
197 // apply the saw
198 a &= 0x3F;
199 b &= 0x3F;
200 c &= 0x3F;
201 d &= 0x3F;
202
203 // remove some of the components if we are to output < 4 partitions.
204 if (partitioncount <= 3)
205 d = 0;
206 if (partitioncount <= 2)
207 c = 0;
208 if (partitioncount <= 1)
209 b = 0;
210
211 int partition;
212 if (a >= b && a >= c && a >= d)
213 partition = 0;
214 else if (b >= c && b >= d)
215 partition = 1;
216 else if (c >= d)
217 partition = 2;
218 else
219 partition = 3;
220 return partition;
221 }
222
generate_one_partition_table(const block_size_descriptor * bsd,int partition_count,int partition_index,partition_info * pt)223 static void generate_one_partition_table(
224 const block_size_descriptor* bsd,
225 int partition_count,
226 int partition_index,
227 partition_info* pt
228 ) {
229 int texels_per_block = bsd->texel_count;
230 int small_block = texels_per_block < 32;
231
232 uint8_t *partition_of_texel = pt->partition_of_texel;
233 int x, y, z, i;
234
235 for (z = 0; z < bsd->zdim; z++)
236 for (y = 0; y < bsd->ydim; y++)
237 for (x = 0; x < bsd->xdim; x++)
238 {
239 uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
240 *partition_of_texel++ = part;
241 }
242
243 int counts[4];
244 for (i = 0; i < 4; i++)
245 counts[i] = 0;
246
247 for (i = 0; i < texels_per_block; i++)
248 {
249 int partition = pt->partition_of_texel[i];
250 counts[partition]++;
251 }
252
253 if (counts[0] == 0)
254 pt->partition_count = 0;
255 else if (counts[1] == 0)
256 pt->partition_count = 1;
257 else if (counts[2] == 0)
258 pt->partition_count = 2;
259 else if (counts[3] == 0)
260 pt->partition_count = 3;
261 else
262 pt->partition_count = 4;
263 }
264
265 /* Public function, see header file for detailed documentation */
init_partition_tables(block_size_descriptor * bsd)266 void init_partition_tables(
267 block_size_descriptor* bsd
268 ) {
269 partition_info *par_tab2 = bsd->partitions;
270 partition_info *par_tab3 = par_tab2 + PARTITION_COUNT;
271 partition_info *par_tab4 = par_tab3 + PARTITION_COUNT;
272 partition_info *par_tab1 = par_tab4 + PARTITION_COUNT;
273
274 generate_one_partition_table(bsd, 1, 0, par_tab1);
275 for (int i = 0; i < 1024; i++)
276 {
277 generate_one_partition_table(bsd, 2, i, par_tab2 + i);
278 generate_one_partition_table(bsd, 3, i, par_tab3 + i);
279 generate_one_partition_table(bsd, 4, i, par_tab4 + i);
280 }
281
282 partition_table_zap_equal_elements(bsd->texel_count, par_tab2);
283 partition_table_zap_equal_elements(bsd->texel_count, par_tab3);
284 partition_table_zap_equal_elements(bsd->texel_count, par_tab4);
285 }
286