1 /*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*
25 * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26 */
27
28 #ifndef TEXCOMPRESS_BPTC_TMP_H
29 #define TEXCOMPRESS_BPTC_TMP_H
30
31 #include "util/format_srgb.h"
32 #include "util/half_float.h"
33 #include "macros.h"
34
35 #define BLOCK_SIZE 4
36 #define N_PARTITIONS 64
37 #define BLOCK_BYTES 16
38
39 struct bptc_unorm_mode {
40 int n_subsets;
41 int n_partition_bits;
42 bool has_rotation_bits;
43 bool has_index_selection_bit;
44 int n_color_bits;
45 int n_alpha_bits;
46 bool has_endpoint_pbits;
47 bool has_shared_pbits;
48 int n_index_bits;
49 int n_secondary_index_bits;
50 };
51
52 struct bptc_float_bitfield {
53 int8_t endpoint;
54 uint8_t component;
55 uint8_t offset;
56 uint8_t n_bits;
57 bool reverse;
58 };
59
60 struct bptc_float_mode {
61 bool reserved;
62 bool transformed_endpoints;
63 int n_partition_bits;
64 int n_endpoint_bits;
65 int n_index_bits;
66 int n_delta_bits[3];
67 struct bptc_float_bitfield bitfields[24];
68 };
69
70 struct bit_writer {
71 uint8_t buf;
72 int pos;
73 uint8_t *dst;
74 };
75
76 static const struct bptc_unorm_mode
77 bptc_unorm_modes[] = {
78 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
79 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
80 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
81 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
82 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
83 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
84 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
85 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
86 };
87
88 static const struct bptc_float_mode
89 bptc_float_modes[] = {
90 /* 00 */
91 { false, true, 5, 10, 3, { 5, 5, 5 },
92 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
93 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
94 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
95 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
96 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
97 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
98 { 3, 2, 3, 1, false },
99 { -1 } }
100 },
101 /* 01 */
102 { false, true, 5, 7, 3, { 6, 6, 6 },
103 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
104 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
105 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
106 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
107 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
108 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
109 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
110 { 2, 0, 0, 6, false },
111 { 3, 0, 0, 6, false },
112 { -1 } }
113 },
114 /* 00010 */
115 { false, true, 5, 11, 3, { 5, 4, 4 },
116 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
117 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
118 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
119 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
120 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
121 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
122 { -1 } }
123 },
124 /* 00011 */
125 { false, false, 0, 10, 4, { 10, 10, 10 },
126 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
127 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
128 { -1 } }
129 },
130 /* 00110 */
131 { false, true, 5, 11, 3, { 4, 5, 4 },
132 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
133 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
134 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
135 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
136 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
137 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
138 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
139 { -1 } }
140 },
141 /* 00111 */
142 { false, true, 0, 11, 4, { 9, 9, 9 },
143 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
144 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
145 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
146 { -1 } }
147 },
148 /* 01010 */
149 { false, true, 5, 11, 3, { 4, 4, 5 },
150 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
151 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
152 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
153 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
154 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
155 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
156 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
157 { -1 } }
158 },
159 /* 01011 */
160 { false, true, 0, 12, 4, { 8, 8, 8 },
161 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
162 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
163 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
164 { -1 } }
165 },
166 /* 01110 */
167 { false, true, 5, 9, 3, { 5, 5, 5 },
168 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
169 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
170 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
171 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
172 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
173 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
174 { 3, 2, 3, 1, false },
175 { -1 } }
176 },
177 /* 01111 */
178 { false, true, 0, 16, 4, { 4, 4, 4 },
179 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
180 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
181 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
182 { -1 } }
183 },
184 /* 10010 */
185 { false, true, 5, 8, 3, { 6, 5, 5 },
186 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
187 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
188 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
189 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
190 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
191 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
192 { 3, 0, 0, 6, false },
193 { -1 } }
194 },
195 /* 10011 */
196 { true /* reserved */ },
197 /* 10110 */
198 { false, true, 5, 8, 3, { 5, 6, 5 },
199 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
200 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
201 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
202 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
203 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
204 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
205 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
206 { -1 } }
207 },
208 /* 10111 */
209 { true /* reserved */ },
210 /* 11010 */
211 { false, true, 5, 8, 3, { 5, 5, 6 },
212 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
213 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
214 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
215 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
216 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
217 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
218 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
219 { -1 } }
220 },
221 /* 11011 */
222 { true /* reserved */ },
223 /* 11110 */
224 { false, false, 5, 6, 3, { 6, 6, 6 },
225 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
226 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
227 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
228 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
229 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
230 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
231 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
232 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
233 { -1 } }
234 },
235 /* 11111 */
236 { true /* reserved */ },
237 };
238
239 /* This partition table is used when the mode has two subsets. Each
240 * partition is represented by a 32-bit value which gives 2 bits per texel
241 * within the block. The value of the two bits represents which subset to use
242 * (0 or 1).
243 */
244 static const uint32_t
245 partition_table1[N_PARTITIONS] = {
246 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
247 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
248 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
249 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
250 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
251 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
252 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
253 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
254 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
255 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
256 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
257 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
258 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
259 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
260 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
261 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
262 };
263
264 /* This partition table is used when the mode has three subsets. In this case
265 * the values can be 0, 1 or 2.
266 */
267 static const uint32_t
268 partition_table2[N_PARTITIONS] = {
269 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
270 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
271 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
272 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
273 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
274 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
275 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
276 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
277 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
278 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
279 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
280 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
281 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
282 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
283 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
284 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
285 };
286
287 static const uint8_t
288 anchor_indices[][N_PARTITIONS] = {
289 /* Anchor index values for the second subset of two-subset partitioning */
290 {
291 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
292 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
293 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
294 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
295 },
296
297 /* Anchor index values for the second subset of three-subset partitioning */
298 {
299 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
300 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
301 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
302 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
303 },
304
305 /* Anchor index values for the third subset of three-subset
306 * partitioning
307 */
308 {
309 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
310 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
311 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
312 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
313 }
314 };
315
316 static int
extract_bits(const uint8_t * block,int offset,int n_bits)317 extract_bits(const uint8_t *block,
318 int offset,
319 int n_bits)
320 {
321 int byte_index = offset / 8;
322 int bit_index = offset % 8;
323 int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
324 int result = 0;
325 int bit = 0;
326
327 while (true) {
328 result |= ((block[byte_index] >> bit_index) &
329 ((1 << n_bits_in_byte) - 1)) << bit;
330
331 n_bits -= n_bits_in_byte;
332
333 if (n_bits <= 0)
334 return result;
335
336 bit += n_bits_in_byte;
337 byte_index++;
338 bit_index = 0;
339 n_bits_in_byte = MIN2(n_bits, 8);
340 }
341 }
342
343 static uint8_t
expand_component(uint8_t byte,int n_bits)344 expand_component(uint8_t byte,
345 int n_bits)
346 {
347 /* Expands a n-bit quantity into a byte by copying the most-significant
348 * bits into the unused least-significant bits.
349 */
350 return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
351 }
352
353 static int
extract_unorm_endpoints(const struct bptc_unorm_mode * mode,const uint8_t * block,int bit_offset,uint8_t endpoints[][4])354 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
355 const uint8_t *block,
356 int bit_offset,
357 uint8_t endpoints[][4])
358 {
359 int component;
360 int subset;
361 int endpoint;
362 int pbit;
363 int n_components;
364
365 /* Extract each color component */
366 for (component = 0; component < 3; component++) {
367 for (subset = 0; subset < mode->n_subsets; subset++) {
368 for (endpoint = 0; endpoint < 2; endpoint++) {
369 endpoints[subset * 2 + endpoint][component] =
370 extract_bits(block, bit_offset, mode->n_color_bits);
371 bit_offset += mode->n_color_bits;
372 }
373 }
374 }
375
376 /* Extract the alpha values */
377 if (mode->n_alpha_bits > 0) {
378 for (subset = 0; subset < mode->n_subsets; subset++) {
379 for (endpoint = 0; endpoint < 2; endpoint++) {
380 endpoints[subset * 2 + endpoint][3] =
381 extract_bits(block, bit_offset, mode->n_alpha_bits);
382 bit_offset += mode->n_alpha_bits;
383 }
384 }
385
386 n_components = 4;
387 } else {
388 for (subset = 0; subset < mode->n_subsets; subset++)
389 for (endpoint = 0; endpoint < 2; endpoint++)
390 endpoints[subset * 2 + endpoint][3] = 255;
391
392 n_components = 3;
393 }
394
395 /* Add in the p-bits */
396 if (mode->has_endpoint_pbits) {
397 for (subset = 0; subset < mode->n_subsets; subset++) {
398 for (endpoint = 0; endpoint < 2; endpoint++) {
399 pbit = extract_bits(block, bit_offset, 1);
400 bit_offset += 1;
401
402 for (component = 0; component < n_components; component++) {
403 endpoints[subset * 2 + endpoint][component] <<= 1;
404 endpoints[subset * 2 + endpoint][component] |= pbit;
405 }
406 }
407 }
408 } else if (mode->has_shared_pbits) {
409 for (subset = 0; subset < mode->n_subsets; subset++) {
410 pbit = extract_bits(block, bit_offset, 1);
411 bit_offset += 1;
412
413 for (endpoint = 0; endpoint < 2; endpoint++) {
414 for (component = 0; component < n_components; component++) {
415 endpoints[subset * 2 + endpoint][component] <<= 1;
416 endpoints[subset * 2 + endpoint][component] |= pbit;
417 }
418 }
419 }
420 }
421
422 /* Expand the n-bit values to a byte */
423 for (subset = 0; subset < mode->n_subsets; subset++) {
424 for (endpoint = 0; endpoint < 2; endpoint++) {
425 for (component = 0; component < 3; component++) {
426 endpoints[subset * 2 + endpoint][component] =
427 expand_component(endpoints[subset * 2 + endpoint][component],
428 mode->n_color_bits +
429 mode->has_endpoint_pbits +
430 mode->has_shared_pbits);
431 }
432
433 if (mode->n_alpha_bits > 0) {
434 endpoints[subset * 2 + endpoint][3] =
435 expand_component(endpoints[subset * 2 + endpoint][3],
436 mode->n_alpha_bits +
437 mode->has_endpoint_pbits +
438 mode->has_shared_pbits);
439 }
440 }
441 }
442
443 return bit_offset;
444 }
445
446 static bool
is_anchor(int n_subsets,int partition_num,int texel)447 is_anchor(int n_subsets,
448 int partition_num,
449 int texel)
450 {
451 if (texel == 0)
452 return true;
453
454 switch (n_subsets) {
455 case 1:
456 return false;
457 case 2:
458 return anchor_indices[0][partition_num] == texel;
459 case 3:
460 return (anchor_indices[1][partition_num] == texel ||
461 anchor_indices[2][partition_num] == texel);
462 default:
463 assert(false);
464 return false;
465 }
466 }
467
468 static int
count_anchors_before_texel(int n_subsets,int partition_num,int texel)469 count_anchors_before_texel(int n_subsets,
470 int partition_num,
471 int texel)
472 {
473 int count = 1;
474
475 if (texel == 0)
476 return 0;
477
478 switch (n_subsets) {
479 case 1:
480 break;
481 case 2:
482 if (texel > anchor_indices[0][partition_num])
483 count++;
484 break;
485 case 3:
486 if (texel > anchor_indices[1][partition_num])
487 count++;
488 if (texel > anchor_indices[2][partition_num])
489 count++;
490 break;
491 default:
492 assert(false);
493 return 0;
494 }
495
496 return count;
497 }
498
499 static int32_t
interpolate(int32_t a,int32_t b,int index,int index_bits)500 interpolate(int32_t a, int32_t b,
501 int index,
502 int index_bits)
503 {
504 static const uint8_t weights2[] = { 0, 21, 43, 64 };
505 static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
506 static const uint8_t weights4[] =
507 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
508 static const uint8_t *weights[] = {
509 NULL, NULL, weights2, weights3, weights4
510 };
511 int weight;
512
513 weight = weights[index_bits][index];
514
515 return ((64 - weight) * a + weight * b + 32) >> 6;
516 }
517
518 static void
apply_rotation(int rotation,uint8_t * result)519 apply_rotation(int rotation,
520 uint8_t *result)
521 {
522 uint8_t t;
523
524 if (rotation == 0)
525 return;
526
527 rotation--;
528
529 t = result[rotation];
530 result[rotation] = result[3];
531 result[3] = t;
532 }
533
534 static void
fetch_rgba_unorm_from_block(const uint8_t * block,uint8_t * result,int texel)535 fetch_rgba_unorm_from_block(const uint8_t *block,
536 uint8_t *result,
537 int texel)
538 {
539 int mode_num = ffs(block[0]);
540 const struct bptc_unorm_mode *mode;
541 int bit_offset, secondary_bit_offset;
542 int partition_num;
543 int subset_num;
544 int rotation;
545 int index_selection;
546 int index_bits;
547 int indices[2];
548 int index;
549 int anchors_before_texel;
550 bool anchor;
551 uint8_t endpoints[3 * 2][4];
552 uint32_t subsets;
553 int component;
554
555 if (mode_num == 0) {
556 /* According to the spec this mode is reserved and shouldn't be used. */
557 memset(result, 0, 4);
558 return;
559 }
560
561 mode = bptc_unorm_modes + mode_num - 1;
562 bit_offset = mode_num;
563
564 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
565 bit_offset += mode->n_partition_bits;
566
567 switch (mode->n_subsets) {
568 case 1:
569 subsets = 0;
570 break;
571 case 2:
572 subsets = partition_table1[partition_num];
573 break;
574 case 3:
575 subsets = partition_table2[partition_num];
576 break;
577 default:
578 assert(false);
579 return;
580 }
581
582 if (mode->has_rotation_bits) {
583 rotation = extract_bits(block, bit_offset, 2);
584 bit_offset += 2;
585 } else {
586 rotation = 0;
587 }
588
589 if (mode->has_index_selection_bit) {
590 index_selection = extract_bits(block, bit_offset, 1);
591 bit_offset++;
592 } else {
593 index_selection = 0;
594 }
595
596 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
597
598 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
599 partition_num, texel);
600
601 /* Calculate the offset to the secondary index */
602 secondary_bit_offset = (bit_offset +
603 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
604 mode->n_subsets +
605 mode->n_secondary_index_bits * texel -
606 anchors_before_texel);
607
608 /* Calculate the offset to the primary index for this texel */
609 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
610
611 subset_num = (subsets >> (texel * 2)) & 3;
612
613 anchor = is_anchor(mode->n_subsets, partition_num, texel);
614
615 index_bits = mode->n_index_bits;
616 if (anchor)
617 index_bits--;
618 indices[0] = extract_bits(block, bit_offset, index_bits);
619
620 if (mode->n_secondary_index_bits) {
621 index_bits = mode->n_secondary_index_bits;
622 if (anchor)
623 index_bits--;
624 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
625 }
626
627 index = indices[index_selection];
628 index_bits = (index_selection ?
629 mode->n_secondary_index_bits :
630 mode->n_index_bits);
631
632 for (component = 0; component < 3; component++)
633 result[component] = interpolate(endpoints[subset_num * 2][component],
634 endpoints[subset_num * 2 + 1][component],
635 index,
636 index_bits);
637
638 /* Alpha uses the opposite index from the color components */
639 if (mode->n_secondary_index_bits && !index_selection) {
640 index = indices[1];
641 index_bits = mode->n_secondary_index_bits;
642 } else {
643 index = indices[0];
644 index_bits = mode->n_index_bits;
645 }
646
647 result[3] = interpolate(endpoints[subset_num * 2][3],
648 endpoints[subset_num * 2 + 1][3],
649 index,
650 index_bits);
651
652 apply_rotation(rotation, result);
653 }
654
655 #ifdef BPTC_BLOCK_DECODE
656 static void
decompress_rgba_unorm_block(int src_width,int src_height,const uint8_t * block,uint8_t * dst_row,int dst_rowstride)657 decompress_rgba_unorm_block(int src_width, int src_height,
658 const uint8_t *block,
659 uint8_t *dst_row, int dst_rowstride)
660 {
661 int mode_num = ffs(block[0]);
662 const struct bptc_unorm_mode *mode;
663 int bit_offset_head, bit_offset, secondary_bit_offset;
664 int partition_num;
665 int subset_num;
666 int rotation;
667 int index_selection;
668 int index_bits;
669 int indices[2];
670 int index;
671 int anchors_before_texel;
672 bool anchor;
673 uint8_t endpoints[3 * 2][4];
674 uint32_t subsets;
675 int component;
676 unsigned x, y;
677
678 if (mode_num == 0) {
679 /* According to the spec this mode is reserved and shouldn't be used. */
680 for(y = 0; y < src_height; y += 1) {
681 uint8_t *result = dst_row;
682 memset(result, 0, 4 * src_width);
683 dst_row += dst_rowstride;
684 }
685 return;
686 }
687
688 mode = bptc_unorm_modes + mode_num - 1;
689 bit_offset_head = mode_num;
690
691 partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
692 bit_offset_head += mode->n_partition_bits;
693
694 switch (mode->n_subsets) {
695 case 1:
696 subsets = 0;
697 break;
698 case 2:
699 subsets = partition_table1[partition_num];
700 break;
701 case 3:
702 subsets = partition_table2[partition_num];
703 break;
704 default:
705 assert(false);
706 return;
707 }
708
709 if (mode->has_rotation_bits) {
710 rotation = extract_bits(block, bit_offset_head, 2);
711 bit_offset_head += 2;
712 } else {
713 rotation = 0;
714 }
715
716 if (mode->has_index_selection_bit) {
717 index_selection = extract_bits(block, bit_offset_head, 1);
718 bit_offset_head++;
719 } else {
720 index_selection = 0;
721 }
722
723 bit_offset_head = extract_unorm_endpoints(mode, block, bit_offset_head, endpoints);
724
725 for(y = 0; y < src_height; y += 1) {
726 uint8_t *result = dst_row;
727 for(x = 0; x < src_width; x += 1) {
728 int texel;
729 texel = x + y * 4;
730 bit_offset = bit_offset_head;
731
732 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
733 partition_num,
734 texel);
735
736 /* Calculate the offset to the secondary index */
737 secondary_bit_offset = (bit_offset +
738 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
739 mode->n_subsets +
740 mode->n_secondary_index_bits * texel -
741 anchors_before_texel);
742
743 /* Calculate the offset to the primary index for this texel */
744 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
745
746 subset_num = (subsets >> (texel * 2)) & 3;
747
748 anchor = is_anchor(mode->n_subsets, partition_num, texel);
749
750 index_bits = mode->n_index_bits;
751 if (anchor)
752 index_bits--;
753 indices[0] = extract_bits(block, bit_offset, index_bits);
754
755 if (mode->n_secondary_index_bits) {
756 index_bits = mode->n_secondary_index_bits;
757 if (anchor)
758 index_bits--;
759 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
760 }
761
762 index = indices[index_selection];
763 index_bits = (index_selection ?
764 mode->n_secondary_index_bits :
765 mode->n_index_bits);
766
767 for (component = 0; component < 3; component++)
768 result[component] = interpolate(endpoints[subset_num * 2][component],
769 endpoints[subset_num * 2 + 1][component],
770 index,
771 index_bits);
772
773 /* Alpha uses the opposite index from the color components */
774 if (mode->n_secondary_index_bits && !index_selection) {
775 index = indices[1];
776 index_bits = mode->n_secondary_index_bits;
777 } else {
778 index = indices[0];
779 index_bits = mode->n_index_bits;
780 }
781
782 result[3] = interpolate(endpoints[subset_num * 2][3],
783 endpoints[subset_num * 2 + 1][3],
784 index,
785 index_bits);
786
787 apply_rotation(rotation, result);
788 result += 4;
789 }
790 dst_row += dst_rowstride;
791 }
792 }
793
794 static void
decompress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)795 decompress_rgba_unorm(int width, int height,
796 const uint8_t *src, int src_rowstride,
797 uint8_t *dst, int dst_rowstride)
798 {
799 int src_row_diff;
800 int y, x;
801
802 if (src_rowstride >= width * 4)
803 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
804 else
805 src_row_diff = 0;
806
807 for (y = 0; y < height; y += BLOCK_SIZE) {
808 for (x = 0; x < width; x += BLOCK_SIZE) {
809 decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
810 MIN2(height - y, BLOCK_SIZE),
811 src,
812 dst + x * 4 + y * dst_rowstride,
813 dst_rowstride);
814 src += BLOCK_BYTES;
815 }
816 src += src_row_diff;
817 }
818 }
819 #endif // BPTC_BLOCK_DECODE
820
821 static int
signed_unquantize(int value,int n_endpoint_bits)822 signed_unquantize(int value, int n_endpoint_bits)
823 {
824 bool sign;
825
826 if (n_endpoint_bits >= 16)
827 return value;
828
829 if (value == 0)
830 return 0;
831
832 sign = false;
833
834 if (value < 0) {
835 sign = true;
836 value = -value;
837 }
838
839 if (value >= (1 << (n_endpoint_bits - 1)) - 1)
840 value = 0x7fff;
841 else
842 value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
843
844 if (sign)
845 value = -value;
846
847 return value;
848 }
849
850 static int
unsigned_unquantize(int value,int n_endpoint_bits)851 unsigned_unquantize(int value, int n_endpoint_bits)
852 {
853 if (n_endpoint_bits >= 15)
854 return value;
855
856 if (value == 0)
857 return 0;
858
859 if (value == (1 << n_endpoint_bits) - 1)
860 return 0xffff;
861
862 return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
863 }
864
865 static int
extract_float_endpoints(const struct bptc_float_mode * mode,const uint8_t * block,int bit_offset,int32_t endpoints[][3],bool is_signed)866 extract_float_endpoints(const struct bptc_float_mode *mode,
867 const uint8_t *block,
868 int bit_offset,
869 int32_t endpoints[][3],
870 bool is_signed)
871 {
872 const struct bptc_float_bitfield *bitfield;
873 int endpoint, component;
874 int n_endpoints;
875 int value;
876 int i;
877
878 if (mode->n_partition_bits)
879 n_endpoints = 4;
880 else
881 n_endpoints = 2;
882
883 memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
884
885 for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
886 value = extract_bits(block, bit_offset, bitfield->n_bits);
887 bit_offset += bitfield->n_bits;
888
889 if (bitfield->reverse) {
890 for (i = 0; i < bitfield->n_bits; i++) {
891 if (value & (1 << i))
892 endpoints[bitfield->endpoint][bitfield->component] |=
893 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
894 }
895 } else {
896 endpoints[bitfield->endpoint][bitfield->component] |=
897 value << bitfield->offset;
898 }
899 }
900
901 if (mode->transformed_endpoints) {
902 /* The endpoints are specified as signed offsets from e0 */
903 for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
904 for (component = 0; component < 3; component++) {
905 value = util_sign_extend(endpoints[endpoint][component],
906 mode->n_delta_bits[component]);
907 endpoints[endpoint][component] =
908 ((endpoints[0][component] + value) &
909 ((1 << mode->n_endpoint_bits) - 1));
910 }
911 }
912 }
913
914 if (is_signed) {
915 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
916 for (component = 0; component < 3; component++) {
917 value = util_sign_extend(endpoints[endpoint][component],
918 mode->n_endpoint_bits);
919 endpoints[endpoint][component] =
920 signed_unquantize(value, mode->n_endpoint_bits);
921 }
922 }
923 } else {
924 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
925 for (component = 0; component < 3; component++) {
926 endpoints[endpoint][component] =
927 unsigned_unquantize(endpoints[endpoint][component],
928 mode->n_endpoint_bits);
929 }
930 }
931 }
932
933 return bit_offset;
934 }
935
936 static int32_t
finish_unsigned_unquantize(int32_t value)937 finish_unsigned_unquantize(int32_t value)
938 {
939 return value * 31 / 64;
940 }
941
942 static int32_t
finish_signed_unquantize(int32_t value)943 finish_signed_unquantize(int32_t value)
944 {
945 if (value < 0)
946 return (-value * 31 / 32) | 0x8000;
947 else
948 return value * 31 / 32;
949 }
950
951 static void
fetch_rgb_float_from_block(const uint8_t * block,float * result,int texel,bool is_signed)952 fetch_rgb_float_from_block(const uint8_t *block,
953 float *result,
954 int texel,
955 bool is_signed)
956 {
957 int mode_num;
958 const struct bptc_float_mode *mode;
959 int bit_offset;
960 int partition_num;
961 int subset_num;
962 int index_bits;
963 int index;
964 int anchors_before_texel;
965 int32_t endpoints[2 * 2][3];
966 uint32_t subsets;
967 int n_subsets;
968 int component;
969 int32_t value;
970
971 if (block[0] & 0x2) {
972 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
973 bit_offset = 5;
974 } else {
975 mode_num = block[0] & 3;
976 bit_offset = 2;
977 }
978
979 mode = bptc_float_modes + mode_num;
980
981 if (mode->reserved) {
982 memset(result, 0, sizeof result[0] * 3);
983 result[3] = 1.0f;
984 return;
985 }
986
987 bit_offset = extract_float_endpoints(mode, block, bit_offset,
988 endpoints, is_signed);
989
990 if (mode->n_partition_bits) {
991 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
992 bit_offset += mode->n_partition_bits;
993
994 subsets = partition_table1[partition_num];
995 n_subsets = 2;
996 } else {
997 partition_num = 0;
998 subsets = 0;
999 n_subsets = 1;
1000 }
1001
1002 anchors_before_texel =
1003 count_anchors_before_texel(n_subsets, partition_num, texel);
1004
1005 /* Calculate the offset to the primary index for this texel */
1006 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1007
1008 subset_num = (subsets >> (texel * 2)) & 3;
1009
1010 index_bits = mode->n_index_bits;
1011 if (is_anchor(n_subsets, partition_num, texel))
1012 index_bits--;
1013 index = extract_bits(block, bit_offset, index_bits);
1014
1015 for (component = 0; component < 3; component++) {
1016 value = interpolate(endpoints[subset_num * 2][component],
1017 endpoints[subset_num * 2 + 1][component],
1018 index,
1019 mode->n_index_bits);
1020
1021 if (is_signed)
1022 value = finish_signed_unquantize(value);
1023 else
1024 value = finish_unsigned_unquantize(value);
1025
1026 result[component] = _mesa_half_to_float(value);
1027 }
1028
1029 result[3] = 1.0f;
1030 }
1031
1032 #ifdef BPTC_BLOCK_DECODE
1033 static void
decompress_rgb_float_block(unsigned src_width,unsigned src_height,const uint8_t * block,float * dst_row,unsigned dst_rowstride,bool is_signed)1034 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1035 const uint8_t *block,
1036 float *dst_row, unsigned dst_rowstride,
1037 bool is_signed)
1038 {
1039 int mode_num;
1040 const struct bptc_float_mode *mode;
1041 int bit_offset_head, bit_offset;
1042 int partition_num;
1043 int subset_num;
1044 int index_bits;
1045 int index;
1046 int anchors_before_texel;
1047 int32_t endpoints[2 * 2][3];
1048 uint32_t subsets;
1049 int n_subsets;
1050 int component;
1051 int32_t value;
1052 unsigned x, y;
1053
1054 if (block[0] & 0x2) {
1055 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1056 bit_offset_head = 5;
1057 } else {
1058 mode_num = block[0] & 3;
1059 bit_offset_head = 2;
1060 }
1061
1062 mode = bptc_float_modes + mode_num;
1063
1064 if (mode->reserved) {
1065 for(y = 0; y < src_height; y += 1) {
1066 float *result = dst_row;
1067 memset(result, 0, sizeof result[0] * 4 * src_width);
1068 for(x = 0; x < src_width; x += 1) {
1069 result[3] = 1.0f;
1070 result += 4;
1071 }
1072 dst_row += dst_rowstride / sizeof dst_row[0];
1073 }
1074 return;
1075 }
1076
1077 bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
1078 endpoints, is_signed);
1079
1080 if (mode->n_partition_bits) {
1081 partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
1082 bit_offset_head += mode->n_partition_bits;
1083
1084 subsets = partition_table1[partition_num];
1085 n_subsets = 2;
1086 } else {
1087 partition_num = 0;
1088 subsets = 0;
1089 n_subsets = 1;
1090 }
1091
1092 for(y = 0; y < src_height; y += 1) {
1093 float *result = dst_row;
1094 for(x = 0; x < src_width; x += 1) {
1095 int texel;
1096
1097 bit_offset = bit_offset_head;
1098
1099 texel = x + y * 4;
1100
1101 anchors_before_texel =
1102 count_anchors_before_texel(n_subsets, partition_num, texel);
1103
1104 /* Calculate the offset to the primary index for this texel */
1105 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1106
1107 subset_num = (subsets >> (texel * 2)) & 3;
1108
1109 index_bits = mode->n_index_bits;
1110 if (is_anchor(n_subsets, partition_num, texel))
1111 index_bits--;
1112 index = extract_bits(block, bit_offset, index_bits);
1113
1114 for (component = 0; component < 3; component++) {
1115 value = interpolate(endpoints[subset_num * 2][component],
1116 endpoints[subset_num * 2 + 1][component],
1117 index,
1118 mode->n_index_bits);
1119
1120 if (is_signed)
1121 value = finish_signed_unquantize(value);
1122 else
1123 value = finish_unsigned_unquantize(value);
1124
1125 result[component] = _mesa_half_to_float(value);
1126 }
1127
1128 result[3] = 1.0f;
1129 result += 4;
1130 }
1131 dst_row += dst_rowstride / sizeof dst_row[0];
1132 }
1133 }
1134
1135 static void
decompress_rgb_float(int width,int height,const uint8_t * src,int src_rowstride,float * dst,int dst_rowstride,bool is_signed)1136 decompress_rgb_float(int width, int height,
1137 const uint8_t *src, int src_rowstride,
1138 float *dst, int dst_rowstride, bool is_signed)
1139 {
1140 int src_row_diff;
1141 int y, x;
1142
1143 if (src_rowstride >= width * 4)
1144 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1145 else
1146 src_row_diff = 0;
1147
1148 for (y = 0; y < height; y += BLOCK_SIZE) {
1149 for (x = 0; x < width; x += BLOCK_SIZE) {
1150 decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1151 MIN2(height - y, BLOCK_SIZE),
1152 src,
1153 (dst + x * 4 +
1154 (y * dst_rowstride / sizeof dst[0])),
1155 dst_rowstride, is_signed);
1156 src += BLOCK_BYTES;
1157 }
1158 src += src_row_diff;
1159 }
1160 }
1161 #endif // BPTC_BLOCK_DECODE
1162
1163 static void
write_bits(struct bit_writer * writer,int n_bits,int value)1164 write_bits(struct bit_writer *writer, int n_bits, int value)
1165 {
1166 do {
1167 if (n_bits + writer->pos >= 8) {
1168 *(writer->dst++) = writer->buf | (value << writer->pos);
1169 writer->buf = 0;
1170 value >>= (8 - writer->pos);
1171 n_bits -= (8 - writer->pos);
1172 writer->pos = 0;
1173 } else {
1174 writer->buf |= value << writer->pos;
1175 writer->pos += n_bits;
1176 break;
1177 }
1178 } while (n_bits > 0);
1179 }
1180
1181 static void
get_average_luminance_alpha_unorm(int width,int height,const uint8_t * src,int src_rowstride,int * average_luminance,int * average_alpha)1182 get_average_luminance_alpha_unorm(int width, int height,
1183 const uint8_t *src, int src_rowstride,
1184 int *average_luminance, int *average_alpha)
1185 {
1186 int luminance_sum = 0, alpha_sum = 0;
1187 int y, x;
1188
1189 for (y = 0; y < height; y++) {
1190 for (x = 0; x < width; x++) {
1191 luminance_sum += src[0] + src[1] + src[2];
1192 alpha_sum += src[3];
1193 src += 4;
1194 }
1195 src += src_rowstride - width * 4;
1196 }
1197
1198 *average_luminance = luminance_sum / (width * height);
1199 *average_alpha = alpha_sum / (width * height);
1200 }
1201
1202 static void
get_rgba_endpoints_unorm(int width,int height,const uint8_t * src,int src_rowstride,int average_luminance,int average_alpha,uint8_t endpoints[][4])1203 get_rgba_endpoints_unorm(int width, int height,
1204 const uint8_t *src, int src_rowstride,
1205 int average_luminance, int average_alpha,
1206 uint8_t endpoints[][4])
1207 {
1208 int endpoint_luminances[2];
1209 int midpoint;
1210 int sums[2][4];
1211 int endpoint;
1212 int luminance;
1213 uint8_t temp[3];
1214 const uint8_t *p = src;
1215 int rgb_left_endpoint_count = 0;
1216 int alpha_left_endpoint_count = 0;
1217 int y, x, i;
1218
1219 memset(sums, 0, sizeof sums);
1220
1221 for (y = 0; y < height; y++) {
1222 for (x = 0; x < width; x++) {
1223 luminance = p[0] + p[1] + p[2];
1224 if (luminance < average_luminance) {
1225 endpoint = 0;
1226 rgb_left_endpoint_count++;
1227 } else {
1228 endpoint = 1;
1229 }
1230 for (i = 0; i < 3; i++)
1231 sums[endpoint][i] += p[i];
1232
1233 if (p[2] < average_alpha) {
1234 endpoint = 0;
1235 alpha_left_endpoint_count++;
1236 } else {
1237 endpoint = 1;
1238 }
1239 sums[endpoint][3] += p[3];
1240
1241 p += 4;
1242 }
1243
1244 p += src_rowstride - width * 4;
1245 }
1246
1247 if (rgb_left_endpoint_count == 0 ||
1248 rgb_left_endpoint_count == width * height) {
1249 for (i = 0; i < 3; i++)
1250 endpoints[0][i] = endpoints[1][i] =
1251 (sums[0][i] + sums[1][i]) / (width * height);
1252 } else {
1253 for (i = 0; i < 3; i++) {
1254 endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1255 endpoints[1][i] = (sums[1][i] /
1256 (width * height - rgb_left_endpoint_count));
1257 }
1258 }
1259
1260 if (alpha_left_endpoint_count == 0 ||
1261 alpha_left_endpoint_count == width * height) {
1262 endpoints[0][3] = endpoints[1][3] =
1263 (sums[0][3] + sums[1][3]) / (width * height);
1264 } else {
1265 endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1266 endpoints[1][3] = (sums[1][3] /
1267 (width * height - alpha_left_endpoint_count));
1268 }
1269
1270 /* We may need to swap the endpoints to ensure the most-significant bit of
1271 * the first index is zero */
1272
1273 for (endpoint = 0; endpoint < 2; endpoint++) {
1274 endpoint_luminances[endpoint] =
1275 endpoints[endpoint][0] +
1276 endpoints[endpoint][1] +
1277 endpoints[endpoint][2];
1278 }
1279 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1280
1281 if ((src[0] + src[1] + src[2] <= midpoint) !=
1282 (endpoint_luminances[0] <= midpoint)) {
1283 memcpy(temp, endpoints[0], 3);
1284 memcpy(endpoints[0], endpoints[1], 3);
1285 memcpy(endpoints[1], temp, 3);
1286 }
1287
1288 /* Same for the alpha endpoints */
1289
1290 midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1291
1292 if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1293 temp[0] = endpoints[0][3];
1294 endpoints[0][3] = endpoints[1][3];
1295 endpoints[1][3] = temp[0];
1296 }
1297 }
1298
1299 static void
write_rgb_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1300 write_rgb_indices_unorm(struct bit_writer *writer,
1301 int src_width, int src_height,
1302 const uint8_t *src, int src_rowstride,
1303 uint8_t endpoints[][4])
1304 {
1305 int luminance;
1306 int endpoint_luminances[2];
1307 int endpoint;
1308 int index;
1309 int y, x;
1310
1311 for (endpoint = 0; endpoint < 2; endpoint++) {
1312 endpoint_luminances[endpoint] =
1313 endpoints[endpoint][0] +
1314 endpoints[endpoint][1] +
1315 endpoints[endpoint][2];
1316 }
1317
1318 /* If the endpoints have the same luminance then we'll just use index 0 for
1319 * all of the texels */
1320 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1321 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1322 return;
1323 }
1324
1325 for (y = 0; y < src_height; y++) {
1326 for (x = 0; x < src_width; x++) {
1327 luminance = src[0] + src[1] + src[2];
1328
1329 index = ((luminance - endpoint_luminances[0]) * 3 /
1330 (endpoint_luminances[1] - endpoint_luminances[0]));
1331 if (index < 0)
1332 index = 0;
1333 else if (index > 3)
1334 index = 3;
1335
1336 assert(x != 0 || y != 0 || index < 2);
1337
1338 write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1339
1340 src += 4;
1341 }
1342
1343 /* Pad the indices out to the block size */
1344 if (src_width < BLOCK_SIZE)
1345 write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1346
1347 src += src_rowstride - src_width * 4;
1348 }
1349
1350 /* Pad the indices out to the block size */
1351 if (src_height < BLOCK_SIZE)
1352 write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1353 }
1354
1355 static void
write_alpha_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1356 write_alpha_indices_unorm(struct bit_writer *writer,
1357 int src_width, int src_height,
1358 const uint8_t *src, int src_rowstride,
1359 uint8_t endpoints[][4])
1360 {
1361 int index;
1362 int y, x;
1363
1364 /* If the endpoints have the same alpha then we'll just use index 0 for
1365 * all of the texels */
1366 if (endpoints[0][3] == endpoints[1][3]) {
1367 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1368 return;
1369 }
1370
1371 for (y = 0; y < src_height; y++) {
1372 for (x = 0; x < src_width; x++) {
1373 index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1374 ((int) endpoints[1][3] - endpoints[0][3]));
1375 if (index < 0)
1376 index = 0;
1377 else if (index > 7)
1378 index = 7;
1379
1380 assert(x != 0 || y != 0 || index < 4);
1381
1382 /* The first index has one less bit */
1383 write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1384
1385 src += 4;
1386 }
1387
1388 /* Pad the indices out to the block size */
1389 if (src_width < BLOCK_SIZE)
1390 write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1391
1392 src += src_rowstride - src_width * 4;
1393 }
1394
1395 /* Pad the indices out to the block size */
1396 if (src_height < BLOCK_SIZE)
1397 write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1398 }
1399
1400 static void
compress_rgba_unorm_block(int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t * dst)1401 compress_rgba_unorm_block(int src_width, int src_height,
1402 const uint8_t *src, int src_rowstride,
1403 uint8_t *dst)
1404 {
1405 int average_luminance, average_alpha;
1406 uint8_t endpoints[2][4];
1407 struct bit_writer writer;
1408 int component, endpoint;
1409
1410 get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1411 &average_luminance, &average_alpha);
1412 get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1413 average_luminance, average_alpha,
1414 endpoints);
1415
1416 writer.dst = dst;
1417 writer.pos = 0;
1418 writer.buf = 0;
1419
1420 write_bits(&writer, 5, 0x10); /* mode 4 */
1421 write_bits(&writer, 2, 0); /* rotation 0 */
1422 write_bits(&writer, 1, 0); /* index selection bit */
1423
1424 /* Write the color endpoints */
1425 for (component = 0; component < 3; component++)
1426 for (endpoint = 0; endpoint < 2; endpoint++)
1427 write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1428
1429 /* Write the alpha endpoints */
1430 for (endpoint = 0; endpoint < 2; endpoint++)
1431 write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1432
1433 write_rgb_indices_unorm(&writer,
1434 src_width, src_height,
1435 src, src_rowstride,
1436 endpoints);
1437 write_alpha_indices_unorm(&writer,
1438 src_width, src_height,
1439 src, src_rowstride,
1440 endpoints);
1441 }
1442
1443 static void
compress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)1444 compress_rgba_unorm(int width, int height,
1445 const uint8_t *src, int src_rowstride,
1446 uint8_t *dst, int dst_rowstride)
1447 {
1448 int dst_row_diff;
1449 int y, x;
1450
1451 if (dst_rowstride >= width * 4)
1452 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1453 else
1454 dst_row_diff = 0;
1455
1456 for (y = 0; y < height; y += BLOCK_SIZE) {
1457 for (x = 0; x < width; x += BLOCK_SIZE) {
1458 compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1459 MIN2(height - y, BLOCK_SIZE),
1460 src + x * 4 + y * src_rowstride,
1461 src_rowstride,
1462 dst);
1463 dst += BLOCK_BYTES;
1464 }
1465 dst += dst_row_diff;
1466 }
1467 }
1468
1469 static float
get_average_luminance_float(int width,int height,const float * src,int src_rowstride)1470 get_average_luminance_float(int width, int height,
1471 const float *src, int src_rowstride)
1472 {
1473 float luminance_sum = 0;
1474 int y, x;
1475
1476 for (y = 0; y < height; y++) {
1477 for (x = 0; x < width; x++) {
1478 luminance_sum += src[0] + src[1] + src[2];
1479 src += 3;
1480 }
1481 src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1482 }
1483
1484 return luminance_sum / (width * height);
1485 }
1486
1487 static float
clamp_value(float value,bool is_signed)1488 clamp_value(float value, bool is_signed)
1489 {
1490 if (value > 65504.0f)
1491 return 65504.0f;
1492
1493 if (is_signed) {
1494 if (value < -65504.0f)
1495 return -65504.0f;
1496 else
1497 return value;
1498 }
1499
1500 if (value < 0.0f)
1501 return 0.0f;
1502
1503 return value;
1504 }
1505
1506 static void
get_endpoints_float(int width,int height,const float * src,int src_rowstride,float average_luminance,float endpoints[][3],bool is_signed)1507 get_endpoints_float(int width, int height,
1508 const float *src, int src_rowstride,
1509 float average_luminance, float endpoints[][3],
1510 bool is_signed)
1511 {
1512 float endpoint_luminances[2];
1513 float midpoint;
1514 float sums[2][3];
1515 int endpoint, component;
1516 float luminance;
1517 float temp[3];
1518 const float *p = src;
1519 int left_endpoint_count = 0;
1520 int y, x, i;
1521
1522 memset(sums, 0, sizeof sums);
1523
1524 for (y = 0; y < height; y++) {
1525 for (x = 0; x < width; x++) {
1526 luminance = p[0] + p[1] + p[2];
1527 if (luminance < average_luminance) {
1528 endpoint = 0;
1529 left_endpoint_count++;
1530 } else {
1531 endpoint = 1;
1532 }
1533 for (i = 0; i < 3; i++)
1534 sums[endpoint][i] += p[i];
1535
1536 p += 3;
1537 }
1538
1539 p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1540 }
1541
1542 if (left_endpoint_count == 0 ||
1543 left_endpoint_count == width * height) {
1544 for (i = 0; i < 3; i++)
1545 endpoints[0][i] = endpoints[1][i] =
1546 (sums[0][i] + sums[1][i]) / (width * height);
1547 } else {
1548 for (i = 0; i < 3; i++) {
1549 endpoints[0][i] = sums[0][i] / left_endpoint_count;
1550 endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1551 }
1552 }
1553
1554 /* Clamp the endpoints to the range of a half float and strip out
1555 * infinities */
1556 for (endpoint = 0; endpoint < 2; endpoint++) {
1557 for (component = 0; component < 3; component++) {
1558 endpoints[endpoint][component] =
1559 clamp_value(endpoints[endpoint][component], is_signed);
1560 }
1561 }
1562
1563 /* We may need to swap the endpoints to ensure the most-significant bit of
1564 * the first index is zero */
1565
1566 for (endpoint = 0; endpoint < 2; endpoint++) {
1567 endpoint_luminances[endpoint] =
1568 endpoints[endpoint][0] +
1569 endpoints[endpoint][1] +
1570 endpoints[endpoint][2];
1571 }
1572 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1573
1574 if ((src[0] + src[1] + src[2] <= midpoint) !=
1575 (endpoint_luminances[0] <= midpoint)) {
1576 memcpy(temp, endpoints[0], sizeof temp);
1577 memcpy(endpoints[0], endpoints[1], sizeof temp);
1578 memcpy(endpoints[1], temp, sizeof temp);
1579 }
1580 }
1581
1582 static void
write_rgb_indices_float(struct bit_writer * writer,int src_width,int src_height,const float * src,int src_rowstride,float endpoints[][3])1583 write_rgb_indices_float(struct bit_writer *writer,
1584 int src_width, int src_height,
1585 const float *src, int src_rowstride,
1586 float endpoints[][3])
1587 {
1588 float luminance;
1589 float endpoint_luminances[2];
1590 int endpoint;
1591 int index;
1592 int y, x;
1593
1594 for (endpoint = 0; endpoint < 2; endpoint++) {
1595 endpoint_luminances[endpoint] =
1596 endpoints[endpoint][0] +
1597 endpoints[endpoint][1] +
1598 endpoints[endpoint][2];
1599 }
1600
1601 /* If the endpoints have the same luminance then we'll just use index 0 for
1602 * all of the texels */
1603 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1604 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1605 return;
1606 }
1607
1608 for (y = 0; y < src_height; y++) {
1609 for (x = 0; x < src_width; x++) {
1610 luminance = src[0] + src[1] + src[2];
1611
1612 index = ((luminance - endpoint_luminances[0]) * 15 /
1613 (endpoint_luminances[1] - endpoint_luminances[0]));
1614 if (index < 0)
1615 index = 0;
1616 else if (index > 15)
1617 index = 15;
1618
1619 assert(x != 0 || y != 0 || index < 8);
1620
1621 write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1622
1623 src += 3;
1624 }
1625
1626 /* Pad the indices out to the block size */
1627 if (src_width < BLOCK_SIZE)
1628 write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1629
1630 src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1631 }
1632
1633 /* Pad the indices out to the block size */
1634 if (src_height < BLOCK_SIZE)
1635 write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1636 }
1637
1638 static int
get_endpoint_value(float value,bool is_signed)1639 get_endpoint_value(float value, bool is_signed)
1640 {
1641 bool sign = false;
1642 int half;
1643
1644 if (is_signed) {
1645 half = _mesa_float_to_half(value);
1646
1647 if (half & 0x8000) {
1648 half &= 0x7fff;
1649 sign = true;
1650 }
1651
1652 half = (32 * half / 31) >> 6;
1653
1654 if (sign)
1655 half = -half & ((1 << 10) - 1);
1656
1657 return half;
1658 } else {
1659 if (value <= 0.0f)
1660 return 0;
1661
1662 half = _mesa_float_to_half(value);
1663
1664 return (64 * half / 31) >> 6;
1665 }
1666 }
1667
1668 static void
compress_rgb_float_block(int src_width,int src_height,const float * src,int src_rowstride,uint8_t * dst,bool is_signed)1669 compress_rgb_float_block(int src_width, int src_height,
1670 const float *src, int src_rowstride,
1671 uint8_t *dst,
1672 bool is_signed)
1673 {
1674 float average_luminance;
1675 float endpoints[2][3];
1676 struct bit_writer writer;
1677 int component, endpoint;
1678 int endpoint_value;
1679
1680 average_luminance =
1681 get_average_luminance_float(src_width, src_height, src, src_rowstride);
1682 get_endpoints_float(src_width, src_height, src, src_rowstride,
1683 average_luminance, endpoints, is_signed);
1684
1685 writer.dst = dst;
1686 writer.pos = 0;
1687 writer.buf = 0;
1688
1689 write_bits(&writer, 5, 3); /* mode 3 */
1690
1691 /* Write the endpoints */
1692 for (endpoint = 0; endpoint < 2; endpoint++) {
1693 for (component = 0; component < 3; component++) {
1694 endpoint_value =
1695 get_endpoint_value(endpoints[endpoint][component], is_signed);
1696 write_bits(&writer, 10, endpoint_value);
1697 }
1698 }
1699
1700 write_rgb_indices_float(&writer,
1701 src_width, src_height,
1702 src, src_rowstride,
1703 endpoints);
1704 }
1705
1706 static void
compress_rgb_float(int width,int height,const float * src,int src_rowstride,uint8_t * dst,int dst_rowstride,bool is_signed)1707 compress_rgb_float(int width, int height,
1708 const float *src, int src_rowstride,
1709 uint8_t *dst, int dst_rowstride,
1710 bool is_signed)
1711 {
1712 int dst_row_diff;
1713 int y, x;
1714
1715 if (dst_rowstride >= width * 4)
1716 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1717 else
1718 dst_row_diff = 0;
1719
1720 for (y = 0; y < height; y += BLOCK_SIZE) {
1721 for (x = 0; x < width; x += BLOCK_SIZE) {
1722 compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1723 MIN2(height - y, BLOCK_SIZE),
1724 src + x * 3 +
1725 y * src_rowstride / sizeof (float),
1726 src_rowstride,
1727 dst,
1728 is_signed);
1729 dst += BLOCK_BYTES;
1730 }
1731 dst += dst_row_diff;
1732 }
1733 }
1734
1735 #endif
1736