1 /*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*
25 * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26 */
27
28 #ifndef TEXCOMPRESS_BPTC_TMP_H
29 #define TEXCOMPRESS_BPTC_TMP_H
30
31 #include "util/format_srgb.h"
32 #include "util/half_float.h"
33 #include "macros.h"
34
35 #define BLOCK_SIZE 4
36 #define N_PARTITIONS 64
37 #define BLOCK_BYTES 16
38
39 struct bptc_unorm_mode {
40 int n_subsets;
41 int n_partition_bits;
42 bool has_rotation_bits;
43 bool has_index_selection_bit;
44 int n_color_bits;
45 int n_alpha_bits;
46 bool has_endpoint_pbits;
47 bool has_shared_pbits;
48 int n_index_bits;
49 int n_secondary_index_bits;
50 };
51
52 struct bptc_float_bitfield {
53 int8_t endpoint;
54 uint8_t component;
55 uint8_t offset;
56 uint8_t n_bits;
57 bool reverse;
58 };
59
60 struct bptc_float_mode {
61 bool reserved;
62 bool transformed_endpoints;
63 int n_partition_bits;
64 int n_endpoint_bits;
65 int n_index_bits;
66 int n_delta_bits[3];
67 struct bptc_float_bitfield bitfields[24];
68 };
69
70 struct bit_writer {
71 uint8_t buf;
72 int pos;
73 uint8_t *dst;
74 };
75
76 static const struct bptc_unorm_mode
77 bptc_unorm_modes[] = {
78 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
79 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
80 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
81 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
82 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
83 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
84 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
85 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
86 };
87
88 static const struct bptc_float_mode
89 bptc_float_modes[] = {
90 /* 00 */
91 { false, true, 5, 10, 3, { 5, 5, 5 },
92 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
93 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
94 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
95 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
96 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
97 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
98 { 3, 2, 3, 1, false },
99 { -1 } }
100 },
101 /* 01 */
102 { false, true, 5, 7, 3, { 6, 6, 6 },
103 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
104 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
105 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
106 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
107 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
108 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
109 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
110 { 2, 0, 0, 6, false },
111 { 3, 0, 0, 6, false },
112 { -1 } }
113 },
114 /* 00010 */
115 { false, true, 5, 11, 3, { 5, 4, 4 },
116 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
117 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
118 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
119 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
120 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
121 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
122 { -1 } }
123 },
124 /* 00011 */
125 { false, false, 0, 10, 4, { 10, 10, 10 },
126 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
127 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
128 { -1 } }
129 },
130 /* 00110 */
131 { false, true, 5, 11, 3, { 4, 5, 4 },
132 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
133 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
134 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
135 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
136 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
137 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
138 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
139 { -1 } }
140 },
141 /* 00111 */
142 { false, true, 0, 11, 4, { 9, 9, 9 },
143 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
144 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
145 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
146 { -1 } }
147 },
148 /* 01010 */
149 { false, true, 5, 11, 3, { 4, 4, 5 },
150 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
151 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
152 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
153 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
154 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
155 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
156 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
157 { -1 } }
158 },
159 /* 01011 */
160 { false, true, 0, 12, 4, { 8, 8, 8 },
161 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
162 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
163 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
164 { -1 } }
165 },
166 /* 01110 */
167 { false, true, 5, 9, 3, { 5, 5, 5 },
168 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
169 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
170 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
171 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
172 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
173 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
174 { 3, 2, 3, 1, false },
175 { -1 } }
176 },
177 /* 01111 */
178 { false, true, 0, 16, 4, { 4, 4, 4 },
179 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
180 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
181 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
182 { -1 } }
183 },
184 /* 10010 */
185 { false, true, 5, 8, 3, { 6, 5, 5 },
186 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
187 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
188 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
189 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
190 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
191 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
192 { 3, 0, 0, 6, false },
193 { -1 } }
194 },
195 /* 10011 */
196 { true /* reserved */ },
197 /* 10110 */
198 { false, true, 5, 8, 3, { 5, 6, 5 },
199 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
200 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
201 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
202 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
203 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
204 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
205 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
206 { -1 } }
207 },
208 /* 10111 */
209 { true /* reserved */ },
210 /* 11010 */
211 { false, true, 5, 8, 3, { 5, 5, 6 },
212 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
213 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
214 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
215 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
216 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
217 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
218 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
219 { -1 } }
220 },
221 /* 11011 */
222 { true /* reserved */ },
223 /* 11110 */
224 { false, false, 5, 6, 3, { 6, 6, 6 },
225 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
226 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
227 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
228 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
229 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
230 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
231 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
232 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
233 { -1 } }
234 },
235 /* 11111 */
236 { true /* reserved */ },
237 };
238
239 /* This partition table is used when the mode has two subsets. Each
240 * partition is represented by a 32-bit value which gives 2 bits per texel
241 * within the block. The value of the two bits represents which subset to use
242 * (0 or 1).
243 */
244 static const uint32_t
245 partition_table1[N_PARTITIONS] = {
246 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
247 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
248 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
249 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
250 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
251 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
252 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
253 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
254 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
255 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
256 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
257 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
258 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
259 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
260 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
261 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
262 };
263
264 /* This partition table is used when the mode has three subsets. In this case
265 * the values can be 0, 1 or 2.
266 */
267 static const uint32_t
268 partition_table2[N_PARTITIONS] = {
269 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
270 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
271 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
272 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
273 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
274 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
275 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
276 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
277 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
278 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
279 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
280 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
281 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
282 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
283 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
284 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
285 };
286
287 static const uint8_t
288 anchor_indices[][N_PARTITIONS] = {
289 /* Anchor index values for the second subset of two-subset partitioning */
290 {
291 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
292 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
293 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
294 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
295 },
296
297 /* Anchor index values for the second subset of three-subset partitioning */
298 {
299 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
300 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
301 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
302 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
303 },
304
305 /* Anchor index values for the third subset of three-subset
306 * partitioning
307 */
308 {
309 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
310 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
311 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
312 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
313 }
314 };
315
316 static int
extract_bits(const uint8_t * block,int offset,int n_bits)317 extract_bits(const uint8_t *block,
318 int offset,
319 int n_bits)
320 {
321 int byte_index = offset / 8;
322 int bit_index = offset % 8;
323 int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
324 int result = 0;
325 int bit = 0;
326
327 while (true) {
328 result |= ((block[byte_index] >> bit_index) &
329 ((1 << n_bits_in_byte) - 1)) << bit;
330
331 n_bits -= n_bits_in_byte;
332
333 if (n_bits <= 0)
334 return result;
335
336 bit += n_bits_in_byte;
337 byte_index++;
338 bit_index = 0;
339 n_bits_in_byte = MIN2(n_bits, 8);
340 }
341 }
342
343 static uint8_t
expand_component(uint8_t byte,int n_bits)344 expand_component(uint8_t byte,
345 int n_bits)
346 {
347 /* Expands a n-bit quantity into a byte by copying the most-significant
348 * bits into the unused least-significant bits.
349 */
350 return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
351 }
352
353 static int
extract_unorm_endpoints(const struct bptc_unorm_mode * mode,const uint8_t * block,int bit_offset,uint8_t endpoints[][4])354 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
355 const uint8_t *block,
356 int bit_offset,
357 uint8_t endpoints[][4])
358 {
359 int component;
360 int subset;
361 int endpoint;
362 int pbit;
363 int n_components;
364
365 /* Extract each color component */
366 for (component = 0; component < 3; component++) {
367 for (subset = 0; subset < mode->n_subsets; subset++) {
368 for (endpoint = 0; endpoint < 2; endpoint++) {
369 endpoints[subset * 2 + endpoint][component] =
370 extract_bits(block, bit_offset, mode->n_color_bits);
371 bit_offset += mode->n_color_bits;
372 }
373 }
374 }
375
376 /* Extract the alpha values */
377 if (mode->n_alpha_bits > 0) {
378 for (subset = 0; subset < mode->n_subsets; subset++) {
379 for (endpoint = 0; endpoint < 2; endpoint++) {
380 endpoints[subset * 2 + endpoint][3] =
381 extract_bits(block, bit_offset, mode->n_alpha_bits);
382 bit_offset += mode->n_alpha_bits;
383 }
384 }
385
386 n_components = 4;
387 } else {
388 for (subset = 0; subset < mode->n_subsets; subset++)
389 for (endpoint = 0; endpoint < 2; endpoint++)
390 endpoints[subset * 2 + endpoint][3] = 255;
391
392 n_components = 3;
393 }
394
395 /* Add in the p-bits */
396 if (mode->has_endpoint_pbits) {
397 for (subset = 0; subset < mode->n_subsets; subset++) {
398 for (endpoint = 0; endpoint < 2; endpoint++) {
399 pbit = extract_bits(block, bit_offset, 1);
400 bit_offset += 1;
401
402 for (component = 0; component < n_components; component++) {
403 endpoints[subset * 2 + endpoint][component] <<= 1;
404 endpoints[subset * 2 + endpoint][component] |= pbit;
405 }
406 }
407 }
408 } else if (mode->has_shared_pbits) {
409 for (subset = 0; subset < mode->n_subsets; subset++) {
410 pbit = extract_bits(block, bit_offset, 1);
411 bit_offset += 1;
412
413 for (endpoint = 0; endpoint < 2; endpoint++) {
414 for (component = 0; component < n_components; component++) {
415 endpoints[subset * 2 + endpoint][component] <<= 1;
416 endpoints[subset * 2 + endpoint][component] |= pbit;
417 }
418 }
419 }
420 }
421
422 /* Expand the n-bit values to a byte */
423 for (subset = 0; subset < mode->n_subsets; subset++) {
424 for (endpoint = 0; endpoint < 2; endpoint++) {
425 for (component = 0; component < 3; component++) {
426 endpoints[subset * 2 + endpoint][component] =
427 expand_component(endpoints[subset * 2 + endpoint][component],
428 mode->n_color_bits +
429 mode->has_endpoint_pbits +
430 mode->has_shared_pbits);
431 }
432
433 if (mode->n_alpha_bits > 0) {
434 endpoints[subset * 2 + endpoint][3] =
435 expand_component(endpoints[subset * 2 + endpoint][3],
436 mode->n_alpha_bits +
437 mode->has_endpoint_pbits +
438 mode->has_shared_pbits);
439 }
440 }
441 }
442
443 return bit_offset;
444 }
445
446 static bool
is_anchor(int n_subsets,int partition_num,int texel)447 is_anchor(int n_subsets,
448 int partition_num,
449 int texel)
450 {
451 if (texel == 0)
452 return true;
453
454 switch (n_subsets) {
455 case 1:
456 return false;
457 case 2:
458 return anchor_indices[0][partition_num] == texel;
459 case 3:
460 return (anchor_indices[1][partition_num] == texel ||
461 anchor_indices[2][partition_num] == texel);
462 default:
463 assert(false);
464 return false;
465 }
466 }
467
468 static int
count_anchors_before_texel(int n_subsets,int partition_num,int texel)469 count_anchors_before_texel(int n_subsets,
470 int partition_num,
471 int texel)
472 {
473 int count = 1;
474
475 if (texel == 0)
476 return 0;
477
478 switch (n_subsets) {
479 case 1:
480 break;
481 case 2:
482 if (texel > anchor_indices[0][partition_num])
483 count++;
484 break;
485 case 3:
486 if (texel > anchor_indices[1][partition_num])
487 count++;
488 if (texel > anchor_indices[2][partition_num])
489 count++;
490 break;
491 default:
492 assert(false);
493 return 0;
494 }
495
496 return count;
497 }
498
499 static int32_t
interpolate(int32_t a,int32_t b,int index,int index_bits)500 interpolate(int32_t a, int32_t b,
501 int index,
502 int index_bits)
503 {
504 static const uint8_t weights2[] = { 0, 21, 43, 64 };
505 static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
506 static const uint8_t weights4[] =
507 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
508 static const uint8_t *weights[] = {
509 NULL, NULL, weights2, weights3, weights4
510 };
511 int weight;
512
513 weight = weights[index_bits][index];
514
515 return ((64 - weight) * a + weight * b + 32) >> 6;
516 }
517
518 static void
apply_rotation(int rotation,uint8_t * result)519 apply_rotation(int rotation,
520 uint8_t *result)
521 {
522 uint8_t t;
523
524 if (rotation == 0)
525 return;
526
527 rotation--;
528
529 t = result[rotation];
530 result[rotation] = result[3];
531 result[3] = t;
532 }
533
534 static void
fetch_rgba_unorm_from_block(const uint8_t * block,uint8_t * result,int texel)535 fetch_rgba_unorm_from_block(const uint8_t *block,
536 uint8_t *result,
537 int texel)
538 {
539 int mode_num = ffs(block[0]);
540 const struct bptc_unorm_mode *mode;
541 int bit_offset, secondary_bit_offset;
542 int partition_num;
543 int subset_num;
544 int rotation;
545 int index_selection;
546 int index_bits;
547 int indices[2];
548 int index;
549 int anchors_before_texel;
550 bool anchor;
551 uint8_t endpoints[3 * 2][4];
552 uint32_t subsets;
553 int component;
554
555 if (mode_num == 0) {
556 /* According to the spec this mode is reserved and shouldn't be used. */
557 memset(result, 0, 4);
558 return;
559 }
560
561 mode = bptc_unorm_modes + mode_num - 1;
562 bit_offset = mode_num;
563
564 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
565 bit_offset += mode->n_partition_bits;
566
567 switch (mode->n_subsets) {
568 case 1:
569 subsets = 0;
570 break;
571 case 2:
572 subsets = partition_table1[partition_num];
573 break;
574 case 3:
575 subsets = partition_table2[partition_num];
576 break;
577 default:
578 assert(false);
579 return;
580 }
581
582 if (mode->has_rotation_bits) {
583 rotation = extract_bits(block, bit_offset, 2);
584 bit_offset += 2;
585 } else {
586 rotation = 0;
587 }
588
589 if (mode->has_index_selection_bit) {
590 index_selection = extract_bits(block, bit_offset, 1);
591 bit_offset++;
592 } else {
593 index_selection = 0;
594 }
595
596 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
597
598 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
599 partition_num, texel);
600
601 /* Calculate the offset to the secondary index */
602 secondary_bit_offset = (bit_offset +
603 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
604 mode->n_subsets +
605 mode->n_secondary_index_bits * texel -
606 anchors_before_texel);
607
608 /* Calculate the offset to the primary index for this texel */
609 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
610
611 subset_num = (subsets >> (texel * 2)) & 3;
612
613 anchor = is_anchor(mode->n_subsets, partition_num, texel);
614
615 index_bits = mode->n_index_bits;
616 if (anchor)
617 index_bits--;
618 indices[0] = extract_bits(block, bit_offset, index_bits);
619
620 if (mode->n_secondary_index_bits) {
621 index_bits = mode->n_secondary_index_bits;
622 if (anchor)
623 index_bits--;
624 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
625 }
626
627 index = indices[index_selection];
628 index_bits = (index_selection ?
629 mode->n_secondary_index_bits :
630 mode->n_index_bits);
631
632 for (component = 0; component < 3; component++)
633 result[component] = interpolate(endpoints[subset_num * 2][component],
634 endpoints[subset_num * 2 + 1][component],
635 index,
636 index_bits);
637
638 /* Alpha uses the opposite index from the color components */
639 if (mode->n_secondary_index_bits && !index_selection) {
640 index = indices[1];
641 index_bits = mode->n_secondary_index_bits;
642 } else {
643 index = indices[0];
644 index_bits = mode->n_index_bits;
645 }
646
647 result[3] = interpolate(endpoints[subset_num * 2][3],
648 endpoints[subset_num * 2 + 1][3],
649 index,
650 index_bits);
651
652 apply_rotation(rotation, result);
653 }
654
655 #ifdef BPTC_BLOCK_DECODE
656 static void
decompress_rgba_unorm_block(int src_width,int src_height,const uint8_t * block,uint8_t * dst_row,int dst_rowstride)657 decompress_rgba_unorm_block(int src_width, int src_height,
658 const uint8_t *block,
659 uint8_t *dst_row, int dst_rowstride)
660 {
661 int mode_num = ffs(block[0]);
662 const struct bptc_unorm_mode *mode;
663 int bit_offset_head, bit_offset, secondary_bit_offset;
664 int partition_num;
665 int subset_num;
666 int rotation;
667 int index_selection;
668 int index_bits;
669 int indices[2];
670 int index;
671 int anchors_before_texel;
672 bool anchor;
673 uint8_t endpoints[3 * 2][4];
674 uint32_t subsets;
675 int component;
676 unsigned x, y;
677
678 if (mode_num == 0) {
679 /* According to the spec this mode is reserved and shouldn't be used. */
680 for(y = 0; y < src_height; y += 1) {
681 uint8_t *result = dst_row;
682 memset(result, 0, 4 * src_width);
683 dst_row += dst_rowstride;
684 }
685 return;
686 }
687
688 mode = bptc_unorm_modes + mode_num - 1;
689 bit_offset_head = mode_num;
690
691 partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
692 bit_offset_head += mode->n_partition_bits;
693
694 switch (mode->n_subsets) {
695 case 1:
696 subsets = 0;
697 break;
698 case 2:
699 subsets = partition_table1[partition_num];
700 break;
701 case 3:
702 subsets = partition_table2[partition_num];
703 break;
704 default:
705 assert(false);
706 return;
707 }
708
709 if (mode->has_rotation_bits) {
710 rotation = extract_bits(block, bit_offset_head, 2);
711 bit_offset_head += 2;
712 } else {
713 rotation = 0;
714 }
715
716 if (mode->has_index_selection_bit) {
717 index_selection = extract_bits(block, bit_offset_head, 1);
718 bit_offset_head++;
719 } else {
720 index_selection = 0;
721 }
722
723 bit_offset_head = extract_unorm_endpoints(mode, block, bit_offset_head, endpoints);
724
725 for(y = 0; y < src_height; y += 1) {
726 uint8_t *result = dst_row;
727 for(x = 0; x < src_width; x += 1) {
728 int texel;
729 texel = x + y * 4;
730 bit_offset = bit_offset_head;
731
732 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
733 partition_num,
734 texel);
735
736 /* Calculate the offset to the secondary index */
737 secondary_bit_offset = (bit_offset +
738 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
739 mode->n_subsets +
740 mode->n_secondary_index_bits * texel -
741 anchors_before_texel);
742
743 /* Calculate the offset to the primary index for this texel */
744 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
745
746 subset_num = (subsets >> (texel * 2)) & 3;
747
748 anchor = is_anchor(mode->n_subsets, partition_num, texel);
749
750 index_bits = mode->n_index_bits;
751 if (anchor)
752 index_bits--;
753 indices[0] = extract_bits(block, bit_offset, index_bits);
754
755 if (mode->n_secondary_index_bits) {
756 index_bits = mode->n_secondary_index_bits;
757 if (anchor)
758 index_bits--;
759 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
760 }
761
762 index = indices[index_selection];
763 index_bits = (index_selection ?
764 mode->n_secondary_index_bits :
765 mode->n_index_bits);
766
767 for (component = 0; component < 3; component++)
768 result[component] = interpolate(endpoints[subset_num * 2][component],
769 endpoints[subset_num * 2 + 1][component],
770 index,
771 index_bits);
772
773 /* Alpha uses the opposite index from the color components */
774 if (mode->n_secondary_index_bits && !index_selection) {
775 index = indices[1];
776 index_bits = mode->n_secondary_index_bits;
777 } else {
778 index = indices[0];
779 index_bits = mode->n_index_bits;
780 }
781
782 result[3] = interpolate(endpoints[subset_num * 2][3],
783 endpoints[subset_num * 2 + 1][3],
784 index,
785 index_bits);
786
787 apply_rotation(rotation, result);
788 result += 4;
789 }
790 dst_row += dst_rowstride;
791 }
792 }
793
794 static void
decompress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)795 decompress_rgba_unorm(int width, int height,
796 const uint8_t *src, int src_rowstride,
797 uint8_t *dst, int dst_rowstride)
798 {
799 int src_row_diff;
800 int y, x;
801
802 if (src_rowstride >= width * 4)
803 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
804 else
805 src_row_diff = 0;
806
807 for (y = 0; y < height; y += BLOCK_SIZE) {
808 for (x = 0; x < width; x += BLOCK_SIZE) {
809 decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
810 MIN2(height - y, BLOCK_SIZE),
811 src,
812 dst + x * 4 + y * dst_rowstride,
813 dst_rowstride);
814 src += BLOCK_BYTES;
815 }
816 src += src_row_diff;
817 }
818 }
819 #endif // BPTC_BLOCK_DECODE
820
821 static int32_t
sign_extend(int32_t value,int n_bits)822 sign_extend(int32_t value,
823 int n_bits)
824 {
825 assert(n_bits > 0 && n_bits < 32);
826
827 const unsigned n = 32 - n_bits;
828 return (int32_t)((uint32_t)value << n) >> n;
829 }
830
831 static int
signed_unquantize(int value,int n_endpoint_bits)832 signed_unquantize(int value, int n_endpoint_bits)
833 {
834 bool sign;
835
836 if (n_endpoint_bits >= 16)
837 return value;
838
839 if (value == 0)
840 return 0;
841
842 sign = false;
843
844 if (value < 0) {
845 sign = true;
846 value = -value;
847 }
848
849 if (value >= (1 << (n_endpoint_bits - 1)) - 1)
850 value = 0x7fff;
851 else
852 value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
853
854 if (sign)
855 value = -value;
856
857 return value;
858 }
859
860 static int
unsigned_unquantize(int value,int n_endpoint_bits)861 unsigned_unquantize(int value, int n_endpoint_bits)
862 {
863 if (n_endpoint_bits >= 15)
864 return value;
865
866 if (value == 0)
867 return 0;
868
869 if (value == (1 << n_endpoint_bits) - 1)
870 return 0xffff;
871
872 return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
873 }
874
875 static int
extract_float_endpoints(const struct bptc_float_mode * mode,const uint8_t * block,int bit_offset,int32_t endpoints[][3],bool is_signed)876 extract_float_endpoints(const struct bptc_float_mode *mode,
877 const uint8_t *block,
878 int bit_offset,
879 int32_t endpoints[][3],
880 bool is_signed)
881 {
882 const struct bptc_float_bitfield *bitfield;
883 int endpoint, component;
884 int n_endpoints;
885 int value;
886 int i;
887
888 if (mode->n_partition_bits)
889 n_endpoints = 4;
890 else
891 n_endpoints = 2;
892
893 memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
894
895 for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
896 value = extract_bits(block, bit_offset, bitfield->n_bits);
897 bit_offset += bitfield->n_bits;
898
899 if (bitfield->reverse) {
900 for (i = 0; i < bitfield->n_bits; i++) {
901 if (value & (1 << i))
902 endpoints[bitfield->endpoint][bitfield->component] |=
903 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
904 }
905 } else {
906 endpoints[bitfield->endpoint][bitfield->component] |=
907 value << bitfield->offset;
908 }
909 }
910
911 if (mode->transformed_endpoints) {
912 /* The endpoints are specified as signed offsets from e0 */
913 for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
914 for (component = 0; component < 3; component++) {
915 value = sign_extend(endpoints[endpoint][component],
916 mode->n_delta_bits[component]);
917 endpoints[endpoint][component] =
918 ((endpoints[0][component] + value) &
919 ((1 << mode->n_endpoint_bits) - 1));
920 }
921 }
922 }
923
924 if (is_signed) {
925 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
926 for (component = 0; component < 3; component++) {
927 value = sign_extend(endpoints[endpoint][component],
928 mode->n_endpoint_bits);
929 endpoints[endpoint][component] =
930 signed_unquantize(value, mode->n_endpoint_bits);
931 }
932 }
933 } else {
934 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
935 for (component = 0; component < 3; component++) {
936 endpoints[endpoint][component] =
937 unsigned_unquantize(endpoints[endpoint][component],
938 mode->n_endpoint_bits);
939 }
940 }
941 }
942
943 return bit_offset;
944 }
945
946 static int32_t
finish_unsigned_unquantize(int32_t value)947 finish_unsigned_unquantize(int32_t value)
948 {
949 return value * 31 / 64;
950 }
951
952 static int32_t
finish_signed_unquantize(int32_t value)953 finish_signed_unquantize(int32_t value)
954 {
955 if (value < 0)
956 return (-value * 31 / 32) | 0x8000;
957 else
958 return value * 31 / 32;
959 }
960
961 static void
fetch_rgb_float_from_block(const uint8_t * block,float * result,int texel,bool is_signed)962 fetch_rgb_float_from_block(const uint8_t *block,
963 float *result,
964 int texel,
965 bool is_signed)
966 {
967 int mode_num;
968 const struct bptc_float_mode *mode;
969 int bit_offset;
970 int partition_num;
971 int subset_num;
972 int index_bits;
973 int index;
974 int anchors_before_texel;
975 int32_t endpoints[2 * 2][3];
976 uint32_t subsets;
977 int n_subsets;
978 int component;
979 int32_t value;
980
981 if (block[0] & 0x2) {
982 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
983 bit_offset = 5;
984 } else {
985 mode_num = block[0] & 3;
986 bit_offset = 2;
987 }
988
989 mode = bptc_float_modes + mode_num;
990
991 if (mode->reserved) {
992 memset(result, 0, sizeof result[0] * 3);
993 result[3] = 1.0f;
994 return;
995 }
996
997 bit_offset = extract_float_endpoints(mode, block, bit_offset,
998 endpoints, is_signed);
999
1000 if (mode->n_partition_bits) {
1001 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1002 bit_offset += mode->n_partition_bits;
1003
1004 subsets = partition_table1[partition_num];
1005 n_subsets = 2;
1006 } else {
1007 partition_num = 0;
1008 subsets = 0;
1009 n_subsets = 1;
1010 }
1011
1012 anchors_before_texel =
1013 count_anchors_before_texel(n_subsets, partition_num, texel);
1014
1015 /* Calculate the offset to the primary index for this texel */
1016 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1017
1018 subset_num = (subsets >> (texel * 2)) & 3;
1019
1020 index_bits = mode->n_index_bits;
1021 if (is_anchor(n_subsets, partition_num, texel))
1022 index_bits--;
1023 index = extract_bits(block, bit_offset, index_bits);
1024
1025 for (component = 0; component < 3; component++) {
1026 value = interpolate(endpoints[subset_num * 2][component],
1027 endpoints[subset_num * 2 + 1][component],
1028 index,
1029 mode->n_index_bits);
1030
1031 if (is_signed)
1032 value = finish_signed_unquantize(value);
1033 else
1034 value = finish_unsigned_unquantize(value);
1035
1036 result[component] = _mesa_half_to_float(value);
1037 }
1038
1039 result[3] = 1.0f;
1040 }
1041
1042 #ifdef BPTC_BLOCK_DECODE
1043 static void
decompress_rgb_float_block(unsigned src_width,unsigned src_height,const uint8_t * block,float * dst_row,unsigned dst_rowstride,bool is_signed)1044 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1045 const uint8_t *block,
1046 float *dst_row, unsigned dst_rowstride,
1047 bool is_signed)
1048 {
1049 int mode_num;
1050 const struct bptc_float_mode *mode;
1051 int bit_offset_head, bit_offset;
1052 int partition_num;
1053 int subset_num;
1054 int index_bits;
1055 int index;
1056 int anchors_before_texel;
1057 int32_t endpoints[2 * 2][3];
1058 uint32_t subsets;
1059 int n_subsets;
1060 int component;
1061 int32_t value;
1062 unsigned x, y;
1063
1064 if (block[0] & 0x2) {
1065 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1066 bit_offset_head = 5;
1067 } else {
1068 mode_num = block[0] & 3;
1069 bit_offset_head = 2;
1070 }
1071
1072 mode = bptc_float_modes + mode_num;
1073
1074 if (mode->reserved) {
1075 for(y = 0; y < src_height; y += 1) {
1076 float *result = dst_row;
1077 memset(result, 0, sizeof result[0] * 4 * src_width);
1078 for(x = 0; x < src_width; x += 1) {
1079 result[3] = 1.0f;
1080 result += 4;
1081 }
1082 dst_row += dst_rowstride / sizeof dst_row[0];
1083 }
1084 return;
1085 }
1086
1087 bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
1088 endpoints, is_signed);
1089
1090 if (mode->n_partition_bits) {
1091 partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
1092 bit_offset_head += mode->n_partition_bits;
1093
1094 subsets = partition_table1[partition_num];
1095 n_subsets = 2;
1096 } else {
1097 partition_num = 0;
1098 subsets = 0;
1099 n_subsets = 1;
1100 }
1101
1102 for(y = 0; y < src_height; y += 1) {
1103 float *result = dst_row;
1104 for(x = 0; x < src_width; x += 1) {
1105 int texel;
1106
1107 bit_offset = bit_offset_head;
1108
1109 texel = x + y * 4;
1110
1111 anchors_before_texel =
1112 count_anchors_before_texel(n_subsets, partition_num, texel);
1113
1114 /* Calculate the offset to the primary index for this texel */
1115 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1116
1117 subset_num = (subsets >> (texel * 2)) & 3;
1118
1119 index_bits = mode->n_index_bits;
1120 if (is_anchor(n_subsets, partition_num, texel))
1121 index_bits--;
1122 index = extract_bits(block, bit_offset, index_bits);
1123
1124 for (component = 0; component < 3; component++) {
1125 value = interpolate(endpoints[subset_num * 2][component],
1126 endpoints[subset_num * 2 + 1][component],
1127 index,
1128 mode->n_index_bits);
1129
1130 if (is_signed)
1131 value = finish_signed_unquantize(value);
1132 else
1133 value = finish_unsigned_unquantize(value);
1134
1135 result[component] = _mesa_half_to_float(value);
1136 }
1137
1138 result[3] = 1.0f;
1139 result += 4;
1140 }
1141 dst_row += dst_rowstride / sizeof dst_row[0];
1142 }
1143 }
1144
1145 static void
decompress_rgb_float(int width,int height,const uint8_t * src,int src_rowstride,float * dst,int dst_rowstride,bool is_signed)1146 decompress_rgb_float(int width, int height,
1147 const uint8_t *src, int src_rowstride,
1148 float *dst, int dst_rowstride, bool is_signed)
1149 {
1150 int src_row_diff;
1151 int y, x;
1152
1153 if (src_rowstride >= width * 4)
1154 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1155 else
1156 src_row_diff = 0;
1157
1158 for (y = 0; y < height; y += BLOCK_SIZE) {
1159 for (x = 0; x < width; x += BLOCK_SIZE) {
1160 decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1161 MIN2(height - y, BLOCK_SIZE),
1162 src,
1163 (dst + x * 4 +
1164 (y * dst_rowstride / sizeof dst[0])),
1165 dst_rowstride, is_signed);
1166 src += BLOCK_BYTES;
1167 }
1168 src += src_row_diff;
1169 }
1170 }
1171 #endif // BPTC_BLOCK_DECODE
1172
1173 static void
write_bits(struct bit_writer * writer,int n_bits,int value)1174 write_bits(struct bit_writer *writer, int n_bits, int value)
1175 {
1176 do {
1177 if (n_bits + writer->pos >= 8) {
1178 *(writer->dst++) = writer->buf | (value << writer->pos);
1179 writer->buf = 0;
1180 value >>= (8 - writer->pos);
1181 n_bits -= (8 - writer->pos);
1182 writer->pos = 0;
1183 } else {
1184 writer->buf |= value << writer->pos;
1185 writer->pos += n_bits;
1186 break;
1187 }
1188 } while (n_bits > 0);
1189 }
1190
1191 static void
get_average_luminance_alpha_unorm(int width,int height,const uint8_t * src,int src_rowstride,int * average_luminance,int * average_alpha)1192 get_average_luminance_alpha_unorm(int width, int height,
1193 const uint8_t *src, int src_rowstride,
1194 int *average_luminance, int *average_alpha)
1195 {
1196 int luminance_sum = 0, alpha_sum = 0;
1197 int y, x;
1198
1199 for (y = 0; y < height; y++) {
1200 for (x = 0; x < width; x++) {
1201 luminance_sum += src[0] + src[1] + src[2];
1202 alpha_sum += src[3];
1203 src += 4;
1204 }
1205 src += src_rowstride - width * 4;
1206 }
1207
1208 *average_luminance = luminance_sum / (width * height);
1209 *average_alpha = alpha_sum / (width * height);
1210 }
1211
1212 static void
get_rgba_endpoints_unorm(int width,int height,const uint8_t * src,int src_rowstride,int average_luminance,int average_alpha,uint8_t endpoints[][4])1213 get_rgba_endpoints_unorm(int width, int height,
1214 const uint8_t *src, int src_rowstride,
1215 int average_luminance, int average_alpha,
1216 uint8_t endpoints[][4])
1217 {
1218 int endpoint_luminances[2];
1219 int midpoint;
1220 int sums[2][4];
1221 int endpoint;
1222 int luminance;
1223 uint8_t temp[3];
1224 const uint8_t *p = src;
1225 int rgb_left_endpoint_count = 0;
1226 int alpha_left_endpoint_count = 0;
1227 int y, x, i;
1228
1229 memset(sums, 0, sizeof sums);
1230
1231 for (y = 0; y < height; y++) {
1232 for (x = 0; x < width; x++) {
1233 luminance = p[0] + p[1] + p[2];
1234 if (luminance < average_luminance) {
1235 endpoint = 0;
1236 rgb_left_endpoint_count++;
1237 } else {
1238 endpoint = 1;
1239 }
1240 for (i = 0; i < 3; i++)
1241 sums[endpoint][i] += p[i];
1242
1243 if (p[2] < average_alpha) {
1244 endpoint = 0;
1245 alpha_left_endpoint_count++;
1246 } else {
1247 endpoint = 1;
1248 }
1249 sums[endpoint][3] += p[3];
1250
1251 p += 4;
1252 }
1253
1254 p += src_rowstride - width * 4;
1255 }
1256
1257 if (rgb_left_endpoint_count == 0 ||
1258 rgb_left_endpoint_count == width * height) {
1259 for (i = 0; i < 3; i++)
1260 endpoints[0][i] = endpoints[1][i] =
1261 (sums[0][i] + sums[1][i]) / (width * height);
1262 } else {
1263 for (i = 0; i < 3; i++) {
1264 endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1265 endpoints[1][i] = (sums[1][i] /
1266 (width * height - rgb_left_endpoint_count));
1267 }
1268 }
1269
1270 if (alpha_left_endpoint_count == 0 ||
1271 alpha_left_endpoint_count == width * height) {
1272 endpoints[0][3] = endpoints[1][3] =
1273 (sums[0][3] + sums[1][3]) / (width * height);
1274 } else {
1275 endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1276 endpoints[1][3] = (sums[1][3] /
1277 (width * height - alpha_left_endpoint_count));
1278 }
1279
1280 /* We may need to swap the endpoints to ensure the most-significant bit of
1281 * the first index is zero */
1282
1283 for (endpoint = 0; endpoint < 2; endpoint++) {
1284 endpoint_luminances[endpoint] =
1285 endpoints[endpoint][0] +
1286 endpoints[endpoint][1] +
1287 endpoints[endpoint][2];
1288 }
1289 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1290
1291 if ((src[0] + src[1] + src[2] <= midpoint) !=
1292 (endpoint_luminances[0] <= midpoint)) {
1293 memcpy(temp, endpoints[0], 3);
1294 memcpy(endpoints[0], endpoints[1], 3);
1295 memcpy(endpoints[1], temp, 3);
1296 }
1297
1298 /* Same for the alpha endpoints */
1299
1300 midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1301
1302 if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1303 temp[0] = endpoints[0][3];
1304 endpoints[0][3] = endpoints[1][3];
1305 endpoints[1][3] = temp[0];
1306 }
1307 }
1308
1309 static void
write_rgb_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1310 write_rgb_indices_unorm(struct bit_writer *writer,
1311 int src_width, int src_height,
1312 const uint8_t *src, int src_rowstride,
1313 uint8_t endpoints[][4])
1314 {
1315 int luminance;
1316 int endpoint_luminances[2];
1317 int endpoint;
1318 int index;
1319 int y, x;
1320
1321 for (endpoint = 0; endpoint < 2; endpoint++) {
1322 endpoint_luminances[endpoint] =
1323 endpoints[endpoint][0] +
1324 endpoints[endpoint][1] +
1325 endpoints[endpoint][2];
1326 }
1327
1328 /* If the endpoints have the same luminance then we'll just use index 0 for
1329 * all of the texels */
1330 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1331 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1332 return;
1333 }
1334
1335 for (y = 0; y < src_height; y++) {
1336 for (x = 0; x < src_width; x++) {
1337 luminance = src[0] + src[1] + src[2];
1338
1339 index = ((luminance - endpoint_luminances[0]) * 3 /
1340 (endpoint_luminances[1] - endpoint_luminances[0]));
1341 if (index < 0)
1342 index = 0;
1343 else if (index > 3)
1344 index = 3;
1345
1346 assert(x != 0 || y != 0 || index < 2);
1347
1348 write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1349
1350 src += 4;
1351 }
1352
1353 /* Pad the indices out to the block size */
1354 if (src_width < BLOCK_SIZE)
1355 write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1356
1357 src += src_rowstride - src_width * 4;
1358 }
1359
1360 /* Pad the indices out to the block size */
1361 if (src_height < BLOCK_SIZE)
1362 write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1363 }
1364
1365 static void
write_alpha_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1366 write_alpha_indices_unorm(struct bit_writer *writer,
1367 int src_width, int src_height,
1368 const uint8_t *src, int src_rowstride,
1369 uint8_t endpoints[][4])
1370 {
1371 int index;
1372 int y, x;
1373
1374 /* If the endpoints have the same alpha then we'll just use index 0 for
1375 * all of the texels */
1376 if (endpoints[0][3] == endpoints[1][3]) {
1377 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1378 return;
1379 }
1380
1381 for (y = 0; y < src_height; y++) {
1382 for (x = 0; x < src_width; x++) {
1383 index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1384 ((int) endpoints[1][3] - endpoints[0][3]));
1385 if (index < 0)
1386 index = 0;
1387 else if (index > 7)
1388 index = 7;
1389
1390 assert(x != 0 || y != 0 || index < 4);
1391
1392 /* The first index has one less bit */
1393 write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1394
1395 src += 4;
1396 }
1397
1398 /* Pad the indices out to the block size */
1399 if (src_width < BLOCK_SIZE)
1400 write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1401
1402 src += src_rowstride - src_width * 4;
1403 }
1404
1405 /* Pad the indices out to the block size */
1406 if (src_height < BLOCK_SIZE)
1407 write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1408 }
1409
1410 static void
compress_rgba_unorm_block(int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t * dst)1411 compress_rgba_unorm_block(int src_width, int src_height,
1412 const uint8_t *src, int src_rowstride,
1413 uint8_t *dst)
1414 {
1415 int average_luminance, average_alpha;
1416 uint8_t endpoints[2][4];
1417 struct bit_writer writer;
1418 int component, endpoint;
1419
1420 get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1421 &average_luminance, &average_alpha);
1422 get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1423 average_luminance, average_alpha,
1424 endpoints);
1425
1426 writer.dst = dst;
1427 writer.pos = 0;
1428 writer.buf = 0;
1429
1430 write_bits(&writer, 5, 0x10); /* mode 4 */
1431 write_bits(&writer, 2, 0); /* rotation 0 */
1432 write_bits(&writer, 1, 0); /* index selection bit */
1433
1434 /* Write the color endpoints */
1435 for (component = 0; component < 3; component++)
1436 for (endpoint = 0; endpoint < 2; endpoint++)
1437 write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1438
1439 /* Write the alpha endpoints */
1440 for (endpoint = 0; endpoint < 2; endpoint++)
1441 write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1442
1443 write_rgb_indices_unorm(&writer,
1444 src_width, src_height,
1445 src, src_rowstride,
1446 endpoints);
1447 write_alpha_indices_unorm(&writer,
1448 src_width, src_height,
1449 src, src_rowstride,
1450 endpoints);
1451 }
1452
1453 static void
compress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)1454 compress_rgba_unorm(int width, int height,
1455 const uint8_t *src, int src_rowstride,
1456 uint8_t *dst, int dst_rowstride)
1457 {
1458 int dst_row_diff;
1459 int y, x;
1460
1461 if (dst_rowstride >= width * 4)
1462 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1463 else
1464 dst_row_diff = 0;
1465
1466 for (y = 0; y < height; y += BLOCK_SIZE) {
1467 for (x = 0; x < width; x += BLOCK_SIZE) {
1468 compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1469 MIN2(height - y, BLOCK_SIZE),
1470 src + x * 4 + y * src_rowstride,
1471 src_rowstride,
1472 dst);
1473 dst += BLOCK_BYTES;
1474 }
1475 dst += dst_row_diff;
1476 }
1477 }
1478
1479 static float
get_average_luminance_float(int width,int height,const float * src,int src_rowstride)1480 get_average_luminance_float(int width, int height,
1481 const float *src, int src_rowstride)
1482 {
1483 float luminance_sum = 0;
1484 int y, x;
1485
1486 for (y = 0; y < height; y++) {
1487 for (x = 0; x < width; x++) {
1488 luminance_sum += src[0] + src[1] + src[2];
1489 src += 3;
1490 }
1491 src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1492 }
1493
1494 return luminance_sum / (width * height);
1495 }
1496
1497 static float
clamp_value(float value,bool is_signed)1498 clamp_value(float value, bool is_signed)
1499 {
1500 if (value > 65504.0f)
1501 return 65504.0f;
1502
1503 if (is_signed) {
1504 if (value < -65504.0f)
1505 return -65504.0f;
1506 else
1507 return value;
1508 }
1509
1510 if (value < 0.0f)
1511 return 0.0f;
1512
1513 return value;
1514 }
1515
1516 static void
get_endpoints_float(int width,int height,const float * src,int src_rowstride,float average_luminance,float endpoints[][3],bool is_signed)1517 get_endpoints_float(int width, int height,
1518 const float *src, int src_rowstride,
1519 float average_luminance, float endpoints[][3],
1520 bool is_signed)
1521 {
1522 float endpoint_luminances[2];
1523 float midpoint;
1524 float sums[2][3];
1525 int endpoint, component;
1526 float luminance;
1527 float temp[3];
1528 const float *p = src;
1529 int left_endpoint_count = 0;
1530 int y, x, i;
1531
1532 memset(sums, 0, sizeof sums);
1533
1534 for (y = 0; y < height; y++) {
1535 for (x = 0; x < width; x++) {
1536 luminance = p[0] + p[1] + p[2];
1537 if (luminance < average_luminance) {
1538 endpoint = 0;
1539 left_endpoint_count++;
1540 } else {
1541 endpoint = 1;
1542 }
1543 for (i = 0; i < 3; i++)
1544 sums[endpoint][i] += p[i];
1545
1546 p += 3;
1547 }
1548
1549 p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1550 }
1551
1552 if (left_endpoint_count == 0 ||
1553 left_endpoint_count == width * height) {
1554 for (i = 0; i < 3; i++)
1555 endpoints[0][i] = endpoints[1][i] =
1556 (sums[0][i] + sums[1][i]) / (width * height);
1557 } else {
1558 for (i = 0; i < 3; i++) {
1559 endpoints[0][i] = sums[0][i] / left_endpoint_count;
1560 endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1561 }
1562 }
1563
1564 /* Clamp the endpoints to the range of a half float and strip out
1565 * infinities */
1566 for (endpoint = 0; endpoint < 2; endpoint++) {
1567 for (component = 0; component < 3; component++) {
1568 endpoints[endpoint][component] =
1569 clamp_value(endpoints[endpoint][component], is_signed);
1570 }
1571 }
1572
1573 /* We may need to swap the endpoints to ensure the most-significant bit of
1574 * the first index is zero */
1575
1576 for (endpoint = 0; endpoint < 2; endpoint++) {
1577 endpoint_luminances[endpoint] =
1578 endpoints[endpoint][0] +
1579 endpoints[endpoint][1] +
1580 endpoints[endpoint][2];
1581 }
1582 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1583
1584 if ((src[0] + src[1] + src[2] <= midpoint) !=
1585 (endpoint_luminances[0] <= midpoint)) {
1586 memcpy(temp, endpoints[0], sizeof temp);
1587 memcpy(endpoints[0], endpoints[1], sizeof temp);
1588 memcpy(endpoints[1], temp, sizeof temp);
1589 }
1590 }
1591
1592 static void
write_rgb_indices_float(struct bit_writer * writer,int src_width,int src_height,const float * src,int src_rowstride,float endpoints[][3])1593 write_rgb_indices_float(struct bit_writer *writer,
1594 int src_width, int src_height,
1595 const float *src, int src_rowstride,
1596 float endpoints[][3])
1597 {
1598 float luminance;
1599 float endpoint_luminances[2];
1600 int endpoint;
1601 int index;
1602 int y, x;
1603
1604 for (endpoint = 0; endpoint < 2; endpoint++) {
1605 endpoint_luminances[endpoint] =
1606 endpoints[endpoint][0] +
1607 endpoints[endpoint][1] +
1608 endpoints[endpoint][2];
1609 }
1610
1611 /* If the endpoints have the same luminance then we'll just use index 0 for
1612 * all of the texels */
1613 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1614 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1615 return;
1616 }
1617
1618 for (y = 0; y < src_height; y++) {
1619 for (x = 0; x < src_width; x++) {
1620 luminance = src[0] + src[1] + src[2];
1621
1622 index = ((luminance - endpoint_luminances[0]) * 15 /
1623 (endpoint_luminances[1] - endpoint_luminances[0]));
1624 if (index < 0)
1625 index = 0;
1626 else if (index > 15)
1627 index = 15;
1628
1629 assert(x != 0 || y != 0 || index < 8);
1630
1631 write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1632
1633 src += 3;
1634 }
1635
1636 /* Pad the indices out to the block size */
1637 if (src_width < BLOCK_SIZE)
1638 write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1639
1640 src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1641 }
1642
1643 /* Pad the indices out to the block size */
1644 if (src_height < BLOCK_SIZE)
1645 write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1646 }
1647
1648 static int
get_endpoint_value(float value,bool is_signed)1649 get_endpoint_value(float value, bool is_signed)
1650 {
1651 bool sign = false;
1652 int half;
1653
1654 if (is_signed) {
1655 half = _mesa_float_to_half(value);
1656
1657 if (half & 0x8000) {
1658 half &= 0x7fff;
1659 sign = true;
1660 }
1661
1662 half = (32 * half / 31) >> 6;
1663
1664 if (sign)
1665 half = -half & ((1 << 10) - 1);
1666
1667 return half;
1668 } else {
1669 if (value <= 0.0f)
1670 return 0;
1671
1672 half = _mesa_float_to_half(value);
1673
1674 return (64 * half / 31) >> 6;
1675 }
1676 }
1677
1678 static void
compress_rgb_float_block(int src_width,int src_height,const float * src,int src_rowstride,uint8_t * dst,bool is_signed)1679 compress_rgb_float_block(int src_width, int src_height,
1680 const float *src, int src_rowstride,
1681 uint8_t *dst,
1682 bool is_signed)
1683 {
1684 float average_luminance;
1685 float endpoints[2][3];
1686 struct bit_writer writer;
1687 int component, endpoint;
1688 int endpoint_value;
1689
1690 average_luminance =
1691 get_average_luminance_float(src_width, src_height, src, src_rowstride);
1692 get_endpoints_float(src_width, src_height, src, src_rowstride,
1693 average_luminance, endpoints, is_signed);
1694
1695 writer.dst = dst;
1696 writer.pos = 0;
1697 writer.buf = 0;
1698
1699 write_bits(&writer, 5, 3); /* mode 3 */
1700
1701 /* Write the endpoints */
1702 for (endpoint = 0; endpoint < 2; endpoint++) {
1703 for (component = 0; component < 3; component++) {
1704 endpoint_value =
1705 get_endpoint_value(endpoints[endpoint][component], is_signed);
1706 write_bits(&writer, 10, endpoint_value);
1707 }
1708 }
1709
1710 write_rgb_indices_float(&writer,
1711 src_width, src_height,
1712 src, src_rowstride,
1713 endpoints);
1714 }
1715
1716 static void
compress_rgb_float(int width,int height,const float * src,int src_rowstride,uint8_t * dst,int dst_rowstride,bool is_signed)1717 compress_rgb_float(int width, int height,
1718 const float *src, int src_rowstride,
1719 uint8_t *dst, int dst_rowstride,
1720 bool is_signed)
1721 {
1722 int dst_row_diff;
1723 int y, x;
1724
1725 if (dst_rowstride >= width * 4)
1726 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1727 else
1728 dst_row_diff = 0;
1729
1730 for (y = 0; y < height; y += BLOCK_SIZE) {
1731 for (x = 0; x < width; x += BLOCK_SIZE) {
1732 compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1733 MIN2(height - y, BLOCK_SIZE),
1734 src + x * 3 +
1735 y * src_rowstride / sizeof (float),
1736 src_rowstride,
1737 dst,
1738 is_signed);
1739 dst += BLOCK_BYTES;
1740 }
1741 dst += dst_row_diff;
1742 }
1743 }
1744
1745 #endif
1746