• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*
25  * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26  */
27 
28 #ifndef TEXCOMPRESS_BPTC_TMP_H
29 #define TEXCOMPRESS_BPTC_TMP_H
30 
31 #include "util/format_srgb.h"
32 #include "util/half_float.h"
33 #include "macros.h"
34 
35 #define BLOCK_SIZE 4
36 #define N_PARTITIONS 64
37 #define BLOCK_BYTES 16
38 
39 struct bptc_unorm_mode {
40    int n_subsets;
41    int n_partition_bits;
42    bool has_rotation_bits;
43    bool has_index_selection_bit;
44    int n_color_bits;
45    int n_alpha_bits;
46    bool has_endpoint_pbits;
47    bool has_shared_pbits;
48    int n_index_bits;
49    int n_secondary_index_bits;
50 };
51 
52 struct bptc_float_bitfield {
53    int8_t endpoint;
54    uint8_t component;
55    uint8_t offset;
56    uint8_t n_bits;
57    bool reverse;
58 };
59 
60 struct bptc_float_mode {
61    bool reserved;
62    bool transformed_endpoints;
63    int n_partition_bits;
64    int n_endpoint_bits;
65    int n_index_bits;
66    int n_delta_bits[3];
67    struct bptc_float_bitfield bitfields[24];
68 };
69 
70 struct bit_writer {
71    uint8_t buf;
72    int pos;
73    uint8_t *dst;
74 };
75 
76 static const struct bptc_unorm_mode
77 bptc_unorm_modes[] = {
78    /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
79    /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
80    /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
81    /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
82    /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
83    /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
84    /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
85    /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
86 };
87 
88 static const struct bptc_float_mode
89 bptc_float_modes[] = {
90    /* 00 */
91    { false, true, 5, 10, 3, { 5, 5, 5 },
92      { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
93        { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
94        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
95        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
96        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
97        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
98        { 3, 2, 3, 1, false },
99        { -1 } }
100    },
101    /* 01 */
102    { false, true, 5, 7, 3, { 6, 6, 6 },
103      { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
104        { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
105        { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
106        { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
107        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
108        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
109        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
110        { 2, 0, 0, 6, false },
111        { 3, 0, 0, 6, false },
112        { -1 } }
113    },
114    /* 00010 */
115    { false, true, 5, 11, 3, { 5, 4, 4 },
116      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
117        { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
118        { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
119        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
120        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
121        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
122        { -1 } }
123    },
124    /* 00011 */
125    { false, false, 0, 10, 4, { 10, 10, 10 },
126      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
127        { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
128        { -1 } }
129    },
130    /* 00110 */
131    { false, true, 5, 11, 3, { 4, 5, 4 },
132      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
133        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
134        { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
135        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
136        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
137        { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
138        { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
139        { -1 } }
140    },
141    /* 00111 */
142    { false, true, 0, 11, 4, { 9, 9, 9 },
143      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
144        { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
145        { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
146        { -1 } }
147    },
148    /* 01010 */
149    { false, true, 5, 11, 3, { 4, 4, 5 },
150      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
151        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
152        { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
153        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
154        { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
155        { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
156        { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
157        { -1 } }
158    },
159    /* 01011 */
160    { false, true, 0, 12, 4, { 8, 8, 8 },
161      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
162        { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
163        { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
164        { -1 } }
165    },
166    /* 01110 */
167    { false, true, 5, 9, 3, { 5, 5, 5 },
168      { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
169        { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
170        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
171        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
172        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
173        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
174        { 3, 2, 3, 1, false },
175        { -1 } }
176    },
177    /* 01111 */
178    { false, true, 0, 16, 4, { 4, 4, 4 },
179      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
180        { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
181        { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
182        { -1 } }
183    },
184    /* 10010 */
185    { false, true, 5, 8, 3, { 6, 5, 5 },
186      { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
187        { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
188        { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
189        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
190        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
191        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
192        { 3, 0, 0, 6, false },
193        { -1 } }
194    },
195    /* 10011 */
196    { true /* reserved */ },
197    /* 10110 */
198    { false, true, 5, 8, 3, { 5, 6, 5 },
199      { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
200        { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
201        { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
202        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
203        { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
204        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
205        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
206        { -1 } }
207    },
208    /* 10111 */
209    { true /* reserved */ },
210    /* 11010 */
211    { false, true, 5, 8, 3, { 5, 5, 6 },
212      { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
213        { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
214        { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
215        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
216        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
217        { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
218        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
219        { -1 } }
220    },
221    /* 11011 */
222    { true /* reserved */ },
223    /* 11110 */
224    { false, false, 5, 6, 3, { 6, 6, 6 },
225      { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
226        { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
227        { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
228        { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
229        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
230        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
231        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
232        { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
233        { -1 } }
234    },
235    /* 11111 */
236    { true /* reserved */ },
237 };
238 
239 /* This partition table is used when the mode has two subsets. Each
240  * partition is represented by a 32-bit value which gives 2 bits per texel
241  * within the block. The value of the two bits represents which subset to use
242  * (0 or 1).
243  */
244 static const uint32_t
245 partition_table1[N_PARTITIONS] = {
246    0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
247    0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
248    0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
249    0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
250    0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
251    0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
252    0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
253    0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
254    0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
255    0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
256    0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
257    0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
258    0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
259    0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
260    0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
261    0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
262 };
263 
264 /* This partition table is used when the mode has three subsets. In this case
265  * the values can be 0, 1 or 2.
266  */
267 static const uint32_t
268 partition_table2[N_PARTITIONS] = {
269    0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
270    0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
271    0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
272    0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
273    0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
274    0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
275    0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
276    0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
277    0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
278    0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
279    0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
280    0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
281    0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
282    0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
283    0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
284    0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
285 };
286 
287 static const uint8_t
288 anchor_indices[][N_PARTITIONS] = {
289    /* Anchor index values for the second subset of two-subset partitioning */
290    {
291       0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
292       0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
293       0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
294       0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
295    },
296 
297    /* Anchor index values for the second subset of three-subset partitioning */
298    {
299       0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
300       0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
301       0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
302       0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
303    },
304 
305    /* Anchor index values for the third subset of three-subset
306     * partitioning
307     */
308    {
309       0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
310       0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
311       0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
312       0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
313    }
314 };
315 
316 static int
extract_bits(const uint8_t * block,int offset,int n_bits)317 extract_bits(const uint8_t *block,
318              int offset,
319              int n_bits)
320 {
321    int byte_index = offset / 8;
322    int bit_index = offset % 8;
323    int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
324    int result = 0;
325    int bit = 0;
326 
327    while (true) {
328       result |= ((block[byte_index] >> bit_index) &
329                  ((1 << n_bits_in_byte) - 1)) << bit;
330 
331       n_bits -= n_bits_in_byte;
332 
333       if (n_bits <= 0)
334          return result;
335 
336       bit += n_bits_in_byte;
337       byte_index++;
338       bit_index = 0;
339       n_bits_in_byte = MIN2(n_bits, 8);
340    }
341 }
342 
343 static uint8_t
expand_component(uint8_t byte,int n_bits)344 expand_component(uint8_t byte,
345                  int n_bits)
346 {
347    /* Expands a n-bit quantity into a byte by copying the most-significant
348     * bits into the unused least-significant bits.
349     */
350    return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
351 }
352 
353 static int
extract_unorm_endpoints(const struct bptc_unorm_mode * mode,const uint8_t * block,int bit_offset,uint8_t endpoints[][4])354 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
355                         const uint8_t *block,
356                         int bit_offset,
357                         uint8_t endpoints[][4])
358 {
359    int component;
360    int subset;
361    int endpoint;
362    int pbit;
363    int n_components;
364 
365    /* Extract each color component */
366    for (component = 0; component < 3; component++) {
367       for (subset = 0; subset < mode->n_subsets; subset++) {
368          for (endpoint = 0; endpoint < 2; endpoint++) {
369             endpoints[subset * 2 + endpoint][component] =
370                extract_bits(block, bit_offset, mode->n_color_bits);
371             bit_offset += mode->n_color_bits;
372          }
373       }
374    }
375 
376    /* Extract the alpha values */
377    if (mode->n_alpha_bits > 0) {
378       for (subset = 0; subset < mode->n_subsets; subset++) {
379          for (endpoint = 0; endpoint < 2; endpoint++) {
380             endpoints[subset * 2 + endpoint][3] =
381                extract_bits(block, bit_offset, mode->n_alpha_bits);
382             bit_offset += mode->n_alpha_bits;
383          }
384       }
385 
386       n_components = 4;
387    } else {
388       for (subset = 0; subset < mode->n_subsets; subset++)
389          for (endpoint = 0; endpoint < 2; endpoint++)
390             endpoints[subset * 2 + endpoint][3] = 255;
391 
392       n_components = 3;
393    }
394 
395    /* Add in the p-bits */
396    if (mode->has_endpoint_pbits) {
397       for (subset = 0; subset < mode->n_subsets; subset++) {
398          for (endpoint = 0; endpoint < 2; endpoint++) {
399             pbit = extract_bits(block, bit_offset, 1);
400             bit_offset += 1;
401 
402             for (component = 0; component < n_components; component++) {
403                endpoints[subset * 2 + endpoint][component] <<= 1;
404                endpoints[subset * 2 + endpoint][component] |= pbit;
405             }
406          }
407       }
408    } else if (mode->has_shared_pbits) {
409       for (subset = 0; subset < mode->n_subsets; subset++) {
410          pbit = extract_bits(block, bit_offset, 1);
411          bit_offset += 1;
412 
413          for (endpoint = 0; endpoint < 2; endpoint++) {
414             for (component = 0; component < n_components; component++) {
415                endpoints[subset * 2 + endpoint][component] <<= 1;
416                endpoints[subset * 2 + endpoint][component] |= pbit;
417             }
418          }
419       }
420    }
421 
422    /* Expand the n-bit values to a byte */
423    for (subset = 0; subset < mode->n_subsets; subset++) {
424       for (endpoint = 0; endpoint < 2; endpoint++) {
425          for (component = 0; component < 3; component++) {
426             endpoints[subset * 2 + endpoint][component] =
427                expand_component(endpoints[subset * 2 + endpoint][component],
428                                 mode->n_color_bits +
429                                 mode->has_endpoint_pbits +
430                                 mode->has_shared_pbits);
431          }
432 
433          if (mode->n_alpha_bits > 0) {
434             endpoints[subset * 2 + endpoint][3] =
435                expand_component(endpoints[subset * 2 + endpoint][3],
436                                 mode->n_alpha_bits +
437                                 mode->has_endpoint_pbits +
438                                 mode->has_shared_pbits);
439          }
440       }
441    }
442 
443    return bit_offset;
444 }
445 
446 static bool
is_anchor(int n_subsets,int partition_num,int texel)447 is_anchor(int n_subsets,
448           int partition_num,
449           int texel)
450 {
451    if (texel == 0)
452       return true;
453 
454    switch (n_subsets) {
455    case 1:
456       return false;
457    case 2:
458       return anchor_indices[0][partition_num] == texel;
459    case 3:
460       return (anchor_indices[1][partition_num] == texel ||
461               anchor_indices[2][partition_num] == texel);
462    default:
463       assert(false);
464       return false;
465    }
466 }
467 
468 static int
count_anchors_before_texel(int n_subsets,int partition_num,int texel)469 count_anchors_before_texel(int n_subsets,
470                            int partition_num,
471                            int texel)
472 {
473    int count = 1;
474 
475    if (texel == 0)
476       return 0;
477 
478    switch (n_subsets) {
479    case 1:
480       break;
481    case 2:
482       if (texel > anchor_indices[0][partition_num])
483          count++;
484       break;
485    case 3:
486       if (texel > anchor_indices[1][partition_num])
487          count++;
488       if (texel > anchor_indices[2][partition_num])
489          count++;
490       break;
491    default:
492       assert(false);
493       return 0;
494    }
495 
496    return count;
497 }
498 
499 static int32_t
interpolate(int32_t a,int32_t b,int index,int index_bits)500 interpolate(int32_t a, int32_t b,
501             int index,
502             int index_bits)
503 {
504    static const uint8_t weights2[] = { 0, 21, 43, 64 };
505    static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
506    static const uint8_t weights4[] =
507       { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
508    static const uint8_t *weights[] = {
509       NULL, NULL, weights2, weights3, weights4
510    };
511    int weight;
512 
513    weight = weights[index_bits][index];
514 
515    return ((64 - weight) * a + weight * b + 32) >> 6;
516 }
517 
518 static void
apply_rotation(int rotation,uint8_t * result)519 apply_rotation(int rotation,
520                uint8_t *result)
521 {
522    uint8_t t;
523 
524    if (rotation == 0)
525       return;
526 
527    rotation--;
528 
529    t = result[rotation];
530    result[rotation] = result[3];
531    result[3] = t;
532 }
533 
534 static void
fetch_rgba_unorm_from_block(const uint8_t * block,uint8_t * result,int texel)535 fetch_rgba_unorm_from_block(const uint8_t *block,
536                             uint8_t *result,
537                             int texel)
538 {
539    int mode_num = ffs(block[0]);
540    const struct bptc_unorm_mode *mode;
541    int bit_offset, secondary_bit_offset;
542    int partition_num;
543    int subset_num;
544    int rotation;
545    int index_selection;
546    int index_bits;
547    int indices[2];
548    int index;
549    int anchors_before_texel;
550    bool anchor;
551    uint8_t endpoints[3 * 2][4];
552    uint32_t subsets;
553    int component;
554 
555    if (mode_num == 0) {
556       /* According to the spec this mode is reserved and shouldn't be used. */
557       memset(result, 0, 4);
558       return;
559    }
560 
561    mode = bptc_unorm_modes + mode_num - 1;
562    bit_offset = mode_num;
563 
564    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
565    bit_offset += mode->n_partition_bits;
566 
567    switch (mode->n_subsets) {
568    case 1:
569       subsets = 0;
570       break;
571    case 2:
572       subsets = partition_table1[partition_num];
573       break;
574    case 3:
575       subsets = partition_table2[partition_num];
576       break;
577    default:
578       assert(false);
579       return;
580    }
581 
582    if (mode->has_rotation_bits) {
583       rotation = extract_bits(block, bit_offset, 2);
584       bit_offset += 2;
585    } else {
586       rotation = 0;
587    }
588 
589    if (mode->has_index_selection_bit) {
590       index_selection = extract_bits(block, bit_offset, 1);
591       bit_offset++;
592    } else {
593       index_selection = 0;
594    }
595 
596    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
597 
598    anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
599                                                      partition_num, texel);
600 
601    /* Calculate the offset to the secondary index */
602    secondary_bit_offset = (bit_offset +
603                            BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
604                            mode->n_subsets +
605                            mode->n_secondary_index_bits * texel -
606                            anchors_before_texel);
607 
608    /* Calculate the offset to the primary index for this texel */
609    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
610 
611    subset_num = (subsets >> (texel * 2)) & 3;
612 
613    anchor = is_anchor(mode->n_subsets, partition_num, texel);
614 
615    index_bits = mode->n_index_bits;
616    if (anchor)
617       index_bits--;
618    indices[0] = extract_bits(block, bit_offset, index_bits);
619 
620    if (mode->n_secondary_index_bits) {
621       index_bits = mode->n_secondary_index_bits;
622       if (anchor)
623          index_bits--;
624       indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
625    }
626 
627    index = indices[index_selection];
628    index_bits = (index_selection ?
629                  mode->n_secondary_index_bits :
630                  mode->n_index_bits);
631 
632    for (component = 0; component < 3; component++)
633       result[component] = interpolate(endpoints[subset_num * 2][component],
634                                       endpoints[subset_num * 2 + 1][component],
635                                       index,
636                                       index_bits);
637 
638    /* Alpha uses the opposite index from the color components */
639    if (mode->n_secondary_index_bits && !index_selection) {
640       index = indices[1];
641       index_bits = mode->n_secondary_index_bits;
642    } else {
643       index = indices[0];
644       index_bits = mode->n_index_bits;
645    }
646 
647    result[3] = interpolate(endpoints[subset_num * 2][3],
648                            endpoints[subset_num * 2 + 1][3],
649                            index,
650                            index_bits);
651 
652    apply_rotation(rotation, result);
653 }
654 
655 #ifdef BPTC_BLOCK_DECODE
656 static void
decompress_rgba_unorm_block(int src_width,int src_height,const uint8_t * block,uint8_t * dst_row,int dst_rowstride)657 decompress_rgba_unorm_block(int src_width, int src_height,
658                             const uint8_t *block,
659                             uint8_t *dst_row, int dst_rowstride)
660 {
661    int mode_num = ffs(block[0]);
662    const struct bptc_unorm_mode *mode;
663    int bit_offset_head, bit_offset, secondary_bit_offset;
664    int partition_num;
665    int subset_num;
666    int rotation;
667    int index_selection;
668    int index_bits;
669    int indices[2];
670    int index;
671    int anchors_before_texel;
672    bool anchor;
673    uint8_t endpoints[3 * 2][4];
674    uint32_t subsets;
675    int component;
676    unsigned x, y;
677 
678    if (mode_num == 0) {
679       /* According to the spec this mode is reserved and shouldn't be used. */
680       for(y = 0; y < src_height; y += 1) {
681          uint8_t *result = dst_row;
682          memset(result, 0, 4 * src_width);
683          dst_row += dst_rowstride;
684       }
685       return;
686    }
687 
688    mode = bptc_unorm_modes + mode_num - 1;
689    bit_offset_head = mode_num;
690 
691    partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
692    bit_offset_head += mode->n_partition_bits;
693 
694    switch (mode->n_subsets) {
695    case 1:
696       subsets = 0;
697       break;
698    case 2:
699       subsets = partition_table1[partition_num];
700       break;
701    case 3:
702       subsets = partition_table2[partition_num];
703       break;
704    default:
705       assert(false);
706       return;
707    }
708 
709    if (mode->has_rotation_bits) {
710       rotation = extract_bits(block, bit_offset_head, 2);
711       bit_offset_head += 2;
712    } else {
713       rotation = 0;
714    }
715 
716    if (mode->has_index_selection_bit) {
717       index_selection = extract_bits(block, bit_offset_head, 1);
718       bit_offset_head++;
719    } else {
720       index_selection = 0;
721    }
722 
723    bit_offset_head = extract_unorm_endpoints(mode, block, bit_offset_head, endpoints);
724 
725    for(y = 0; y < src_height; y += 1) {
726       uint8_t *result = dst_row;
727       for(x = 0; x < src_width; x += 1) {
728          int texel;
729          texel = x + y * 4;
730          bit_offset = bit_offset_head;
731 
732          anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
733                                                            partition_num,
734                                                            texel);
735 
736          /* Calculate the offset to the secondary index */
737          secondary_bit_offset = (bit_offset +
738                                  BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
739                                  mode->n_subsets +
740                                  mode->n_secondary_index_bits * texel -
741                                  anchors_before_texel);
742 
743          /* Calculate the offset to the primary index for this texel */
744          bit_offset += mode->n_index_bits * texel - anchors_before_texel;
745 
746          subset_num = (subsets >> (texel * 2)) & 3;
747 
748          anchor = is_anchor(mode->n_subsets, partition_num, texel);
749 
750          index_bits = mode->n_index_bits;
751          if (anchor)
752             index_bits--;
753          indices[0] = extract_bits(block, bit_offset, index_bits);
754 
755          if (mode->n_secondary_index_bits) {
756             index_bits = mode->n_secondary_index_bits;
757             if (anchor)
758                index_bits--;
759             indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
760          }
761 
762          index = indices[index_selection];
763          index_bits = (index_selection ?
764                        mode->n_secondary_index_bits :
765                        mode->n_index_bits);
766 
767          for (component = 0; component < 3; component++)
768             result[component] = interpolate(endpoints[subset_num * 2][component],
769                                             endpoints[subset_num * 2 + 1][component],
770                                             index,
771                                             index_bits);
772 
773          /* Alpha uses the opposite index from the color components */
774          if (mode->n_secondary_index_bits && !index_selection) {
775             index = indices[1];
776             index_bits = mode->n_secondary_index_bits;
777          } else {
778             index = indices[0];
779             index_bits = mode->n_index_bits;
780          }
781 
782          result[3] = interpolate(endpoints[subset_num * 2][3],
783                                  endpoints[subset_num * 2 + 1][3],
784                                  index,
785                                  index_bits);
786 
787          apply_rotation(rotation, result);
788          result += 4;
789       }
790       dst_row += dst_rowstride;
791    }
792 }
793 
794 static void
decompress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)795 decompress_rgba_unorm(int width, int height,
796                       const uint8_t *src, int src_rowstride,
797                       uint8_t *dst, int dst_rowstride)
798 {
799    int src_row_diff;
800    int y, x;
801 
802    if (src_rowstride >= width * 4)
803       src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
804    else
805       src_row_diff = 0;
806 
807    for (y = 0; y < height; y += BLOCK_SIZE) {
808       for (x = 0; x < width; x += BLOCK_SIZE) {
809          decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
810                                      MIN2(height - y, BLOCK_SIZE),
811                                      src,
812                                      dst + x * 4 + y * dst_rowstride,
813                                      dst_rowstride);
814          src += BLOCK_BYTES;
815       }
816       src += src_row_diff;
817    }
818 }
819 #endif // BPTC_BLOCK_DECODE
820 
821 static int
signed_unquantize(int value,int n_endpoint_bits)822 signed_unquantize(int value, int n_endpoint_bits)
823 {
824    bool sign;
825 
826    if (n_endpoint_bits >= 16)
827       return value;
828 
829    if (value == 0)
830       return 0;
831 
832    sign = false;
833 
834    if (value < 0) {
835       sign = true;
836       value = -value;
837    }
838 
839    if (value >= (1 << (n_endpoint_bits - 1)) - 1)
840       value = 0x7fff;
841    else
842       value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
843 
844    if (sign)
845       value = -value;
846 
847    return value;
848 }
849 
850 static int
unsigned_unquantize(int value,int n_endpoint_bits)851 unsigned_unquantize(int value, int n_endpoint_bits)
852 {
853    if (n_endpoint_bits >= 15)
854       return value;
855 
856    if (value == 0)
857       return 0;
858 
859    if (value == (1 << n_endpoint_bits) - 1)
860       return 0xffff;
861 
862    return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
863 }
864 
865 static int
extract_float_endpoints(const struct bptc_float_mode * mode,const uint8_t * block,int bit_offset,int32_t endpoints[][3],bool is_signed)866 extract_float_endpoints(const struct bptc_float_mode *mode,
867                         const uint8_t *block,
868                         int bit_offset,
869                         int32_t endpoints[][3],
870                         bool is_signed)
871 {
872    const struct bptc_float_bitfield *bitfield;
873    int endpoint, component;
874    int n_endpoints;
875    int value;
876    int i;
877 
878    if (mode->n_partition_bits)
879       n_endpoints = 4;
880    else
881       n_endpoints = 2;
882 
883    memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
884 
885    for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
886       value = extract_bits(block, bit_offset, bitfield->n_bits);
887       bit_offset += bitfield->n_bits;
888 
889       if (bitfield->reverse) {
890          for (i = 0; i < bitfield->n_bits; i++) {
891             if (value & (1 << i))
892                endpoints[bitfield->endpoint][bitfield->component] |=
893                   1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
894          }
895       } else {
896          endpoints[bitfield->endpoint][bitfield->component] |=
897             value << bitfield->offset;
898       }
899    }
900 
901    if (mode->transformed_endpoints) {
902       /* The endpoints are specified as signed offsets from e0 */
903       for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
904          for (component = 0; component < 3; component++) {
905             value = util_sign_extend(endpoints[endpoint][component],
906                                      mode->n_delta_bits[component]);
907             endpoints[endpoint][component] =
908                ((endpoints[0][component] + value) &
909                 ((1 << mode->n_endpoint_bits) - 1));
910          }
911       }
912    }
913 
914    if (is_signed) {
915       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
916          for (component = 0; component < 3; component++) {
917             value = util_sign_extend(endpoints[endpoint][component],
918                                      mode->n_endpoint_bits);
919             endpoints[endpoint][component] =
920                signed_unquantize(value, mode->n_endpoint_bits);
921          }
922       }
923    } else {
924       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
925          for (component = 0; component < 3; component++) {
926             endpoints[endpoint][component] =
927                unsigned_unquantize(endpoints[endpoint][component],
928                                    mode->n_endpoint_bits);
929          }
930       }
931    }
932 
933    return bit_offset;
934 }
935 
936 static int32_t
finish_unsigned_unquantize(int32_t value)937 finish_unsigned_unquantize(int32_t value)
938 {
939    return value * 31 / 64;
940 }
941 
942 static int32_t
finish_signed_unquantize(int32_t value)943 finish_signed_unquantize(int32_t value)
944 {
945    if (value < 0)
946       return (-value * 31 / 32) | 0x8000;
947    else
948       return value * 31 / 32;
949 }
950 
951 static void
fetch_rgb_float_from_block(const uint8_t * block,float * result,int texel,bool is_signed)952 fetch_rgb_float_from_block(const uint8_t *block,
953                            float *result,
954                            int texel,
955                            bool is_signed)
956 {
957    int mode_num;
958    const struct bptc_float_mode *mode;
959    int bit_offset;
960    int partition_num;
961    int subset_num;
962    int index_bits;
963    int index;
964    int anchors_before_texel;
965    int32_t endpoints[2 * 2][3];
966    uint32_t subsets;
967    int n_subsets;
968    int component;
969    int32_t value;
970 
971    if (block[0] & 0x2) {
972       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
973       bit_offset = 5;
974    } else {
975       mode_num = block[0] & 3;
976       bit_offset = 2;
977    }
978 
979    mode = bptc_float_modes + mode_num;
980 
981    if (mode->reserved) {
982       memset(result, 0, sizeof result[0] * 3);
983       result[3] = 1.0f;
984       return;
985    }
986 
987    bit_offset = extract_float_endpoints(mode, block, bit_offset,
988                                         endpoints, is_signed);
989 
990    if (mode->n_partition_bits) {
991       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
992       bit_offset += mode->n_partition_bits;
993 
994       subsets = partition_table1[partition_num];
995       n_subsets = 2;
996    } else {
997       partition_num = 0;
998       subsets = 0;
999       n_subsets = 1;
1000    }
1001 
1002    anchors_before_texel =
1003       count_anchors_before_texel(n_subsets, partition_num, texel);
1004 
1005    /* Calculate the offset to the primary index for this texel */
1006    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1007 
1008    subset_num = (subsets >> (texel * 2)) & 3;
1009 
1010    index_bits = mode->n_index_bits;
1011    if (is_anchor(n_subsets, partition_num, texel))
1012       index_bits--;
1013    index = extract_bits(block, bit_offset, index_bits);
1014 
1015    for (component = 0; component < 3; component++) {
1016       value = interpolate(endpoints[subset_num * 2][component],
1017                           endpoints[subset_num * 2 + 1][component],
1018                           index,
1019                           mode->n_index_bits);
1020 
1021       if (is_signed)
1022          value = finish_signed_unquantize(value);
1023       else
1024          value = finish_unsigned_unquantize(value);
1025 
1026       result[component] = _mesa_half_to_float(value);
1027    }
1028 
1029    result[3] = 1.0f;
1030 }
1031 
1032 #ifdef BPTC_BLOCK_DECODE
1033 static void
decompress_rgb_float_block(unsigned src_width,unsigned src_height,const uint8_t * block,float * dst_row,unsigned dst_rowstride,bool is_signed)1034 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1035                            const uint8_t *block,
1036                            float *dst_row, unsigned dst_rowstride,
1037                            bool is_signed)
1038 {
1039    int mode_num;
1040    const struct bptc_float_mode *mode;
1041    int bit_offset_head, bit_offset;
1042    int partition_num;
1043    int subset_num;
1044    int index_bits;
1045    int index;
1046    int anchors_before_texel;
1047    int32_t endpoints[2 * 2][3];
1048    uint32_t subsets;
1049    int n_subsets;
1050    int component;
1051    int32_t value;
1052    unsigned x, y;
1053 
1054    if (block[0] & 0x2) {
1055       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1056       bit_offset_head = 5;
1057    } else {
1058       mode_num = block[0] & 3;
1059       bit_offset_head = 2;
1060    }
1061 
1062    mode = bptc_float_modes + mode_num;
1063 
1064    if (mode->reserved) {
1065       for(y = 0; y < src_height; y += 1) {
1066          float *result = dst_row;
1067          memset(result, 0, sizeof result[0] * 4 * src_width);
1068          for(x = 0; x < src_width; x += 1) {
1069             result[3] = 1.0f;
1070             result += 4;
1071          }
1072          dst_row += dst_rowstride / sizeof dst_row[0];
1073       }
1074       return;
1075    }
1076 
1077    bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
1078                                         endpoints, is_signed);
1079 
1080    if (mode->n_partition_bits) {
1081       partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
1082       bit_offset_head += mode->n_partition_bits;
1083 
1084       subsets = partition_table1[partition_num];
1085       n_subsets = 2;
1086    } else {
1087       partition_num = 0;
1088       subsets = 0;
1089       n_subsets = 1;
1090    }
1091 
1092    for(y = 0; y < src_height; y += 1) {
1093       float *result = dst_row;
1094       for(x = 0; x < src_width; x += 1) {
1095          int texel;
1096 
1097          bit_offset = bit_offset_head;
1098 
1099          texel = x + y * 4;
1100 
1101          anchors_before_texel =
1102             count_anchors_before_texel(n_subsets, partition_num, texel);
1103 
1104          /* Calculate the offset to the primary index for this texel */
1105          bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1106 
1107          subset_num = (subsets >> (texel * 2)) & 3;
1108 
1109          index_bits = mode->n_index_bits;
1110          if (is_anchor(n_subsets, partition_num, texel))
1111             index_bits--;
1112          index = extract_bits(block, bit_offset, index_bits);
1113 
1114          for (component = 0; component < 3; component++) {
1115             value = interpolate(endpoints[subset_num * 2][component],
1116                                 endpoints[subset_num * 2 + 1][component],
1117                                 index,
1118                                 mode->n_index_bits);
1119 
1120             if (is_signed)
1121                value = finish_signed_unquantize(value);
1122             else
1123                value = finish_unsigned_unquantize(value);
1124 
1125             result[component] = _mesa_half_to_float(value);
1126          }
1127 
1128          result[3] = 1.0f;
1129          result += 4;
1130       }
1131       dst_row += dst_rowstride / sizeof dst_row[0];
1132    }
1133 }
1134 
1135 static void
decompress_rgb_float(int width,int height,const uint8_t * src,int src_rowstride,float * dst,int dst_rowstride,bool is_signed)1136 decompress_rgb_float(int width, int height,
1137                       const uint8_t *src, int src_rowstride,
1138                       float *dst, int dst_rowstride, bool is_signed)
1139 {
1140    int src_row_diff;
1141    int y, x;
1142 
1143    if (src_rowstride >= width * 4)
1144       src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1145    else
1146       src_row_diff = 0;
1147 
1148    for (y = 0; y < height; y += BLOCK_SIZE) {
1149       for (x = 0; x < width; x += BLOCK_SIZE) {
1150          decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1151                                     MIN2(height - y, BLOCK_SIZE),
1152                                     src,
1153                                     (dst + x * 4 +
1154                                      (y * dst_rowstride / sizeof dst[0])),
1155                                     dst_rowstride, is_signed);
1156          src += BLOCK_BYTES;
1157       }
1158       src += src_row_diff;
1159    }
1160 }
1161 #endif // BPTC_BLOCK_DECODE
1162 
1163 static void
write_bits(struct bit_writer * writer,int n_bits,int value)1164 write_bits(struct bit_writer *writer, int n_bits, int value)
1165 {
1166    do {
1167       if (n_bits + writer->pos >= 8) {
1168          *(writer->dst++) = writer->buf | (value << writer->pos);
1169          writer->buf = 0;
1170          value >>= (8 - writer->pos);
1171          n_bits -= (8 - writer->pos);
1172          writer->pos = 0;
1173       } else {
1174          writer->buf |= value << writer->pos;
1175          writer->pos += n_bits;
1176          break;
1177       }
1178    } while (n_bits > 0);
1179 }
1180 
1181 static void
get_average_luminance_alpha_unorm(int width,int height,const uint8_t * src,int src_rowstride,int * average_luminance,int * average_alpha)1182 get_average_luminance_alpha_unorm(int width, int height,
1183                                   const uint8_t *src, int src_rowstride,
1184                                   int *average_luminance, int *average_alpha)
1185 {
1186    int luminance_sum = 0, alpha_sum = 0;
1187    int y, x;
1188 
1189    for (y = 0; y < height; y++) {
1190       for (x = 0; x < width; x++) {
1191          luminance_sum += src[0] + src[1] + src[2];
1192          alpha_sum += src[3];
1193          src += 4;
1194       }
1195       src += src_rowstride - width * 4;
1196    }
1197 
1198    *average_luminance = luminance_sum / (width * height);
1199    *average_alpha = alpha_sum / (width * height);
1200 }
1201 
1202 static void
get_rgba_endpoints_unorm(int width,int height,const uint8_t * src,int src_rowstride,int average_luminance,int average_alpha,uint8_t endpoints[][4])1203 get_rgba_endpoints_unorm(int width, int height,
1204                          const uint8_t *src, int src_rowstride,
1205                          int average_luminance, int average_alpha,
1206                          uint8_t endpoints[][4])
1207 {
1208    int endpoint_luminances[2];
1209    int midpoint;
1210    int sums[2][4];
1211    int endpoint;
1212    int luminance;
1213    uint8_t temp[3];
1214    const uint8_t *p = src;
1215    int rgb_left_endpoint_count = 0;
1216    int alpha_left_endpoint_count = 0;
1217    int y, x, i;
1218 
1219    memset(sums, 0, sizeof sums);
1220 
1221    for (y = 0; y < height; y++) {
1222       for (x = 0; x < width; x++) {
1223          luminance = p[0] + p[1] + p[2];
1224          if (luminance < average_luminance) {
1225             endpoint = 0;
1226             rgb_left_endpoint_count++;
1227          } else {
1228             endpoint = 1;
1229          }
1230          for (i = 0; i < 3; i++)
1231             sums[endpoint][i] += p[i];
1232 
1233          if (p[2] < average_alpha) {
1234             endpoint = 0;
1235             alpha_left_endpoint_count++;
1236          } else {
1237             endpoint = 1;
1238          }
1239          sums[endpoint][3] += p[3];
1240 
1241          p += 4;
1242       }
1243 
1244       p += src_rowstride - width * 4;
1245    }
1246 
1247    if (rgb_left_endpoint_count == 0 ||
1248        rgb_left_endpoint_count == width * height) {
1249       for (i = 0; i < 3; i++)
1250          endpoints[0][i] = endpoints[1][i] =
1251             (sums[0][i] + sums[1][i]) / (width * height);
1252    } else {
1253       for (i = 0; i < 3; i++) {
1254          endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1255          endpoints[1][i] = (sums[1][i] /
1256                             (width * height - rgb_left_endpoint_count));
1257       }
1258    }
1259 
1260    if (alpha_left_endpoint_count == 0 ||
1261        alpha_left_endpoint_count == width * height) {
1262       endpoints[0][3] = endpoints[1][3] =
1263          (sums[0][3] + sums[1][3]) / (width * height);
1264    } else {
1265          endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1266          endpoints[1][3] = (sums[1][3] /
1267                             (width * height - alpha_left_endpoint_count));
1268    }
1269 
1270    /* We may need to swap the endpoints to ensure the most-significant bit of
1271     * the first index is zero */
1272 
1273    for (endpoint = 0; endpoint < 2; endpoint++) {
1274       endpoint_luminances[endpoint] =
1275          endpoints[endpoint][0] +
1276          endpoints[endpoint][1] +
1277          endpoints[endpoint][2];
1278    }
1279    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1280 
1281    if ((src[0] + src[1] + src[2] <= midpoint) !=
1282        (endpoint_luminances[0] <= midpoint)) {
1283       memcpy(temp, endpoints[0], 3);
1284       memcpy(endpoints[0], endpoints[1], 3);
1285       memcpy(endpoints[1], temp, 3);
1286    }
1287 
1288    /* Same for the alpha endpoints */
1289 
1290    midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1291 
1292    if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1293       temp[0] = endpoints[0][3];
1294       endpoints[0][3] = endpoints[1][3];
1295       endpoints[1][3] = temp[0];
1296    }
1297 }
1298 
1299 static void
write_rgb_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1300 write_rgb_indices_unorm(struct bit_writer *writer,
1301                         int src_width, int src_height,
1302                         const uint8_t *src, int src_rowstride,
1303                         uint8_t endpoints[][4])
1304 {
1305    int luminance;
1306    int endpoint_luminances[2];
1307    int endpoint;
1308    int index;
1309    int y, x;
1310 
1311    for (endpoint = 0; endpoint < 2; endpoint++) {
1312       endpoint_luminances[endpoint] =
1313          endpoints[endpoint][0] +
1314          endpoints[endpoint][1] +
1315          endpoints[endpoint][2];
1316    }
1317 
1318    /* If the endpoints have the same luminance then we'll just use index 0 for
1319     * all of the texels */
1320    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1321       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1322       return;
1323    }
1324 
1325    for (y = 0; y < src_height; y++) {
1326       for (x = 0; x < src_width; x++) {
1327          luminance = src[0] + src[1] + src[2];
1328 
1329          index = ((luminance - endpoint_luminances[0]) * 3 /
1330                   (endpoint_luminances[1] - endpoint_luminances[0]));
1331          if (index < 0)
1332             index = 0;
1333          else if (index > 3)
1334             index = 3;
1335 
1336          assert(x != 0 || y != 0 || index < 2);
1337 
1338          write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1339 
1340          src += 4;
1341       }
1342 
1343       /* Pad the indices out to the block size */
1344       if (src_width < BLOCK_SIZE)
1345          write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1346 
1347       src += src_rowstride - src_width * 4;
1348    }
1349 
1350    /* Pad the indices out to the block size */
1351    if (src_height < BLOCK_SIZE)
1352       write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1353 }
1354 
1355 static void
write_alpha_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1356 write_alpha_indices_unorm(struct bit_writer *writer,
1357                           int src_width, int src_height,
1358                           const uint8_t *src, int src_rowstride,
1359                           uint8_t endpoints[][4])
1360 {
1361    int index;
1362    int y, x;
1363 
1364    /* If the endpoints have the same alpha then we'll just use index 0 for
1365     * all of the texels */
1366    if (endpoints[0][3] == endpoints[1][3]) {
1367       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1368       return;
1369    }
1370 
1371    for (y = 0; y < src_height; y++) {
1372       for (x = 0; x < src_width; x++) {
1373          index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1374                   ((int) endpoints[1][3] - endpoints[0][3]));
1375          if (index < 0)
1376             index = 0;
1377          else if (index > 7)
1378             index = 7;
1379 
1380          assert(x != 0 || y != 0 || index < 4);
1381 
1382          /* The first index has one less bit */
1383          write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1384 
1385          src += 4;
1386       }
1387 
1388       /* Pad the indices out to the block size */
1389       if (src_width < BLOCK_SIZE)
1390          write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1391 
1392       src += src_rowstride - src_width * 4;
1393    }
1394 
1395    /* Pad the indices out to the block size */
1396    if (src_height < BLOCK_SIZE)
1397       write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1398 }
1399 
1400 static void
compress_rgba_unorm_block(int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t * dst)1401 compress_rgba_unorm_block(int src_width, int src_height,
1402                           const uint8_t *src, int src_rowstride,
1403                           uint8_t *dst)
1404 {
1405    int average_luminance, average_alpha;
1406    uint8_t endpoints[2][4];
1407    struct bit_writer writer;
1408    int component, endpoint;
1409 
1410    get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1411                                      &average_luminance, &average_alpha);
1412    get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1413                             average_luminance, average_alpha,
1414                             endpoints);
1415 
1416    writer.dst = dst;
1417    writer.pos = 0;
1418    writer.buf = 0;
1419 
1420    write_bits(&writer, 5, 0x10); /* mode 4 */
1421    write_bits(&writer, 2, 0); /* rotation 0 */
1422    write_bits(&writer, 1, 0); /* index selection bit */
1423 
1424    /* Write the color endpoints */
1425    for (component = 0; component < 3; component++)
1426       for (endpoint = 0; endpoint < 2; endpoint++)
1427          write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1428 
1429    /* Write the alpha endpoints */
1430    for (endpoint = 0; endpoint < 2; endpoint++)
1431       write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1432 
1433    write_rgb_indices_unorm(&writer,
1434                            src_width, src_height,
1435                            src, src_rowstride,
1436                            endpoints);
1437    write_alpha_indices_unorm(&writer,
1438                              src_width, src_height,
1439                              src, src_rowstride,
1440                              endpoints);
1441 }
1442 
1443 static void
compress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)1444 compress_rgba_unorm(int width, int height,
1445                     const uint8_t *src, int src_rowstride,
1446                     uint8_t *dst, int dst_rowstride)
1447 {
1448    int dst_row_diff;
1449    int y, x;
1450 
1451    if (dst_rowstride >= width * 4)
1452       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1453    else
1454       dst_row_diff = 0;
1455 
1456    for (y = 0; y < height; y += BLOCK_SIZE) {
1457       for (x = 0; x < width; x += BLOCK_SIZE) {
1458          compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1459                                    MIN2(height - y, BLOCK_SIZE),
1460                                    src + x * 4 + y * src_rowstride,
1461                                    src_rowstride,
1462                                    dst);
1463          dst += BLOCK_BYTES;
1464       }
1465       dst += dst_row_diff;
1466    }
1467 }
1468 
1469 static float
get_average_luminance_float(int width,int height,const float * src,int src_rowstride)1470 get_average_luminance_float(int width, int height,
1471                             const float *src, int src_rowstride)
1472 {
1473    float luminance_sum = 0;
1474    int y, x;
1475 
1476    for (y = 0; y < height; y++) {
1477       for (x = 0; x < width; x++) {
1478          luminance_sum += src[0] + src[1] + src[2];
1479          src += 3;
1480       }
1481       src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1482    }
1483 
1484    return luminance_sum / (width * height);
1485 }
1486 
1487 static float
clamp_value(float value,bool is_signed)1488 clamp_value(float value, bool is_signed)
1489 {
1490    if (value > 65504.0f)
1491       return 65504.0f;
1492 
1493    if (is_signed) {
1494       if (value < -65504.0f)
1495          return -65504.0f;
1496       else
1497          return value;
1498    }
1499 
1500    if (value < 0.0f)
1501       return 0.0f;
1502 
1503    return value;
1504 }
1505 
1506 static void
get_endpoints_float(int width,int height,const float * src,int src_rowstride,float average_luminance,float endpoints[][3],bool is_signed)1507 get_endpoints_float(int width, int height,
1508                     const float *src, int src_rowstride,
1509                     float average_luminance, float endpoints[][3],
1510                     bool is_signed)
1511 {
1512    float endpoint_luminances[2];
1513    float midpoint;
1514    float sums[2][3];
1515    int endpoint, component;
1516    float luminance;
1517    float temp[3];
1518    const float *p = src;
1519    int left_endpoint_count = 0;
1520    int y, x, i;
1521 
1522    memset(sums, 0, sizeof sums);
1523 
1524    for (y = 0; y < height; y++) {
1525       for (x = 0; x < width; x++) {
1526          luminance = p[0] + p[1] + p[2];
1527          if (luminance < average_luminance) {
1528             endpoint = 0;
1529             left_endpoint_count++;
1530          } else {
1531             endpoint = 1;
1532          }
1533          for (i = 0; i < 3; i++)
1534             sums[endpoint][i] += p[i];
1535 
1536          p += 3;
1537       }
1538 
1539       p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1540    }
1541 
1542    if (left_endpoint_count == 0 ||
1543        left_endpoint_count == width * height) {
1544       for (i = 0; i < 3; i++)
1545          endpoints[0][i] = endpoints[1][i] =
1546             (sums[0][i] + sums[1][i]) / (width * height);
1547    } else {
1548       for (i = 0; i < 3; i++) {
1549          endpoints[0][i] = sums[0][i] / left_endpoint_count;
1550          endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1551       }
1552    }
1553 
1554    /* Clamp the endpoints to the range of a half float and strip out
1555     * infinities */
1556    for (endpoint = 0; endpoint < 2; endpoint++) {
1557       for (component = 0; component < 3; component++) {
1558          endpoints[endpoint][component] =
1559             clamp_value(endpoints[endpoint][component], is_signed);
1560       }
1561    }
1562 
1563    /* We may need to swap the endpoints to ensure the most-significant bit of
1564     * the first index is zero */
1565 
1566    for (endpoint = 0; endpoint < 2; endpoint++) {
1567       endpoint_luminances[endpoint] =
1568          endpoints[endpoint][0] +
1569          endpoints[endpoint][1] +
1570          endpoints[endpoint][2];
1571    }
1572    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1573 
1574    if ((src[0] + src[1] + src[2] <= midpoint) !=
1575        (endpoint_luminances[0] <= midpoint)) {
1576       memcpy(temp, endpoints[0], sizeof temp);
1577       memcpy(endpoints[0], endpoints[1], sizeof temp);
1578       memcpy(endpoints[1], temp, sizeof temp);
1579    }
1580 }
1581 
1582 static void
write_rgb_indices_float(struct bit_writer * writer,int src_width,int src_height,const float * src,int src_rowstride,float endpoints[][3])1583 write_rgb_indices_float(struct bit_writer *writer,
1584                         int src_width, int src_height,
1585                         const float *src, int src_rowstride,
1586                         float endpoints[][3])
1587 {
1588    float luminance;
1589    float endpoint_luminances[2];
1590    int endpoint;
1591    int index;
1592    int y, x;
1593 
1594    for (endpoint = 0; endpoint < 2; endpoint++) {
1595       endpoint_luminances[endpoint] =
1596          endpoints[endpoint][0] +
1597          endpoints[endpoint][1] +
1598          endpoints[endpoint][2];
1599    }
1600 
1601    /* If the endpoints have the same luminance then we'll just use index 0 for
1602     * all of the texels */
1603    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1604       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1605       return;
1606    }
1607 
1608    for (y = 0; y < src_height; y++) {
1609       for (x = 0; x < src_width; x++) {
1610          luminance = src[0] + src[1] + src[2];
1611 
1612          index = ((luminance - endpoint_luminances[0]) * 15 /
1613                   (endpoint_luminances[1] - endpoint_luminances[0]));
1614          if (index < 0)
1615             index = 0;
1616          else if (index > 15)
1617             index = 15;
1618 
1619          assert(x != 0 || y != 0 || index < 8);
1620 
1621          write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1622 
1623          src += 3;
1624       }
1625 
1626       /* Pad the indices out to the block size */
1627       if (src_width < BLOCK_SIZE)
1628          write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1629 
1630       src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1631    }
1632 
1633    /* Pad the indices out to the block size */
1634    if (src_height < BLOCK_SIZE)
1635       write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1636 }
1637 
1638 static int
get_endpoint_value(float value,bool is_signed)1639 get_endpoint_value(float value, bool is_signed)
1640 {
1641    bool sign = false;
1642    int half;
1643 
1644    if (is_signed) {
1645       half = _mesa_float_to_half(value);
1646 
1647       if (half & 0x8000) {
1648          half &= 0x7fff;
1649          sign = true;
1650       }
1651 
1652       half = (32 * half / 31) >> 6;
1653 
1654       if (sign)
1655          half = -half & ((1 << 10) - 1);
1656 
1657       return half;
1658    } else {
1659       if (value <= 0.0f)
1660          return 0;
1661 
1662       half = _mesa_float_to_half(value);
1663 
1664       return (64 * half / 31) >> 6;
1665    }
1666 }
1667 
1668 static void
compress_rgb_float_block(int src_width,int src_height,const float * src,int src_rowstride,uint8_t * dst,bool is_signed)1669 compress_rgb_float_block(int src_width, int src_height,
1670                          const float *src, int src_rowstride,
1671                          uint8_t *dst,
1672                          bool is_signed)
1673 {
1674    float average_luminance;
1675    float endpoints[2][3];
1676    struct bit_writer writer;
1677    int component, endpoint;
1678    int endpoint_value;
1679 
1680    average_luminance =
1681       get_average_luminance_float(src_width, src_height, src, src_rowstride);
1682    get_endpoints_float(src_width, src_height, src, src_rowstride,
1683                        average_luminance, endpoints, is_signed);
1684 
1685    writer.dst = dst;
1686    writer.pos = 0;
1687    writer.buf = 0;
1688 
1689    write_bits(&writer, 5, 3); /* mode 3 */
1690 
1691    /* Write the endpoints */
1692    for (endpoint = 0; endpoint < 2; endpoint++) {
1693       for (component = 0; component < 3; component++) {
1694          endpoint_value =
1695             get_endpoint_value(endpoints[endpoint][component], is_signed);
1696          write_bits(&writer, 10, endpoint_value);
1697       }
1698    }
1699 
1700    write_rgb_indices_float(&writer,
1701                            src_width, src_height,
1702                            src, src_rowstride,
1703                            endpoints);
1704 }
1705 
1706 static void
compress_rgb_float(int width,int height,const float * src,int src_rowstride,uint8_t * dst,int dst_rowstride,bool is_signed)1707 compress_rgb_float(int width, int height,
1708                    const float *src, int src_rowstride,
1709                    uint8_t *dst, int dst_rowstride,
1710                    bool is_signed)
1711 {
1712    int dst_row_diff;
1713    int y, x;
1714 
1715    if (dst_rowstride >= width * 4)
1716       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1717    else
1718       dst_row_diff = 0;
1719 
1720    for (y = 0; y < height; y += BLOCK_SIZE) {
1721       for (x = 0; x < width; x += BLOCK_SIZE) {
1722          compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1723                                   MIN2(height - y, BLOCK_SIZE),
1724                                   src + x * 3 +
1725                                   y * src_rowstride / sizeof (float),
1726                                   src_rowstride,
1727                                   dst,
1728                                   is_signed);
1729          dst += BLOCK_BYTES;
1730       }
1731       dst += dst_row_diff;
1732    }
1733 }
1734 
1735 #endif
1736