• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 
27 #include "nir/nir_builder.h"
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "sid.h"
31 #include "vk_format.h"
32 
33 /* Based on
34  * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/etc2.comp
35  * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/eac.comp
36  *
37  * With some differences:
38  *  - Use the vk format to do all the settings.
39  *  - Combine the ETC2 and EAC shaders.
40  *  - Since we combined the above, reuse the function for the ETC2 A8 component.
41  *  - the EAC shader doesn't do SNORM correctly, so this has that fixed.
42  */
43 
44 static nir_ssa_def *
flip_endian(nir_builder * b,nir_ssa_def * src,unsigned cnt)45 flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt)
46 {
47    nir_ssa_def *v[2];
48    for (unsigned i = 0; i < cnt; ++i) {
49       nir_ssa_def *intermediate[4];
50       nir_ssa_def *chan = cnt == 1 ? src : nir_channel(b, src, i);
51       for (unsigned j = 0; j < 4; ++j)
52          intermediate[j] = nir_ubfe_imm(b, chan, 8 * j, 8);
53       v[i] = nir_ior(
54          b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)),
55          nir_ior(b, nir_ishl_imm(b, intermediate[2], 8), nir_ishl_imm(b, intermediate[3], 0)));
56    }
57    return cnt == 1 ? v[0] : nir_vec(b, v, cnt);
58 }
59 
60 static nir_ssa_def *
etc1_color_modifier_lookup(nir_builder * b,nir_ssa_def * x,nir_ssa_def * y)61 etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
62 {
63    const unsigned table[8][2] = {{2, 8},   {5, 17},  {9, 29},   {13, 42},
64                                  {18, 60}, {24, 80}, {33, 106}, {47, 183}};
65    nir_ssa_def *upper = nir_ieq_imm(b, y, 1);
66    nir_ssa_def *result = NULL;
67    for (unsigned i = 0; i < 8; ++i) {
68       nir_ssa_def *tmp =
69          nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
70       if (result)
71          result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
72       else
73          result = tmp;
74    }
75    return result;
76 }
77 
78 static nir_ssa_def *
etc2_distance_lookup(nir_builder * b,nir_ssa_def * x)79 etc2_distance_lookup(nir_builder *b, nir_ssa_def *x)
80 {
81    const unsigned table[8] = {3, 6, 11, 16, 23, 32, 41, 64};
82    nir_ssa_def *result = NULL;
83    for (unsigned i = 0; i < 8; ++i) {
84       if (result)
85          result = nir_bcsel(b, nir_ieq_imm(b, x, i), nir_imm_int(b, table[i]), result);
86       else
87          result = nir_imm_int(b, table[i]);
88    }
89    return result;
90 }
91 
92 static nir_ssa_def *
etc1_alpha_modifier_lookup(nir_builder * b,nir_ssa_def * x,nir_ssa_def * y)93 etc1_alpha_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
94 {
95    const unsigned table[16] = {0xe852, 0xc962, 0xc741, 0xc531, 0xb752, 0xa862, 0xa763, 0xa742,
96                                0x9751, 0x9741, 0x9731, 0x9641, 0x9632, 0x9210, 0x8753, 0x8642};
97    nir_ssa_def *result = NULL;
98    for (unsigned i = 0; i < 16; ++i) {
99       nir_ssa_def *tmp = nir_imm_int(b, table[i]);
100       if (result)
101          result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
102       else
103          result = tmp;
104    }
105    return nir_ubfe(b, result, nir_imul_imm(b, y, 4), nir_imm_int(b, 4));
106 }
107 
108 static nir_ssa_def *
etc_extend(nir_builder * b,nir_ssa_def * v,int bits)109 etc_extend(nir_builder *b, nir_ssa_def *v, int bits)
110 {
111    if (bits == 4)
112       return nir_imul_imm(b, v, 0x11);
113    return nir_ior(b, nir_ishl_imm(b, v, 8 - bits), nir_ushr_imm(b, v, bits - (8 - bits)));
114 }
115 
116 static nir_ssa_def *
decode_etc2_alpha(struct nir_builder * b,nir_ssa_def * alpha_payload,nir_ssa_def * linear_pixel,bool eac,nir_ssa_def * is_signed)117 decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel,
118                   bool eac, nir_ssa_def *is_signed)
119 {
120    alpha_payload = flip_endian(b, alpha_payload, 2);
121    nir_ssa_def *alpha_x = nir_channel(b, alpha_payload, 1);
122    nir_ssa_def *alpha_y = nir_channel(b, alpha_payload, 0);
123    nir_ssa_def *bit_offset = nir_isub_imm(b, 45, nir_imul_imm(b, linear_pixel, 3));
124    nir_ssa_def *base = nir_ubfe_imm(b, alpha_y, 24, 8);
125    nir_ssa_def *multiplier = nir_ubfe_imm(b, alpha_y, 20, 4);
126    nir_ssa_def *table = nir_ubfe_imm(b, alpha_y, 16, 4);
127 
128    if (eac) {
129       nir_ssa_def *signed_base = nir_ibfe_imm(b, alpha_y, 24, 8);
130       signed_base = nir_imul_imm(b, signed_base, 8);
131       base = nir_iadd_imm(b, nir_imul_imm(b, base, 8), 4);
132       base = nir_bcsel(b, is_signed, signed_base, base);
133       multiplier = nir_imax(b, nir_imul_imm(b, multiplier, 8), nir_imm_int(b, 1));
134    }
135 
136    nir_ssa_def *lsb_index =
137       nir_ubfe(b, nir_bcsel(b, nir_uge(b, bit_offset, nir_imm_int(b, 32)), alpha_y, alpha_x),
138                nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2));
139    bit_offset = nir_iadd_imm(b, bit_offset, 2);
140    nir_ssa_def *msb =
141       nir_ubfe(b, nir_bcsel(b, nir_uge(b, bit_offset, nir_imm_int(b, 32)), alpha_y, alpha_x),
142                nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1));
143    nir_ssa_def *mod =
144       nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1));
145    nir_ssa_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier));
146 
147    nir_ssa_def *low_bound = nir_imm_int(b, 0);
148    nir_ssa_def *high_bound = nir_imm_int(b, 255);
149    nir_ssa_def *final_mult = nir_imm_float(b, 1 / 255.0);
150    if (eac) {
151       low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound);
152       high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047));
153       final_mult =
154          nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0));
155    }
156 
157    return nir_fmul(b, nir_i2f32(b, nir_iclamp(b, a, low_bound, high_bound)), final_mult);
158 }
159 
160 static nir_shader *
build_shader(struct radv_device * dev)161 build_shader(struct radv_device *dev)
162 {
163    const struct glsl_type *sampler_type_2d =
164       glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_FLOAT);
165    const struct glsl_type *sampler_type_3d =
166       glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_FLOAT);
167    const struct glsl_type *img_type_2d =
168       glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
169    const struct glsl_type *img_type_3d =
170       glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT);
171    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_decode_etc");
172    b.shader->info.workgroup_size[0] = 8;
173    b.shader->info.workgroup_size[1] = 8;
174 
175    nir_variable *input_img_2d =
176       nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d");
177    input_img_2d->data.descriptor_set = 0;
178    input_img_2d->data.binding = 0;
179 
180    nir_variable *input_img_3d =
181       nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d");
182    input_img_2d->data.descriptor_set = 0;
183    input_img_2d->data.binding = 0;
184 
185    nir_variable *output_img_2d =
186       nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d");
187    output_img_2d->data.descriptor_set = 0;
188    output_img_2d->data.binding = 1;
189 
190    nir_variable *output_img_3d =
191       nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d");
192    output_img_3d->data.descriptor_set = 0;
193    output_img_3d->data.binding = 1;
194 
195    nir_ssa_def *global_id = get_global_ids(&b, 3);
196 
197    nir_ssa_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
198    nir_ssa_def *consts2 =
199       nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
200    nir_ssa_def *offset = nir_channels(&b, consts, 7);
201    nir_ssa_def *format = nir_channel(&b, consts, 3);
202    nir_ssa_def *image_type = nir_channel(&b, consts2, 0);
203    nir_ssa_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D);
204    nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
205    nir_ssa_def *src_coord =
206       nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
207                nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2));
208 
209    nir_variable *payload_var =
210       nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload");
211    nir_push_if(&b, is_3d);
212    {
213       nir_ssa_def *tex_deref = &nir_build_deref_var(&b, input_img_3d)->dest.ssa;
214 
215       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
216       tex->sampler_dim = GLSL_SAMPLER_DIM_3D;
217       tex->op = nir_texop_txf;
218       tex->src[0].src_type = nir_tex_src_coord;
219       tex->src[0].src = nir_src_for_ssa(src_coord);
220       tex->src[1].src_type = nir_tex_src_lod;
221       tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
222       tex->src[2].src_type = nir_tex_src_texture_deref;
223       tex->src[2].src = nir_src_for_ssa(tex_deref);
224       tex->dest_type = nir_type_uint32;
225       tex->is_array = false;
226       tex->coord_components = 3;
227 
228       nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
229       nir_builder_instr_insert(&b, &tex->instr);
230       nir_store_var(&b, payload_var, &tex->dest.ssa, 0xf);
231    }
232    nir_push_else(&b, NULL);
233    {
234       nir_ssa_def *tex_deref = &nir_build_deref_var(&b, input_img_2d)->dest.ssa;
235 
236       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
237       tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
238       tex->op = nir_texop_txf;
239       tex->src[0].src_type = nir_tex_src_coord;
240       tex->src[0].src = nir_src_for_ssa(src_coord);
241       tex->src[1].src_type = nir_tex_src_lod;
242       tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
243       tex->src[2].src_type = nir_tex_src_texture_deref;
244       tex->src[2].src = nir_src_for_ssa(tex_deref);
245       tex->dest_type = nir_type_uint32;
246       tex->is_array = true;
247       tex->coord_components = 3;
248 
249       nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
250       nir_builder_instr_insert(&b, &tex->instr);
251       nir_store_var(&b, payload_var, &tex->dest.ssa, 0xf);
252    }
253    nir_pop_if(&b, NULL);
254 
255    nir_ssa_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3);
256    nir_ssa_def *linear_pixel = nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4),
257                                         nir_channel(&b, pixel_coord, 1));
258 
259    nir_ssa_def *payload = nir_load_var(&b, payload_var);
260    nir_variable *color =
261       nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color");
262    nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf);
263    nir_push_if(&b, nir_ilt(&b, format, nir_imm_int(&b, VK_FORMAT_EAC_R11_UNORM_BLOCK)));
264    {
265       nir_ssa_def *alpha_bits_8 =
266          nir_ige(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK));
267       nir_ssa_def *alpha_bits_1 =
268          nir_iand(&b, nir_ige(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK)),
269                   nir_ilt(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK)));
270 
271       nir_ssa_def *color_payload =
272          nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3));
273       color_payload = flip_endian(&b, color_payload, 2);
274       nir_ssa_def *color_y = nir_channel(&b, color_payload, 0);
275       nir_ssa_def *color_x = nir_channel(&b, color_payload, 1);
276       nir_ssa_def *flip = nir_test_mask(&b, color_y, 1);
277       nir_ssa_def *subblock = nir_ushr_imm(
278          &b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)),
279          1);
280 
281       nir_variable *punchthrough =
282          nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough");
283       nir_ssa_def *punchthrough_init =
284          nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2)));
285       nir_store_var(&b, punchthrough, punchthrough_init, 0x1);
286 
287       nir_variable *etc1_compat =
288          nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat");
289       nir_store_var(&b, etc1_compat, nir_imm_bool(&b, false), 0x1);
290 
291       nir_variable *alpha_result =
292          nir_variable_create(b.shader, nir_var_shader_temp, glsl_float_type(), "alpha_result");
293       nir_push_if(&b, alpha_bits_8);
294       {
295          nir_store_var(
296             &b, alpha_result,
297             decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL), 1);
298       }
299       nir_push_else(&b, NULL);
300       {
301          nir_store_var(&b, alpha_result, nir_imm_float(&b, 1.0), 1);
302       }
303       nir_pop_if(&b, NULL);
304 
305       const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3);
306       nir_variable *rgb_result =
307          nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result");
308       nir_variable *base_rgb =
309          nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb");
310       nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7);
311 
312       nir_ssa_def *msb =
313          nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2);
314       nir_ssa_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1);
315 
316       nir_push_if(
317          &b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2))));
318       {
319          nir_store_var(&b, etc1_compat, nir_imm_bool(&b, true), 1);
320          nir_ssa_def *tmp[3];
321          for (unsigned i = 0; i < 3; ++i)
322             tmp[i] = etc_extend(
323                &b,
324                nir_iand_imm(&b,
325                             nir_ushr(&b, color_y,
326                                      nir_isub_imm(&b, 28 - 8 * i, nir_imul_imm(&b, subblock, 4))),
327                             0xf),
328                4);
329          nir_store_var(&b, base_rgb, nir_vec(&b, tmp, 3), 0x7);
330       }
331       nir_push_else(&b, NULL);
332       {
333          nir_ssa_def *rb = nir_ubfe_imm(&b, color_y, 27, 5);
334          nir_ssa_def *rd = nir_ibfe_imm(&b, color_y, 24, 3);
335          nir_ssa_def *gb = nir_ubfe_imm(&b, color_y, 19, 5);
336          nir_ssa_def *gd = nir_ibfe_imm(&b, color_y, 16, 3);
337          nir_ssa_def *bb = nir_ubfe_imm(&b, color_y, 11, 5);
338          nir_ssa_def *bd = nir_ibfe_imm(&b, color_y, 8, 3);
339          nir_ssa_def *r1 = nir_iadd(&b, rb, rd);
340          nir_ssa_def *g1 = nir_iadd(&b, gb, gd);
341          nir_ssa_def *b1 = nir_iadd(&b, bb, bd);
342 
343          nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), r1));
344          {
345             nir_ssa_def *r0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2),
346                                       nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2));
347             nir_ssa_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4);
348             nir_ssa_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4);
349             nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4);
350             nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4);
351             nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4);
352             nir_ssa_def *da = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1),
353                                       nir_iand_imm(&b, color_y, 1));
354             nir_ssa_def *dist = etc2_distance_lookup(&b, da);
355             nir_ssa_def *index = nir_ior(&b, lsb, msb);
356 
357             nir_store_var(&b, punchthrough,
358                           nir_iand(&b, nir_load_var(&b, punchthrough),
359                                    nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
360                           0x1);
361             nir_push_if(&b, nir_ieq_imm(&b, index, 0));
362             {
363                nir_store_var(&b, rgb_result, etc_extend(&b, nir_vec3(&b, r0, g0, b0), 4), 0x7);
364             }
365             nir_push_else(&b, NULL);
366             {
367 
368                nir_ssa_def *tmp = nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4),
369                                            nir_imul(&b, dist, nir_isub_imm(&b, 2, index)));
370                nir_store_var(&b, rgb_result, tmp, 0x7);
371             }
372             nir_pop_if(&b, NULL);
373          }
374          nir_push_else(&b, NULL);
375          nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), g1));
376          {
377             nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4);
378             nir_ssa_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1),
379                                       nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1));
380             nir_ssa_def *b0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3),
381                                       nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8));
382             nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4);
383             nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4);
384             nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4);
385             nir_ssa_def *da = nir_iand_imm(&b, color_y, 4);
386             nir_ssa_def *db = nir_iand_imm(&b, color_y, 1);
387             nir_ssa_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2));
388             nir_ssa_def *d0 =
389                nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0));
390             nir_ssa_def *d2 =
391                nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2));
392             d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd_imm(&b, d, 1), d);
393             nir_ssa_def *dist = etc2_distance_lookup(&b, d);
394             nir_ssa_def *base = nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2),
395                                           nir_vec3(&b, r0, g0, b0));
396             base = etc_extend(&b, base, 4);
397             base = nir_iadd(&b, base,
398                             nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2))));
399             nir_store_var(&b, rgb_result, base, 0x7);
400             nir_store_var(&b, punchthrough,
401                           nir_iand(&b, nir_load_var(&b, punchthrough),
402                                    nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
403                           0x1);
404          }
405          nir_push_else(&b, NULL);
406          nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), b1));
407          {
408             nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6);
409             nir_ssa_def *g0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6),
410                                       nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40));
411             nir_ssa_def *b0 =
412                nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3),
413                        nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20),
414                                nir_ubfe_imm(&b, color_y, 7, 3)));
415             nir_ssa_def *rh = nir_ior(&b, nir_iand_imm(&b, color_y, 1),
416                                       nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1));
417             nir_ssa_def *rv = nir_ubfe_imm(&b, color_x, 13, 6);
418             nir_ssa_def *gh = nir_ubfe_imm(&b, color_x, 25, 7);
419             nir_ssa_def *gv = nir_ubfe_imm(&b, color_x, 6, 7);
420             nir_ssa_def *bh = nir_ubfe_imm(&b, color_x, 19, 6);
421             nir_ssa_def *bv = nir_ubfe_imm(&b, color_x, 0, 6);
422 
423             r0 = etc_extend(&b, r0, 6);
424             g0 = etc_extend(&b, g0, 7);
425             b0 = etc_extend(&b, b0, 6);
426             rh = etc_extend(&b, rh, 6);
427             rv = etc_extend(&b, rv, 6);
428             gh = etc_extend(&b, gh, 7);
429             gv = etc_extend(&b, gv, 7);
430             bh = etc_extend(&b, bh, 6);
431             bv = etc_extend(&b, bv, 6);
432 
433             nir_ssa_def *rgb = nir_vec3(&b, r0, g0, b0);
434             nir_ssa_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb),
435                                        nir_channel(&b, pixel_coord, 0));
436             nir_ssa_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb),
437                                        nir_channel(&b, pixel_coord, 1));
438             rgb = nir_iadd(&b, rgb, nir_ishr_imm(&b, nir_iadd_imm(&b, nir_iadd(&b, dx, dy), 2), 2));
439             nir_store_var(&b, rgb_result, rgb, 0x7);
440             nir_store_var(&b, punchthrough, nir_imm_bool(&b, false), 0x1);
441          }
442          nir_push_else(&b, NULL);
443          {
444             nir_store_var(&b, etc1_compat, nir_imm_bool(&b, true), 1);
445             nir_ssa_def *subblock_b = nir_ine_imm(&b, subblock, 0);
446             nir_ssa_def *tmp[] = {
447                nir_bcsel(&b, subblock_b, r1, rb),
448                nir_bcsel(&b, subblock_b, g1, gb),
449                nir_bcsel(&b, subblock_b, b1, bb),
450             };
451             nir_store_var(&b, base_rgb, etc_extend(&b, nir_vec(&b, tmp, 3), 5), 0x7);
452          }
453          nir_pop_if(&b, NULL);
454          nir_pop_if(&b, NULL);
455          nir_pop_if(&b, NULL);
456       }
457       nir_pop_if(&b, NULL);
458       nir_push_if(&b, nir_load_var(&b, etc1_compat));
459       {
460          nir_ssa_def *etc1_table_index = nir_ubfe(
461             &b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3));
462          nir_ssa_def *sgn = nir_isub_imm(&b, 1, msb);
463          sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn);
464          nir_store_var(&b, punchthrough,
465                        nir_iand(&b, nir_load_var(&b, punchthrough),
466                                 nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
467                        0x1);
468          nir_ssa_def *off =
469             nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
470          nir_ssa_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off);
471          nir_store_var(&b, rgb_result, result, 0x7);
472       }
473       nir_pop_if(&b, NULL);
474       nir_push_if(&b, nir_load_var(&b, punchthrough));
475       {
476          nir_store_var(&b, alpha_result, nir_imm_float(&b, 0), 0x1);
477          nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 0, 0, 0), 0x7);
478       }
479       nir_pop_if(&b, NULL);
480       nir_ssa_def *col[4];
481       for (unsigned i = 0; i < 3; ++i)
482          col[i] = nir_fdiv(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)),
483                            nir_imm_float(&b, 255.0));
484       col[3] = nir_load_var(&b, alpha_result);
485       nir_store_var(&b, color, nir_vec(&b, col, 4), 0xf);
486    }
487    nir_push_else(&b, NULL);
488    { /* EAC */
489       nir_ssa_def *is_signed = nir_ior(&b, nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11_SNORM_BLOCK),
490                                        nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK));
491       nir_ssa_def *val[4];
492       for (int i = 0; i < 2; ++i) {
493          val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true,
494                                     is_signed);
495       }
496       val[2] = nir_imm_float(&b, 0.0);
497       val[3] = nir_imm_float(&b, 1.0);
498       nir_store_var(&b, color, nir_vec(&b, val, 4), 0xf);
499    }
500    nir_pop_if(&b, NULL);
501 
502    nir_ssa_def *outval = nir_load_var(&b, color);
503    nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
504                                      nir_channel(&b, coord, 2), nir_ssa_undef(&b, 1, 32));
505 
506    nir_push_if(&b, is_3d);
507    {
508       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord,
509                             nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
510                             .image_dim = GLSL_SAMPLER_DIM_3D);
511    }
512    nir_push_else(&b, NULL);
513    {
514       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord,
515                             nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
516                             .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
517    }
518    nir_pop_if(&b, NULL);
519    return b.shader;
520 }
521 
522 static VkResult
create_layout(struct radv_device * device)523 create_layout(struct radv_device *device)
524 {
525    VkResult result;
526    VkDescriptorSetLayoutCreateInfo ds_create_info = {
527       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
528       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
529       .bindingCount = 2,
530       .pBindings = (VkDescriptorSetLayoutBinding[]){
531          {.binding = 0,
532           .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
533           .descriptorCount = 1,
534           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
535           .pImmutableSamplers = NULL},
536          {.binding = 1,
537           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
538           .descriptorCount = 1,
539           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
540           .pImmutableSamplers = NULL},
541       }};
542 
543    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
544                                            &device->meta_state.alloc,
545                                            &device->meta_state.etc_decode.ds_layout);
546    if (result != VK_SUCCESS)
547       goto fail;
548 
549    VkPipelineLayoutCreateInfo pl_create_info = {
550       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
551       .setLayoutCount = 1,
552       .pSetLayouts = &device->meta_state.etc_decode.ds_layout,
553       .pushConstantRangeCount = 1,
554       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
555    };
556 
557    result =
558       radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
559                                 &device->meta_state.alloc, &device->meta_state.etc_decode.p_layout);
560    if (result != VK_SUCCESS)
561       goto fail;
562    return VK_SUCCESS;
563 fail:
564    return result;
565 }
566 
567 static VkResult
create_decode_pipeline(struct radv_device * device,VkPipeline * pipeline)568 create_decode_pipeline(struct radv_device *device, VkPipeline *pipeline)
569 {
570    VkResult result;
571 
572    mtx_lock(&device->meta_state.mtx);
573    if (*pipeline) {
574       mtx_unlock(&device->meta_state.mtx);
575       return VK_SUCCESS;
576    }
577 
578    nir_shader *cs = build_shader(device);
579 
580    /* compute shader */
581 
582    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
583       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
584       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
585       .module = vk_shader_module_handle_from_nir(cs),
586       .pName = "main",
587       .pSpecializationInfo = NULL,
588    };
589 
590    VkComputePipelineCreateInfo vk_pipeline_info = {
591       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
592       .stage = pipeline_shader_stage,
593       .flags = 0,
594       .layout = device->meta_state.resolve_compute.p_layout,
595    };
596 
597    result = radv_CreateComputePipelines(radv_device_to_handle(device),
598                                         radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
599                                         &vk_pipeline_info, NULL, pipeline);
600    if (result != VK_SUCCESS)
601       goto fail;
602 
603    ralloc_free(cs);
604    mtx_unlock(&device->meta_state.mtx);
605    return VK_SUCCESS;
606 fail:
607    ralloc_free(cs);
608    mtx_unlock(&device->meta_state.mtx);
609    return result;
610 }
611 
612 VkResult
radv_device_init_meta_etc_decode_state(struct radv_device * device,bool on_demand)613 radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand)
614 {
615    struct radv_meta_state *state = &device->meta_state;
616    VkResult res;
617 
618    if (!device->physical_device->emulate_etc2)
619       return VK_SUCCESS;
620 
621    res = create_layout(device);
622    if (res != VK_SUCCESS)
623       return res;
624 
625    if (on_demand)
626       return VK_SUCCESS;
627 
628    return create_decode_pipeline(device, &state->etc_decode.pipeline);
629 }
630 
631 void
radv_device_finish_meta_etc_decode_state(struct radv_device * device)632 radv_device_finish_meta_etc_decode_state(struct radv_device *device)
633 {
634    struct radv_meta_state *state = &device->meta_state;
635    radv_DestroyPipeline(radv_device_to_handle(device), state->etc_decode.pipeline, &state->alloc);
636    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->etc_decode.p_layout,
637                               &state->alloc);
638    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
639                                                         state->etc_decode.ds_layout, &state->alloc);
640 }
641 
642 static VkPipeline
radv_get_etc_decode_pipeline(struct radv_cmd_buffer * cmd_buffer)643 radv_get_etc_decode_pipeline(struct radv_cmd_buffer *cmd_buffer)
644 {
645    struct radv_device *device = cmd_buffer->device;
646    VkPipeline *pipeline = &device->meta_state.etc_decode.pipeline;
647 
648    if (!*pipeline) {
649       VkResult ret;
650 
651       ret = create_decode_pipeline(device, pipeline);
652       if (ret != VK_SUCCESS) {
653          cmd_buffer->record_result = ret;
654          return VK_NULL_HANDLE;
655       }
656    }
657 
658    return *pipeline;
659 }
660 
661 static void
decode_etc(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dest_iview,const VkOffset3D * offset,const VkExtent3D * extent)662 decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
663            struct radv_image_view *dest_iview, const VkOffset3D *offset, const VkExtent3D *extent)
664 {
665    struct radv_device *device = cmd_buffer->device;
666 
667    radv_meta_push_descriptor_set(
668       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
669       0, /* set */
670       2, /* descriptorWriteCount */
671       (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
672                                 .dstBinding = 0,
673                                 .dstArrayElement = 0,
674                                 .descriptorCount = 1,
675                                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
676                                 .pImageInfo =
677                                    (VkDescriptorImageInfo[]){
678                                       {.sampler = VK_NULL_HANDLE,
679                                        .imageView = radv_image_view_to_handle(src_iview),
680                                        .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
681                                    }},
682                                {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
683                                 .dstBinding = 1,
684                                 .dstArrayElement = 0,
685                                 .descriptorCount = 1,
686                                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
687                                 .pImageInfo = (VkDescriptorImageInfo[]){
688                                    {
689                                       .sampler = VK_NULL_HANDLE,
690                                       .imageView = radv_image_view_to_handle(dest_iview),
691                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
692                                    },
693                                 }}});
694 
695    VkPipeline pipeline = radv_get_etc_decode_pipeline(cmd_buffer);
696 
697    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
698                         pipeline);
699 
700    unsigned push_constants[5] = {
701       offset->x, offset->y, offset->z, src_iview->image->vk.format, src_iview->image->vk.image_type,
702    };
703 
704    radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
705                          device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
706                          0, 20, push_constants);
707    radv_unaligned_dispatch(cmd_buffer, extent->width, extent->height, extent->depth);
708 }
709 
710 void
radv_meta_decode_etc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout layout,const VkImageSubresourceLayers * subresource,VkOffset3D offset,VkExtent3D extent)711 radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
712                      VkImageLayout layout, const VkImageSubresourceLayers *subresource,
713                      VkOffset3D offset, VkExtent3D extent)
714 {
715    struct radv_meta_saved_state saved_state;
716    radv_meta_save(&saved_state, cmd_buffer,
717                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS |
718                      RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING);
719 
720    uint32_t base_slice = radv_meta_get_iview_layer(image, subresource, &offset);
721    uint32_t slice_count = image->vk.image_type == VK_IMAGE_TYPE_3D ? extent.depth : subresource->layerCount;
722 
723    extent = vk_image_sanitize_extent(&image->vk, extent);
724    offset = vk_image_sanitize_offset(&image->vk, offset);
725 
726    VkFormat load_format = vk_format_get_blocksize(image->vk.format) == 16
727                              ? VK_FORMAT_R32G32B32A32_UINT
728                              : VK_FORMAT_R32G32_UINT;
729    struct radv_image_view src_iview;
730    radv_image_view_init(
731       &src_iview, cmd_buffer->device,
732       &(VkImageViewCreateInfo){
733          .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
734          .image = radv_image_to_handle(image),
735          .viewType = radv_meta_get_view_type(image),
736          .format = load_format,
737          .subresourceRange =
738             {
739                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
740                .baseMipLevel = subresource->mipLevel,
741                .levelCount = 1,
742                .baseArrayLayer = 0,
743                .layerCount = subresource->baseArrayLayer + subresource->layerCount,
744             },
745       },
746       0, NULL);
747 
748    VkFormat store_format;
749    switch (image->vk.format) {
750    case VK_FORMAT_EAC_R11_UNORM_BLOCK:
751       store_format = VK_FORMAT_R16_UNORM;
752       break;
753    case VK_FORMAT_EAC_R11_SNORM_BLOCK:
754       store_format = VK_FORMAT_R16_SNORM;
755       break;
756    case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
757       store_format = VK_FORMAT_R16G16_UNORM;
758       break;
759    case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
760       store_format = VK_FORMAT_R16G16_SNORM;
761       break;
762    default:
763       store_format = VK_FORMAT_R8G8B8A8_UNORM;
764    }
765    struct radv_image_view dest_iview;
766    radv_image_view_init(
767       &dest_iview, cmd_buffer->device,
768       &(VkImageViewCreateInfo){
769          .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
770          .image = radv_image_to_handle(image),
771          .viewType = radv_meta_get_view_type(image),
772          .format = store_format,
773          .subresourceRange =
774             {
775                .aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT,
776                .baseMipLevel = subresource->mipLevel,
777                .levelCount = 1,
778                .baseArrayLayer = 0,
779                .layerCount = subresource->baseArrayLayer + subresource->layerCount,
780             },
781       },
782       0, NULL);
783 
784    decode_etc(cmd_buffer, &src_iview, &dest_iview, &(VkOffset3D){offset.x, offset.y, base_slice},
785               &(VkExtent3D){extent.width, extent.height, slice_count});
786 
787    radv_image_view_finish(&src_iview);
788    radv_image_view_finish(&dest_iview);
789 
790    radv_meta_restore(&saved_state, cmd_buffer);
791 }
792