1 /*
2 * Copyright © 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
25 */
26
27 #include <stdbool.h>
28 #include "main/image.h"
29 #include "main/pbo.h"
30
31 #include "nir/pipe_nir.h"
32 #include "state_tracker/st_nir.h"
33 #include "state_tracker/st_format.h"
34 #include "state_tracker/st_pbo.h"
35 #include "state_tracker/st_program.h"
36 #include "state_tracker/st_texture.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_format_convert.h"
39 #include "compiler/glsl/gl_nir.h"
40 #include "compiler/glsl/gl_nir_linker.h"
41 #include "util/u_sampler.h"
42 #include "util/streaming-load-memcpy.h"
43
44 #define SPEC_USES_THRESHOLD 5
45
46 struct pbo_spec_async_data {
47 uint32_t data[4]; //must be first
48 bool created;
49 unsigned uses;
50 struct util_queue_fence fence;
51 nir_shader *nir;
52 struct pipe_shader_state *cs;
53 };
54
55 struct pbo_async_data {
56 struct st_context *st;
57 enum pipe_texture_target target;
58 unsigned num_components;
59 struct util_queue_fence fence;
60 nir_shader *nir;
61 nir_shader *copy; //immutable
62 struct pipe_shader_state *cs;
63 struct set specialized;
64 };
65
66 #define BGR_FORMAT(NAME) \
67 {{ \
68 [0] = PIPE_FORMAT_##NAME##_SNORM, \
69 [1] = PIPE_FORMAT_##NAME##_SINT, \
70 }, \
71 { \
72 [0] = PIPE_FORMAT_##NAME##_UNORM, \
73 [1] = PIPE_FORMAT_##NAME##_UINT, \
74 }}
75
76 #define FORMAT(NAME, NAME16, NAME32) \
77 {{ \
78 [1] = PIPE_FORMAT_##NAME##_SNORM, \
79 [2] = PIPE_FORMAT_##NAME16##_SNORM, \
80 [4] = PIPE_FORMAT_##NAME32##_SNORM, \
81 }, \
82 { \
83 [1] = PIPE_FORMAT_##NAME##_UNORM, \
84 [2] = PIPE_FORMAT_##NAME16##_UNORM, \
85 [4] = PIPE_FORMAT_##NAME32##_UNORM, \
86 }}
87
88 /* don't try these at home */
89 static enum pipe_format
get_convert_format(struct gl_context * ctx,enum pipe_format src_format,GLenum format,GLenum type,bool * need_bgra_swizzle)90 get_convert_format(struct gl_context *ctx,
91 enum pipe_format src_format,
92 GLenum format, GLenum type,
93 bool *need_bgra_swizzle)
94 {
95 struct st_context *st = st_context(ctx);
96 GLint bpp = _mesa_bytes_per_pixel(format, type);
97 if (_mesa_is_depth_format(format) ||
98 format == GL_GREEN_INTEGER ||
99 format == GL_BLUE_INTEGER) {
100 switch (bpp) {
101 case 1:
102 return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R8_UINT : PIPE_FORMAT_R8_SINT;
103 case 2:
104 return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R16_UINT : PIPE_FORMAT_R16_SINT;
105 case 4:
106 return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R32_SINT;
107 }
108 }
109 mesa_format mformat = _mesa_tex_format_from_format_and_type(ctx, format, type);
110 enum pipe_format pformat = st_mesa_format_to_pipe_format(st, mformat);
111 if (!pformat) {
112 GLint dst_components = _mesa_components_in_format(format);
113 bpp /= dst_components;
114 if (format == GL_BGR || format == GL_BGRA) {
115 pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR ? GL_RGB : GL_RGBA, type, 0);
116 if (!pformat)
117 pformat = get_convert_format(ctx, src_format, format == GL_BGR ? GL_RGB : GL_RGBA, type, need_bgra_swizzle);
118 assert(pformat);
119 *need_bgra_swizzle = true;
120 } else if (format == GL_BGR_INTEGER || format == GL_BGRA_INTEGER) {
121 pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, 0);
122 if (!pformat)
123 pformat = get_convert_format(ctx, src_format, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, need_bgra_swizzle);
124 assert(pformat);
125 *need_bgra_swizzle = true;
126 } else {
127 /* [signed,unsigned][bpp] */
128 enum pipe_format rgb[5][2][5] = {
129 [1] = FORMAT(R8, R16, R32),
130 [2] = FORMAT(R8G8, R16G16, R32G32),
131 [3] = FORMAT(R8G8B8, R16G16B16, R32G32B32),
132 [4] = FORMAT(R8G8B8A8, R16G16B16A16, R32G32B32A32),
133 };
134 pformat = rgb[dst_components][_mesa_is_type_unsigned(type)][bpp];
135 }
136 assert(util_format_get_nr_components(pformat) == dst_components);
137 }
138 assert(pformat);
139 return pformat;
140 }
141 #undef BGR_FORMAT
142 #undef FORMAT
143
144
145 struct pbo_shader_data {
146 nir_def *offset;
147 nir_def *range;
148 nir_def *invert;
149 nir_def *blocksize;
150 nir_def *alignment;
151 nir_def *dst_bit_size;
152 nir_def *channels;
153 nir_def *normalized;
154 nir_def *integer;
155 nir_def *clamp_uint;
156 nir_def *r11g11b10_or_sint;
157 nir_def *r9g9b9e5;
158 nir_def *bits1;
159 nir_def *bits2;
160 nir_def *bits3;
161 nir_def *bits4;
162 nir_def *swap;
163 nir_def *bits; //vec4
164 };
165
166
167 /* must be under 16bytes / sizeof(vec4) / 128 bits) */
168 struct pbo_data {
169 union {
170 struct {
171 struct {
172 uint16_t x, y;
173 };
174 struct {
175 uint16_t width, height;
176 };
177 struct {
178 uint16_t depth;
179 uint8_t invert : 1;
180 uint8_t blocksize : 7;
181
182 uint8_t clamp_uint : 1;
183 uint8_t r11g11b10_or_sint : 1;
184 uint8_t r9g9b9e5 : 1;
185 uint8_t swap : 1;
186 uint16_t alignment : 2;
187 uint8_t dst_bit_size : 2; //8, 16, 32, 64
188 };
189
190 struct {
191 uint8_t channels : 2;
192 uint8_t bits1 : 6;
193 uint8_t normalized : 1;
194 uint8_t integer : 1;
195 uint8_t bits2 : 6;
196 uint8_t bits3 : 6;
197 uint8_t pad1 : 2;
198 uint8_t bits4 : 6;
199 uint8_t pad2 : 2;
200 };
201 };
202 float vec[4];
203 };
204 };
205
206
207 #define STRUCT_OFFSET(name) (offsetof(struct pbo_data, name) * 8)
208
209 #define STRUCT_BLOCK(offset, ...) \
210 do { \
211 assert(offset % 8 == 0); \
212 nir_def *block##offset = nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, (offset), 1, 8)); \
213 __VA_ARGS__ \
214 } while (0)
215 #define STRUCT_MEMBER(blockoffset, name, offset, size, op, clamp) \
216 do { \
217 assert(offset + size <= 8); \
218 nir_def *val = nir_iand_imm(b, block##blockoffset, u_bit_consecutive(offset, size)); \
219 if (offset) \
220 val = nir_ushr_imm(b, val, offset); \
221 sd->name = op; \
222 if (clamp) \
223 sd->name = nir_umin(b, sd->name, nir_imm_int(b, clamp)); \
224 } while (0)
225 #define STRUCT_MEMBER_SHIFTED_2BIT(blockoffset, name, offset, shift, clamp) \
226 STRUCT_MEMBER(blockoffset, name, offset, 2, nir_ishl(b, nir_imm_int(b, shift), val), clamp)
227
228 #define STRUCT_MEMBER_BOOL(blockoffset, name, offset) \
229 STRUCT_MEMBER(blockoffset, name, offset, 1, nir_ieq_imm(b, val, 1), 0)
230
231 /* this function extracts the conversion data from pbo_data using the
232 * size annotations for each grouping. data is compacted into bitfields,
233 * so bitwise operations must be used to "unpact" everything
234 */
235 static void
init_pbo_shader_data(nir_builder * b,struct pbo_shader_data * sd,unsigned coord_components)236 init_pbo_shader_data(nir_builder *b, struct pbo_shader_data *sd, unsigned coord_components)
237 {
238 nir_variable *ubo = nir_variable_create(b->shader, nir_var_uniform, glsl_uvec4_type(), "offset");
239 nir_def *ubo_load = nir_load_var(b, ubo);
240
241 sd->offset = nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(x), 2, 16));
242 if (coord_components == 1)
243 sd->offset = nir_vector_insert_imm(b, sd->offset, nir_imm_int(b, 0), 1);
244 sd->range = nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(width), 3, 16));
245 if (coord_components < 3) {
246 sd->range = nir_vector_insert_imm(b, sd->range, nir_imm_int(b, 1), 2);
247 if (coord_components == 1)
248 sd->range = nir_vector_insert_imm(b, sd->range, nir_imm_int(b, 1), 1);
249 }
250
251 STRUCT_BLOCK(80,
252 STRUCT_MEMBER_BOOL(80, invert, 0);
253 STRUCT_MEMBER(80, blocksize, 1, 7, nir_iadd_imm(b, val, 1), 128);
254 );
255
256 STRUCT_BLOCK(88,
257 STRUCT_MEMBER_BOOL(88, clamp_uint, 0);
258 STRUCT_MEMBER_BOOL(88, r11g11b10_or_sint, 1);
259 STRUCT_MEMBER_BOOL(88, r9g9b9e5, 2);
260 STRUCT_MEMBER_BOOL(88, swap, 3);
261 STRUCT_MEMBER_SHIFTED_2BIT(88, alignment, 4, 1, 8);
262 STRUCT_MEMBER_SHIFTED_2BIT(88, dst_bit_size, 6, 8, 64);
263 );
264
265 STRUCT_BLOCK(96,
266 STRUCT_MEMBER(96, channels, 0, 2, nir_iadd_imm(b, val, 1), 4);
267 STRUCT_MEMBER(96, bits1, 2, 6, val, 32);
268 );
269
270 STRUCT_BLOCK(104,
271 STRUCT_MEMBER_BOOL(104, normalized, 0);
272 STRUCT_MEMBER_BOOL(104, integer, 1);
273 STRUCT_MEMBER(104, bits2, 2, 6, val, 32);
274 );
275
276
277 STRUCT_BLOCK(112,
278 STRUCT_MEMBER(112, bits3, 0, 6, val, 32);
279 );
280
281 STRUCT_BLOCK(120,
282 STRUCT_MEMBER(120, bits4, 0, 6, val, 32);
283 );
284 sd->bits = nir_vec4(b, sd->bits1, sd->bits2, sd->bits3, sd->bits4);
285
286 /* clamp swap in the shader to enable better optimizing */
287 /* TODO?
288 sd->swap = nir_bcsel(b, nir_ior(b,
289 nir_ieq_imm(b, sd->blocksize, 8),
290 nir_bcsel(b,
291 nir_ieq_imm(b, sd->bits1, 8),
292 nir_bcsel(b,
293 nir_uge_imm(b, sd->channels, 2),
294 nir_bcsel(b,
295 nir_uge_imm(b, sd->channels, 3),
296 nir_bcsel(b,
297 nir_ieq_imm(b, sd->channels, 4),
298 nir_ball(b, nir_ieq_imm(b, sd->bits, 8)),
299 nir_ball(b, nir_ieq_imm(b, nir_channels(b, sd->bits, 7), 8))),
300 nir_ball(b, nir_ieq_imm(b, nir_channels(b, sd->bits, 3), 8))),
301 nir_imm_false(b)),
302 nir_imm_false(b))),
303 nir_imm_false(b),
304 sd->swap);
305 */
306 }
307
308 static unsigned
fill_pbo_data(struct pbo_data * pd,enum pipe_format src_format,enum pipe_format dst_format,bool swap)309 fill_pbo_data(struct pbo_data *pd, enum pipe_format src_format, enum pipe_format dst_format, bool swap)
310 {
311 unsigned bits[4] = {0};
312 bool weird_packed = false;
313 const struct util_format_description *dst_desc = util_format_description(dst_format);
314 bool is_8bit = true;
315
316 for (unsigned c = 0; c < 4; c++) {
317 bits[c] = dst_desc->channel[c].size;
318 if (c < dst_desc->nr_channels) {
319 weird_packed |= bits[c] != bits[0] || bits[c] % 8 != 0;
320 if (bits[c] != 8)
321 is_8bit = false;
322 }
323 }
324
325 if (is_8bit || dst_desc->block.bits == 8)
326 swap = false;
327
328 unsigned dst_bit_size = 0;
329 if (weird_packed) {
330 dst_bit_size = dst_desc->block.bits;
331 } else {
332 dst_bit_size = dst_desc->block.bits / dst_desc->nr_channels;
333 }
334 assert(dst_bit_size);
335 assert(dst_bit_size <= 64);
336
337 pd->dst_bit_size = dst_bit_size >> 4;
338 pd->channels = dst_desc->nr_channels - 1;
339 pd->normalized = dst_desc->is_unorm || dst_desc->is_snorm;
340 pd->clamp_uint = dst_desc->is_unorm ||
341 (util_format_is_pure_sint(dst_format) &&
342 !util_format_is_pure_sint(src_format) &&
343 !util_format_is_snorm(src_format)) ||
344 util_format_is_pure_uint(dst_format);
345 pd->integer = util_format_is_pure_uint(dst_format) || util_format_is_pure_sint(dst_format);
346 pd->r11g11b10_or_sint = dst_format == PIPE_FORMAT_R11G11B10_FLOAT || util_format_is_pure_sint(dst_format);
347 pd->r9g9b9e5 = dst_format == PIPE_FORMAT_R9G9B9E5_FLOAT;
348 pd->bits1 = bits[0];
349 pd->bits2 = bits[1];
350 pd->bits3 = bits[2];
351 pd->bits4 = bits[3];
352 pd->swap = swap;
353
354 return weird_packed ? 1 : dst_desc->nr_channels;
355 }
356
357 static nir_def *
get_buffer_offset(nir_builder * b,nir_def * coord,struct pbo_shader_data * sd)358 get_buffer_offset(nir_builder *b, nir_def *coord, struct pbo_shader_data *sd)
359 {
360 /* from _mesa_image_offset():
361 offset = topOfImage
362 + (skippixels + column) * bytes_per_pixel
363 + (skiprows + row) * bytes_per_row
364 + (skipimages + img) * bytes_per_image;
365 */
366 nir_def *bytes_per_row = nir_imul(b, nir_channel(b, sd->range, 0), sd->blocksize);
367 bytes_per_row = nir_bcsel(b, nir_ult_imm(b, sd->alignment, 2),
368 bytes_per_row,
369 nir_iand(b,
370 nir_iadd_imm(b, nir_iadd(b, bytes_per_row, sd->alignment), -1),
371 nir_inot(b, nir_iadd_imm(b, sd->alignment, -1))));
372 nir_def *bytes_per_image = nir_imul(b, bytes_per_row, nir_channel(b, sd->range, 1));
373 bytes_per_row = nir_bcsel(b, sd->invert,
374 nir_ineg(b, bytes_per_row),
375 bytes_per_row);
376 return nir_iadd(b,
377 nir_imul(b, nir_channel(b, coord, 0), sd->blocksize),
378 nir_iadd(b,
379 nir_imul(b, nir_channel(b, coord, 1), bytes_per_row),
380 nir_imul(b, nir_channel(b, coord, 2), bytes_per_image)));
381 }
382
383 static inline void
write_ssbo(nir_builder * b,nir_def * pixel,nir_def * buffer_offset)384 write_ssbo(nir_builder *b, nir_def *pixel, nir_def *buffer_offset)
385 {
386 nir_store_ssbo(b, pixel, nir_imm_zero(b, 1, 32), buffer_offset,
387 .align_mul = pixel->bit_size / 8,
388 .write_mask = (1 << pixel->num_components) - 1);
389 }
390
391 static void
write_conversion(nir_builder * b,nir_def * pixel,nir_def * buffer_offset,struct pbo_shader_data * sd)392 write_conversion(nir_builder *b, nir_def *pixel, nir_def *buffer_offset, struct pbo_shader_data *sd)
393 {
394 nir_push_if(b, nir_ilt_imm(b, sd->dst_bit_size, 32));
395 nir_push_if(b, nir_ieq_imm(b, sd->dst_bit_size, 16));
396 write_ssbo(b, nir_u2u16(b, pixel), buffer_offset);
397 nir_push_else(b, NULL);
398 write_ssbo(b, nir_u2u8(b, pixel), buffer_offset);
399 nir_pop_if(b, NULL);
400 nir_push_else(b, NULL);
401 write_ssbo(b, pixel, buffer_offset);
402 nir_pop_if(b, NULL);
403 }
404
405 static nir_def *
swap2(nir_builder * b,nir_def * src)406 swap2(nir_builder *b, nir_def *src)
407 {
408 /* dst[i] = (src[i] >> 8) | ((src[i] << 8) & 0xff00); */
409 return nir_ior(b,
410 nir_ushr_imm(b, src, 8),
411 nir_iand_imm(b, nir_ishl_imm(b, src, 8), 0xff00));
412 }
413
414 static nir_def *
swap4(nir_builder * b,nir_def * src)415 swap4(nir_builder *b, nir_def *src)
416 {
417 /* a = (b >> 24) | ((b >> 8) & 0xff00) | ((b << 8) & 0xff0000) | ((b << 24) & 0xff000000); */
418 return nir_ior(b,
419 /* (b >> 24) */
420 nir_ushr_imm(b, src, 24),
421 nir_ior(b,
422 /* ((b >> 8) & 0xff00) */
423 nir_iand_imm(b, nir_ushr_imm(b, src, 8), 0xff00),
424 nir_ior(b,
425 /* ((b << 8) & 0xff0000) */
426 nir_iand_imm(b, nir_ishl_imm(b, src, 8), 0xff0000),
427 /* ((b << 24) & 0xff000000) */
428 nir_iand_imm(b, nir_ishl_imm(b, src, 24), 0xff000000))));
429 }
430
431 /* explode the cf to handle channel counts in the shader */
432 static void
grab_components(nir_builder * b,nir_def * pixel,nir_def * buffer_offset,struct pbo_shader_data * sd,bool weird_packed)433 grab_components(nir_builder *b, nir_def *pixel, nir_def *buffer_offset, struct pbo_shader_data *sd, bool weird_packed)
434 {
435 if (weird_packed) {
436 nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32));
437 write_conversion(b, nir_trim_vector(b, pixel, 2), buffer_offset, sd);
438 nir_push_else(b, NULL);
439 write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd);
440 nir_pop_if(b, NULL);
441 } else {
442 nir_push_if(b, nir_ieq_imm(b, sd->channels, 1));
443 write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd);
444 nir_push_else(b, NULL);
445 nir_push_if(b, nir_ieq_imm(b, sd->channels, 2));
446 write_conversion(b, nir_trim_vector(b, pixel, 2), buffer_offset,
447 sd);
448 nir_push_else(b, NULL);
449 nir_push_if(b, nir_ieq_imm(b, sd->channels, 3));
450 write_conversion(b, nir_trim_vector(b, pixel, 3),
451 buffer_offset, sd);
452 nir_push_else(b, NULL);
453 write_conversion(b, nir_trim_vector(b, pixel, 4),
454 buffer_offset, sd);
455 nir_pop_if(b, NULL);
456 nir_pop_if(b, NULL);
457 nir_pop_if(b, NULL);
458 }
459 }
460
461 /* if byteswap is enabled, handle that and then write the components */
462 static void
handle_swap(nir_builder * b,nir_def * pixel,nir_def * buffer_offset,struct pbo_shader_data * sd,unsigned num_components,bool weird_packed)463 handle_swap(nir_builder *b, nir_def *pixel, nir_def *buffer_offset,
464 struct pbo_shader_data *sd, unsigned num_components, bool weird_packed)
465 {
466 nir_push_if(b, sd->swap); {
467 nir_push_if(b, nir_ieq_imm(b, nir_udiv_imm(b, sd->blocksize, num_components), 2)); {
468 /* this is a single high/low swap per component */
469 nir_def *components[4];
470 for (unsigned i = 0; i < 4; i++)
471 components[i] = swap2(b, nir_channel(b, pixel, i));
472 nir_def *v = nir_vec(b, components, 4);
473 grab_components(b, v, buffer_offset, sd, weird_packed);
474 } nir_push_else(b, NULL); {
475 /* this is a pair of high/low swaps for each half of the component */
476 nir_def *components[4];
477 for (unsigned i = 0; i < 4; i++)
478 components[i] = swap4(b, nir_channel(b, pixel, i));
479 nir_def *v = nir_vec(b, components, 4);
480 grab_components(b, v, buffer_offset, sd, weird_packed);
481 } nir_pop_if(b, NULL);
482 } nir_push_else(b, NULL); {
483 /* swap disabled */
484 grab_components(b, pixel, buffer_offset, sd, weird_packed);
485 } nir_pop_if(b, NULL);
486 }
487
488 static nir_def *
check_for_weird_packing(nir_builder * b,struct pbo_shader_data * sd,unsigned component)489 check_for_weird_packing(nir_builder *b, struct pbo_shader_data *sd, unsigned component)
490 {
491 nir_def *c = nir_channel(b, sd->bits, component - 1);
492
493 return nir_bcsel(b,
494 nir_ige_imm(b, sd->channels, component),
495 nir_ior(b,
496 nir_ine(b, c, sd->bits1),
497 nir_ine_imm(b, nir_imod_imm(b, c, 8), 0)),
498 nir_imm_false(b));
499 }
500
501 /* convenience function for clamping signed integers */
502 static inline nir_def *
nir_imin_imax(nir_builder * build,nir_def * src,nir_def * clamp_to_min,nir_def * clamp_to_max)503 nir_imin_imax(nir_builder *build, nir_def *src, nir_def *clamp_to_min, nir_def *clamp_to_max)
504 {
505 return nir_imax(build, nir_imin(build, src, clamp_to_min), clamp_to_max);
506 }
507
508 static inline nir_def *
nir_format_float_to_unorm_with_factor(nir_builder * b,nir_def * f,nir_def * factor)509 nir_format_float_to_unorm_with_factor(nir_builder *b, nir_def *f, nir_def *factor)
510 {
511 /* Clamp to the range [0, 1] */
512 f = nir_fsat(b, f);
513
514 return nir_f2u32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
515 }
516
517 static inline nir_def *
nir_format_float_to_snorm_with_factor(nir_builder * b,nir_def * f,nir_def * factor)518 nir_format_float_to_snorm_with_factor(nir_builder *b, nir_def *f, nir_def *factor)
519 {
520 /* Clamp to the range [-1, 1] */
521 f = nir_fmin(b, nir_fmax(b, f, nir_imm_float(b, -1)), nir_imm_float(b, 1));
522
523 return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
524 }
525
526 static nir_def *
clamp_and_mask(nir_builder * b,nir_def * src,nir_def * channels)527 clamp_and_mask(nir_builder *b, nir_def *src, nir_def *channels)
528 {
529 nir_def *one = nir_imm_ivec4(b, 1, 0, 0, 0);
530 nir_def *two = nir_imm_ivec4(b, 1, 1, 0, 0);
531 nir_def *three = nir_imm_ivec4(b, 1, 1, 1, 0);
532 nir_def *four = nir_imm_ivec4(b, 1, 1, 1, 1);
533 /* avoid underflow by clamping to channel count */
534 src = nir_bcsel(b,
535 nir_ieq(b, channels, one),
536 nir_isub(b, src, one),
537 nir_bcsel(b,
538 nir_ieq_imm(b, channels, 2),
539 nir_isub(b, src, two),
540 nir_bcsel(b,
541 nir_ieq_imm(b, channels, 3),
542 nir_isub(b, src, three),
543 nir_isub(b, src, four))));
544
545 return nir_mask(b, src, 32);
546 }
547
548 static void
convert_swap_write(nir_builder * b,nir_def * pixel,nir_def * buffer_offset,unsigned num_components,struct pbo_shader_data * sd)549 convert_swap_write(nir_builder *b, nir_def *pixel, nir_def *buffer_offset,
550 unsigned num_components,
551 struct pbo_shader_data *sd)
552 {
553
554 nir_def *weird_packed = nir_ior(b,
555 nir_ior(b,
556 check_for_weird_packing(b, sd, 4),
557 check_for_weird_packing(b, sd, 3)),
558 check_for_weird_packing(b, sd, 2));
559 if (num_components == 1) {
560 nir_push_if(b, weird_packed);
561 nir_push_if(b, sd->r11g11b10_or_sint);
562 handle_swap(b, nir_pad_vec4(b, nir_format_pack_11f11f10f(b, pixel)), buffer_offset, sd, 1, true);
563 nir_push_else(b, NULL);
564 nir_push_if(b, sd->r9g9b9e5);
565 handle_swap(b, nir_pad_vec4(b, nir_format_pack_r9g9b9e5(b, pixel)), buffer_offset, sd, 1, true);
566 nir_push_else(b, NULL);
567 nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32)); { //PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
568 nir_def *pack[2];
569 pack[0] = nir_format_pack_uint_unmasked_ssa(b, nir_channel(b, pixel, 0), nir_channel(b, sd->bits, 0));
570 pack[1] = nir_format_pack_uint_unmasked_ssa(b, nir_channels(b, pixel, 6), nir_channels(b, sd->bits, 6));
571 handle_swap(b, nir_pad_vec4(b, nir_vec2(b, pack[0], pack[1])), buffer_offset, sd, 2, true);
572 } nir_push_else(b, NULL);
573 handle_swap(b, nir_pad_vec4(b, nir_format_pack_uint_unmasked_ssa(b, pixel, sd->bits)), buffer_offset, sd, 1, true);
574 nir_pop_if(b, NULL);
575 nir_pop_if(b, NULL);
576 nir_pop_if(b, NULL);
577 nir_push_else(b, NULL);
578 handle_swap(b, pixel, buffer_offset, sd, num_components, false);
579 nir_pop_if(b, NULL);
580 } else {
581 nir_push_if(b, weird_packed);
582 handle_swap(b, pixel, buffer_offset, sd, num_components, true);
583 nir_push_else(b, NULL);
584 handle_swap(b, pixel, buffer_offset, sd, num_components, false);
585 nir_pop_if(b, NULL);
586 }
587 }
588
589 static void
do_shader_conversion(nir_builder * b,nir_def * pixel,unsigned num_components,nir_def * coord,struct pbo_shader_data * sd)590 do_shader_conversion(nir_builder *b, nir_def *pixel,
591 unsigned num_components,
592 nir_def *coord, struct pbo_shader_data *sd)
593 {
594 nir_def *buffer_offset = get_buffer_offset(b, coord, sd);
595
596 nir_def *signed_bit_mask = clamp_and_mask(b, sd->bits, sd->channels);
597
598 #define CONVERT_SWAP_WRITE(PIXEL) \
599 convert_swap_write(b, PIXEL, buffer_offset, num_components, sd);
600 nir_push_if(b, sd->normalized);
601 nir_push_if(b, sd->clamp_uint); //unorm
602 CONVERT_SWAP_WRITE(nir_format_float_to_unorm_with_factor(b, pixel, nir_u2f32(b, nir_mask(b, sd->bits, 32))));
603 nir_push_else(b, NULL);
604 CONVERT_SWAP_WRITE(nir_format_float_to_snorm_with_factor(b, pixel, nir_u2f32(b, signed_bit_mask)));
605 nir_pop_if(b, NULL);
606 nir_push_else(b, NULL);
607 nir_push_if(b, sd->integer);
608 nir_push_if(b, sd->r11g11b10_or_sint); //sint
609 nir_push_if(b, sd->clamp_uint); //uint -> sint
610 CONVERT_SWAP_WRITE(nir_umin(b, pixel, signed_bit_mask));
611 nir_push_else(b, NULL);
612 CONVERT_SWAP_WRITE(nir_imin_imax(b, pixel, signed_bit_mask, nir_iadd_imm(b, nir_ineg(b, signed_bit_mask), -1)));
613 nir_pop_if(b, NULL);
614 nir_push_else(b, NULL);
615 nir_push_if(b, sd->clamp_uint); //uint
616 /* nir_format_clamp_uint */
617 CONVERT_SWAP_WRITE(nir_umin(b, pixel, nir_mask(b, sd->bits, 32)));
618 nir_pop_if(b, NULL);
619 nir_pop_if(b, NULL);
620 nir_push_else(b, NULL);
621 nir_push_if(b, nir_ieq_imm(b, sd->bits1, 16)); //half
622 CONVERT_SWAP_WRITE(nir_format_float_to_half(b, pixel));
623 nir_push_else(b, NULL);
624 CONVERT_SWAP_WRITE(pixel);
625 nir_pop_if(b, NULL);
626 nir_pop_if(b, NULL);
627 }
628
629 static nir_shader *
create_conversion_shader(struct st_context * st,enum pipe_texture_target target,unsigned num_components)630 create_conversion_shader(struct st_context *st, enum pipe_texture_target target, unsigned num_components)
631 {
632 const nir_shader_compiler_options *options = st_get_nir_compiler_options(st, MESA_SHADER_COMPUTE);
633 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "%s", "convert");
634 b.shader->info.workgroup_size[0] = target != PIPE_TEXTURE_1D ? 8 : 64;
635 b.shader->info.workgroup_size[1] = target != PIPE_TEXTURE_1D ? 8 : 1;
636
637 b.shader->info.workgroup_size[2] = 1;
638 b.shader->info.textures_used[0] = 1;
639 b.shader->info.num_ssbos = 1;
640 b.shader->num_uniforms = 2;
641 nir_variable_create(b.shader, nir_var_mem_ssbo, glsl_array_type(glsl_float_type(), 0, 4), "ssbo");
642 nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, st_pbo_sampler_type_for_target(target, ST_PBO_CONVERT_FLOAT), "sampler");
643 unsigned coord_components = glsl_get_sampler_coordinate_components(sampler->type);
644 sampler->data.explicit_binding = 1;
645
646 struct pbo_shader_data sd;
647 init_pbo_shader_data(&b, &sd, coord_components);
648
649 nir_def *bsize = nir_imm_ivec4(&b,
650 b.shader->info.workgroup_size[0],
651 b.shader->info.workgroup_size[1],
652 b.shader->info.workgroup_size[2],
653 0);
654 nir_def *wid = nir_load_workgroup_id(&b);
655 nir_def *iid = nir_load_local_invocation_id(&b);
656 nir_def *tile = nir_imul(&b, wid, bsize);
657 nir_def *global_id = nir_iadd(&b, tile, iid);
658 nir_def *start = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), sd.offset);
659
660 nir_def *coord;
661 if (coord_components < 3)
662 coord = start;
663 else {
664 /* pad offset vec with global_id to get correct z offset */
665 assert(coord_components == 3);
666 coord = nir_vec3(&b, nir_channel(&b, start, 0),
667 nir_channel(&b, start, 1),
668 nir_channel(&b, global_id, 2));
669 }
670 coord = nir_trim_vector(&b, coord, coord_components);
671 nir_def *offset = coord_components > 2 ?
672 nir_pad_vector_imm_int(&b, sd.offset, 0, 3) :
673 nir_trim_vector(&b, sd.offset, coord_components);
674 nir_def *range = nir_trim_vector(&b, sd.range, coord_components);
675 nir_def *max = nir_iadd(&b, offset, range);
676 nir_push_if(&b, nir_ball(&b, nir_ilt(&b, coord, max)));
677 nir_tex_instr *txf = nir_tex_instr_create(b.shader, 3);
678 txf->is_array = glsl_sampler_type_is_array(sampler->type);
679 txf->op = nir_texop_txf;
680 txf->sampler_dim = glsl_get_sampler_dim(sampler->type);
681 txf->dest_type = nir_type_float32;
682 txf->coord_components = coord_components;
683 txf->texture_index = 0;
684 txf->sampler_index = 0;
685 txf->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
686 txf->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(&b, 0));
687 txf->src[2].src_type = nir_tex_src_texture_deref;
688 nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler);
689 txf->src[2].src = nir_src_for_ssa(&sampler_deref->def);
690
691 nir_def_init(&txf->instr, &txf->def, 4, 32);
692 nir_builder_instr_insert(&b, &txf->instr);
693
694 /* pass the grid offset as the coord to get the zero-indexed buffer offset */
695 do_shader_conversion(&b, &txf->def, num_components, global_id, &sd);
696
697 nir_pop_if(&b, NULL);
698
699 nir_validate_shader(b.shader, NULL);
700 gl_nir_opts(b.shader);
701 st_nir_finish_builtin_nir(st, b.shader);
702 return b.shader;
703 }
704
705 static void
invert_swizzle(uint8_t * out,const uint8_t * in)706 invert_swizzle(uint8_t *out, const uint8_t *in)
707 {
708 /* First, default to all zeroes to prevent uninitialized junk */
709 for (unsigned c = 0; c < 4; ++c)
710 out[c] = PIPE_SWIZZLE_0;
711
712 /* Now "do" what the swizzle says */
713 for (unsigned c = 0; c < 4; ++c) {
714 unsigned char i = in[c];
715
716 /* Who cares? */
717 assert(PIPE_SWIZZLE_X == 0);
718 if (i > PIPE_SWIZZLE_W)
719 continue;
720 /* Invert */
721 unsigned idx = i - PIPE_SWIZZLE_X;
722 out[idx] = PIPE_SWIZZLE_X + c;
723 }
724 }
725
726 static uint32_t
compute_shader_key(enum pipe_texture_target target,unsigned num_components)727 compute_shader_key(enum pipe_texture_target target, unsigned num_components)
728 {
729 uint8_t key_target[] = {
730 [PIPE_BUFFER] = UINT8_MAX,
731 [PIPE_TEXTURE_1D] = 1,
732 [PIPE_TEXTURE_2D] = 2,
733 [PIPE_TEXTURE_3D] = 3,
734 [PIPE_TEXTURE_CUBE] = 4,
735 [PIPE_TEXTURE_RECT] = UINT8_MAX,
736 [PIPE_TEXTURE_1D_ARRAY] = 5,
737 [PIPE_TEXTURE_2D_ARRAY] = 6,
738 [PIPE_TEXTURE_CUBE_ARRAY] = UINT8_MAX,
739 };
740 assert(target < ARRAY_SIZE(key_target));
741 assert(key_target[target] != UINT8_MAX);
742 return key_target[target] | (num_components << 3);
743 }
744
745 static unsigned
get_dim_from_target(enum pipe_texture_target target)746 get_dim_from_target(enum pipe_texture_target target)
747 {
748 switch (target) {
749 case PIPE_TEXTURE_1D:
750 return 1;
751 case PIPE_TEXTURE_2D_ARRAY:
752 case PIPE_TEXTURE_3D:
753 return 3;
754 default:
755 return 2;
756 }
757 }
758
759 static enum pipe_texture_target
get_target_from_texture(struct pipe_resource * src)760 get_target_from_texture(struct pipe_resource *src)
761 {
762 enum pipe_texture_target view_target;
763 switch (src->target) {
764 case PIPE_TEXTURE_RECT:
765 view_target = PIPE_TEXTURE_2D;
766 break;
767 case PIPE_TEXTURE_CUBE:
768 case PIPE_TEXTURE_CUBE_ARRAY:
769 view_target = PIPE_TEXTURE_2D_ARRAY;
770 break;
771 default:
772 view_target = src->target;
773 break;
774 }
775 return view_target;
776 }
777
778 /* force swizzling behavior for sampling */
779 enum swizzle_clamp {
780 /* force component selection for named format */
781 SWIZZLE_CLAMP_LUMINANCE = 1,
782 SWIZZLE_CLAMP_ALPHA = 2,
783 SWIZZLE_CLAMP_LUMINANCE_ALPHA = 3,
784 SWIZZLE_CLAMP_INTENSITY = 4,
785 SWIZZLE_CLAMP_RGBX = 5,
786
787 /* select only 1 component */
788 SWIZZLE_CLAMP_GREEN = 8,
789 SWIZZLE_CLAMP_BLUE = 16,
790
791 /* reverse ordering for format emulation */
792 SWIZZLE_CLAMP_BGRA = 32,
793 };
794
795 static bool
can_copy_direct(const struct gl_pixelstore_attrib * pack)796 can_copy_direct(const struct gl_pixelstore_attrib *pack)
797 {
798 return !(pack->RowLength ||
799 pack->SkipPixels ||
800 pack->SkipRows ||
801 pack->ImageHeight ||
802 pack->SkipImages);
803 }
804
805 static void
create_conversion_shader_async(void * data,void * gdata,int thread_index)806 create_conversion_shader_async(void *data, void *gdata, int thread_index)
807 {
808 struct pbo_async_data *async = data;
809 async->nir = create_conversion_shader(async->st, async->target, async->num_components);
810 /* this is hefty, but specialized shaders need a base to work from */
811 async->copy = nir_shader_clone(NULL, async->nir);
812 }
813
814 static void
create_spec_shader_async(void * data,void * gdata,int thread_index)815 create_spec_shader_async(void *data, void *gdata, int thread_index)
816 {
817 struct pbo_spec_async_data *spec = data;
818 /* this is still the immutable clone: create our own copy */
819 spec->nir = nir_shader_clone(NULL, spec->nir);
820 /* do not inline geometry */
821 uint16_t offsets[2] = {2, 3};
822 nir_inline_uniforms(spec->nir, ARRAY_SIZE(offsets), &spec->data[2], offsets);
823 spec->created = true;
824 }
825
826 static uint32_t
hash_pbo_data(const void * data)827 hash_pbo_data(const void *data)
828 {
829 const struct pbo_data *p = data;
830 return _mesa_hash_data(&p->vec[2], sizeof(uint32_t) * 2);
831 }
832
833 static bool
equals_pbo_data(const void * a,const void * b)834 equals_pbo_data(const void *a, const void *b)
835 {
836 const struct pbo_data *pa = a, *pb = b;
837 return !memcmp(&pa->vec[2], &pb->vec[2], sizeof(uint32_t) * 2);
838 }
839
840 static struct pbo_spec_async_data *
add_spec_data(struct pbo_async_data * async,struct pbo_data * pd)841 add_spec_data(struct pbo_async_data *async, struct pbo_data *pd)
842 {
843 bool found = false;
844 struct pbo_spec_async_data *spec;
845 struct set_entry *entry = _mesa_set_search_or_add(&async->specialized, pd, &found);
846 if (!found) {
847 spec = calloc(1, sizeof(struct pbo_async_data));
848 util_queue_fence_init(&spec->fence);
849 memcpy(spec->data, pd, sizeof(struct pbo_data));
850 entry->key = spec;
851 }
852 spec = (void*)entry->key;
853 if (!spec->nir && !spec->created)
854 spec->nir = async->copy;
855 spec->uses++;
856 return spec;
857 }
858
859 static struct pbo_async_data *
add_async_data(struct st_context * st,enum pipe_texture_target view_target,unsigned num_components,uint32_t hash_key)860 add_async_data(struct st_context *st, enum pipe_texture_target view_target, unsigned num_components, uint32_t hash_key)
861 {
862 struct pbo_async_data *async = calloc(1, sizeof(struct pbo_async_data));
863 async->st = st;
864 async->target = view_target;
865 async->num_components = num_components;
866 util_queue_fence_init(&async->fence);
867 _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, async);
868 _mesa_set_init(&async->specialized, NULL, hash_pbo_data, equals_pbo_data);
869 return async;
870 }
871
872 static struct pipe_resource *
download_texture_compute(struct st_context * st,const struct gl_pixelstore_attrib * pack,GLint xoffset,GLint yoffset,GLint zoffset,GLsizei width,GLsizei height,GLint depth,unsigned level,unsigned layer,GLenum format,GLenum type,enum pipe_format src_format,enum pipe_texture_target view_target,struct pipe_resource * src,enum pipe_format dst_format,enum swizzle_clamp swizzle_clamp)873 download_texture_compute(struct st_context *st,
874 const struct gl_pixelstore_attrib *pack,
875 GLint xoffset, GLint yoffset, GLint zoffset,
876 GLsizei width, GLsizei height, GLint depth,
877 unsigned level, unsigned layer,
878 GLenum format, GLenum type,
879 enum pipe_format src_format,
880 enum pipe_texture_target view_target,
881 struct pipe_resource *src,
882 enum pipe_format dst_format,
883 enum swizzle_clamp swizzle_clamp)
884 {
885 struct pipe_context *pipe = st->pipe;
886 struct pipe_screen *screen = st->screen;
887 struct pipe_resource *dst = NULL;
888 unsigned dim = get_dim_from_target(view_target);
889
890 /* clamp 3d offsets based on slice */
891 if (view_target == PIPE_TEXTURE_3D)
892 zoffset += layer;
893
894 unsigned num_components = 0;
895 /* Upload constants */
896 struct pipe_constant_buffer cb;
897 assert(view_target != PIPE_TEXTURE_1D_ARRAY || !yoffset);
898 struct pbo_data pd = {
899 .x = MIN2(xoffset, 65535),
900 .y = view_target == PIPE_TEXTURE_1D_ARRAY ? 0 : MIN2(yoffset, 65535),
901 .width = MIN2(width, 65535),
902 .height = MIN2(height, 65535),
903 .depth = MIN2(depth, 65535),
904 .invert = pack->Invert,
905 .blocksize = util_format_get_blocksize(dst_format) - 1,
906 .alignment = ffs(MAX2(pack->Alignment, 1)) - 1,
907 };
908 num_components = fill_pbo_data(&pd, src_format, dst_format, pack->SwapBytes == 1);
909
910 cb.buffer = NULL;
911 cb.user_buffer = &pd;
912 cb.buffer_offset = 0;
913 cb.buffer_size = sizeof(pd);
914
915 uint32_t hash_key = compute_shader_key(view_target, num_components);
916 assert(hash_key != 0);
917
918 struct hash_entry *he = _mesa_hash_table_search(st->pbo.shaders, (void*)(uintptr_t)hash_key);
919 void *cs = NULL;
920 if (he) {
921 /* disable async if MESA_COMPUTE_PBO is set */
922 if (st->force_specialized_compute_transfer) {
923 struct pbo_async_data *async = he->data;
924 struct pbo_spec_async_data *spec = add_spec_data(async, &pd);
925 if (spec->cs) {
926 cs = spec->cs;
927 } else {
928 create_spec_shader_async(spec, NULL, 0);
929 struct pipe_shader_state state = {
930 .type = PIPE_SHADER_IR_NIR,
931 .ir.nir = spec->nir,
932 };
933 cs = spec->cs = st_create_nir_shader(st, &state);
934 }
935 cb.buffer_size = 2 * sizeof(uint32_t);
936 } else if (!st->force_compute_based_texture_transfer && screen->driver_thread_add_job) {
937 struct pbo_async_data *async = he->data;
938 struct pbo_spec_async_data *spec = add_spec_data(async, &pd);
939 if (!util_queue_fence_is_signalled(&async->fence))
940 return NULL;
941 /* nir is definitely done */
942 if (!async->cs) {
943 /* cs job not yet started */
944 assert(async->nir && !async->cs);
945 async->cs = pipe_shader_from_nir(pipe, async->nir);
946 async->nir = NULL;
947 }
948 /* cs *may* be done */
949 if (screen->is_parallel_shader_compilation_finished &&
950 !screen->is_parallel_shader_compilation_finished(screen, async->cs, MESA_SHADER_COMPUTE))
951 return NULL;
952 cs = async->cs;
953 if (spec->uses > SPEC_USES_THRESHOLD && util_queue_fence_is_signalled(&spec->fence)) {
954 if (spec->created) {
955 if (!spec->cs) {
956 spec->cs = pipe_shader_from_nir(pipe, spec->nir);
957 spec->nir = NULL;
958 }
959 if (screen->is_parallel_shader_compilation_finished &&
960 screen->is_parallel_shader_compilation_finished(screen, spec->cs, MESA_SHADER_COMPUTE)) {
961 cs = spec->cs;
962 cb.buffer_size = 2 * sizeof(uint32_t);
963 }
964 } else {
965 screen->driver_thread_add_job(screen, spec, &spec->fence, create_spec_shader_async, NULL, 0);
966 }
967 }
968 } else {
969 cs = he->data;
970 }
971 } else {
972 if (!st->force_compute_based_texture_transfer && screen->driver_thread_add_job) {
973 struct pbo_async_data *async = add_async_data(st, view_target, num_components, hash_key);
974 screen->driver_thread_add_job(screen, async, &async->fence, create_conversion_shader_async, NULL, 0);
975 add_spec_data(async, &pd);
976 return NULL;
977 }
978
979 if (st->force_specialized_compute_transfer) {
980 struct pbo_async_data *async = add_async_data(st, view_target, num_components, hash_key);
981 create_conversion_shader_async(async, NULL, 0);
982 struct pbo_spec_async_data *spec = add_spec_data(async, &pd);
983 create_spec_shader_async(spec, NULL, 0);
984 struct pipe_shader_state state = {
985 .type = PIPE_SHADER_IR_NIR,
986 .ir.nir = spec->nir,
987 };
988 cs = spec->cs = st_create_nir_shader(st, &state);
989 cb.buffer_size = 2 * sizeof(uint32_t);
990 } else {
991 nir_shader *nir = create_conversion_shader(st, view_target, num_components);
992 struct pipe_shader_state state = {
993 .type = PIPE_SHADER_IR_NIR,
994 .ir.nir = nir,
995 };
996 cs = st_create_nir_shader(st, &state);
997 he = _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, cs);
998 }
999 }
1000 assert(cs);
1001 struct cso_context *cso = st->cso_context;
1002
1003 pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &cb);
1004
1005 cso_save_compute_state(cso, CSO_BIT_COMPUTE_SHADER | CSO_BIT_COMPUTE_SAMPLERS);
1006 cso_set_compute_shader_handle(cso, cs);
1007
1008 /* Set up the sampler_view */
1009 {
1010 struct pipe_sampler_view templ;
1011 struct pipe_sampler_view *sampler_view;
1012 struct pipe_sampler_state sampler = {0};
1013 const struct pipe_sampler_state *samplers[1] = {&sampler};
1014 const struct util_format_description *desc = util_format_description(dst_format);
1015
1016 u_sampler_view_default_template(&templ, src, src_format);
1017 if (util_format_is_depth_or_stencil(dst_format)) {
1018 templ.swizzle_r = PIPE_SWIZZLE_X;
1019 templ.swizzle_g = PIPE_SWIZZLE_X;
1020 templ.swizzle_b = PIPE_SWIZZLE_X;
1021 templ.swizzle_a = PIPE_SWIZZLE_X;
1022 } else {
1023 uint8_t invswizzle[4];
1024 const uint8_t *swizzle;
1025
1026 /* these swizzle output bits require explicit component selection/ordering */
1027 if (swizzle_clamp & SWIZZLE_CLAMP_GREEN) {
1028 for (unsigned i = 0; i < 4; i++)
1029 invswizzle[i] = PIPE_SWIZZLE_Y;
1030 } else if (swizzle_clamp & SWIZZLE_CLAMP_BLUE) {
1031 for (unsigned i = 0; i < 4; i++)
1032 invswizzle[i] = PIPE_SWIZZLE_Z;
1033 } else {
1034 if (swizzle_clamp & SWIZZLE_CLAMP_BGRA) {
1035 if (util_format_get_nr_components(dst_format) == 3)
1036 swizzle = util_format_description(PIPE_FORMAT_B8G8R8_UNORM)->swizzle;
1037 else
1038 swizzle = util_format_description(PIPE_FORMAT_B8G8R8A8_UNORM)->swizzle;
1039 } else {
1040 swizzle = desc->swizzle;
1041 }
1042 invert_swizzle(invswizzle, swizzle);
1043 }
1044 swizzle_clamp &= ~(SWIZZLE_CLAMP_BGRA | SWIZZLE_CLAMP_GREEN | SWIZZLE_CLAMP_BLUE);
1045
1046 /* these swizzle input modes clamp unused components to 0 and (sometimes) alpha to 1 */
1047 switch (swizzle_clamp) {
1048 case SWIZZLE_CLAMP_LUMINANCE:
1049 if (util_format_is_luminance(dst_format))
1050 break;
1051 for (unsigned i = 0; i < 4; i++) {
1052 if (invswizzle[i] != PIPE_SWIZZLE_X)
1053 invswizzle[i] = invswizzle[i] == PIPE_SWIZZLE_W ? PIPE_SWIZZLE_1 : PIPE_SWIZZLE_0;
1054 }
1055 break;
1056 case SWIZZLE_CLAMP_ALPHA:
1057 for (unsigned i = 0; i < 4; i++) {
1058 if (invswizzle[i] != PIPE_SWIZZLE_W)
1059 invswizzle[i] = PIPE_SWIZZLE_0;
1060 }
1061 break;
1062 case SWIZZLE_CLAMP_LUMINANCE_ALPHA:
1063 if (util_format_is_luminance_alpha(dst_format))
1064 break;
1065 for (unsigned i = 0; i < 4; i++) {
1066 if (invswizzle[i] != PIPE_SWIZZLE_X && invswizzle[i] != PIPE_SWIZZLE_W)
1067 invswizzle[i] = PIPE_SWIZZLE_0;
1068 }
1069 break;
1070 case SWIZZLE_CLAMP_INTENSITY:
1071 for (unsigned i = 0; i < 4; i++) {
1072 if (invswizzle[i] == PIPE_SWIZZLE_W)
1073 invswizzle[i] = PIPE_SWIZZLE_1;
1074 else if (invswizzle[i] != PIPE_SWIZZLE_X)
1075 invswizzle[i] = PIPE_SWIZZLE_0;
1076 }
1077 break;
1078 case SWIZZLE_CLAMP_RGBX:
1079 for (unsigned i = 0; i < 4; i++) {
1080 if (invswizzle[i] == PIPE_SWIZZLE_W)
1081 invswizzle[i] = PIPE_SWIZZLE_1;
1082 }
1083 break;
1084 default: break;
1085 }
1086 templ.swizzle_r = invswizzle[0];
1087 templ.swizzle_g = invswizzle[1];
1088 templ.swizzle_b = invswizzle[2];
1089 templ.swizzle_a = invswizzle[3];
1090 }
1091 templ.target = view_target;
1092 templ.u.tex.first_level = level;
1093 templ.u.tex.last_level = level;
1094
1095 /* array textures expect to have array index provided */
1096 if (view_target != PIPE_TEXTURE_3D && src->array_size) {
1097 templ.u.tex.first_layer = layer;
1098 if (view_target == PIPE_TEXTURE_1D_ARRAY) {
1099 templ.u.tex.first_layer += yoffset;
1100 templ.u.tex.last_layer = templ.u.tex.first_layer + height - 1;
1101 } else {
1102 templ.u.tex.first_layer += zoffset;
1103 templ.u.tex.last_layer = templ.u.tex.first_layer + depth - 1;
1104 }
1105 }
1106
1107 sampler_view = pipe->create_sampler_view(pipe, src, &templ);
1108 if (sampler_view == NULL)
1109 goto fail;
1110
1111 pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, false,
1112 &sampler_view);
1113 st->state.num_sampler_views[PIPE_SHADER_COMPUTE] =
1114 MAX2(st->state.num_sampler_views[PIPE_SHADER_COMPUTE], 1);
1115
1116 pipe_sampler_view_reference(&sampler_view, NULL);
1117
1118 cso_set_samplers(cso, PIPE_SHADER_COMPUTE, 1, samplers);
1119 }
1120
1121 /* Set up destination buffer */
1122 intptr_t img_stride = src->target == PIPE_TEXTURE_3D ||
1123 src->target == PIPE_TEXTURE_2D_ARRAY ||
1124 src->target == PIPE_TEXTURE_CUBE_ARRAY ?
1125 /* only use image stride for 3d images to avoid pulling in IMAGE_HEIGHT pixelstore */
1126 _mesa_image_image_stride(pack, width, height, format, type) :
1127 _mesa_image_row_stride(pack, width, format, type) * height;
1128 intptr_t buffer_size = (depth + (dim == 3 ? pack->SkipImages : 0)) * img_stride;
1129 assert(buffer_size <= UINT32_MAX);
1130 {
1131 struct pipe_shader_buffer buffer;
1132 memset(&buffer, 0, sizeof(buffer));
1133 if (can_copy_direct(pack) && pack->BufferObj) {
1134 dst = pack->BufferObj->buffer;
1135 assert(pack->BufferObj->Size >= buffer_size);
1136 } else {
1137 dst = pipe_buffer_create(screen, PIPE_BIND_SHADER_BUFFER, PIPE_USAGE_STAGING, buffer_size);
1138 if (!dst)
1139 goto fail;
1140 }
1141 buffer.buffer = dst;
1142 buffer.buffer_size = buffer_size;
1143
1144 pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, &buffer, 0x1);
1145 }
1146
1147 struct pipe_grid_info info = { 0 };
1148 info.block[0] = src->target != PIPE_TEXTURE_1D ? 8 : 64;
1149 info.block[1] = src->target != PIPE_TEXTURE_1D ? 8 : 1;
1150 info.last_block[0] = width % info.block[0];
1151 info.last_block[1] = height % info.block[1];
1152 info.block[2] = 1;
1153 info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
1154 info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
1155 info.grid[2] = depth;
1156
1157 pipe->launch_grid(pipe, &info);
1158
1159 fail:
1160 cso_restore_compute_state(cso);
1161
1162 /* Unbind all because st/mesa won't do it if the current shader doesn't
1163 * use them.
1164 */
1165 pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0,
1166 st->state.num_sampler_views[PIPE_SHADER_COMPUTE],
1167 false, NULL);
1168 st->state.num_sampler_views[PIPE_SHADER_COMPUTE] = 0;
1169 pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, NULL, 0);
1170
1171 st->ctx->NewDriverState |= ST_NEW_CS_CONSTANTS |
1172 ST_NEW_CS_SSBOS |
1173 ST_NEW_CS_SAMPLER_VIEWS;
1174
1175 return dst;
1176 }
1177
1178 static void
copy_converted_buffer(struct gl_context * ctx,struct gl_pixelstore_attrib * pack,enum pipe_texture_target view_target,struct pipe_resource * dst,enum pipe_format dst_format,GLint xoffset,GLint yoffset,GLint zoffset,GLsizei width,GLsizei height,GLint depth,GLenum format,GLenum type,void * pixels)1179 copy_converted_buffer(struct gl_context * ctx,
1180 struct gl_pixelstore_attrib *pack,
1181 enum pipe_texture_target view_target,
1182 struct pipe_resource *dst, enum pipe_format dst_format,
1183 GLint xoffset, GLint yoffset, GLint zoffset,
1184 GLsizei width, GLsizei height, GLint depth,
1185 GLenum format, GLenum type, void *pixels)
1186 {
1187 struct pipe_transfer *xfer;
1188 struct st_context *st = st_context(ctx);
1189 unsigned dim = get_dim_from_target(view_target);
1190 uint8_t *map = pipe_buffer_map(st->pipe, dst, PIPE_MAP_READ | PIPE_MAP_ONCE, &xfer);
1191 if (!map)
1192 return;
1193
1194 pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
1195 /* compute shader doesn't handle these to cut down on uniform size */
1196 if (!can_copy_direct(pack)) {
1197 if (view_target == PIPE_TEXTURE_1D_ARRAY) {
1198 depth = height;
1199 height = 1;
1200 zoffset = yoffset;
1201 yoffset = 0;
1202 }
1203
1204 struct gl_pixelstore_attrib packing = *pack;
1205
1206 /* source image is tightly packed */
1207 packing.RowLength = 0;
1208 packing.SkipPixels = 0;
1209 packing.SkipRows = 0;
1210 packing.ImageHeight = 0;
1211 packing.SkipImages = 0;
1212
1213 for (unsigned z = 0; z < depth; z++) {
1214 for (unsigned y = 0; y < height; y++) {
1215 GLubyte *dst = _mesa_image_address(dim, pack, pixels,
1216 width, height, format, type,
1217 z, y, 0);
1218 GLubyte *srcpx = _mesa_image_address(dim, &packing, map,
1219 width, height, format, type,
1220 z, y, 0);
1221 util_streaming_load_memcpy(dst, srcpx, util_format_get_stride(dst_format, width));
1222 }
1223 }
1224 } else {
1225 /* direct copy for all other cases */
1226 util_streaming_load_memcpy(pixels, map, dst->width0);
1227 }
1228
1229 _mesa_unmap_pbo_dest(ctx, pack);
1230 pipe_buffer_unmap(st->pipe, xfer);
1231 }
1232
1233 bool
st_GetTexSubImage_shader(struct gl_context * ctx,GLint xoffset,GLint yoffset,GLint zoffset,GLsizei width,GLsizei height,GLint depth,GLenum format,GLenum type,void * pixels,struct gl_texture_image * texImage)1234 st_GetTexSubImage_shader(struct gl_context * ctx,
1235 GLint xoffset, GLint yoffset, GLint zoffset,
1236 GLsizei width, GLsizei height, GLint depth,
1237 GLenum format, GLenum type, void * pixels,
1238 struct gl_texture_image *texImage)
1239 {
1240 struct st_context *st = st_context(ctx);
1241 struct pipe_screen *screen = st->screen;
1242 struct gl_texture_object *stObj = texImage->TexObject;
1243 struct pipe_resource *src = texImage->pt;
1244 struct pipe_resource *dst = NULL;
1245 enum pipe_format dst_format, src_format;
1246 unsigned level = (texImage->pt != stObj->pt ? 0 : texImage->Level) + texImage->TexObject->Attrib.MinLevel;
1247 unsigned layer = texImage->Face + texImage->TexObject->Attrib.MinLayer;
1248 enum pipe_texture_target view_target;
1249
1250 assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
1251 !_mesa_is_format_astc_2d(texImage->TexFormat) &&
1252 texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
1253
1254 /* See if the texture format already matches the format and type,
1255 * in which case the memcpy-based fast path will be used. */
1256 if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
1257 type, ctx->Pack.SwapBytes, NULL)) {
1258 return false;
1259 }
1260 enum swizzle_clamp swizzle_clamp = 0;
1261 src_format = st_pbo_get_src_format(screen, stObj->surface_based ? stObj->surface_format : src->format, src);
1262 if (src_format == PIPE_FORMAT_NONE)
1263 return false;
1264
1265 if (texImage->_BaseFormat != _mesa_get_format_base_format(texImage->TexFormat)) {
1266 /* special handling for drivers that don't support these formats natively */
1267 if (texImage->_BaseFormat == GL_LUMINANCE)
1268 swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE;
1269 else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA)
1270 swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE_ALPHA;
1271 else if (texImage->_BaseFormat == GL_ALPHA)
1272 swizzle_clamp = SWIZZLE_CLAMP_ALPHA;
1273 else if (texImage->_BaseFormat == GL_INTENSITY)
1274 swizzle_clamp = SWIZZLE_CLAMP_INTENSITY;
1275 else if (texImage->_BaseFormat == GL_RGB)
1276 swizzle_clamp = SWIZZLE_CLAMP_RGBX;
1277 }
1278
1279 dst_format = st_pbo_get_dst_format(ctx, PIPE_BUFFER, src_format, false, format, type, 0);
1280
1281 if (dst_format == PIPE_FORMAT_NONE) {
1282 bool need_bgra_swizzle = false;
1283 dst_format = get_convert_format(ctx, src_format, format, type, &need_bgra_swizzle);
1284 if (dst_format == PIPE_FORMAT_NONE)
1285 return false;
1286 /* special swizzling for component selection */
1287 if (need_bgra_swizzle)
1288 swizzle_clamp |= SWIZZLE_CLAMP_BGRA;
1289 else if (format == GL_GREEN_INTEGER)
1290 swizzle_clamp |= SWIZZLE_CLAMP_GREEN;
1291 else if (format == GL_BLUE_INTEGER)
1292 swizzle_clamp |= SWIZZLE_CLAMP_BLUE;
1293 }
1294
1295 /* check with the driver to see if memcpy is likely to be faster */
1296 if (!st->force_compute_based_texture_transfer &&
1297 !screen->is_compute_copy_faster(screen, src_format, dst_format, width, height, depth, true))
1298 return false;
1299
1300 view_target = get_target_from_texture(src);
1301 /* I don't know why this works
1302 * only for the texture rects
1303 * but that's how it is
1304 */
1305 if ((src->target != PIPE_TEXTURE_RECT &&
1306 /* this would need multiple samplerviews */
1307 ((util_format_is_depth_and_stencil(src_format) && util_format_is_depth_and_stencil(dst_format)) ||
1308 /* these format just doesn't work and science can't explain why */
1309 dst_format == PIPE_FORMAT_Z32_FLOAT)) ||
1310 /* L8 -> L32_FLOAT is another thinker */
1311 (!util_format_is_float(src_format) && dst_format == PIPE_FORMAT_L32_FLOAT))
1312 return false;
1313
1314 dst = download_texture_compute(st, &ctx->Pack, xoffset, yoffset, zoffset, width, height, depth,
1315 level, layer, format, type, src_format, view_target, src, dst_format,
1316 swizzle_clamp);
1317 if (!dst)
1318 return false;
1319
1320 if (!can_copy_direct(&ctx->Pack) || !ctx->Pack.BufferObj) {
1321 copy_converted_buffer(ctx, &ctx->Pack, view_target, dst, dst_format, xoffset, yoffset, zoffset,
1322 width, height, depth, format, type, pixels);
1323
1324 pipe_resource_reference(&dst, NULL);
1325 }
1326
1327 return true;
1328 }
1329
1330 void
st_pbo_compute_deinit(struct st_context * st)1331 st_pbo_compute_deinit(struct st_context *st)
1332 {
1333 struct pipe_screen *screen = st->screen;
1334 if (!st->pbo.shaders)
1335 return;
1336 hash_table_foreach(st->pbo.shaders, entry) {
1337 if (st->force_specialized_compute_transfer ||
1338 (!st->force_compute_based_texture_transfer && screen->driver_thread_add_job)) {
1339 struct pbo_async_data *async = entry->data;
1340 util_queue_fence_wait(&async->fence);
1341 if (async->cs)
1342 st->pipe->delete_compute_state(st->pipe, async->cs);
1343 util_queue_fence_destroy(&async->fence);
1344 ralloc_free(async->copy);
1345 set_foreach_remove(&async->specialized, se) {
1346 struct pbo_spec_async_data *spec = (void*)se->key;
1347 util_queue_fence_wait(&spec->fence);
1348 util_queue_fence_destroy(&spec->fence);
1349 if (spec->created) {
1350 ralloc_free(spec->nir);
1351 st->pipe->delete_compute_state(st->pipe, spec->cs);
1352 }
1353 free(spec);
1354 }
1355 ralloc_free(async->specialized.table);
1356 free(async);
1357 } else {
1358 st->pipe->delete_compute_state(st->pipe, entry->data);
1359 }
1360 }
1361 _mesa_hash_table_destroy(st->pbo.shaders, NULL);
1362 }
1363