1 /*
2 * Copyright 2007 VMware, Inc.
3 * Copyright 2016 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file
27 *
28 * Common helper functions for PBO up- and downloads.
29 */
30
31 #include "state_tracker/st_context.h"
32 #include "state_tracker/st_nir.h"
33 #include "state_tracker/st_pbo.h"
34
35 #include "main/context.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_screen.h"
39 #include "cso_cache/cso_context.h"
40 #include "util/format/u_format.h"
41 #include "util/u_inlines.h"
42 #include "util/u_upload_mgr.h"
43
44 #include "compiler/nir/nir_builder.h"
45
46 /* Final setup of buffer addressing information.
47 *
48 * buf_offset is in pixels.
49 *
50 * Returns false if something (e.g. alignment) prevents PBO upload/download.
51 */
52 bool
st_pbo_addresses_setup(struct st_context * st,struct pipe_resource * buf,intptr_t buf_offset,struct st_pbo_addresses * addr)53 st_pbo_addresses_setup(struct st_context *st,
54 struct pipe_resource *buf, intptr_t buf_offset,
55 struct st_pbo_addresses *addr)
56 {
57 unsigned skip_pixels;
58
59 /* Check alignment against texture buffer requirements. */
60 {
61 unsigned ofs = (buf_offset * addr->bytes_per_pixel) % st->ctx->Const.TextureBufferOffsetAlignment;
62 if (ofs != 0) {
63 if (ofs % addr->bytes_per_pixel != 0)
64 return false;
65
66 skip_pixels = ofs / addr->bytes_per_pixel;
67 buf_offset -= skip_pixels;
68 } else {
69 skip_pixels = 0;
70 }
71 }
72
73 assert(buf_offset >= 0);
74
75 addr->buffer = buf;
76 addr->first_element = buf_offset;
77 addr->last_element = buf_offset + skip_pixels + addr->width - 1
78 + (addr->height - 1 + (addr->depth - 1) * addr->image_height) * addr->pixels_per_row;
79
80 if (addr->last_element - addr->first_element > st->ctx->Const.MaxTextureBufferSize - 1)
81 return false;
82
83 /* This should be ensured by Mesa before calling our callbacks */
84 assert((addr->last_element + 1) * addr->bytes_per_pixel <= buf->width0);
85
86 addr->constants.xoffset = -addr->xoffset + skip_pixels;
87 addr->constants.yoffset = -addr->yoffset;
88 addr->constants.stride = addr->pixels_per_row;
89 addr->constants.image_size = addr->pixels_per_row * addr->image_height;
90 addr->constants.layer_offset = 0;
91
92 return true;
93 }
94
95 /* Validate and fill buffer addressing information based on GL pixelstore
96 * attributes.
97 *
98 * Returns false if some aspect of the addressing (e.g. alignment) prevents
99 * PBO upload/download.
100 */
101 bool
st_pbo_addresses_pixelstore(struct st_context * st,GLenum gl_target,bool skip_images,const struct gl_pixelstore_attrib * store,const void * pixels,struct st_pbo_addresses * addr)102 st_pbo_addresses_pixelstore(struct st_context *st,
103 GLenum gl_target, bool skip_images,
104 const struct gl_pixelstore_attrib *store,
105 const void *pixels,
106 struct st_pbo_addresses *addr)
107 {
108 struct pipe_resource *buf = store->BufferObj->buffer;
109 intptr_t buf_offset = (intptr_t) pixels;
110
111 if (buf_offset % addr->bytes_per_pixel)
112 return false;
113
114 /* Convert to texels */
115 buf_offset = buf_offset / addr->bytes_per_pixel;
116
117 /* Determine image height */
118 if (gl_target == GL_TEXTURE_1D_ARRAY) {
119 addr->image_height = 1;
120 } else {
121 addr->image_height = store->ImageHeight > 0 ? store->ImageHeight : addr->height;
122 }
123
124 /* Compute the stride, taking store->Alignment into account */
125 {
126 unsigned pixels_per_row = store->RowLength > 0 ?
127 store->RowLength : addr->width;
128 unsigned bytes_per_row = pixels_per_row * addr->bytes_per_pixel;
129 unsigned remainder = bytes_per_row % store->Alignment;
130 unsigned offset_rows;
131
132 if (remainder > 0)
133 bytes_per_row += store->Alignment - remainder;
134
135 if (bytes_per_row % addr->bytes_per_pixel)
136 return false;
137
138 addr->pixels_per_row = bytes_per_row / addr->bytes_per_pixel;
139
140 offset_rows = store->SkipRows;
141 if (skip_images)
142 offset_rows += addr->image_height * store->SkipImages;
143
144 buf_offset += store->SkipPixels + addr->pixels_per_row * offset_rows;
145 }
146
147 if (!st_pbo_addresses_setup(st, buf, buf_offset, addr))
148 return false;
149
150 /* Support GL_PACK_INVERT_MESA */
151 if (store->Invert) {
152 addr->constants.xoffset += (addr->height - 1) * addr->constants.stride;
153 addr->constants.stride = -addr->constants.stride;
154 }
155
156 return true;
157 }
158
159 /* For download from a framebuffer, we may have to invert the Y axis. The
160 * setup is as follows:
161 * - set viewport to inverted, so that the position sysval is correct for
162 * texel fetches
163 * - this function adjusts the fragment shader's constant buffer to compute
164 * the correct destination addresses.
165 */
166 void
st_pbo_addresses_invert_y(struct st_pbo_addresses * addr,unsigned viewport_height)167 st_pbo_addresses_invert_y(struct st_pbo_addresses *addr,
168 unsigned viewport_height)
169 {
170 addr->constants.xoffset +=
171 (viewport_height - 1 + 2 * addr->constants.yoffset) * addr->constants.stride;
172 addr->constants.stride = -addr->constants.stride;
173 }
174
175 /* Setup all vertex pipeline state, rasterizer state, and fragment shader
176 * constants, and issue the draw call for PBO upload/download.
177 *
178 * The caller is responsible for saving and restoring state, as well as for
179 * setting other fragment shader state (fragment shader, samplers), and
180 * framebuffer/viewport/DSA/blend state.
181 */
182 bool
st_pbo_draw(struct st_context * st,const struct st_pbo_addresses * addr,unsigned surface_width,unsigned surface_height)183 st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr,
184 unsigned surface_width, unsigned surface_height)
185 {
186 struct cso_context *cso = st->cso_context;
187 struct pipe_context *pipe = st->pipe;
188
189 /* Setup vertex and geometry shaders */
190 if (!st->pbo.vs) {
191 st->pbo.vs = st_pbo_create_vs(st);
192 if (!st->pbo.vs)
193 return false;
194 }
195
196 if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) {
197 st->pbo.gs = st_pbo_create_gs(st);
198 if (!st->pbo.gs)
199 return false;
200 }
201
202 cso_set_vertex_shader_handle(cso, st->pbo.vs);
203
204 cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL);
205
206 cso_set_tessctrl_shader_handle(cso, NULL);
207
208 cso_set_tesseval_shader_handle(cso, NULL);
209
210 /* Upload vertices */
211 {
212 struct pipe_vertex_buffer vbo = {0};
213 struct cso_velems_state velem;
214
215 float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f;
216 float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f;
217 float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f;
218 float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f;
219
220 float *verts = NULL;
221
222 u_upload_alloc(st->pipe->stream_uploader, 0, 8 * sizeof(float), 4,
223 &vbo.buffer_offset, &vbo.buffer.resource, (void **) &verts);
224 if (!verts)
225 return false;
226
227 verts[0] = x0;
228 verts[1] = y0;
229 verts[2] = x0;
230 verts[3] = y1;
231 verts[4] = x1;
232 verts[5] = y0;
233 verts[6] = x1;
234 verts[7] = y1;
235
236 u_upload_unmap(st->pipe->stream_uploader);
237
238 velem.count = 1;
239 velem.velems[0].src_offset = 0;
240 velem.velems[0].src_stride = 2 * sizeof(float);
241 velem.velems[0].instance_divisor = 0;
242 velem.velems[0].vertex_buffer_index = 0;
243 velem.velems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
244 velem.velems[0].dual_slot = false;
245
246 cso_set_vertex_elements(cso, &velem);
247 cso_set_vertex_buffers(cso, 1, true, &vbo);
248 }
249
250 /* Upload constants */
251 {
252 struct pipe_constant_buffer cb;
253
254 cb.buffer = NULL;
255 cb.user_buffer = &addr->constants;
256 cb.buffer_offset = 0;
257 cb.buffer_size = sizeof(addr->constants);
258
259 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, false, &cb);
260
261 pipe_resource_reference(&cb.buffer, NULL);
262 }
263
264 /* Rasterizer state */
265 cso_set_rasterizer(cso, &st->pbo.raster);
266
267 /* Disable stream output */
268 cso_set_stream_outputs(cso, 0, NULL, 0);
269
270 if (addr->depth == 1) {
271 cso_draw_arrays(cso, MESA_PRIM_TRIANGLE_STRIP, 0, 4);
272 } else {
273 cso_draw_arrays_instanced(cso, MESA_PRIM_TRIANGLE_STRIP,
274 0, 4, 0, addr->depth);
275 }
276
277 return true;
278 }
279
280 void *
st_pbo_create_vs(struct st_context * st)281 st_pbo_create_vs(struct st_context *st)
282 {
283 const struct glsl_type *vec4 = glsl_vec4_type();
284 const nir_shader_compiler_options *options =
285 st_get_nir_compiler_options(st, MESA_SHADER_VERTEX);
286
287 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
288 "st/pbo VS");
289
290 nir_variable *in_pos = nir_create_variable_with_location(b.shader, nir_var_shader_in,
291 VERT_ATTRIB_POS, vec4);
292
293 nir_variable *out_pos = nir_create_variable_with_location(b.shader, nir_var_shader_out,
294 VARYING_SLOT_POS, vec4);
295
296 if (!st->pbo.use_gs)
297 nir_copy_var(&b, out_pos, in_pos);
298
299 if (st->pbo.layers) {
300 nir_variable *instance_id = nir_create_variable_with_location(b.shader, nir_var_system_value,
301 SYSTEM_VALUE_INSTANCE_ID, glsl_int_type());
302
303 if (st->pbo.use_gs) {
304 nir_store_var(&b, out_pos,
305 nir_vector_insert_imm(&b, nir_load_var(&b, in_pos),
306 nir_i2f32(&b, nir_load_var(&b, instance_id)), 2),
307 0xf);
308 } else {
309 nir_variable *out_layer = nir_create_variable_with_location(b.shader, nir_var_shader_out,
310 VARYING_SLOT_LAYER, glsl_int_type());
311 out_layer->data.interpolation = INTERP_MODE_NONE;
312 nir_copy_var(&b, out_layer, instance_id);
313 }
314 }
315
316 return st_nir_finish_builtin_shader(st, b.shader);
317 }
318
319 void *
st_pbo_create_gs(struct st_context * st)320 st_pbo_create_gs(struct st_context *st)
321 {
322 const nir_shader_compiler_options *options =
323 st_get_nir_compiler_options(st, MESA_SHADER_GEOMETRY);
324
325 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
326 "st/pbo GS");
327
328 b.shader->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
329 b.shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
330 b.shader->info.gs.vertices_in = 3;
331 b.shader->info.gs.vertices_out = 3;
332 b.shader->info.gs.invocations = 1;
333 b.shader->info.gs.active_stream_mask = 1;
334
335 const struct glsl_type *in_type = glsl_array_type(glsl_vec4_type(), 3, 0);
336 nir_variable *in_pos = nir_variable_create(b.shader, nir_var_shader_in, in_type, "in_pos");
337 in_pos->data.location = VARYING_SLOT_POS;
338 b.shader->info.inputs_read |= VARYING_BIT_POS;
339
340 nir_variable *out_pos = nir_create_variable_with_location(b.shader, nir_var_shader_out,
341 VARYING_SLOT_POS, glsl_vec4_type());
342
343 b.shader->info.outputs_written |= VARYING_BIT_POS;
344
345 nir_variable *out_layer = nir_create_variable_with_location(b.shader, nir_var_shader_out,
346 VARYING_SLOT_LAYER, glsl_int_type());
347 out_layer->data.interpolation = INTERP_MODE_NONE;
348 b.shader->info.outputs_written |= VARYING_BIT_LAYER;
349
350 for (int i = 0; i < 3; ++i) {
351 nir_def *pos = nir_load_array_var_imm(&b, in_pos, i);
352
353 nir_store_var(&b, out_pos, nir_vector_insert_imm(&b, pos, nir_imm_float(&b, 0.0), 2), 0xf);
354 /* out_layer.x = f2i(in_pos[i].z) */
355 nir_store_var(&b, out_layer, nir_f2i32(&b, nir_channel(&b, pos, 2)), 0x1);
356
357 nir_emit_vertex(&b);
358 }
359
360 return st_nir_finish_builtin_shader(st, b.shader);
361 }
362
363 const struct glsl_type *
st_pbo_sampler_type_for_target(enum pipe_texture_target target,enum st_pbo_conversion conv)364 st_pbo_sampler_type_for_target(enum pipe_texture_target target,
365 enum st_pbo_conversion conv)
366 {
367 bool is_array = target >= PIPE_TEXTURE_1D_ARRAY;
368 static const enum glsl_sampler_dim dim[] = {
369 [PIPE_BUFFER] = GLSL_SAMPLER_DIM_BUF,
370 [PIPE_TEXTURE_1D] = GLSL_SAMPLER_DIM_1D,
371 [PIPE_TEXTURE_2D] = GLSL_SAMPLER_DIM_2D,
372 [PIPE_TEXTURE_3D] = GLSL_SAMPLER_DIM_3D,
373 [PIPE_TEXTURE_CUBE] = GLSL_SAMPLER_DIM_CUBE,
374 [PIPE_TEXTURE_RECT] = GLSL_SAMPLER_DIM_RECT,
375 [PIPE_TEXTURE_1D_ARRAY] = GLSL_SAMPLER_DIM_1D,
376 [PIPE_TEXTURE_2D_ARRAY] = GLSL_SAMPLER_DIM_2D,
377 [PIPE_TEXTURE_CUBE_ARRAY] = GLSL_SAMPLER_DIM_CUBE,
378 };
379
380 static const enum glsl_base_type type[] = {
381 [ST_PBO_CONVERT_FLOAT] = GLSL_TYPE_FLOAT,
382 [ST_PBO_CONVERT_UINT] = GLSL_TYPE_UINT,
383 [ST_PBO_CONVERT_UINT_TO_SINT] = GLSL_TYPE_UINT,
384 [ST_PBO_CONVERT_SINT] = GLSL_TYPE_INT,
385 [ST_PBO_CONVERT_SINT_TO_UINT] = GLSL_TYPE_INT,
386 };
387
388 return glsl_sampler_type(dim[target], false, is_array, type[conv]);
389 }
390
391
392 static void *
create_fs(struct st_context * st,bool download,enum pipe_texture_target target,enum st_pbo_conversion conversion,enum pipe_format format,bool need_layer)393 create_fs(struct st_context *st, bool download,
394 enum pipe_texture_target target,
395 enum st_pbo_conversion conversion,
396 enum pipe_format format,
397 bool need_layer)
398 {
399 struct pipe_screen *screen = st->screen;
400 const nir_shader_compiler_options *options =
401 st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
402 bool pos_is_sysval =
403 screen->get_param(screen, PIPE_CAP_FS_POSITION_IS_SYSVAL);
404
405 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
406 download ?
407 "st/pbo download FS" :
408 "st/pbo upload FS");
409
410 nir_def *zero = nir_imm_int(&b, 0);
411
412 /* param = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
413 nir_variable *param_var =
414 nir_variable_create(b.shader, nir_var_uniform, glsl_vec4_type(), "param");
415 b.shader->num_uniforms += 4;
416 nir_def *param = nir_load_var(&b, param_var);
417
418 nir_variable *fragcoord;
419 if (pos_is_sysval)
420 fragcoord = nir_create_variable_with_location(b.shader, nir_var_system_value,
421 SYSTEM_VALUE_FRAG_COORD, glsl_vec4_type());
422 else
423 fragcoord = nir_create_variable_with_location(b.shader, nir_var_shader_in,
424 VARYING_SLOT_POS, glsl_vec4_type());
425 nir_def *coord = nir_load_var(&b, fragcoord);
426
427 /* When st->pbo.layers == false, it is guaranteed we only have a single
428 * layer. But we still need the "layer" variable to add the "array"
429 * coordinate to the texture. Hence we set layer to zero when array texture
430 * is used in case only a single layer is required.
431 */
432 nir_def *layer = NULL;
433 if (!download || target == PIPE_TEXTURE_1D_ARRAY ||
434 target == PIPE_TEXTURE_2D_ARRAY ||
435 target == PIPE_TEXTURE_3D ||
436 target == PIPE_TEXTURE_CUBE ||
437 target == PIPE_TEXTURE_CUBE_ARRAY) {
438 if (need_layer) {
439 assert(st->pbo.layers);
440 nir_variable *var = nir_create_variable_with_location(b.shader, nir_var_shader_in,
441 VARYING_SLOT_LAYER, glsl_int_type());
442 var->data.interpolation = INTERP_MODE_FLAT;
443 layer = nir_load_var(&b, var);
444 }
445 else {
446 layer = zero;
447 }
448 }
449
450 /* offset_pos = param.xy + f2i(coord.xy) */
451 nir_def *offset_pos =
452 nir_iadd(&b, nir_channels(&b, param, TGSI_WRITEMASK_XY),
453 nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY)));
454
455 /* addr = offset_pos.x + offset_pos.y * stride */
456 nir_def *pbo_addr =
457 nir_iadd(&b, nir_channel(&b, offset_pos, 0),
458 nir_imul(&b, nir_channel(&b, offset_pos, 1),
459 nir_channel(&b, param, 2)));
460 if (layer && layer != zero) {
461 /* pbo_addr += image_height * layer */
462 pbo_addr = nir_iadd(&b, pbo_addr,
463 nir_imul(&b, layer, nir_channel(&b, param, 3)));
464 }
465
466 nir_def *texcoord;
467 if (download) {
468 texcoord = nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY));
469
470 if (target == PIPE_TEXTURE_1D) {
471 unsigned sw = 0;
472 texcoord = nir_swizzle(&b, texcoord, &sw, 1);
473 }
474
475 if (layer) {
476 nir_def *src_layer = layer;
477
478 if (target == PIPE_TEXTURE_3D) {
479 nir_variable *layer_offset_var =
480 nir_variable_create(b.shader, nir_var_uniform,
481 glsl_int_type(), "layer_offset");
482 b.shader->num_uniforms += 1;
483 layer_offset_var->data.driver_location = 4;
484 nir_def *layer_offset = nir_load_var(&b, layer_offset_var);
485
486 src_layer = nir_iadd(&b, layer, layer_offset);
487 }
488
489 if (target == PIPE_TEXTURE_1D_ARRAY) {
490 texcoord = nir_vec2(&b, nir_channel(&b, texcoord, 0),
491 src_layer);
492 } else {
493 texcoord = nir_vec3(&b, nir_channel(&b, texcoord, 0),
494 nir_channel(&b, texcoord, 1),
495 src_layer);
496 }
497 }
498 } else {
499 texcoord = pbo_addr;
500 }
501
502 nir_variable *tex_var =
503 nir_variable_create(b.shader, nir_var_uniform,
504 st_pbo_sampler_type_for_target(target, conversion),
505 "tex");
506 tex_var->data.explicit_binding = true;
507 tex_var->data.binding = 0;
508
509 nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
510
511 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
512 tex->op = nir_texop_txf;
513 tex->sampler_dim = glsl_get_sampler_dim(tex_var->type);
514 tex->coord_components =
515 glsl_get_sampler_coordinate_components(tex_var->type);
516 tex->is_array = target >= PIPE_TEXTURE_1D_ARRAY;
517
518 tex->dest_type = nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(tex_var->type));
519 tex->src[0].src_type = nir_tex_src_texture_deref;
520 tex->src[0].src = nir_src_for_ssa(&tex_deref->def);
521 tex->src[1].src_type = nir_tex_src_sampler_deref;
522 tex->src[1].src = nir_src_for_ssa(&tex_deref->def);
523 tex->src[2].src_type = nir_tex_src_coord;
524 tex->src[2].src = nir_src_for_ssa(texcoord);
525 nir_def_init(&tex->instr, &tex->def, 4, 32);
526 nir_builder_instr_insert(&b, &tex->instr);
527 nir_def *result = &tex->def;
528
529 if (conversion == ST_PBO_CONVERT_SINT_TO_UINT)
530 result = nir_imax(&b, result, zero);
531 else if (conversion == ST_PBO_CONVERT_UINT_TO_SINT)
532 result = nir_umin(&b, result, nir_imm_int(&b, (1u << 31) - 1));
533
534 if (download) {
535 static const enum glsl_base_type type[] = {
536 [ST_PBO_CONVERT_FLOAT] = GLSL_TYPE_FLOAT,
537 [ST_PBO_CONVERT_UINT] = GLSL_TYPE_UINT,
538 [ST_PBO_CONVERT_UINT_TO_SINT] = GLSL_TYPE_INT,
539 [ST_PBO_CONVERT_SINT] = GLSL_TYPE_INT,
540 [ST_PBO_CONVERT_SINT_TO_UINT] = GLSL_TYPE_UINT,
541 };
542 nir_variable *img_var =
543 nir_variable_create(b.shader, nir_var_image,
544 glsl_image_type(GLSL_SAMPLER_DIM_BUF, false,
545 type[conversion]), "img");
546 img_var->data.access = ACCESS_NON_READABLE;
547 img_var->data.explicit_binding = true;
548 img_var->data.binding = 0;
549 img_var->data.image.format = format;
550 nir_deref_instr *img_deref = nir_build_deref_var(&b, img_var);
551
552 nir_image_deref_store(&b, &img_deref->def,
553 nir_vec4(&b, pbo_addr, zero, zero, zero),
554 zero,
555 result,
556 nir_imm_int(&b, 0),
557 .image_dim = GLSL_SAMPLER_DIM_BUF);
558 } else {
559 nir_variable *color =
560 nir_create_variable_with_location(b.shader, nir_var_shader_out,
561 FRAG_RESULT_COLOR, glsl_vec4_type());
562
563 nir_store_var(&b, color, result, TGSI_WRITEMASK_XYZW);
564 }
565
566 return st_nir_finish_builtin_shader(st, b.shader);
567 }
568
569 static enum st_pbo_conversion
get_pbo_conversion(enum pipe_format src_format,enum pipe_format dst_format)570 get_pbo_conversion(enum pipe_format src_format, enum pipe_format dst_format)
571 {
572 if (util_format_is_pure_uint(src_format)) {
573 if (util_format_is_pure_uint(dst_format))
574 return ST_PBO_CONVERT_UINT;
575 if (util_format_is_pure_sint(dst_format))
576 return ST_PBO_CONVERT_UINT_TO_SINT;
577 } else if (util_format_is_pure_sint(src_format)) {
578 if (util_format_is_pure_sint(dst_format))
579 return ST_PBO_CONVERT_SINT;
580 if (util_format_is_pure_uint(dst_format))
581 return ST_PBO_CONVERT_SINT_TO_UINT;
582 }
583
584 return ST_PBO_CONVERT_FLOAT;
585 }
586
587 void *
st_pbo_get_upload_fs(struct st_context * st,enum pipe_format src_format,enum pipe_format dst_format,bool need_layer)588 st_pbo_get_upload_fs(struct st_context *st,
589 enum pipe_format src_format,
590 enum pipe_format dst_format,
591 bool need_layer)
592 {
593 STATIC_ASSERT(ARRAY_SIZE(st->pbo.upload_fs) == ST_NUM_PBO_CONVERSIONS);
594
595 enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);
596
597 if (!st->pbo.upload_fs[conversion][need_layer])
598 st->pbo.upload_fs[conversion][need_layer] = create_fs(st, false, 0, conversion, PIPE_FORMAT_NONE, need_layer);
599
600 return st->pbo.upload_fs[conversion][need_layer];
601 }
602
603 void *
st_pbo_get_download_fs(struct st_context * st,enum pipe_texture_target target,enum pipe_format src_format,enum pipe_format dst_format,bool need_layer)604 st_pbo_get_download_fs(struct st_context *st, enum pipe_texture_target target,
605 enum pipe_format src_format,
606 enum pipe_format dst_format,
607 bool need_layer)
608 {
609 STATIC_ASSERT(ARRAY_SIZE(st->pbo.download_fs) == ST_NUM_PBO_CONVERSIONS);
610 assert(target < PIPE_MAX_TEXTURE_TYPES);
611
612 struct pipe_screen *screen = st->screen;
613 enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);
614 bool formatless_store = screen->get_param(screen, PIPE_CAP_IMAGE_STORE_FORMATTED);
615
616 /* For drivers not supporting formatless storing, download FS is stored in an
617 * indirect dynamically allocated array of storing formats.
618 */
619 if (!formatless_store && !st->pbo.download_fs[conversion][target][need_layer])
620 st->pbo.download_fs[conversion][target][need_layer] = calloc(sizeof(void *), PIPE_FORMAT_COUNT);
621
622 if (formatless_store) {
623 if (!st->pbo.download_fs[conversion][target][need_layer])
624 st->pbo.download_fs[conversion][target][need_layer] = create_fs(st, true, target, conversion, PIPE_FORMAT_NONE, need_layer);
625 return st->pbo.download_fs[conversion][target][need_layer];
626 } else {
627 void **fs_array = (void **)st->pbo.download_fs[conversion][target][need_layer];
628 if (!fs_array[dst_format])
629 fs_array[dst_format] = create_fs(st, true, target, conversion, dst_format, need_layer);
630 return fs_array[dst_format];
631 }
632 }
633
634 void
st_init_pbo_helpers(struct st_context * st)635 st_init_pbo_helpers(struct st_context *st)
636 {
637 struct pipe_screen *screen = st->screen;
638
639 st->pbo.upload_enabled =
640 screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) &&
641 screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 &&
642 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
643 if (!st->pbo.upload_enabled)
644 return;
645
646 st->pbo.download_enabled =
647 st->pbo.upload_enabled &&
648 screen->get_param(screen, PIPE_CAP_SAMPLER_VIEW_TARGET) &&
649 screen->get_param(screen, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT) &&
650 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
651 PIPE_SHADER_CAP_MAX_SHADER_IMAGES) >= 1;
652
653 st->pbo.rgba_only =
654 screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);
655
656 if (screen->get_param(screen, PIPE_CAP_VS_INSTANCEID)) {
657 if (screen->get_param(screen, PIPE_CAP_VS_LAYER_VIEWPORT)) {
658 st->pbo.layers = true;
659 } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) {
660 st->pbo.layers = true;
661 st->pbo.use_gs = true;
662 }
663 }
664
665 /* Blend state */
666 memset(&st->pbo.upload_blend, 0, sizeof(struct pipe_blend_state));
667 st->pbo.upload_blend.rt[0].colormask = PIPE_MASK_RGBA;
668
669 /* Rasterizer state */
670 memset(&st->pbo.raster, 0, sizeof(struct pipe_rasterizer_state));
671 st->pbo.raster.half_pixel_center = 1;
672
673 const char *pbo = debug_get_option("MESA_COMPUTE_PBO", NULL);
674 if (pbo) {
675 st->force_compute_based_texture_transfer = true;
676 st->force_specialized_compute_transfer = !strncmp(pbo, "spec", 4);
677 }
678
679 if (st->allow_compute_based_texture_transfer || st->force_compute_based_texture_transfer)
680 st->pbo.shaders = _mesa_hash_table_create_u32_keys(NULL);
681 }
682
683 void
st_destroy_pbo_helpers(struct st_context * st)684 st_destroy_pbo_helpers(struct st_context *st)
685 {
686 struct pipe_screen *screen = st->screen;
687 bool formatless_store = screen->get_param(screen, PIPE_CAP_IMAGE_STORE_FORMATTED);
688 unsigned i;
689
690 for (i = 0; i < ARRAY_SIZE(st->pbo.upload_fs); ++i) {
691 for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.upload_fs[0]); j++) {
692 if (st->pbo.upload_fs[i][j]) {
693 st->pipe->delete_fs_state(st->pipe, st->pbo.upload_fs[i][j]);
694 st->pbo.upload_fs[i][j] = NULL;
695 }
696 }
697 }
698
699 for (i = 0; i < ARRAY_SIZE(st->pbo.download_fs); ++i) {
700 for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.download_fs[0]); ++j) {
701 for (unsigned k = 0; k < ARRAY_SIZE(st->pbo.download_fs[0][0]); k++) {
702 if (st->pbo.download_fs[i][j][k]) {
703 if (formatless_store) {
704 st->pipe->delete_fs_state(st->pipe, st->pbo.download_fs[i][j][k]);
705 } else {
706 void **fs_array = (void **)st->pbo.download_fs[i][j][k];
707 for (unsigned l = 0; l < PIPE_FORMAT_COUNT; l++)
708 if (fs_array[l])
709 st->pipe->delete_fs_state(st->pipe, fs_array[l]);
710 free(st->pbo.download_fs[i][j][k]);
711 }
712 st->pbo.download_fs[i][j][k] = NULL;
713 }
714 }
715 }
716 }
717
718 if (st->pbo.gs) {
719 st->pipe->delete_gs_state(st->pipe, st->pbo.gs);
720 st->pbo.gs = NULL;
721 }
722
723 if (st->pbo.vs) {
724 st->pipe->delete_vs_state(st->pipe, st->pbo.vs);
725 st->pbo.vs = NULL;
726 }
727
728 st_pbo_compute_deinit(st);
729 }
730