1 /**********************************************************
2 * Copyright 2008-2023 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 #include "util/u_bitmask.h"
27 #include "util/u_memory.h"
28 #include "util/format/u_format.h"
29 #include "svga_context.h"
30 #include "svga_cmd.h"
31 #include "svga_format.h"
32 #include "svga_shader.h"
33 #include "svga_tgsi.h"
34 #include "svga_resource_texture.h"
35 #include "VGPU10ShaderTokens.h"
36
37 #include "compiler/nir/nir.h"
38 #include "compiler/glsl/gl_nir.h"
39 #include "nir/nir_to_tgsi.h"
40
41
42 /**
43 * This bit isn't really used anywhere. It only serves to help
44 * generate a unique "signature" for the vertex shader output bitmask.
45 * Shader input/output signatures are used to resolve shader linking
46 * issues.
47 */
48 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
49
50
51 /**
52 * Use the shader info to generate a bitmask indicating which generic
53 * inputs are used by the shader. A set bit indicates that GENERIC[i]
54 * is used.
55 */
56 uint64_t
svga_get_generic_inputs_mask(const struct tgsi_shader_info * info)57 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
58 {
59 unsigned i;
60 uint64_t mask = 0x0;
61
62 for (i = 0; i < info->num_inputs; i++) {
63 if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
64 unsigned j = info->input_semantic_index[i];
65 assert(j < sizeof(mask) * 8);
66 mask |= ((uint64_t) 1) << j;
67 }
68 }
69
70 return mask;
71 }
72
73
74 /**
75 * Scan shader info to return a bitmask of written outputs.
76 */
77 uint64_t
svga_get_generic_outputs_mask(const struct tgsi_shader_info * info)78 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
79 {
80 unsigned i;
81 uint64_t mask = 0x0;
82
83 for (i = 0; i < info->num_outputs; i++) {
84 switch (info->output_semantic_name[i]) {
85 case TGSI_SEMANTIC_GENERIC:
86 {
87 unsigned j = info->output_semantic_index[i];
88 assert(j < sizeof(mask) * 8);
89 mask |= ((uint64_t) 1) << j;
90 }
91 break;
92 case TGSI_SEMANTIC_FOG:
93 mask |= FOG_GENERIC_BIT;
94 break;
95 }
96 }
97
98 return mask;
99 }
100
101
102
103 /**
104 * Given a mask of used generic variables (as returned by the above functions)
105 * fill in a table which maps those indexes to small integers.
106 * This table is used by the remap_generic_index() function in
107 * svga_tgsi_decl_sm30.c
108 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
109 * GENERIC[3] are used. The remap_table will contain:
110 * table[1] = 0;
111 * table[3] = 1;
112 * The remaining table entries will be filled in with the next unused
113 * generic index (in this example, 2).
114 */
115 void
svga_remap_generics(uint64_t generics_mask,int8_t remap_table[MAX_GENERIC_VARYING])116 svga_remap_generics(uint64_t generics_mask,
117 int8_t remap_table[MAX_GENERIC_VARYING])
118 {
119 /* Note texcoord[0] is reserved so start at 1 */
120 unsigned count = 1, i;
121
122 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
123 remap_table[i] = -1;
124 }
125
126 /* for each bit set in generic_mask */
127 while (generics_mask) {
128 unsigned index = ffsll(generics_mask) - 1;
129 remap_table[index] = count++;
130 generics_mask &= ~((uint64_t) 1 << index);
131 }
132 }
133
134
135 /**
136 * Use the generic remap table to map a TGSI generic varying variable
137 * index to a small integer. If the remapping table doesn't have a
138 * valid value for the given index (the table entry is -1) it means
139 * the fragment shader doesn't use that VS output. Just allocate
140 * the next free value in that case. Alternately, we could cull
141 * VS instructions that write to register, or replace the register
142 * with a dummy temp register.
143 * XXX TODO: we should do one of the later as it would save precious
144 * texcoord registers.
145 */
146 int
svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],int generic_index)147 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
148 int generic_index)
149 {
150 assert(generic_index < MAX_GENERIC_VARYING);
151
152 if (generic_index >= MAX_GENERIC_VARYING) {
153 /* just don't return a random/garbage value */
154 generic_index = MAX_GENERIC_VARYING - 1;
155 }
156
157 if (remap_table[generic_index] == -1) {
158 /* This is a VS output that has no matching PS input. Find a
159 * free index.
160 */
161 int i, max = 0;
162 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
163 max = MAX2(max, remap_table[i]);
164 }
165 remap_table[generic_index] = max + 1;
166 }
167
168 return remap_table[generic_index];
169 }
170
171 static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
172 PIPE_SWIZZLE_X,
173 PIPE_SWIZZLE_Y,
174 PIPE_SWIZZLE_Z,
175 PIPE_SWIZZLE_W,
176 PIPE_SWIZZLE_0,
177 PIPE_SWIZZLE_1,
178 PIPE_SWIZZLE_NONE
179 };
180
181 static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
182 PIPE_SWIZZLE_X,
183 PIPE_SWIZZLE_Y,
184 PIPE_SWIZZLE_Z,
185 PIPE_SWIZZLE_1,
186 PIPE_SWIZZLE_0,
187 PIPE_SWIZZLE_1,
188 PIPE_SWIZZLE_NONE
189 };
190
191 static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
192 PIPE_SWIZZLE_0,
193 PIPE_SWIZZLE_0,
194 PIPE_SWIZZLE_0,
195 PIPE_SWIZZLE_X,
196 PIPE_SWIZZLE_0,
197 PIPE_SWIZZLE_1,
198 PIPE_SWIZZLE_NONE
199 };
200
201 static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
202 PIPE_SWIZZLE_X,
203 PIPE_SWIZZLE_X,
204 PIPE_SWIZZLE_X,
205 PIPE_SWIZZLE_X,
206 PIPE_SWIZZLE_0,
207 PIPE_SWIZZLE_1,
208 PIPE_SWIZZLE_NONE
209 };
210
211 static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
212 PIPE_SWIZZLE_X,
213 PIPE_SWIZZLE_X,
214 PIPE_SWIZZLE_X,
215 PIPE_SWIZZLE_1,
216 PIPE_SWIZZLE_0,
217 PIPE_SWIZZLE_1,
218 PIPE_SWIZZLE_NONE
219 };
220
221 static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
222 PIPE_SWIZZLE_X,
223 PIPE_SWIZZLE_X,
224 PIPE_SWIZZLE_X,
225 PIPE_SWIZZLE_Y,
226 PIPE_SWIZZLE_0,
227 PIPE_SWIZZLE_1,
228 PIPE_SWIZZLE_NONE
229 };
230
231 static const enum pipe_swizzle set_YYYY[PIPE_SWIZZLE_MAX] = {
232 PIPE_SWIZZLE_Y,
233 PIPE_SWIZZLE_Y,
234 PIPE_SWIZZLE_Y,
235 PIPE_SWIZZLE_Y,
236 PIPE_SWIZZLE_0,
237 PIPE_SWIZZLE_1,
238 PIPE_SWIZZLE_NONE
239 };
240
241
242 static VGPU10_RESOURCE_RETURN_TYPE
vgpu10_return_type(enum pipe_format format)243 vgpu10_return_type(enum pipe_format format)
244 {
245 if (util_format_is_unorm(format))
246 return VGPU10_RETURN_TYPE_UNORM;
247 else if (util_format_is_snorm(format))
248 return VGPU10_RETURN_TYPE_SNORM;
249 else if (util_format_is_pure_uint(format))
250 return VGPU10_RETURN_TYPE_UINT;
251 else if (util_format_is_pure_sint(format))
252 return VGPU10_RETURN_TYPE_SINT;
253 else if (util_format_is_float(format))
254 return VGPU10_RETURN_TYPE_FLOAT;
255 else
256 return VGPU10_RETURN_TYPE_MAX;
257 }
258
259
260 /**
261 * A helper function to return TRUE if the specified format
262 * is a supported format for sample_c instruction.
263 */
264 static bool
isValidSampleCFormat(enum pipe_format format)265 isValidSampleCFormat(enum pipe_format format)
266 {
267 return util_format_is_depth_or_stencil(format);
268 }
269
270
271 /**
272 * Initialize the shader-neutral fields of svga_compile_key from context
273 * state. This is basically the texture-related state.
274 */
275 void
svga_init_shader_key_common(const struct svga_context * svga,enum pipe_shader_type shader_type,const struct svga_shader * shader,struct svga_compile_key * key)276 svga_init_shader_key_common(const struct svga_context *svga,
277 enum pipe_shader_type shader_type,
278 const struct svga_shader *shader,
279 struct svga_compile_key *key)
280 {
281 unsigned i, idx = 0;
282 unsigned sampler_slots = 0;
283
284 assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
285
286 /* In case the number of samplers and sampler_views doesn't match,
287 * loop over the upper of the two counts.
288 */
289 key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
290 svga->curr.num_samplers[shader_type]);
291
292 if (!shader->info.uses_samplers)
293 key->num_textures = 0;
294
295 key->num_samplers = 0;
296
297 /* Set sampler_state_mapping only if GL43 is supported and
298 * the number of samplers exceeds SVGA limit or the sampler state
299 * mapping env is set.
300 */
301 bool sampler_state_mapping =
302 svga_use_sampler_state_mapping(svga, svga->curr.num_samplers[shader_type]);
303
304 key->sampler_state_mapping =
305 key->num_textures && sampler_state_mapping ? 1 : 0;
306
307 for (i = 0; i < key->num_textures; i++) {
308 struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
309 const struct svga_sampler_state
310 *sampler = svga->curr.sampler[shader_type][i];
311
312 if (view) {
313 assert(view->texture);
314
315 enum pipe_texture_target target = view->target;
316 assert(target < (1 << 4)); /* texture_target:4 */
317
318 key->tex[i].target = target;
319 key->tex[i].sampler_return_type = vgpu10_return_type(view->format);
320 key->tex[i].sampler_view = 1;
321
322 /* 1D/2D array textures with one slice and cube map array textures
323 * with one cube are treated as non-arrays by the SVGA3D device.
324 * Set the is_array flag only if we know that we have more than 1
325 * element. This will be used to select shader instruction/resource
326 * types during shader translation.
327 */
328 switch (target) {
329 case PIPE_TEXTURE_1D_ARRAY:
330 case PIPE_TEXTURE_2D_ARRAY:
331 key->tex[i].is_array = view->texture->array_size > 1;
332 break;
333 case PIPE_TEXTURE_CUBE_ARRAY:
334 key->tex[i].is_array = view->texture->array_size > 6;
335 break;
336 default:
337 ; /* nothing / silence compiler warning */
338 }
339
340 assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
341 key->tex[i].num_samples = view->texture->nr_samples;
342
343 const enum pipe_swizzle *swizzle_tab;
344 if (target == PIPE_BUFFER) {
345 SVGA3dSurfaceFormat svga_format;
346 unsigned tf_flags;
347
348 assert(view->texture->target == PIPE_BUFFER);
349
350 /* Apply any special swizzle mask for the view format if needed */
351
352 svga_translate_texture_buffer_view_format(view->format,
353 &svga_format, &tf_flags);
354 if (tf_flags & TF_000X)
355 swizzle_tab = set_000X;
356 else if (tf_flags & TF_XXXX)
357 swizzle_tab = set_XXXX;
358 else if (tf_flags & TF_XXX1)
359 swizzle_tab = set_XXX1;
360 else if (tf_flags & TF_XXXY)
361 swizzle_tab = set_XXXY;
362 else
363 swizzle_tab = copy_alpha;
364 }
365 else {
366 /* If we have a non-alpha view into an svga3d surface with an
367 * alpha channel, then explicitly set the alpha channel to 1
368 * when sampling. Note that we need to check the
369 * actual device format to cover also imported surface cases.
370 */
371 swizzle_tab =
372 (!util_format_has_alpha(view->format) &&
373 svga_texture_device_format_has_alpha(view->texture)) ?
374 set_alpha : copy_alpha;
375
376 if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
377 view->texture->format == PIPE_FORMAT_DXT1_SRGB)
378 swizzle_tab = set_alpha;
379
380 if (view->format == PIPE_FORMAT_X24S8_UINT ||
381 view->format == PIPE_FORMAT_X32_S8X24_UINT)
382 swizzle_tab = set_YYYY;
383
384 /* Save the compare function as we need to handle
385 * depth compare in the shader.
386 */
387 key->tex[i].compare_mode = sampler->compare_mode;
388 key->tex[i].compare_func = sampler->compare_func;
389
390 /* Set the compare_in_shader bit if the view format
391 * is not a supported format for shadow compare.
392 * In this case, we'll do the comparison in the shader.
393 */
394 if ((sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) &&
395 !isValidSampleCFormat(view->format)) {
396 key->tex[i].compare_in_shader = true;
397 }
398 }
399
400 key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
401 key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
402 key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
403 key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
404 }
405 else {
406 key->tex[i].sampler_view = 0;
407 }
408
409 if (sampler) {
410 if (!sampler->normalized_coords) {
411 if (view) {
412 assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
413 key->tex[i].width_height_idx = idx++;
414 }
415 key->tex[i].unnormalized = true;
416 ++key->num_unnormalized_coords;
417
418 if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
419 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
420 key->tex[i].texel_bias = true;
421 }
422 }
423
424 if (!sampler_state_mapping) {
425 /* Use the same index if sampler state mapping is not supported */
426 key->tex[i].sampler_index = i;
427 key->num_samplers = i + 1;
428 }
429 else {
430
431 /* The current samplers list can have redundant entries.
432 * In order to allow the number of bound samplers within the
433 * max limit supported by SVGA, we'll recreate the list with
434 * unique sampler state objects only.
435 */
436
437 /* Check to see if this sampler is already on the list.
438 * If so, set the sampler index of this sampler to the
439 * same sampler index.
440 */
441 for (unsigned j = 0; j <= i; j++) {
442 if (svga->curr.sampler[shader_type][j] == sampler) {
443
444 if (!(sampler_slots & (1 << j))) {
445
446 /* if this sampler is not added to the new list yet,
447 * set its sampler index to the next sampler index,
448 * increment the sampler count, and mark this
449 * sampler as added to the list.
450 */
451
452 unsigned next_index =
453 MIN2(key->num_samplers, SVGA3D_DX_MAX_SAMPLERS-1);
454
455 key->tex[i].sampler_index = next_index;
456 key->num_samplers = next_index + 1;
457
458 if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
459 /* reserve one slot for the alternate sampler */
460 key->num_samplers++;
461 }
462
463 sampler_slots |= (1 << j);
464 }
465 else {
466 key->tex[i].sampler_index = key->tex[j].sampler_index;
467 }
468 break;
469 }
470 }
471 }
472 }
473 }
474
475 if (svga_have_gl43(svga)) {
476
477 /* Save info about which constant buffers are to be viewed
478 * as srv raw buffers in the shader key.
479 */
480 if (shader->info.const_buffers_declared &
481 svga->state.raw_constbufs[shader_type]) {
482 key->raw_constbufs = svga->state.raw_constbufs[shader_type] &
483 shader->info.const_buffers_declared;
484 }
485
486 /* beginning index for srv for raw constant buffers */
487 key->srv_raw_constbuf_index = PIPE_MAX_SAMPLERS;
488
489 if (shader->info.uses_images || shader->info.uses_hw_atomic ||
490 shader->info.uses_shader_buffers) {
491
492 /* Save the uavSpliceIndex which is the index used for the first uav
493 * in the draw pipeline. For compute, uavSpliceIndex is always 0.
494 */
495 if (shader_type != PIPE_SHADER_COMPUTE)
496 key->uav_splice_index = svga->state.hw_draw.uavSpliceIndex;
497
498 unsigned uav_splice_index = key->uav_splice_index;
499
500 /* Also get the texture data type to be used in the uav declaration */
501 const struct svga_image_view *cur_image_view =
502 &svga->curr.image_views[shader_type][0];
503
504 for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.image_views[shader_type]);
505 i++, cur_image_view++) {
506
507 struct pipe_resource *resource = cur_image_view->desc.resource;
508
509 if (resource) {
510 key->images[i].return_type =
511 svga_get_texture_datatype(cur_image_view->desc.format);
512
513 key->images[i].is_array = resource->array_size > 1;
514
515 /* Save the image resource target in the shader key because
516 * for single layer image view, the resource target in the
517 * tgsi shader is changed to a different texture target.
518 */
519 key->images[i].resource_target = resource->target;
520 if (resource->target == PIPE_TEXTURE_3D ||
521 resource->target == PIPE_TEXTURE_1D_ARRAY ||
522 resource->target == PIPE_TEXTURE_2D_ARRAY ||
523 resource->target == PIPE_TEXTURE_CUBE ||
524 resource->target == PIPE_TEXTURE_CUBE_ARRAY) {
525 key->images[i].is_single_layer =
526 cur_image_view->desc.u.tex.first_layer ==
527 cur_image_view->desc.u.tex.last_layer;
528 }
529
530 key->images[i].uav_index = cur_image_view->uav_index + uav_splice_index;
531 }
532 else
533 key->images[i].uav_index = SVGA3D_INVALID_ID;
534 }
535
536 const struct svga_shader_buffer *cur_sbuf =
537 &svga->curr.shader_buffers[shader_type][0];
538
539 /* Save info about which shader buffers are to be viewed
540 * as srv raw buffers in the shader key.
541 */
542 if (shader->info.shader_buffers_declared &
543 svga->state.raw_shaderbufs[shader_type]) {
544 key->raw_shaderbufs = svga->state.raw_shaderbufs[shader_type] &
545 shader->info.shader_buffers_declared;
546 key->srv_raw_shaderbuf_index = key->srv_raw_constbuf_index +
547 SVGA_MAX_CONST_BUFS;
548 }
549
550 for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.shader_buffers[shader_type]);
551 i++, cur_sbuf++) {
552
553 if (cur_sbuf->resource && (!(key->raw_shaderbufs & (1 << i))))
554 key->shader_buf_uav_index[i] = cur_sbuf->uav_index + uav_splice_index;
555 else
556 key->shader_buf_uav_index[i] = SVGA3D_INVALID_ID;
557 }
558
559 const struct svga_shader_buffer *cur_buf = &svga->curr.atomic_buffers[0];
560
561 for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.atomic_buffers);
562 i++, cur_buf++) {
563
564 if (cur_buf->resource)
565 key->atomic_buf_uav_index[i] = cur_buf->uav_index + uav_splice_index;
566 else
567 key->atomic_buf_uav_index[i] = SVGA3D_INVALID_ID;
568 }
569
570 key->image_size_used = shader->info.uses_image_size;
571 }
572
573 }
574
575 key->clamp_vertex_color = svga->curr.rast ?
576 svga->curr.rast->templ.clamp_vertex_color : 0;
577 }
578
579
580 /** Search for a compiled shader variant with the same compile key */
581 struct svga_shader_variant *
svga_search_shader_key(const struct svga_shader * shader,const struct svga_compile_key * key)582 svga_search_shader_key(const struct svga_shader *shader,
583 const struct svga_compile_key *key)
584 {
585 struct svga_shader_variant *variant = shader->variants;
586
587 assert(key);
588
589 for ( ; variant; variant = variant->next) {
590 if (svga_compile_keys_equal(key, &variant->key))
591 return variant;
592 }
593 return NULL;
594 }
595
596 /** Search for a shader with the same token key */
597 struct svga_shader *
svga_search_shader_token_key(struct svga_shader * pshader,const struct svga_token_key * key)598 svga_search_shader_token_key(struct svga_shader *pshader,
599 const struct svga_token_key *key)
600 {
601 struct svga_shader *shader = pshader;
602
603 assert(key);
604
605 for ( ; shader; shader = shader->next) {
606 if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
607 return shader;
608 }
609 return NULL;
610 }
611
612 /**
613 * Helper function to define a gb shader for non-vgpu10 device
614 */
615 static enum pipe_error
define_gb_shader_vgpu9(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)616 define_gb_shader_vgpu9(struct svga_context *svga,
617 struct svga_shader_variant *variant,
618 unsigned codeLen)
619 {
620 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
621 enum pipe_error ret;
622
623 /**
624 * Create gb memory for the shader and upload the shader code.
625 * Kernel module will allocate an id for the shader and issue
626 * the DefineGBShader command.
627 */
628 variant->gb_shader = sws->shader_create(sws, variant->type,
629 variant->tokens, codeLen);
630
631 svga->hud.shader_mem_used += codeLen;
632
633 if (!variant->gb_shader)
634 return PIPE_ERROR_OUT_OF_MEMORY;
635
636 ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
637
638 return ret;
639 }
640
641 /**
642 * Helper function to define a gb shader for vgpu10 device
643 */
644 static enum pipe_error
define_gb_shader_vgpu10(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)645 define_gb_shader_vgpu10(struct svga_context *svga,
646 struct svga_shader_variant *variant,
647 unsigned codeLen)
648 {
649 struct svga_winsys_context *swc = svga->swc;
650 enum pipe_error ret;
651 unsigned len = codeLen + variant->signatureLen;
652
653 /**
654 * Shaders in VGPU10 enabled device reside in the device COTable.
655 * SVGA driver will allocate an integer ID for the shader and
656 * issue DXDefineShader and DXBindShader commands.
657 */
658 variant->id = util_bitmask_add(svga->shader_id_bm);
659 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
660 return PIPE_ERROR_OUT_OF_MEMORY;
661 }
662
663 /* Create gb memory for the shader and upload the shader code */
664 variant->gb_shader = swc->shader_create(swc,
665 variant->id, variant->type,
666 variant->tokens, codeLen,
667 variant->signature,
668 variant->signatureLen);
669
670 svga->hud.shader_mem_used += len;
671
672 if (!variant->gb_shader) {
673 /* Free the shader ID */
674 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
675 goto fail_no_allocation;
676 }
677
678 /**
679 * Since we don't want to do any flush within state emission to avoid
680 * partial state in a command buffer, it's important to make sure that
681 * there is enough room to send both the DXDefineShader & DXBindShader
682 * commands in the same command buffer. So let's send both
683 * commands in one command reservation. If it fails, we'll undo
684 * the shader creation and return an error.
685 */
686 ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
687 variant->id, variant->type,
688 len);
689
690 if (ret != PIPE_OK)
691 goto fail;
692
693 return PIPE_OK;
694
695 fail:
696 swc->shader_destroy(swc, variant->gb_shader);
697 variant->gb_shader = NULL;
698
699 fail_no_allocation:
700 util_bitmask_clear(svga->shader_id_bm, variant->id);
701 variant->id = UTIL_BITMASK_INVALID_INDEX;
702
703 return PIPE_ERROR_OUT_OF_MEMORY;
704 }
705
706 /**
707 * Issue the SVGA3D commands to define a new shader.
708 * \param variant contains the shader tokens, etc. The result->id field will
709 * be set here.
710 */
711 enum pipe_error
svga_define_shader(struct svga_context * svga,struct svga_shader_variant * variant)712 svga_define_shader(struct svga_context *svga,
713 struct svga_shader_variant *variant)
714 {
715 unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
716 enum pipe_error ret;
717
718 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
719
720 variant->id = UTIL_BITMASK_INVALID_INDEX;
721
722 if (svga_have_gb_objects(svga)) {
723 if (svga_have_vgpu10(svga))
724 ret = define_gb_shader_vgpu10(svga, variant, codeLen);
725 else
726 ret = define_gb_shader_vgpu9(svga, variant, codeLen);
727 }
728 else {
729 /* Allocate an integer ID for the shader */
730 variant->id = util_bitmask_add(svga->shader_id_bm);
731 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
732 ret = PIPE_ERROR_OUT_OF_MEMORY;
733 goto done;
734 }
735
736 /* Issue SVGA3D device command to define the shader */
737 ret = SVGA3D_DefineShader(svga->swc,
738 variant->id,
739 variant->type,
740 variant->tokens,
741 codeLen);
742 if (ret != PIPE_OK) {
743 /* free the ID */
744 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
745 util_bitmask_clear(svga->shader_id_bm, variant->id);
746 variant->id = UTIL_BITMASK_INVALID_INDEX;
747 }
748 }
749
750 done:
751 SVGA_STATS_TIME_POP(svga_sws(svga));
752 return ret;
753 }
754
755
756 /**
757 * Issue the SVGA3D commands to set/bind a shader.
758 * \param result the shader to bind.
759 */
760 enum pipe_error
svga_set_shader(struct svga_context * svga,SVGA3dShaderType type,struct svga_shader_variant * variant)761 svga_set_shader(struct svga_context *svga,
762 SVGA3dShaderType type,
763 struct svga_shader_variant *variant)
764 {
765 enum pipe_error ret;
766 unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
767
768 assert(type == SVGA3D_SHADERTYPE_VS ||
769 type == SVGA3D_SHADERTYPE_GS ||
770 type == SVGA3D_SHADERTYPE_PS ||
771 type == SVGA3D_SHADERTYPE_HS ||
772 type == SVGA3D_SHADERTYPE_DS ||
773 type == SVGA3D_SHADERTYPE_CS);
774
775 if (svga_have_gb_objects(svga)) {
776 struct svga_winsys_gb_shader *gbshader =
777 variant ? variant->gb_shader : NULL;
778
779 if (svga_have_vgpu10(svga))
780 ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
781 else
782 ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
783 }
784 else {
785 ret = SVGA3D_SetShader(svga->swc, type, id);
786 }
787
788 return ret;
789 }
790
791
792 struct svga_shader_variant *
svga_new_shader_variant(struct svga_context * svga,enum pipe_shader_type type)793 svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
794 {
795 struct svga_shader_variant *variant;
796
797 switch (type) {
798 case PIPE_SHADER_FRAGMENT:
799 variant = CALLOC(1, sizeof(struct svga_fs_variant));
800 break;
801 case PIPE_SHADER_GEOMETRY:
802 variant = CALLOC(1, sizeof(struct svga_gs_variant));
803 break;
804 case PIPE_SHADER_VERTEX:
805 variant = CALLOC(1, sizeof(struct svga_vs_variant));
806 break;
807 case PIPE_SHADER_TESS_EVAL:
808 variant = CALLOC(1, sizeof(struct svga_tes_variant));
809 break;
810 case PIPE_SHADER_TESS_CTRL:
811 variant = CALLOC(1, sizeof(struct svga_tcs_variant));
812 break;
813 case PIPE_SHADER_COMPUTE:
814 variant = CALLOC(1, sizeof(struct svga_cs_variant));
815 break;
816 default:
817 return NULL;
818 }
819
820 if (variant) {
821 variant->type = svga_shader_type(type);
822 svga->hud.num_shaders++;
823 }
824 return variant;
825 }
826
827
828 void
svga_destroy_shader_variant(struct svga_context * svga,struct svga_shader_variant * variant)829 svga_destroy_shader_variant(struct svga_context *svga,
830 struct svga_shader_variant *variant)
831 {
832 if (svga_have_gb_objects(svga) && variant->gb_shader) {
833 if (svga_have_vgpu10(svga)) {
834 struct svga_winsys_context *swc = svga->swc;
835 swc->shader_destroy(swc, variant->gb_shader);
836 SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
837 util_bitmask_clear(svga->shader_id_bm, variant->id);
838 }
839 else {
840 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
841 sws->shader_destroy(sws, variant->gb_shader);
842 }
843 variant->gb_shader = NULL;
844 }
845 else {
846 if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
847 SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
848 variant->type));
849 util_bitmask_clear(svga->shader_id_bm, variant->id);
850 }
851 }
852
853 FREE(variant->signature);
854 FREE((unsigned *)variant->tokens);
855 FREE(variant);
856
857 svga->hud.num_shaders--;
858 }
859
860 /*
861 * Rebind shaders.
862 * Called at the beginning of every new command buffer to ensure that
863 * shaders are properly paged-in. Instead of sending the SetShader
864 * command, this function sends a private allocation command to
865 * page in a shader. This avoids emitting redundant state to the device
866 * just to page in a resource.
867 */
868 enum pipe_error
svga_rebind_shaders(struct svga_context * svga)869 svga_rebind_shaders(struct svga_context *svga)
870 {
871 struct svga_winsys_context *swc = svga->swc;
872 struct svga_hw_draw_state *hw = &svga->state.hw_draw;
873 enum pipe_error ret;
874
875 assert(svga_have_vgpu10(svga));
876
877 /**
878 * If the underlying winsys layer does not need resource rebinding,
879 * just clear the rebind flags and return.
880 */
881 if (swc->resource_rebind == NULL) {
882 svga->rebind.flags.vs = 0;
883 svga->rebind.flags.gs = 0;
884 svga->rebind.flags.fs = 0;
885 svga->rebind.flags.tcs = 0;
886 svga->rebind.flags.tes = 0;
887
888 return PIPE_OK;
889 }
890
891 if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
892 ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
893 if (ret != PIPE_OK)
894 return ret;
895 }
896 svga->rebind.flags.vs = 0;
897
898 if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
899 ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
900 if (ret != PIPE_OK)
901 return ret;
902 }
903 svga->rebind.flags.gs = 0;
904
905 if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
906 ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
907 if (ret != PIPE_OK)
908 return ret;
909 }
910 svga->rebind.flags.fs = 0;
911
912 if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
913 ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
914 if (ret != PIPE_OK)
915 return ret;
916 }
917 svga->rebind.flags.tcs = 0;
918
919 if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
920 ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
921 if (ret != PIPE_OK)
922 return ret;
923 }
924 svga->rebind.flags.tes = 0;
925
926 return PIPE_OK;
927 }
928
929
930 /**
931 * Helper function to create a shader object.
932 */
933 struct svga_shader *
svga_create_shader(struct pipe_context * pipe,const struct pipe_shader_state * templ,enum pipe_shader_type stage,unsigned shader_structlen)934 svga_create_shader(struct pipe_context *pipe,
935 const struct pipe_shader_state *templ,
936 enum pipe_shader_type stage,
937 unsigned shader_structlen)
938 {
939 struct svga_context *svga = svga_context(pipe);
940 struct svga_shader *shader = CALLOC(1, shader_structlen);
941 nir_shader *nir = (nir_shader *)templ->ir.nir;
942
943 if (shader == NULL)
944 return NULL;
945
946 shader->id = svga->debug.shader_id++;
947 shader->stage = stage;
948
949 if (templ->type == PIPE_SHADER_IR_NIR) {
950 /* nir_to_tgsi requires lowered images */
951 NIR_PASS_V(nir, gl_nir_lower_images, false);
952 }
953 shader->tokens = pipe_shader_state_to_tgsi_tokens(pipe->screen, templ);
954 shader->type = PIPE_SHADER_IR_TGSI;
955
956 /* Collect basic info of the shader */
957 svga_tgsi_scan_shader(shader);
958
959 /* check for any stream output declarations */
960 if (templ->stream_output.num_outputs) {
961 shader->stream_output = svga_create_stream_output(svga, shader,
962 &templ->stream_output);
963 }
964
965 return shader;
966 }
967
968
969 /**
970 * Helper function to compile a shader.
971 * Depending on the shader IR type, it calls the corresponding
972 * compile shader function.
973 */
974 enum pipe_error
svga_compile_shader(struct svga_context * svga,struct svga_shader * shader,const struct svga_compile_key * key,struct svga_shader_variant ** out_variant)975 svga_compile_shader(struct svga_context *svga,
976 struct svga_shader *shader,
977 const struct svga_compile_key *key,
978 struct svga_shader_variant **out_variant)
979 {
980 struct svga_shader_variant *variant = NULL;
981 enum pipe_error ret = PIPE_ERROR;
982
983 if (shader->type == PIPE_SHADER_IR_TGSI) {
984 variant = svga_tgsi_compile_shader(svga, shader, key);
985 } else {
986 debug_printf("Unexpected nir shader\n");
987 assert(0);
988 }
989
990 if (variant == NULL) {
991 if (shader->get_dummy_shader != NULL) {
992 debug_printf("Failed to compile shader, using dummy shader.\n");
993 variant = shader->get_dummy_shader(svga, shader, key);
994 }
995 }
996 else if (svga_shader_too_large(svga, variant)) {
997 /* too big, use shader */
998 if (shader->get_dummy_shader != NULL) {
999 debug_printf("Shader too large (%u bytes), using dummy shader.\n",
1000 (unsigned)(variant->nr_tokens
1001 * sizeof(variant->tokens[0])));
1002
1003 /* Free the too-large variant */
1004 svga_destroy_shader_variant(svga, variant);
1005
1006 /* Use simple pass-through shader instead */
1007 variant = shader->get_dummy_shader(svga, shader, key);
1008 }
1009 }
1010
1011 if (variant == NULL)
1012 return PIPE_ERROR;
1013
1014 ret = svga_define_shader(svga, variant);
1015 if (ret != PIPE_OK) {
1016 svga_destroy_shader_variant(svga, variant);
1017 return ret;
1018 }
1019
1020 *out_variant = variant;
1021
1022 /* insert variant at head of linked list */
1023 variant->next = shader->variants;
1024 shader->variants = variant;
1025
1026 return PIPE_OK;
1027 }
1028