1 /**********************************************************
2 * Copyright 2008-2022 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 #include "util/u_bitmask.h"
27 #include "util/u_memory.h"
28 #include "util/format/u_format.h"
29 #include "svga_context.h"
30 #include "svga_cmd.h"
31 #include "svga_format.h"
32 #include "svga_shader.h"
33 #include "svga_tgsi.h"
34 #include "svga_resource_texture.h"
35 #include "VGPU10ShaderTokens.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_text.h"
38 #include "nir/nir_to_tgsi.h"
39
40
41 /**
42 * This bit isn't really used anywhere. It only serves to help
43 * generate a unique "signature" for the vertex shader output bitmask.
44 * Shader input/output signatures are used to resolve shader linking
45 * issues.
46 */
47 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
48
49
50 /**
51 * Use the shader info to generate a bitmask indicating which generic
52 * inputs are used by the shader. A set bit indicates that GENERIC[i]
53 * is used.
54 */
55 uint64_t
svga_get_generic_inputs_mask(const struct tgsi_shader_info * info)56 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
57 {
58 unsigned i;
59 uint64_t mask = 0x0;
60
61 for (i = 0; i < info->num_inputs; i++) {
62 if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
63 unsigned j = info->input_semantic_index[i];
64 assert(j < sizeof(mask) * 8);
65 mask |= ((uint64_t) 1) << j;
66 }
67 }
68
69 return mask;
70 }
71
72
73 /**
74 * Scan shader info to return a bitmask of written outputs.
75 */
76 uint64_t
svga_get_generic_outputs_mask(const struct tgsi_shader_info * info)77 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
78 {
79 unsigned i;
80 uint64_t mask = 0x0;
81
82 for (i = 0; i < info->num_outputs; i++) {
83 switch (info->output_semantic_name[i]) {
84 case TGSI_SEMANTIC_GENERIC:
85 {
86 unsigned j = info->output_semantic_index[i];
87 assert(j < sizeof(mask) * 8);
88 mask |= ((uint64_t) 1) << j;
89 }
90 break;
91 case TGSI_SEMANTIC_FOG:
92 mask |= FOG_GENERIC_BIT;
93 break;
94 }
95 }
96
97 return mask;
98 }
99
100
101
102 /**
103 * Given a mask of used generic variables (as returned by the above functions)
104 * fill in a table which maps those indexes to small integers.
105 * This table is used by the remap_generic_index() function in
106 * svga_tgsi_decl_sm30.c
107 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
108 * GENERIC[3] are used. The remap_table will contain:
109 * table[1] = 0;
110 * table[3] = 1;
111 * The remaining table entries will be filled in with the next unused
112 * generic index (in this example, 2).
113 */
114 void
svga_remap_generics(uint64_t generics_mask,int8_t remap_table[MAX_GENERIC_VARYING])115 svga_remap_generics(uint64_t generics_mask,
116 int8_t remap_table[MAX_GENERIC_VARYING])
117 {
118 /* Note texcoord[0] is reserved so start at 1 */
119 unsigned count = 1, i;
120
121 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
122 remap_table[i] = -1;
123 }
124
125 /* for each bit set in generic_mask */
126 while (generics_mask) {
127 unsigned index = ffsll(generics_mask) - 1;
128 remap_table[index] = count++;
129 generics_mask &= ~((uint64_t) 1 << index);
130 }
131 }
132
133
134 /**
135 * Use the generic remap table to map a TGSI generic varying variable
136 * index to a small integer. If the remapping table doesn't have a
137 * valid value for the given index (the table entry is -1) it means
138 * the fragment shader doesn't use that VS output. Just allocate
139 * the next free value in that case. Alternately, we could cull
140 * VS instructions that write to register, or replace the register
141 * with a dummy temp register.
142 * XXX TODO: we should do one of the later as it would save precious
143 * texcoord registers.
144 */
145 int
svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],int generic_index)146 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
147 int generic_index)
148 {
149 assert(generic_index < MAX_GENERIC_VARYING);
150
151 if (generic_index >= MAX_GENERIC_VARYING) {
152 /* just don't return a random/garbage value */
153 generic_index = MAX_GENERIC_VARYING - 1;
154 }
155
156 if (remap_table[generic_index] == -1) {
157 /* This is a VS output that has no matching PS input. Find a
158 * free index.
159 */
160 int i, max = 0;
161 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
162 max = MAX2(max, remap_table[i]);
163 }
164 remap_table[generic_index] = max + 1;
165 }
166
167 return remap_table[generic_index];
168 }
169
170 static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
171 PIPE_SWIZZLE_X,
172 PIPE_SWIZZLE_Y,
173 PIPE_SWIZZLE_Z,
174 PIPE_SWIZZLE_W,
175 PIPE_SWIZZLE_0,
176 PIPE_SWIZZLE_1,
177 PIPE_SWIZZLE_NONE
178 };
179
180 static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
181 PIPE_SWIZZLE_X,
182 PIPE_SWIZZLE_Y,
183 PIPE_SWIZZLE_Z,
184 PIPE_SWIZZLE_1,
185 PIPE_SWIZZLE_0,
186 PIPE_SWIZZLE_1,
187 PIPE_SWIZZLE_NONE
188 };
189
190 static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
191 PIPE_SWIZZLE_0,
192 PIPE_SWIZZLE_0,
193 PIPE_SWIZZLE_0,
194 PIPE_SWIZZLE_X,
195 PIPE_SWIZZLE_0,
196 PIPE_SWIZZLE_1,
197 PIPE_SWIZZLE_NONE
198 };
199
200 static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
201 PIPE_SWIZZLE_X,
202 PIPE_SWIZZLE_X,
203 PIPE_SWIZZLE_X,
204 PIPE_SWIZZLE_X,
205 PIPE_SWIZZLE_0,
206 PIPE_SWIZZLE_1,
207 PIPE_SWIZZLE_NONE
208 };
209
210 static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
211 PIPE_SWIZZLE_X,
212 PIPE_SWIZZLE_X,
213 PIPE_SWIZZLE_X,
214 PIPE_SWIZZLE_1,
215 PIPE_SWIZZLE_0,
216 PIPE_SWIZZLE_1,
217 PIPE_SWIZZLE_NONE
218 };
219
220 static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
221 PIPE_SWIZZLE_X,
222 PIPE_SWIZZLE_X,
223 PIPE_SWIZZLE_X,
224 PIPE_SWIZZLE_Y,
225 PIPE_SWIZZLE_0,
226 PIPE_SWIZZLE_1,
227 PIPE_SWIZZLE_NONE
228 };
229
230 static const enum pipe_swizzle set_YYYY[PIPE_SWIZZLE_MAX] = {
231 PIPE_SWIZZLE_Y,
232 PIPE_SWIZZLE_Y,
233 PIPE_SWIZZLE_Y,
234 PIPE_SWIZZLE_Y,
235 PIPE_SWIZZLE_0,
236 PIPE_SWIZZLE_1,
237 PIPE_SWIZZLE_NONE
238 };
239
240
241 static VGPU10_RESOURCE_RETURN_TYPE
vgpu10_return_type(enum pipe_format format)242 vgpu10_return_type(enum pipe_format format)
243 {
244 if (util_format_is_unorm(format))
245 return VGPU10_RETURN_TYPE_UNORM;
246 else if (util_format_is_snorm(format))
247 return VGPU10_RETURN_TYPE_SNORM;
248 else if (util_format_is_pure_uint(format))
249 return VGPU10_RETURN_TYPE_UINT;
250 else if (util_format_is_pure_sint(format))
251 return VGPU10_RETURN_TYPE_SINT;
252 else if (util_format_is_float(format))
253 return VGPU10_RETURN_TYPE_FLOAT;
254 else
255 return VGPU10_RETURN_TYPE_MAX;
256 }
257
258
259 /**
260 * A helper function to return TRUE if the specified format
261 * is a supported format for sample_c instruction.
262 */
263 static bool
isValidSampleCFormat(enum pipe_format format)264 isValidSampleCFormat(enum pipe_format format)
265 {
266 return util_format_is_depth_or_stencil(format);
267 }
268
269
270 /**
271 * Initialize the shader-neutral fields of svga_compile_key from context
272 * state. This is basically the texture-related state.
273 */
274 void
svga_init_shader_key_common(const struct svga_context * svga,enum pipe_shader_type shader_type,const struct svga_shader * shader,struct svga_compile_key * key)275 svga_init_shader_key_common(const struct svga_context *svga,
276 enum pipe_shader_type shader_type,
277 const struct svga_shader *shader,
278 struct svga_compile_key *key)
279 {
280 unsigned i, idx = 0;
281 unsigned sampler_slots = 0;
282
283 assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
284
285 /* In case the number of samplers and sampler_views doesn't match,
286 * loop over the upper of the two counts.
287 */
288 key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
289 svga->curr.num_samplers[shader_type]);
290
291 key->num_samplers = 0;
292
293 /* Set sampler_state_mapping only if GL43 is supported and
294 * the number of samplers exceeds SVGA limit or the sampler state
295 * mapping env is set.
296 */
297 boolean sampler_state_mapping =
298 svga_use_sampler_state_mapping(svga, svga->curr.num_samplers[shader_type]);
299
300 key->sampler_state_mapping =
301 key->num_textures && sampler_state_mapping ? 1 : 0;
302
303 for (i = 0; i < key->num_textures; i++) {
304 struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
305 const struct svga_sampler_state
306 *sampler = svga->curr.sampler[shader_type][i];
307
308 if (view) {
309 assert(view->texture);
310
311 enum pipe_texture_target target = view->target;
312 assert(target < (1 << 4)); /* texture_target:4 */
313
314 key->tex[i].target = target;
315 key->tex[i].sampler_return_type = vgpu10_return_type(view->format);
316 key->tex[i].sampler_view = 1;
317
318 /* 1D/2D array textures with one slice and cube map array textures
319 * with one cube are treated as non-arrays by the SVGA3D device.
320 * Set the is_array flag only if we know that we have more than 1
321 * element. This will be used to select shader instruction/resource
322 * types during shader translation.
323 */
324 switch (target) {
325 case PIPE_TEXTURE_1D_ARRAY:
326 case PIPE_TEXTURE_2D_ARRAY:
327 key->tex[i].is_array = view->texture->array_size > 1;
328 break;
329 case PIPE_TEXTURE_CUBE_ARRAY:
330 key->tex[i].is_array = view->texture->array_size > 6;
331 break;
332 default:
333 ; /* nothing / silence compiler warning */
334 }
335
336 assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
337 key->tex[i].num_samples = view->texture->nr_samples;
338
339 const enum pipe_swizzle *swizzle_tab;
340 if (target == PIPE_BUFFER) {
341 SVGA3dSurfaceFormat svga_format;
342 unsigned tf_flags;
343
344 assert(view->texture->target == PIPE_BUFFER);
345
346 /* Apply any special swizzle mask for the view format if needed */
347
348 svga_translate_texture_buffer_view_format(view->format,
349 &svga_format, &tf_flags);
350 if (tf_flags & TF_000X)
351 swizzle_tab = set_000X;
352 else if (tf_flags & TF_XXXX)
353 swizzle_tab = set_XXXX;
354 else if (tf_flags & TF_XXX1)
355 swizzle_tab = set_XXX1;
356 else if (tf_flags & TF_XXXY)
357 swizzle_tab = set_XXXY;
358 else
359 swizzle_tab = copy_alpha;
360 }
361 else {
362 /* If we have a non-alpha view into an svga3d surface with an
363 * alpha channel, then explicitly set the alpha channel to 1
364 * when sampling. Note that we need to check the
365 * actual device format to cover also imported surface cases.
366 */
367 swizzle_tab =
368 (!util_format_has_alpha(view->format) &&
369 svga_texture_device_format_has_alpha(view->texture)) ?
370 set_alpha : copy_alpha;
371
372 if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
373 view->texture->format == PIPE_FORMAT_DXT1_SRGB)
374 swizzle_tab = set_alpha;
375
376 if (view->format == PIPE_FORMAT_X24S8_UINT ||
377 view->format == PIPE_FORMAT_X32_S8X24_UINT)
378 swizzle_tab = set_YYYY;
379
380 /* Save the compare function as we need to handle
381 * depth compare in the shader.
382 */
383 key->tex[i].compare_mode = sampler->compare_mode;
384 key->tex[i].compare_func = sampler->compare_func;
385
386 /* Set the compare_in_shader bit if the view format
387 * is not a supported format for shadow compare.
388 * In this case, we'll do the comparison in the shader.
389 */
390 if ((sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) &&
391 !isValidSampleCFormat(view->format)) {
392 key->tex[i].compare_in_shader = TRUE;
393 }
394 }
395
396 key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
397 key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
398 key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
399 key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
400 }
401 else {
402 key->tex[i].sampler_view = 0;
403 }
404
405 if (sampler) {
406 if (!sampler->normalized_coords) {
407 if (view) {
408 assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
409 key->tex[i].width_height_idx = idx++;
410 }
411 key->tex[i].unnormalized = TRUE;
412 ++key->num_unnormalized_coords;
413
414 if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
415 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
416 key->tex[i].texel_bias = TRUE;
417 }
418 }
419
420 if (!sampler_state_mapping) {
421 /* Use the same index if sampler state mapping is not supported */
422 key->tex[i].sampler_index = i;
423 key->num_samplers = i + 1;
424 }
425 else {
426
427 /* The current samplers list can have redundant entries.
428 * In order to allow the number of bound samplers within the
429 * max limit supported by SVGA, we'll recreate the list with
430 * unique sampler state objects only.
431 */
432
433 /* Check to see if this sampler is already on the list.
434 * If so, set the sampler index of this sampler to the
435 * same sampler index.
436 */
437 for (unsigned j = 0; j <= i; j++) {
438 if (svga->curr.sampler[shader_type][j] == sampler) {
439
440 if (!(sampler_slots & (1 << j))) {
441
442 /* if this sampler is not added to the new list yet,
443 * set its sampler index to the next sampler index,
444 * increment the sampler count, and mark this
445 * sampler as added to the list.
446 */
447
448 unsigned next_index =
449 MIN2(key->num_samplers, SVGA3D_DX_MAX_SAMPLERS-1);
450
451 key->tex[i].sampler_index = next_index;
452 key->num_samplers = next_index + 1;
453
454 if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
455 /* reserve one slot for the alternate sampler */
456 key->num_samplers++;
457 }
458
459 sampler_slots |= (1 << j);
460 }
461 else {
462 key->tex[i].sampler_index = key->tex[j].sampler_index;
463 }
464 break;
465 }
466 }
467 }
468 }
469 }
470
471 if (svga_have_gl43(svga)) {
472 if (shader->info.uses_images || shader->info.uses_hw_atomic ||
473 shader->info.uses_shader_buffers) {
474
475 /* Save the uavSpliceIndex which is the index used for the first uav
476 * in the draw pipeline. For compute, uavSpliceIndex is always 0.
477 */
478 if (shader_type != PIPE_SHADER_COMPUTE)
479 key->uav_splice_index = svga->state.hw_draw.uavSpliceIndex;
480
481 unsigned uav_splice_index = key->uav_splice_index;
482
483 /* Also get the texture data type to be used in the uav declaration */
484 const struct svga_image_view *cur_image_view =
485 &svga->curr.image_views[shader_type][0];
486
487 for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.image_views[shader_type]);
488 i++, cur_image_view++) {
489
490 struct pipe_resource *resource = cur_image_view->desc.resource;
491
492 if (resource) {
493 key->images[i].return_type =
494 svga_get_texture_datatype(cur_image_view->desc.format);
495
496 key->images[i].is_array = resource->array_size > 1;
497
498 /* Save the image resource target in the shader key because
499 * for single layer image view, the resource target in the
500 * tgsi shader is changed to a different texture target.
501 */
502 key->images[i].resource_target = resource->target;
503 if (resource->target == PIPE_TEXTURE_3D ||
504 resource->target == PIPE_TEXTURE_1D_ARRAY ||
505 resource->target == PIPE_TEXTURE_2D_ARRAY ||
506 resource->target == PIPE_TEXTURE_CUBE ||
507 resource->target == PIPE_TEXTURE_CUBE_ARRAY) {
508 key->images[i].is_single_layer =
509 cur_image_view->desc.u.tex.first_layer ==
510 cur_image_view->desc.u.tex.last_layer;
511 }
512
513 key->images[i].uav_index = cur_image_view->uav_index + uav_splice_index;
514 }
515 else
516 key->images[i].uav_index = SVGA3D_INVALID_ID;
517 }
518
519 const struct svga_shader_buffer *cur_sbuf =
520 &svga->curr.shader_buffers[shader_type][0];
521
522 for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.shader_buffers[shader_type]);
523 i++, cur_sbuf++) {
524
525 if (cur_sbuf->resource)
526 key->shader_buf_uav_index[i] = cur_sbuf->uav_index + uav_splice_index;
527 else
528 key->shader_buf_uav_index[i] = SVGA3D_INVALID_ID;
529 }
530
531 const struct svga_shader_buffer *cur_buf = &svga->curr.atomic_buffers[0];
532
533 for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.atomic_buffers);
534 i++, cur_buf++) {
535
536 if (cur_buf->resource)
537 key->atomic_buf_uav_index[i] = cur_buf->uav_index + uav_splice_index;
538 else
539 key->atomic_buf_uav_index[i] = SVGA3D_INVALID_ID;
540 }
541
542 key->image_size_used = shader->info.uses_image_size;
543 }
544
545 /* Save info about which constant buffers are to be viewed
546 * as raw buffers in the shader key.
547 */
548 if (shader->info.const_buffers_declared &
549 svga->state.raw_constbufs[shader_type]) {
550 key->raw_buffers = svga->state.raw_constbufs[shader_type];
551
552 /* beginning index for srv for raw buffers */
553 key->srv_raw_buf_index = PIPE_MAX_SAMPLERS;
554 }
555 }
556
557 key->clamp_vertex_color = svga->curr.rast ?
558 svga->curr.rast->templ.clamp_vertex_color : 0;
559 }
560
561
562 /** Search for a compiled shader variant with the same compile key */
563 struct svga_shader_variant *
svga_search_shader_key(const struct svga_shader * shader,const struct svga_compile_key * key)564 svga_search_shader_key(const struct svga_shader *shader,
565 const struct svga_compile_key *key)
566 {
567 struct svga_shader_variant *variant = shader->variants;
568
569 assert(key);
570
571 for ( ; variant; variant = variant->next) {
572 if (svga_compile_keys_equal(key, &variant->key))
573 return variant;
574 }
575 return NULL;
576 }
577
578 /** Search for a shader with the same token key */
579 struct svga_shader *
svga_search_shader_token_key(struct svga_shader * pshader,const struct svga_token_key * key)580 svga_search_shader_token_key(struct svga_shader *pshader,
581 const struct svga_token_key *key)
582 {
583 struct svga_shader *shader = pshader;
584
585 assert(key);
586
587 for ( ; shader; shader = shader->next) {
588 if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
589 return shader;
590 }
591 return NULL;
592 }
593
594 /**
595 * Helper function to define a gb shader for non-vgpu10 device
596 */
597 static enum pipe_error
define_gb_shader_vgpu9(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)598 define_gb_shader_vgpu9(struct svga_context *svga,
599 struct svga_shader_variant *variant,
600 unsigned codeLen)
601 {
602 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
603 enum pipe_error ret;
604
605 /**
606 * Create gb memory for the shader and upload the shader code.
607 * Kernel module will allocate an id for the shader and issue
608 * the DefineGBShader command.
609 */
610 variant->gb_shader = sws->shader_create(sws, variant->type,
611 variant->tokens, codeLen);
612
613 svga->hud.shader_mem_used += codeLen;
614
615 if (!variant->gb_shader)
616 return PIPE_ERROR_OUT_OF_MEMORY;
617
618 ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
619
620 return ret;
621 }
622
623 /**
624 * Helper function to define a gb shader for vgpu10 device
625 */
626 static enum pipe_error
define_gb_shader_vgpu10(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)627 define_gb_shader_vgpu10(struct svga_context *svga,
628 struct svga_shader_variant *variant,
629 unsigned codeLen)
630 {
631 struct svga_winsys_context *swc = svga->swc;
632 enum pipe_error ret;
633 unsigned len = codeLen + variant->signatureLen;
634
635 /**
636 * Shaders in VGPU10 enabled device reside in the device COTable.
637 * SVGA driver will allocate an integer ID for the shader and
638 * issue DXDefineShader and DXBindShader commands.
639 */
640 variant->id = util_bitmask_add(svga->shader_id_bm);
641 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
642 return PIPE_ERROR_OUT_OF_MEMORY;
643 }
644
645 /* Create gb memory for the shader and upload the shader code */
646 variant->gb_shader = swc->shader_create(swc,
647 variant->id, variant->type,
648 variant->tokens, codeLen,
649 variant->signature,
650 variant->signatureLen);
651
652 svga->hud.shader_mem_used += len;
653
654 if (!variant->gb_shader) {
655 /* Free the shader ID */
656 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
657 goto fail_no_allocation;
658 }
659
660 /**
661 * Since we don't want to do any flush within state emission to avoid
662 * partial state in a command buffer, it's important to make sure that
663 * there is enough room to send both the DXDefineShader & DXBindShader
664 * commands in the same command buffer. So let's send both
665 * commands in one command reservation. If it fails, we'll undo
666 * the shader creation and return an error.
667 */
668 ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
669 variant->id, variant->type,
670 len);
671
672 if (ret != PIPE_OK)
673 goto fail;
674
675 return PIPE_OK;
676
677 fail:
678 swc->shader_destroy(swc, variant->gb_shader);
679 variant->gb_shader = NULL;
680
681 fail_no_allocation:
682 util_bitmask_clear(svga->shader_id_bm, variant->id);
683 variant->id = UTIL_BITMASK_INVALID_INDEX;
684
685 return PIPE_ERROR_OUT_OF_MEMORY;
686 }
687
688 /**
689 * Issue the SVGA3D commands to define a new shader.
690 * \param variant contains the shader tokens, etc. The result->id field will
691 * be set here.
692 */
693 enum pipe_error
svga_define_shader(struct svga_context * svga,struct svga_shader_variant * variant)694 svga_define_shader(struct svga_context *svga,
695 struct svga_shader_variant *variant)
696 {
697 unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
698 enum pipe_error ret;
699
700 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
701
702 variant->id = UTIL_BITMASK_INVALID_INDEX;
703
704 if (svga_have_gb_objects(svga)) {
705 if (svga_have_vgpu10(svga))
706 ret = define_gb_shader_vgpu10(svga, variant, codeLen);
707 else
708 ret = define_gb_shader_vgpu9(svga, variant, codeLen);
709 }
710 else {
711 /* Allocate an integer ID for the shader */
712 variant->id = util_bitmask_add(svga->shader_id_bm);
713 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
714 ret = PIPE_ERROR_OUT_OF_MEMORY;
715 goto done;
716 }
717
718 /* Issue SVGA3D device command to define the shader */
719 ret = SVGA3D_DefineShader(svga->swc,
720 variant->id,
721 variant->type,
722 variant->tokens,
723 codeLen);
724 if (ret != PIPE_OK) {
725 /* free the ID */
726 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
727 util_bitmask_clear(svga->shader_id_bm, variant->id);
728 variant->id = UTIL_BITMASK_INVALID_INDEX;
729 }
730 }
731
732 done:
733 SVGA_STATS_TIME_POP(svga_sws(svga));
734 return ret;
735 }
736
737
738 /**
739 * Issue the SVGA3D commands to set/bind a shader.
740 * \param result the shader to bind.
741 */
742 enum pipe_error
svga_set_shader(struct svga_context * svga,SVGA3dShaderType type,struct svga_shader_variant * variant)743 svga_set_shader(struct svga_context *svga,
744 SVGA3dShaderType type,
745 struct svga_shader_variant *variant)
746 {
747 enum pipe_error ret;
748 unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
749
750 assert(type == SVGA3D_SHADERTYPE_VS ||
751 type == SVGA3D_SHADERTYPE_GS ||
752 type == SVGA3D_SHADERTYPE_PS ||
753 type == SVGA3D_SHADERTYPE_HS ||
754 type == SVGA3D_SHADERTYPE_DS ||
755 type == SVGA3D_SHADERTYPE_CS);
756
757 if (svga_have_gb_objects(svga)) {
758 struct svga_winsys_gb_shader *gbshader =
759 variant ? variant->gb_shader : NULL;
760
761 if (svga_have_vgpu10(svga))
762 ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
763 else
764 ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
765 }
766 else {
767 ret = SVGA3D_SetShader(svga->swc, type, id);
768 }
769
770 return ret;
771 }
772
773
774 struct svga_shader_variant *
svga_new_shader_variant(struct svga_context * svga,enum pipe_shader_type type)775 svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
776 {
777 struct svga_shader_variant *variant;
778
779 switch (type) {
780 case PIPE_SHADER_FRAGMENT:
781 variant = CALLOC(1, sizeof(struct svga_fs_variant));
782 break;
783 case PIPE_SHADER_GEOMETRY:
784 variant = CALLOC(1, sizeof(struct svga_gs_variant));
785 break;
786 case PIPE_SHADER_VERTEX:
787 variant = CALLOC(1, sizeof(struct svga_vs_variant));
788 break;
789 case PIPE_SHADER_TESS_EVAL:
790 variant = CALLOC(1, sizeof(struct svga_tes_variant));
791 break;
792 case PIPE_SHADER_TESS_CTRL:
793 variant = CALLOC(1, sizeof(struct svga_tcs_variant));
794 break;
795 case PIPE_SHADER_COMPUTE:
796 variant = CALLOC(1, sizeof(struct svga_cs_variant));
797 break;
798 default:
799 return NULL;
800 }
801
802 if (variant) {
803 variant->type = svga_shader_type(type);
804 svga->hud.num_shaders++;
805 }
806 return variant;
807 }
808
809
810 void
svga_destroy_shader_variant(struct svga_context * svga,struct svga_shader_variant * variant)811 svga_destroy_shader_variant(struct svga_context *svga,
812 struct svga_shader_variant *variant)
813 {
814 if (svga_have_gb_objects(svga) && variant->gb_shader) {
815 if (svga_have_vgpu10(svga)) {
816 struct svga_winsys_context *swc = svga->swc;
817 swc->shader_destroy(swc, variant->gb_shader);
818 SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
819 util_bitmask_clear(svga->shader_id_bm, variant->id);
820 }
821 else {
822 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
823 sws->shader_destroy(sws, variant->gb_shader);
824 }
825 variant->gb_shader = NULL;
826 }
827 else {
828 if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
829 SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
830 variant->type));
831 util_bitmask_clear(svga->shader_id_bm, variant->id);
832 }
833 }
834
835 FREE(variant->signature);
836 FREE((unsigned *)variant->tokens);
837 FREE(variant);
838
839 svga->hud.num_shaders--;
840 }
841
842 /*
843 * Rebind shaders.
844 * Called at the beginning of every new command buffer to ensure that
845 * shaders are properly paged-in. Instead of sending the SetShader
846 * command, this function sends a private allocation command to
847 * page in a shader. This avoids emitting redundant state to the device
848 * just to page in a resource.
849 */
850 enum pipe_error
svga_rebind_shaders(struct svga_context * svga)851 svga_rebind_shaders(struct svga_context *svga)
852 {
853 struct svga_winsys_context *swc = svga->swc;
854 struct svga_hw_draw_state *hw = &svga->state.hw_draw;
855 enum pipe_error ret;
856
857 assert(svga_have_vgpu10(svga));
858
859 /**
860 * If the underlying winsys layer does not need resource rebinding,
861 * just clear the rebind flags and return.
862 */
863 if (swc->resource_rebind == NULL) {
864 svga->rebind.flags.vs = 0;
865 svga->rebind.flags.gs = 0;
866 svga->rebind.flags.fs = 0;
867 svga->rebind.flags.tcs = 0;
868 svga->rebind.flags.tes = 0;
869
870 return PIPE_OK;
871 }
872
873 if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
874 ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
875 if (ret != PIPE_OK)
876 return ret;
877 }
878 svga->rebind.flags.vs = 0;
879
880 if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
881 ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
882 if (ret != PIPE_OK)
883 return ret;
884 }
885 svga->rebind.flags.gs = 0;
886
887 if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
888 ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
889 if (ret != PIPE_OK)
890 return ret;
891 }
892 svga->rebind.flags.fs = 0;
893
894 if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
895 ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
896 if (ret != PIPE_OK)
897 return ret;
898 }
899 svga->rebind.flags.tcs = 0;
900
901 if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
902 ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
903 if (ret != PIPE_OK)
904 return ret;
905 }
906 svga->rebind.flags.tes = 0;
907
908 return PIPE_OK;
909 }
910
911
912 /**
913 * Helper function to create a shader object.
914 */
915 struct svga_shader *
svga_create_shader(struct pipe_context * pipe,const struct pipe_shader_state * templ,enum pipe_shader_type stage,unsigned shader_structlen)916 svga_create_shader(struct pipe_context *pipe,
917 const struct pipe_shader_state *templ,
918 enum pipe_shader_type stage,
919 unsigned shader_structlen)
920 {
921 struct svga_context *svga = svga_context(pipe);
922 struct svga_shader *shader = CALLOC(1, shader_structlen);
923
924 if (shader == NULL)
925 return NULL;
926
927 shader->id = svga->debug.shader_id++;
928 shader->type = templ->type;
929 shader->stage = stage;
930
931 shader->tokens = pipe_shader_state_to_tgsi_tokens(pipe->screen, templ);
932
933 if (shader->type == PIPE_SHADER_IR_TGSI) {
934 /* Collect basic info of the shader */
935 svga_tgsi_scan_shader(shader);
936 }
937 else {
938 debug_printf("Unexpected nir shader\n");
939 assert(0);
940 }
941
942 /* check for any stream output declarations */
943 if (templ->stream_output.num_outputs) {
944 shader->stream_output = svga_create_stream_output(svga, shader,
945 &templ->stream_output);
946 }
947
948 return shader;
949 }
950
951
952 /**
953 * Helper function to compile a shader.
954 * Depending on the shader IR type, it calls the corresponding
955 * compile shader function.
956 */
957 enum pipe_error
svga_compile_shader(struct svga_context * svga,struct svga_shader * shader,const struct svga_compile_key * key,struct svga_shader_variant ** out_variant)958 svga_compile_shader(struct svga_context *svga,
959 struct svga_shader *shader,
960 const struct svga_compile_key *key,
961 struct svga_shader_variant **out_variant)
962 {
963 struct svga_shader_variant *variant = NULL;
964 enum pipe_error ret = PIPE_ERROR;
965
966 if (shader->type == PIPE_SHADER_IR_TGSI) {
967 variant = svga_tgsi_compile_shader(svga, shader, key);
968 } else {
969 debug_printf("Unexpected nir shader\n");
970 assert(0);
971 }
972
973 if (variant == NULL) {
974 if (shader->get_dummy_shader != NULL) {
975 debug_printf("Failed to compile shader, using dummy shader.\n");
976 variant = shader->get_dummy_shader(svga, shader, key);
977 }
978 }
979 else if (svga_shader_too_large(svga, variant)) {
980 /* too big, use shader */
981 if (shader->get_dummy_shader != NULL) {
982 debug_printf("Shader too large (%u bytes), using dummy shader.\n",
983 (unsigned)(variant->nr_tokens
984 * sizeof(variant->tokens[0])));
985
986 /* Free the too-large variant */
987 svga_destroy_shader_variant(svga, variant);
988
989 /* Use simple pass-through shader instead */
990 variant = shader->get_dummy_shader(svga, shader, key);
991 }
992 }
993
994 if (variant == NULL)
995 return PIPE_ERROR;
996
997 ret = svga_define_shader(svga, variant);
998 if (ret != PIPE_OK) {
999 svga_destroy_shader_variant(svga, variant);
1000 return ret;
1001 }
1002
1003 *out_variant = variant;
1004
1005 /* insert variant at head of linked list */
1006 variant->next = shader->variants;
1007 shader->variants = variant;
1008
1009 return PIPE_OK;
1010 }
1011