• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************
2  * Copyright 2008-2023 VMware, Inc.  All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **********************************************************/
25 
26 #include "util/u_bitmask.h"
27 #include "util/u_memory.h"
28 #include "util/format/u_format.h"
29 #include "svga_context.h"
30 #include "svga_cmd.h"
31 #include "svga_format.h"
32 #include "svga_shader.h"
33 #include "svga_tgsi.h"
34 #include "svga_resource_texture.h"
35 #include "VGPU10ShaderTokens.h"
36 
37 #include "compiler/nir/nir.h"
38 #include "compiler/glsl/gl_nir.h"
39 #include "nir/nir_to_tgsi.h"
40 
41 
42 /**
43  * This bit isn't really used anywhere.  It only serves to help
44  * generate a unique "signature" for the vertex shader output bitmask.
45  * Shader input/output signatures are used to resolve shader linking
46  * issues.
47  */
48 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
49 
50 
51 /**
52  * Use the shader info to generate a bitmask indicating which generic
53  * inputs are used by the shader.  A set bit indicates that GENERIC[i]
54  * is used.
55  */
56 uint64_t
svga_get_generic_inputs_mask(const struct tgsi_shader_info * info)57 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
58 {
59    unsigned i;
60    uint64_t mask = 0x0;
61 
62    for (i = 0; i < info->num_inputs; i++) {
63       if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
64          unsigned j = info->input_semantic_index[i];
65          assert(j < sizeof(mask) * 8);
66          mask |= ((uint64_t) 1) << j;
67       }
68    }
69 
70    return mask;
71 }
72 
73 
74 /**
75  * Scan shader info to return a bitmask of written outputs.
76  */
77 uint64_t
svga_get_generic_outputs_mask(const struct tgsi_shader_info * info)78 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
79 {
80    unsigned i;
81    uint64_t mask = 0x0;
82 
83    for (i = 0; i < info->num_outputs; i++) {
84       switch (info->output_semantic_name[i]) {
85       case TGSI_SEMANTIC_GENERIC:
86          {
87             unsigned j = info->output_semantic_index[i];
88             assert(j < sizeof(mask) * 8);
89             mask |= ((uint64_t) 1) << j;
90          }
91          break;
92       case TGSI_SEMANTIC_FOG:
93          mask |= FOG_GENERIC_BIT;
94          break;
95       }
96    }
97 
98    return mask;
99 }
100 
101 
102 
103 /**
104  * Given a mask of used generic variables (as returned by the above functions)
105  * fill in a table which maps those indexes to small integers.
106  * This table is used by the remap_generic_index() function in
107  * svga_tgsi_decl_sm30.c
108  * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
109  * GENERIC[3] are used.  The remap_table will contain:
110  *   table[1] = 0;
111  *   table[3] = 1;
112  * The remaining table entries will be filled in with the next unused
113  * generic index (in this example, 2).
114  */
115 void
svga_remap_generics(uint64_t generics_mask,int8_t remap_table[MAX_GENERIC_VARYING])116 svga_remap_generics(uint64_t generics_mask,
117                     int8_t remap_table[MAX_GENERIC_VARYING])
118 {
119    /* Note texcoord[0] is reserved so start at 1 */
120    unsigned count = 1, i;
121 
122    for (i = 0; i < MAX_GENERIC_VARYING; i++) {
123       remap_table[i] = -1;
124    }
125 
126    /* for each bit set in generic_mask */
127    while (generics_mask) {
128       unsigned index = ffsll(generics_mask) - 1;
129       remap_table[index] = count++;
130       generics_mask &= ~((uint64_t) 1 << index);
131    }
132 }
133 
134 
135 /**
136  * Use the generic remap table to map a TGSI generic varying variable
137  * index to a small integer.  If the remapping table doesn't have a
138  * valid value for the given index (the table entry is -1) it means
139  * the fragment shader doesn't use that VS output.  Just allocate
140  * the next free value in that case.  Alternately, we could cull
141  * VS instructions that write to register, or replace the register
142  * with a dummy temp register.
143  * XXX TODO: we should do one of the later as it would save precious
144  * texcoord registers.
145  */
146 int
svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],int generic_index)147 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
148                          int generic_index)
149 {
150    assert(generic_index < MAX_GENERIC_VARYING);
151 
152    if (generic_index >= MAX_GENERIC_VARYING) {
153       /* just don't return a random/garbage value */
154       generic_index = MAX_GENERIC_VARYING - 1;
155    }
156 
157    if (remap_table[generic_index] == -1) {
158       /* This is a VS output that has no matching PS input.  Find a
159        * free index.
160        */
161       int i, max = 0;
162       for (i = 0; i < MAX_GENERIC_VARYING; i++) {
163          max = MAX2(max, remap_table[i]);
164       }
165       remap_table[generic_index] = max + 1;
166    }
167 
168    return remap_table[generic_index];
169 }
170 
171 static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
172    PIPE_SWIZZLE_X,
173    PIPE_SWIZZLE_Y,
174    PIPE_SWIZZLE_Z,
175    PIPE_SWIZZLE_W,
176    PIPE_SWIZZLE_0,
177    PIPE_SWIZZLE_1,
178    PIPE_SWIZZLE_NONE
179 };
180 
181 static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
182    PIPE_SWIZZLE_X,
183    PIPE_SWIZZLE_Y,
184    PIPE_SWIZZLE_Z,
185    PIPE_SWIZZLE_1,
186    PIPE_SWIZZLE_0,
187    PIPE_SWIZZLE_1,
188    PIPE_SWIZZLE_NONE
189 };
190 
191 static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
192    PIPE_SWIZZLE_0,
193    PIPE_SWIZZLE_0,
194    PIPE_SWIZZLE_0,
195    PIPE_SWIZZLE_X,
196    PIPE_SWIZZLE_0,
197    PIPE_SWIZZLE_1,
198    PIPE_SWIZZLE_NONE
199 };
200 
201 static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
202    PIPE_SWIZZLE_X,
203    PIPE_SWIZZLE_X,
204    PIPE_SWIZZLE_X,
205    PIPE_SWIZZLE_X,
206    PIPE_SWIZZLE_0,
207    PIPE_SWIZZLE_1,
208    PIPE_SWIZZLE_NONE
209 };
210 
211 static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
212    PIPE_SWIZZLE_X,
213    PIPE_SWIZZLE_X,
214    PIPE_SWIZZLE_X,
215    PIPE_SWIZZLE_1,
216    PIPE_SWIZZLE_0,
217    PIPE_SWIZZLE_1,
218    PIPE_SWIZZLE_NONE
219 };
220 
221 static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
222    PIPE_SWIZZLE_X,
223    PIPE_SWIZZLE_X,
224    PIPE_SWIZZLE_X,
225    PIPE_SWIZZLE_Y,
226    PIPE_SWIZZLE_0,
227    PIPE_SWIZZLE_1,
228    PIPE_SWIZZLE_NONE
229 };
230 
231 static const enum pipe_swizzle set_YYYY[PIPE_SWIZZLE_MAX] = {
232    PIPE_SWIZZLE_Y,
233    PIPE_SWIZZLE_Y,
234    PIPE_SWIZZLE_Y,
235    PIPE_SWIZZLE_Y,
236    PIPE_SWIZZLE_0,
237    PIPE_SWIZZLE_1,
238    PIPE_SWIZZLE_NONE
239 };
240 
241 
242 static VGPU10_RESOURCE_RETURN_TYPE
vgpu10_return_type(enum pipe_format format)243 vgpu10_return_type(enum pipe_format format)
244 {
245    if (util_format_is_unorm(format))
246       return VGPU10_RETURN_TYPE_UNORM;
247    else if (util_format_is_snorm(format))
248       return VGPU10_RETURN_TYPE_SNORM;
249    else if (util_format_is_pure_uint(format))
250       return VGPU10_RETURN_TYPE_UINT;
251    else if (util_format_is_pure_sint(format))
252       return VGPU10_RETURN_TYPE_SINT;
253    else if (util_format_is_float(format))
254       return VGPU10_RETURN_TYPE_FLOAT;
255    else
256       return VGPU10_RETURN_TYPE_MAX;
257 }
258 
259 
260 /**
261  * A helper function to return TRUE if the specified format
262  * is a supported format for sample_c instruction.
263  */
264 static bool
isValidSampleCFormat(enum pipe_format format)265 isValidSampleCFormat(enum pipe_format format)
266 {
267    return util_format_is_depth_or_stencil(format);
268 }
269 
270 
271 /**
272  * Initialize the shader-neutral fields of svga_compile_key from context
273  * state.  This is basically the texture-related state.
274  */
275 void
svga_init_shader_key_common(const struct svga_context * svga,enum pipe_shader_type shader_type,const struct svga_shader * shader,struct svga_compile_key * key)276 svga_init_shader_key_common(const struct svga_context *svga,
277                             enum pipe_shader_type shader_type,
278                             const struct svga_shader *shader,
279                             struct svga_compile_key *key)
280 {
281    unsigned i, idx = 0;
282    unsigned sampler_slots = 0;
283 
284    assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
285 
286    /* In case the number of samplers and sampler_views doesn't match,
287     * loop over the upper of the two counts.
288     */
289    key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
290                             svga->curr.num_samplers[shader_type]);
291 
292    if (!shader->info.uses_samplers)
293       key->num_textures = 0;
294 
295    key->num_samplers = 0;
296 
297    /* Set sampler_state_mapping only if GL43 is supported and
298     * the number of samplers exceeds SVGA limit or the sampler state
299     * mapping env is set.
300     */
301    bool sampler_state_mapping =
302       svga_use_sampler_state_mapping(svga, svga->curr.num_samplers[shader_type]);
303 
304    key->sampler_state_mapping =
305       key->num_textures && sampler_state_mapping ? 1 : 0;
306 
307    for (i = 0; i < key->num_textures; i++) {
308       struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
309       const struct svga_sampler_state
310          *sampler = svga->curr.sampler[shader_type][i];
311 
312       if (view) {
313          assert(view->texture);
314 
315          enum pipe_texture_target target = view->target;
316          assert(target < (1 << 4)); /* texture_target:4 */
317 
318 	 key->tex[i].target = target;
319 	 key->tex[i].sampler_return_type = vgpu10_return_type(view->format);
320 	 key->tex[i].sampler_view = 1;
321 
322          /* 1D/2D array textures with one slice and cube map array textures
323           * with one cube are treated as non-arrays by the SVGA3D device.
324           * Set the is_array flag only if we know that we have more than 1
325           * element.  This will be used to select shader instruction/resource
326           * types during shader translation.
327           */
328          switch (target) {
329          case PIPE_TEXTURE_1D_ARRAY:
330          case PIPE_TEXTURE_2D_ARRAY:
331             key->tex[i].is_array = view->texture->array_size > 1;
332             break;
333          case PIPE_TEXTURE_CUBE_ARRAY:
334             key->tex[i].is_array = view->texture->array_size > 6;
335             break;
336          default:
337             ; /* nothing / silence compiler warning */
338          }
339 
340          assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
341          key->tex[i].num_samples = view->texture->nr_samples;
342 
343          const enum pipe_swizzle *swizzle_tab;
344          if (target == PIPE_BUFFER) {
345             SVGA3dSurfaceFormat svga_format;
346             unsigned tf_flags;
347 
348             assert(view->texture->target == PIPE_BUFFER);
349 
350             /* Apply any special swizzle mask for the view format if needed */
351 
352             svga_translate_texture_buffer_view_format(view->format,
353                                                       &svga_format, &tf_flags);
354             if (tf_flags & TF_000X)
355                swizzle_tab = set_000X;
356             else if (tf_flags & TF_XXXX)
357                swizzle_tab = set_XXXX;
358             else if (tf_flags & TF_XXX1)
359                swizzle_tab = set_XXX1;
360             else if (tf_flags & TF_XXXY)
361                swizzle_tab = set_XXXY;
362             else
363                swizzle_tab = copy_alpha;
364          }
365          else {
366             /* If we have a non-alpha view into an svga3d surface with an
367              * alpha channel, then explicitly set the alpha channel to 1
368              * when sampling. Note that we need to check the
369              * actual device format to cover also imported surface cases.
370              */
371             swizzle_tab =
372                (!util_format_has_alpha(view->format) &&
373                 svga_texture_device_format_has_alpha(view->texture)) ?
374                 set_alpha : copy_alpha;
375 
376             if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
377                 view->texture->format == PIPE_FORMAT_DXT1_SRGB)
378                swizzle_tab = set_alpha;
379 
380             if (view->format == PIPE_FORMAT_X24S8_UINT ||
381                 view->format == PIPE_FORMAT_X32_S8X24_UINT)
382                swizzle_tab = set_YYYY;
383 
384             /* Save the compare function as we need to handle
385              * depth compare in the shader.
386              */
387             key->tex[i].compare_mode = sampler->compare_mode;
388             key->tex[i].compare_func = sampler->compare_func;
389 
390             /* Set the compare_in_shader bit if the view format
391              * is not a supported format for shadow compare.
392              * In this case, we'll do the comparison in the shader.
393              */
394             if ((sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) &&
395                 !isValidSampleCFormat(view->format)) {
396                key->tex[i].compare_in_shader = true;
397             }
398          }
399 
400          key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
401          key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
402          key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
403          key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
404       }
405       else {
406 	 key->tex[i].sampler_view = 0;
407       }
408 
409       if (sampler) {
410          if (!sampler->normalized_coords) {
411             if (view) {
412                assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
413                key->tex[i].width_height_idx = idx++;
414 	    }
415             key->tex[i].unnormalized = true;
416             ++key->num_unnormalized_coords;
417 
418             if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
419                 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
420                 key->tex[i].texel_bias = true;
421             }
422          }
423 
424          if (!sampler_state_mapping) {
425             /* Use the same index if sampler state mapping is not supported */
426             key->tex[i].sampler_index = i;
427             key->num_samplers = i + 1;
428          }
429          else {
430 
431             /* The current samplers list can have redundant entries.
432              * In order to allow the number of bound samplers within the
433              * max limit supported by SVGA, we'll recreate the list with
434              * unique sampler state objects only.
435              */
436 
437             /* Check to see if this sampler is already on the list.
438              * If so, set the sampler index of this sampler to the
439              * same sampler index.
440              */
441             for (unsigned j = 0; j <= i; j++) {
442                if (svga->curr.sampler[shader_type][j] == sampler) {
443 
444                   if (!(sampler_slots & (1 << j))) {
445 
446                      /* if this sampler is not added to the new list yet,
447                       * set its sampler index to the next sampler index,
448                       * increment the sampler count, and mark this
449                       * sampler as added to the list.
450                       */
451 
452                      unsigned next_index =
453                         MIN2(key->num_samplers, SVGA3D_DX_MAX_SAMPLERS-1);
454 
455                      key->tex[i].sampler_index = next_index;
456                      key->num_samplers = next_index + 1;
457 
458                      if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
459                         /* reserve one slot for the alternate sampler */
460                         key->num_samplers++;
461                      }
462 
463                      sampler_slots |= (1 << j);
464                   }
465                   else {
466                      key->tex[i].sampler_index = key->tex[j].sampler_index;
467                   }
468                   break;
469                }
470             }
471          }
472       }
473    }
474 
475    if (svga_have_gl43(svga)) {
476 
477       /* Save info about which constant buffers are to be viewed
478        * as srv raw buffers in the shader key.
479        */
480       if (shader->info.const_buffers_declared &
481           svga->state.raw_constbufs[shader_type]) {
482          key->raw_constbufs = svga->state.raw_constbufs[shader_type] &
483                               shader->info.const_buffers_declared;
484       }
485 
486       /* beginning index for srv for raw constant buffers */
487       key->srv_raw_constbuf_index = PIPE_MAX_SAMPLERS;
488 
489       if (shader->info.uses_images || shader->info.uses_hw_atomic ||
490           shader->info.uses_shader_buffers) {
491 
492          /* Save the uavSpliceIndex which is the index used for the first uav
493           * in the draw pipeline. For compute, uavSpliceIndex is always 0.
494           */
495          if (shader_type != PIPE_SHADER_COMPUTE)
496             key->uav_splice_index = svga->state.hw_draw.uavSpliceIndex;
497 
498          unsigned uav_splice_index = key->uav_splice_index;
499 
500          /* Also get the texture data type to be used in the uav declaration */
501          const struct svga_image_view *cur_image_view =
502             &svga->curr.image_views[shader_type][0];
503 
504          for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.image_views[shader_type]);
505               i++, cur_image_view++) {
506 
507             struct pipe_resource *resource = cur_image_view->desc.resource;
508 
509             if (resource) {
510                key->images[i].return_type =
511                   svga_get_texture_datatype(cur_image_view->desc.format);
512 
513                key->images[i].is_array = resource->array_size > 1;
514 
515                /* Save the image resource target in the shader key because
516                 * for single layer image view, the resource target in the
517                 * tgsi shader is changed to a different texture target.
518                 */
519                key->images[i].resource_target = resource->target;
520                if (resource->target == PIPE_TEXTURE_3D ||
521                    resource->target == PIPE_TEXTURE_1D_ARRAY ||
522                    resource->target == PIPE_TEXTURE_2D_ARRAY ||
523                    resource->target == PIPE_TEXTURE_CUBE ||
524                    resource->target == PIPE_TEXTURE_CUBE_ARRAY) {
525                   key->images[i].is_single_layer =
526                      cur_image_view->desc.u.tex.first_layer ==
527                      cur_image_view->desc.u.tex.last_layer;
528                }
529 
530                key->images[i].uav_index = cur_image_view->uav_index + uav_splice_index;
531             }
532             else
533                key->images[i].uav_index = SVGA3D_INVALID_ID;
534          }
535 
536          const struct svga_shader_buffer *cur_sbuf =
537             &svga->curr.shader_buffers[shader_type][0];
538 
539          /* Save info about which shader buffers are to be viewed
540           * as srv raw buffers in the shader key.
541           */
542          if (shader->info.shader_buffers_declared &
543              svga->state.raw_shaderbufs[shader_type]) {
544             key->raw_shaderbufs = svga->state.raw_shaderbufs[shader_type] &
545                                   shader->info.shader_buffers_declared;
546             key->srv_raw_shaderbuf_index = key->srv_raw_constbuf_index +
547 		                           SVGA_MAX_CONST_BUFS;
548          }
549 
550          for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.shader_buffers[shader_type]);
551               i++, cur_sbuf++) {
552 
553             if (cur_sbuf->resource && (!(key->raw_shaderbufs & (1 << i))))
554                key->shader_buf_uav_index[i] = cur_sbuf->uav_index + uav_splice_index;
555             else
556                key->shader_buf_uav_index[i] = SVGA3D_INVALID_ID;
557          }
558 
559          const struct svga_shader_buffer *cur_buf = &svga->curr.atomic_buffers[0];
560 
561          for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.atomic_buffers);
562               i++, cur_buf++) {
563 
564             if (cur_buf->resource)
565                key->atomic_buf_uav_index[i] = cur_buf->uav_index + uav_splice_index;
566             else
567                key->atomic_buf_uav_index[i] = SVGA3D_INVALID_ID;
568          }
569 
570          key->image_size_used = shader->info.uses_image_size;
571       }
572 
573    }
574 
575    key->clamp_vertex_color = svga->curr.rast ?
576                              svga->curr.rast->templ.clamp_vertex_color : 0;
577 }
578 
579 
580 /** Search for a compiled shader variant with the same compile key */
581 struct svga_shader_variant *
svga_search_shader_key(const struct svga_shader * shader,const struct svga_compile_key * key)582 svga_search_shader_key(const struct svga_shader *shader,
583                        const struct svga_compile_key *key)
584 {
585    struct svga_shader_variant *variant = shader->variants;
586 
587    assert(key);
588 
589    for ( ; variant; variant = variant->next) {
590       if (svga_compile_keys_equal(key, &variant->key))
591          return variant;
592    }
593    return NULL;
594 }
595 
596 /** Search for a shader with the same token key */
597 struct svga_shader *
svga_search_shader_token_key(struct svga_shader * pshader,const struct svga_token_key * key)598 svga_search_shader_token_key(struct svga_shader *pshader,
599                              const struct svga_token_key *key)
600 {
601    struct svga_shader *shader = pshader;
602 
603    assert(key);
604 
605    for ( ; shader; shader = shader->next) {
606       if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
607          return shader;
608    }
609    return NULL;
610 }
611 
612 /**
613  * Helper function to define a gb shader for non-vgpu10 device
614  */
615 static enum pipe_error
define_gb_shader_vgpu9(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)616 define_gb_shader_vgpu9(struct svga_context *svga,
617                        struct svga_shader_variant *variant,
618                        unsigned codeLen)
619 {
620    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
621    enum pipe_error ret;
622 
623    /**
624     * Create gb memory for the shader and upload the shader code.
625     * Kernel module will allocate an id for the shader and issue
626     * the DefineGBShader command.
627     */
628    variant->gb_shader = sws->shader_create(sws, variant->type,
629                                            variant->tokens, codeLen);
630 
631    svga->hud.shader_mem_used += codeLen;
632 
633    if (!variant->gb_shader)
634       return PIPE_ERROR_OUT_OF_MEMORY;
635 
636    ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
637 
638    return ret;
639 }
640 
641 /**
642  * Helper function to define a gb shader for vgpu10 device
643  */
644 static enum pipe_error
define_gb_shader_vgpu10(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)645 define_gb_shader_vgpu10(struct svga_context *svga,
646                         struct svga_shader_variant *variant,
647                         unsigned codeLen)
648 {
649    struct svga_winsys_context *swc = svga->swc;
650    enum pipe_error ret;
651    unsigned len = codeLen + variant->signatureLen;
652 
653    /**
654     * Shaders in VGPU10 enabled device reside in the device COTable.
655     * SVGA driver will allocate an integer ID for the shader and
656     * issue DXDefineShader and DXBindShader commands.
657     */
658    variant->id = util_bitmask_add(svga->shader_id_bm);
659    if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
660       return PIPE_ERROR_OUT_OF_MEMORY;
661    }
662 
663    /* Create gb memory for the shader and upload the shader code */
664    variant->gb_shader = swc->shader_create(swc,
665                                            variant->id, variant->type,
666                                            variant->tokens, codeLen,
667                                            variant->signature,
668                                            variant->signatureLen);
669 
670    svga->hud.shader_mem_used += len;
671 
672    if (!variant->gb_shader) {
673       /* Free the shader ID */
674       assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
675       goto fail_no_allocation;
676    }
677 
678    /**
679     * Since we don't want to do any flush within state emission to avoid
680     * partial state in a command buffer, it's important to make sure that
681     * there is enough room to send both the DXDefineShader & DXBindShader
682     * commands in the same command buffer. So let's send both
683     * commands in one command reservation. If it fails, we'll undo
684     * the shader creation and return an error.
685     */
686    ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
687                                            variant->id, variant->type,
688                                            len);
689 
690    if (ret != PIPE_OK)
691       goto fail;
692 
693    return PIPE_OK;
694 
695 fail:
696    swc->shader_destroy(swc, variant->gb_shader);
697    variant->gb_shader = NULL;
698 
699 fail_no_allocation:
700    util_bitmask_clear(svga->shader_id_bm, variant->id);
701    variant->id = UTIL_BITMASK_INVALID_INDEX;
702 
703    return PIPE_ERROR_OUT_OF_MEMORY;
704 }
705 
706 /**
707  * Issue the SVGA3D commands to define a new shader.
708  * \param variant  contains the shader tokens, etc.  The result->id field will
709  *                 be set here.
710  */
711 enum pipe_error
svga_define_shader(struct svga_context * svga,struct svga_shader_variant * variant)712 svga_define_shader(struct svga_context *svga,
713                    struct svga_shader_variant *variant)
714 {
715    unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
716    enum pipe_error ret;
717 
718    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
719 
720    variant->id = UTIL_BITMASK_INVALID_INDEX;
721 
722    if (svga_have_gb_objects(svga)) {
723       if (svga_have_vgpu10(svga))
724          ret = define_gb_shader_vgpu10(svga, variant, codeLen);
725       else
726          ret = define_gb_shader_vgpu9(svga, variant, codeLen);
727    }
728    else {
729       /* Allocate an integer ID for the shader */
730       variant->id = util_bitmask_add(svga->shader_id_bm);
731       if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
732          ret = PIPE_ERROR_OUT_OF_MEMORY;
733          goto done;
734       }
735 
736       /* Issue SVGA3D device command to define the shader */
737       ret = SVGA3D_DefineShader(svga->swc,
738                                 variant->id,
739                                 variant->type,
740                                 variant->tokens,
741                                 codeLen);
742       if (ret != PIPE_OK) {
743          /* free the ID */
744          assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
745          util_bitmask_clear(svga->shader_id_bm, variant->id);
746          variant->id = UTIL_BITMASK_INVALID_INDEX;
747       }
748    }
749 
750 done:
751    SVGA_STATS_TIME_POP(svga_sws(svga));
752    return ret;
753 }
754 
755 
756 /**
757  * Issue the SVGA3D commands to set/bind a shader.
758  * \param result  the shader to bind.
759  */
760 enum pipe_error
svga_set_shader(struct svga_context * svga,SVGA3dShaderType type,struct svga_shader_variant * variant)761 svga_set_shader(struct svga_context *svga,
762                 SVGA3dShaderType type,
763                 struct svga_shader_variant *variant)
764 {
765    enum pipe_error ret;
766    unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
767 
768    assert(type == SVGA3D_SHADERTYPE_VS ||
769           type == SVGA3D_SHADERTYPE_GS ||
770           type == SVGA3D_SHADERTYPE_PS ||
771           type == SVGA3D_SHADERTYPE_HS ||
772           type == SVGA3D_SHADERTYPE_DS ||
773           type == SVGA3D_SHADERTYPE_CS);
774 
775    if (svga_have_gb_objects(svga)) {
776       struct svga_winsys_gb_shader *gbshader =
777          variant ? variant->gb_shader : NULL;
778 
779       if (svga_have_vgpu10(svga))
780          ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
781       else
782          ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
783    }
784    else {
785       ret = SVGA3D_SetShader(svga->swc, type, id);
786    }
787 
788    return ret;
789 }
790 
791 
792 struct svga_shader_variant *
svga_new_shader_variant(struct svga_context * svga,enum pipe_shader_type type)793 svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
794 {
795    struct svga_shader_variant *variant;
796 
797    switch (type) {
798    case PIPE_SHADER_FRAGMENT:
799       variant = CALLOC(1, sizeof(struct svga_fs_variant));
800       break;
801    case PIPE_SHADER_GEOMETRY:
802       variant = CALLOC(1, sizeof(struct svga_gs_variant));
803       break;
804    case PIPE_SHADER_VERTEX:
805       variant = CALLOC(1, sizeof(struct svga_vs_variant));
806       break;
807    case PIPE_SHADER_TESS_EVAL:
808       variant = CALLOC(1, sizeof(struct svga_tes_variant));
809       break;
810    case PIPE_SHADER_TESS_CTRL:
811       variant = CALLOC(1, sizeof(struct svga_tcs_variant));
812       break;
813    case PIPE_SHADER_COMPUTE:
814       variant = CALLOC(1, sizeof(struct svga_cs_variant));
815       break;
816    default:
817       return NULL;
818    }
819 
820    if (variant) {
821       variant->type = svga_shader_type(type);
822       svga->hud.num_shaders++;
823    }
824    return variant;
825 }
826 
827 
828 void
svga_destroy_shader_variant(struct svga_context * svga,struct svga_shader_variant * variant)829 svga_destroy_shader_variant(struct svga_context *svga,
830                             struct svga_shader_variant *variant)
831 {
832    if (svga_have_gb_objects(svga) && variant->gb_shader) {
833       if (svga_have_vgpu10(svga)) {
834          struct svga_winsys_context *swc = svga->swc;
835          swc->shader_destroy(swc, variant->gb_shader);
836          SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
837          util_bitmask_clear(svga->shader_id_bm, variant->id);
838       }
839       else {
840          struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
841          sws->shader_destroy(sws, variant->gb_shader);
842       }
843       variant->gb_shader = NULL;
844    }
845    else {
846       if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
847          SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
848                                                variant->type));
849          util_bitmask_clear(svga->shader_id_bm, variant->id);
850       }
851    }
852 
853    FREE(variant->signature);
854    FREE((unsigned *)variant->tokens);
855    FREE(variant);
856 
857    svga->hud.num_shaders--;
858 }
859 
860 /*
861  * Rebind shaders.
862  * Called at the beginning of every new command buffer to ensure that
863  * shaders are properly paged-in. Instead of sending the SetShader
864  * command, this function sends a private allocation command to
865  * page in a shader. This avoids emitting redundant state to the device
866  * just to page in a resource.
867  */
868 enum pipe_error
svga_rebind_shaders(struct svga_context * svga)869 svga_rebind_shaders(struct svga_context *svga)
870 {
871    struct svga_winsys_context *swc = svga->swc;
872    struct svga_hw_draw_state *hw = &svga->state.hw_draw;
873    enum pipe_error ret;
874 
875    assert(svga_have_vgpu10(svga));
876 
877    /**
878     * If the underlying winsys layer does not need resource rebinding,
879     * just clear the rebind flags and return.
880     */
881    if (swc->resource_rebind == NULL) {
882       svga->rebind.flags.vs = 0;
883       svga->rebind.flags.gs = 0;
884       svga->rebind.flags.fs = 0;
885       svga->rebind.flags.tcs = 0;
886       svga->rebind.flags.tes = 0;
887 
888       return PIPE_OK;
889    }
890 
891    if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
892       ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
893       if (ret != PIPE_OK)
894          return ret;
895    }
896    svga->rebind.flags.vs = 0;
897 
898    if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
899       ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
900       if (ret != PIPE_OK)
901          return ret;
902    }
903    svga->rebind.flags.gs = 0;
904 
905    if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
906       ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
907       if (ret != PIPE_OK)
908          return ret;
909    }
910    svga->rebind.flags.fs = 0;
911 
912    if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
913       ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
914       if (ret != PIPE_OK)
915          return ret;
916    }
917    svga->rebind.flags.tcs = 0;
918 
919    if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
920       ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
921       if (ret != PIPE_OK)
922          return ret;
923    }
924    svga->rebind.flags.tes = 0;
925 
926    return PIPE_OK;
927 }
928 
929 
930 /**
931  * Helper function to create a shader object.
932  */
933 struct svga_shader *
svga_create_shader(struct pipe_context * pipe,const struct pipe_shader_state * templ,enum pipe_shader_type stage,unsigned shader_structlen)934 svga_create_shader(struct pipe_context *pipe,
935                    const struct pipe_shader_state *templ,
936                    enum pipe_shader_type stage,
937                    unsigned shader_structlen)
938 {
939    struct svga_context *svga = svga_context(pipe);
940    struct svga_shader *shader = CALLOC(1, shader_structlen);
941    nir_shader *nir = (nir_shader *)templ->ir.nir;
942 
943    if (shader == NULL)
944       return NULL;
945 
946    shader->id = svga->debug.shader_id++;
947    shader->stage = stage;
948 
949    if (templ->type == PIPE_SHADER_IR_NIR) {
950       /* nir_to_tgsi requires lowered images */
951       NIR_PASS_V(nir, gl_nir_lower_images, false);
952    }
953    shader->tokens = pipe_shader_state_to_tgsi_tokens(pipe->screen, templ);
954    shader->type = PIPE_SHADER_IR_TGSI;
955 
956    /* Collect basic info of the shader */
957    svga_tgsi_scan_shader(shader);
958 
959    /* check for any stream output declarations */
960    if (templ->stream_output.num_outputs) {
961       shader->stream_output = svga_create_stream_output(svga, shader,
962                                                         &templ->stream_output);
963    }
964 
965    return shader;
966 }
967 
968 
969 /**
970  * Helper function to compile a shader.
971  * Depending on the shader IR type, it calls the corresponding
972  * compile shader function.
973  */
974 enum pipe_error
svga_compile_shader(struct svga_context * svga,struct svga_shader * shader,const struct svga_compile_key * key,struct svga_shader_variant ** out_variant)975 svga_compile_shader(struct svga_context *svga,
976                     struct svga_shader *shader,
977                     const struct svga_compile_key *key,
978                     struct svga_shader_variant **out_variant)
979 {
980    struct svga_shader_variant *variant = NULL;
981    enum pipe_error ret = PIPE_ERROR;
982 
983    if (shader->type == PIPE_SHADER_IR_TGSI) {
984       variant = svga_tgsi_compile_shader(svga, shader, key);
985    } else {
986       debug_printf("Unexpected nir shader\n");
987       assert(0);
988    }
989 
990    if (variant == NULL) {
991       if (shader->get_dummy_shader != NULL) {
992          debug_printf("Failed to compile shader, using dummy shader.\n");
993          variant = shader->get_dummy_shader(svga, shader, key);
994       }
995    }
996    else if (svga_shader_too_large(svga, variant)) {
997       /* too big, use shader */
998       if (shader->get_dummy_shader != NULL) {
999          debug_printf("Shader too large (%u bytes), using dummy shader.\n",
1000                       (unsigned)(variant->nr_tokens
1001                                  * sizeof(variant->tokens[0])));
1002 
1003          /* Free the too-large variant */
1004          svga_destroy_shader_variant(svga, variant);
1005 
1006          /* Use simple pass-through shader instead */
1007          variant = shader->get_dummy_shader(svga, shader, key);
1008       }
1009    }
1010 
1011    if (variant == NULL)
1012       return PIPE_ERROR;
1013 
1014    ret = svga_define_shader(svga, variant);
1015    if (ret != PIPE_OK) {
1016       svga_destroy_shader_variant(svga, variant);
1017       return ret;
1018    }
1019 
1020    *out_variant = variant;
1021 
1022    /* insert variant at head of linked list */
1023    variant->next = shader->variants;
1024    shader->variants = variant;
1025 
1026    return PIPE_OK;
1027 }
1028