• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 #include "main/compiler.h"
34 #include "main/context.h"
35 #include "brw_context.h"
36 #include "brw_vs.h"
37 #include "brw_util.h"
38 #include "brw_state.h"
39 #include "program/prog_print.h"
40 #include "program/prog_parameter.h"
41 #include "brw_nir.h"
42 #include "brw_program.h"
43 
44 #include "util/ralloc.h"
45 
46 GLbitfield64
brw_vs_outputs_written(struct brw_context * brw,struct brw_vs_prog_key * key,GLbitfield64 user_varyings)47 brw_vs_outputs_written(struct brw_context *brw, struct brw_vs_prog_key *key,
48                        GLbitfield64 user_varyings)
49 {
50    GLbitfield64 outputs_written = user_varyings;
51 
52    if (key->copy_edgeflag) {
53       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
54    }
55 
56    if (brw->gen < 6) {
57       /* Put dummy slots into the VUE for the SF to put the replaced
58        * point sprite coords in.  We shouldn't need these dummy slots,
59        * which take up precious URB space, but it would mean that the SF
60        * doesn't get nice aligned pairs of input coords into output
61        * coords, which would be a pain to handle.
62        */
63       for (unsigned i = 0; i < 8; i++) {
64          if (key->point_coord_replace & (1 << i))
65             outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
66       }
67 
68       /* if back colors are written, allocate slots for front colors too */
69       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
70          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
71       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
72          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
73    }
74 
75    /* In order for legacy clipping to work, we need to populate the clip
76     * distance varying slots whenever clipping is enabled, even if the vertex
77     * shader doesn't write to gl_ClipDistance.
78     */
79    if (key->nr_userclip_plane_consts > 0) {
80       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
81       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
82    }
83 
84    return outputs_written;
85 }
86 
87 static void
brw_vs_debug_recompile(struct brw_context * brw,struct gl_program * prog,const struct brw_vs_prog_key * key)88 brw_vs_debug_recompile(struct brw_context *brw, struct gl_program *prog,
89                        const struct brw_vs_prog_key *key)
90 {
91    perf_debug("Recompiling vertex shader for program %d\n", prog->Id);
92 
93    bool found = false;
94    const struct brw_vs_prog_key *old_key =
95       brw_find_previous_compile(&brw->cache, BRW_CACHE_VS_PROG,
96                                 key->program_string_id);
97 
98    if (!old_key) {
99       perf_debug("  Didn't find previous compile in the shader cache for "
100                  "debug\n");
101       return;
102    }
103 
104    for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
105       found |= key_debug(brw, "Vertex attrib w/a flags",
106                          old_key->gl_attrib_wa_flags[i],
107                          key->gl_attrib_wa_flags[i]);
108    }
109 
110    found |= key_debug(brw, "legacy user clipping",
111                       old_key->nr_userclip_plane_consts,
112                       key->nr_userclip_plane_consts);
113 
114    found |= key_debug(brw, "copy edgeflag",
115                       old_key->copy_edgeflag, key->copy_edgeflag);
116    found |= key_debug(brw, "PointCoord replace",
117                       old_key->point_coord_replace, key->point_coord_replace);
118    found |= key_debug(brw, "vertex color clamping",
119                       old_key->clamp_vertex_color, key->clamp_vertex_color);
120 
121    found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
122 
123    if (!found) {
124       perf_debug("  Something else\n");
125    }
126 }
127 
128 static bool
brw_codegen_vs_prog(struct brw_context * brw,struct brw_program * vp,struct brw_vs_prog_key * key)129 brw_codegen_vs_prog(struct brw_context *brw,
130                     struct brw_program *vp,
131                     struct brw_vs_prog_key *key)
132 {
133    const struct brw_compiler *compiler = brw->screen->compiler;
134    const struct gen_device_info *devinfo = &brw->screen->devinfo;
135    GLuint program_size;
136    const GLuint *program;
137    struct brw_vs_prog_data prog_data;
138    struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
139    void *mem_ctx;
140    bool start_busy = false;
141    double start_time = 0;
142 
143    memset(&prog_data, 0, sizeof(prog_data));
144 
145    /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
146    if (vp->program.is_arb_asm)
147       stage_prog_data->use_alt_mode = true;
148 
149    mem_ctx = ralloc_context(NULL);
150 
151    brw_assign_common_binding_table_offsets(devinfo, &vp->program,
152                                            &prog_data.base.base, 0);
153 
154    /* Allocate the references to the uniforms that will end up in the
155     * prog_data associated with the compiled program, and which will be freed
156     * by the state cache.
157     */
158    int param_count = vp->program.nir->num_uniforms / 4;
159 
160    prog_data.base.base.nr_image_params = vp->program.info.num_images;
161 
162    /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
163     * planes as uniforms.
164     */
165    param_count += key->nr_userclip_plane_consts * 4;
166 
167    stage_prog_data->param =
168       rzalloc_array(NULL, const gl_constant_value *, param_count);
169    stage_prog_data->pull_param =
170       rzalloc_array(NULL, const gl_constant_value *, param_count);
171    stage_prog_data->image_param =
172       rzalloc_array(NULL, struct brw_image_param,
173                     stage_prog_data->nr_image_params);
174    stage_prog_data->nr_params = param_count;
175 
176    if (!vp->program.is_arb_asm) {
177       brw_nir_setup_glsl_uniforms(vp->program.nir, &vp->program,
178                                   &prog_data.base.base,
179                                   compiler->scalar_stage[MESA_SHADER_VERTEX]);
180    } else {
181       brw_nir_setup_arb_uniforms(vp->program.nir, &vp->program,
182                                  &prog_data.base.base);
183    }
184 
185    uint64_t outputs_written =
186       brw_vs_outputs_written(brw, key, vp->program.info.outputs_written);
187    prog_data.inputs_read = vp->program.info.inputs_read;
188    prog_data.double_inputs_read = vp->program.info.double_inputs_read;
189 
190    if (key->copy_edgeflag) {
191       prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
192    }
193 
194    brw_compute_vue_map(devinfo,
195                        &prog_data.base.vue_map, outputs_written,
196                        vp->program.nir->info->separate_shader);
197 
198    if (0) {
199       _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
200    }
201 
202    if (unlikely(brw->perf_debug)) {
203       start_busy = (brw->batch.last_bo &&
204                     drm_intel_bo_busy(brw->batch.last_bo));
205       start_time = get_time();
206    }
207 
208    if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
209       if (vp->program.is_arb_asm)
210          brw_dump_arb_asm("vertex", &vp->program);
211    }
212 
213    int st_index = -1;
214    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
215       st_index = brw_get_shader_time_index(brw, &vp->program, ST_VS,
216                                            !vp->program.is_arb_asm);
217    }
218 
219    /* Emit GEN4 code.
220     */
221    char *error_str;
222    program = brw_compile_vs(compiler, brw, mem_ctx, key, &prog_data,
223                             vp->program.nir,
224                             brw_select_clip_planes(&brw->ctx),
225                             !_mesa_is_gles3(&brw->ctx),
226                             st_index, &program_size, &error_str);
227    if (program == NULL) {
228       if (!vp->program.is_arb_asm) {
229          vp->program.sh.data->LinkStatus = false;
230          ralloc_strcat(&vp->program.sh.data->InfoLog, error_str);
231       }
232 
233       _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str);
234 
235       ralloc_free(mem_ctx);
236       return false;
237    }
238 
239    if (unlikely(brw->perf_debug)) {
240       if (vp->compiled_once) {
241          brw_vs_debug_recompile(brw, &vp->program, key);
242       }
243       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
244          perf_debug("VS compile took %.03f ms and stalled the GPU\n",
245                     (get_time() - start_time) * 1000);
246       }
247       vp->compiled_once = true;
248    }
249 
250    /* Scratch space is used for register spilling */
251    brw_alloc_stage_scratch(brw, &brw->vs.base,
252                            prog_data.base.base.total_scratch,
253                            devinfo->max_vs_threads);
254 
255    brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
256 		    key, sizeof(struct brw_vs_prog_key),
257 		    program, program_size,
258 		    &prog_data, sizeof(prog_data),
259 		    &brw->vs.base.prog_offset, &brw->vs.base.prog_data);
260    ralloc_free(mem_ctx);
261 
262    return true;
263 }
264 
265 static bool
brw_vs_state_dirty(const struct brw_context * brw)266 brw_vs_state_dirty(const struct brw_context *brw)
267 {
268    return brw_state_dirty(brw,
269                           _NEW_BUFFERS |
270                           _NEW_LIGHT |
271                           _NEW_POINT |
272                           _NEW_POLYGON |
273                           _NEW_TEXTURE |
274                           _NEW_TRANSFORM,
275                           BRW_NEW_VERTEX_PROGRAM |
276                           BRW_NEW_VS_ATTRIB_WORKAROUNDS);
277 }
278 
279 void
brw_vs_populate_key(struct brw_context * brw,struct brw_vs_prog_key * key)280 brw_vs_populate_key(struct brw_context *brw,
281                     struct brw_vs_prog_key *key)
282 {
283    struct gl_context *ctx = &brw->ctx;
284    /* BRW_NEW_VERTEX_PROGRAM */
285    struct brw_program *vp = (struct brw_program *)brw->vertex_program;
286    struct gl_program *prog = (struct gl_program *) brw->vertex_program;
287 
288    memset(key, 0, sizeof(*key));
289 
290    /* Just upload the program verbatim for now.  Always send it all
291     * the inputs it asks for, whether they are varying or not.
292     */
293    key->program_string_id = vp->id;
294 
295    if (ctx->Transform.ClipPlanesEnabled != 0 &&
296        (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) &&
297        vp->program.ClipDistanceArraySize == 0) {
298       key->nr_userclip_plane_consts =
299          _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
300    }
301 
302    if (brw->gen < 6) {
303       /* _NEW_POLYGON */
304       key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
305                             ctx->Polygon.BackMode != GL_FILL);
306 
307       /* _NEW_POINT */
308       if (ctx->Point.PointSprite) {
309          key->point_coord_replace = ctx->Point.CoordReplace & 0xff;
310       }
311    }
312 
313    if (prog->nir->info->outputs_written &
314        (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
315         VARYING_BIT_BFC1)) {
316       /* _NEW_LIGHT | _NEW_BUFFERS */
317       key->clamp_vertex_color = ctx->Light._ClampVertexColor;
318    }
319 
320    /* _NEW_TEXTURE */
321    brw_populate_sampler_prog_key_data(ctx, prog, &key->tex);
322 
323    /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
324    if (brw->gen < 8 && !brw->is_haswell) {
325       memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
326              sizeof(brw->vb.attrib_wa_flags));
327    }
328 }
329 
330 void
brw_upload_vs_prog(struct brw_context * brw)331 brw_upload_vs_prog(struct brw_context *brw)
332 {
333    struct brw_vs_prog_key key;
334    /* BRW_NEW_VERTEX_PROGRAM */
335    struct brw_program *vp = (struct brw_program *)brw->vertex_program;
336 
337    if (!brw_vs_state_dirty(brw))
338       return;
339 
340    brw_vs_populate_key(brw, &key);
341 
342    if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG,
343 			 &key, sizeof(key),
344 			 &brw->vs.base.prog_offset, &brw->vs.base.prog_data)) {
345       bool success = brw_codegen_vs_prog(brw, vp, &key);
346       (void) success;
347       assert(success);
348    }
349 }
350 
351 bool
brw_vs_precompile(struct gl_context * ctx,struct gl_program * prog)352 brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog)
353 {
354    struct brw_context *brw = brw_context(ctx);
355    struct brw_vs_prog_key key;
356    uint32_t old_prog_offset = brw->vs.base.prog_offset;
357    struct brw_stage_prog_data *old_prog_data = brw->vs.base.prog_data;
358    bool success;
359 
360    struct brw_program *bvp = brw_program(prog);
361 
362    memset(&key, 0, sizeof(key));
363 
364    brw_setup_tex_for_precompile(brw, &key.tex, prog);
365    key.program_string_id = bvp->id;
366    key.clamp_vertex_color =
367       (prog->nir->info->outputs_written &
368        (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
369         VARYING_BIT_BFC1));
370 
371    success = brw_codegen_vs_prog(brw, bvp, &key);
372 
373    brw->vs.base.prog_offset = old_prog_offset;
374    brw->vs.base.prog_data = old_prog_data;
375 
376    return success;
377 }
378