1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 #include <pthread.h>
33 #include "main/glspirv.h"
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 #include "program/prog_to_nir.h"
37 #include "program/program.h"
38 #include "program/programopt.h"
39 #include "tnl/tnl.h"
40 #include "util/ralloc.h"
41 #include "compiler/glsl/ir.h"
42 #include "compiler/glsl/program.h"
43 #include "compiler/glsl/gl_nir.h"
44 #include "compiler/glsl/glsl_to_nir.h"
45
46 #include "brw_program.h"
47 #include "brw_context.h"
48 #include "compiler/brw_nir.h"
49 #include "brw_defines.h"
50 #include "brw_batch.h"
51
52 #include "brw_cs.h"
53 #include "brw_gs.h"
54 #include "brw_vs.h"
55 #include "brw_wm.h"
56 #include "brw_state.h"
57
58 #include "main/shaderapi.h"
59 #include "main/shaderobj.h"
60
61 static bool
brw_nir_lower_uniforms(nir_shader * nir,bool is_scalar)62 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
63 {
64 if (is_scalar) {
65 nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
66 type_size_scalar_bytes);
67 return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
68 } else {
69 nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
70 type_size_vec4_bytes);
71 return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
72 }
73 }
74
75 static struct gl_program *brw_new_program(struct gl_context *ctx,
76 gl_shader_stage stage,
77 GLuint id, bool is_arb_asm);
78
79 nir_shader *
brw_create_nir(struct brw_context * brw,const struct gl_shader_program * shader_prog,struct gl_program * prog,gl_shader_stage stage,bool is_scalar)80 brw_create_nir(struct brw_context *brw,
81 const struct gl_shader_program *shader_prog,
82 struct gl_program *prog,
83 gl_shader_stage stage,
84 bool is_scalar)
85 {
86 const struct intel_device_info *devinfo = &brw->screen->devinfo;
87 struct gl_context *ctx = &brw->ctx;
88 const nir_shader_compiler_options *options =
89 ctx->Const.ShaderCompilerOptions[stage].NirOptions;
90 nir_shader *nir;
91
92 /* First, lower the GLSL/Mesa IR or SPIR-V to NIR */
93 if (shader_prog) {
94 if (shader_prog->data->spirv) {
95 nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
96 } else {
97 nir = glsl_to_nir(ctx, shader_prog, stage, options);
98
99 /* Remap the locations to slots so those requiring two slots will
100 * occupy two locations. For instance, if we have in the IR code a
101 * dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
102 * will use locations/slots 0 and 1, and attr1 will use location/slot 2
103 */
104 if (nir->info.stage == MESA_SHADER_VERTEX)
105 nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
106 }
107 assert (nir);
108
109 nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
110 NULL);
111 nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
112 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
113 nir_shader_get_entrypoint(nir), true, false);
114 } else {
115 nir = prog_to_nir(prog, options);
116 NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
117 }
118 nir_validate_shader(nir, "before brw_preprocess_nir");
119
120 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
121
122 if (!ctx->SoftFP64 && ((nir->info.bit_sizes_int | nir->info.bit_sizes_float) & 64) &&
123 (options->lower_doubles_options & nir_lower_fp64_full_software)) {
124 ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
125 }
126
127 brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
128
129 if (stage == MESA_SHADER_TESS_CTRL) {
130 /* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gfx8+. */
131 static const gl_state_index16 tokens[STATE_LENGTH] =
132 { STATE_TCS_PATCH_VERTICES_IN };
133 nir_lower_patch_vertices(nir, 0, devinfo->ver >= 8 ? tokens : NULL);
134 }
135
136 if (stage == MESA_SHADER_TESS_EVAL) {
137 /* Lower gl_PatchVerticesIn to a constant if we have a TCS, or
138 * a uniform if we don't.
139 */
140 struct gl_linked_shader *tcs =
141 shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
142 uint32_t static_patch_vertices =
143 tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0;
144 static const gl_state_index16 tokens[STATE_LENGTH] =
145 { STATE_TES_PATCH_VERTICES_IN };
146 nir_lower_patch_vertices(nir, static_patch_vertices, tokens);
147 }
148
149 if (stage == MESA_SHADER_FRAGMENT) {
150 static const struct nir_lower_wpos_ytransform_options wpos_options = {
151 .state_tokens = {STATE_FB_WPOS_Y_TRANSFORM, 0, 0},
152 .fs_coord_pixel_center_integer = 1,
153 .fs_coord_origin_upper_left = 1,
154 };
155
156 bool progress = false;
157 NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
158 if (progress) {
159 _mesa_add_state_reference(prog->Parameters,
160 wpos_options.state_tokens);
161 }
162 }
163
164 return nir;
165 }
166
167 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)168 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
169 {
170 assert(glsl_type_is_vector_or_scalar(type));
171
172 uint32_t comp_size = glsl_type_is_boolean(type)
173 ? 4 : glsl_get_bit_size(type) / 8;
174 unsigned length = glsl_get_vector_elements(type);
175 *size = comp_size * length,
176 *align = comp_size * (length == 3 ? 4 : length);
177 }
178
179 void
brw_nir_lower_resources(nir_shader * nir,struct gl_shader_program * shader_prog,struct gl_program * prog,const struct intel_device_info * devinfo)180 brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
181 struct gl_program *prog,
182 const struct intel_device_info *devinfo)
183 {
184 NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
185 NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
186 BITSET_COPY(prog->info.textures_used, prog->nir->info.textures_used);
187 BITSET_COPY(prog->info.textures_used_by_txf, prog->nir->info.textures_used_by_txf);
188
189 NIR_PASS_V(prog->nir, brw_nir_lower_storage_image, devinfo);
190
191 if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
192 shader_prog->data->spirv) {
193 NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
194 nir_var_mem_shared, shared_type_info);
195 NIR_PASS_V(prog->nir, nir_lower_explicit_io,
196 nir_var_mem_shared, nir_address_format_32bit_offset);
197 }
198
199 NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
200 /* Do a round of constant folding to clean up address calculations */
201 NIR_PASS_V(prog->nir, nir_opt_constant_folding);
202 }
203
204 void
brw_shader_gather_info(nir_shader * nir,struct gl_program * prog)205 brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
206 {
207 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
208
209 /* Copy the info we just generated back into the gl_program */
210 const char *prog_name = prog->info.name;
211 const char *prog_label = prog->info.label;
212 prog->info = nir->info;
213 prog->info.name = prog_name;
214 prog->info.label = prog_label;
215 }
216
217 static unsigned
get_new_program_id(struct brw_screen * screen)218 get_new_program_id(struct brw_screen *screen)
219 {
220 return p_atomic_inc_return(&screen->program_id);
221 }
222
223 static struct gl_program *
brw_new_program(struct gl_context * ctx,gl_shader_stage stage,GLuint id,bool is_arb_asm)224 brw_new_program(struct gl_context *ctx,
225 gl_shader_stage stage,
226 GLuint id, bool is_arb_asm)
227 {
228 struct brw_context *brw = brw_context(ctx);
229 struct brw_program *prog = rzalloc(NULL, struct brw_program);
230
231 if (prog) {
232 prog->id = get_new_program_id(brw->screen);
233
234 return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
235 }
236
237 return NULL;
238 }
239
240 static void
brw_delete_program(struct gl_context * ctx,struct gl_program * prog)241 brw_delete_program(struct gl_context *ctx, struct gl_program *prog)
242 {
243 struct brw_context *brw = brw_context(ctx);
244
245 /* Beware! prog's refcount has reached zero, and it's about to be freed.
246 *
247 * In brw_upload_pipeline_state(), we compare brw->programs[i] to
248 * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
249 * pointer has changed.
250 *
251 * We cannot leave brw->programs[i] as a dangling pointer to the dead
252 * program. malloc() may allocate the same memory for a new gl_program,
253 * causing us to see matching pointers...but totally different programs.
254 *
255 * We cannot set brw->programs[i] to NULL, either. If we've deleted the
256 * active program, Mesa may set ctx->FooProgram._Current to NULL. That
257 * would cause us to see matching pointers (NULL == NULL), and fail to
258 * detect that a program has changed since our last draw.
259 *
260 * So, set it to a bogus gl_program pointer that will never match,
261 * causing us to properly reevaluate the state on our next draw.
262 *
263 * Getting this wrong causes heisenbugs which are very hard to catch,
264 * as you need a very specific allocation pattern to hit the problem.
265 */
266 static const struct gl_program deleted_program;
267
268 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
269 if (brw->programs[i] == prog)
270 brw->programs[i] = (struct gl_program *) &deleted_program;
271 }
272
273 _mesa_delete_program( ctx, prog );
274 }
275
276
277 static GLboolean
brw_program_string_notify(struct gl_context * ctx,GLenum target,struct gl_program * prog)278 brw_program_string_notify(struct gl_context *ctx,
279 GLenum target,
280 struct gl_program *prog)
281 {
282 assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
283
284 struct brw_context *brw = brw_context(ctx);
285 const struct brw_compiler *compiler = brw->screen->compiler;
286
287 switch (target) {
288 case GL_FRAGMENT_PROGRAM_ARB: {
289 struct brw_program *newFP = brw_program(prog);
290 const struct brw_program *curFP =
291 brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]);
292
293 if (newFP == curFP)
294 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
295 _mesa_program_fragment_position_to_sysval(&newFP->program);
296 newFP->id = get_new_program_id(brw->screen);
297
298 prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
299
300 brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
301
302 brw_shader_gather_info(prog->nir, prog);
303
304 brw_fs_precompile(ctx, prog);
305 break;
306 }
307 case GL_VERTEX_PROGRAM_ARB: {
308 struct brw_program *newVP = brw_program(prog);
309 const struct brw_program *curVP =
310 brw_program_const(brw->programs[MESA_SHADER_VERTEX]);
311
312 if (newVP == curVP)
313 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
314 if (newVP->program.arb.IsPositionInvariant) {
315 _mesa_insert_mvp_code(ctx, &newVP->program);
316 }
317 newVP->id = get_new_program_id(brw->screen);
318
319 /* Also tell tnl about it:
320 */
321 _tnl_program_string(ctx, target, prog);
322
323 prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
324 compiler->scalar_stage[MESA_SHADER_VERTEX]);
325
326 brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
327
328 brw_shader_gather_info(prog->nir, prog);
329
330 brw_vs_precompile(ctx, prog);
331 break;
332 }
333 default:
334 /*
335 * driver->ProgramStringNotify is only called for ARB programs, fixed
336 * function vertex programs, and ir_to_mesa (which isn't used by the
337 * i965 back-end). Therefore, even after geometry shaders are added,
338 * this function should only ever be called with a target of
339 * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
340 */
341 unreachable("Unexpected target in brwProgramStringNotify");
342 }
343
344 return true;
345 }
346
347 static void
brw_memory_barrier(struct gl_context * ctx,GLbitfield barriers)348 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
349 {
350 struct brw_context *brw = brw_context(ctx);
351 const struct intel_device_info *devinfo = &brw->screen->devinfo;
352 unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
353 assert(devinfo->ver >= 7 && devinfo->ver <= 11);
354
355 if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
356 GL_ELEMENT_ARRAY_BARRIER_BIT |
357 GL_COMMAND_BARRIER_BIT))
358 bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
359
360 if (barriers & GL_UNIFORM_BARRIER_BIT)
361 bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
362 PIPE_CONTROL_CONST_CACHE_INVALIDATE);
363
364 if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
365 bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
366
367 if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT |
368 GL_PIXEL_BUFFER_BARRIER_BIT))
369 bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
370 PIPE_CONTROL_RENDER_TARGET_FLUSH);
371
372 if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
373 bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
374 PIPE_CONTROL_RENDER_TARGET_FLUSH);
375
376 /* Typed surface messages are handled by the render cache on IVB, so we
377 * need to flush it too.
378 */
379 if (devinfo->verx10 == 70)
380 bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
381
382 brw_emit_pipe_control_flush(brw, bits);
383 }
384
385 static void
brw_framebuffer_fetch_barrier(struct gl_context * ctx)386 brw_framebuffer_fetch_barrier(struct gl_context *ctx)
387 {
388 struct brw_context *brw = brw_context(ctx);
389 const struct intel_device_info *devinfo = &brw->screen->devinfo;
390
391 if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
392 if (devinfo->ver >= 6) {
393 brw_emit_pipe_control_flush(brw,
394 PIPE_CONTROL_RENDER_TARGET_FLUSH |
395 PIPE_CONTROL_CS_STALL);
396 brw_emit_pipe_control_flush(brw,
397 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
398 } else {
399 brw_emit_pipe_control_flush(brw,
400 PIPE_CONTROL_RENDER_TARGET_FLUSH);
401 }
402 }
403 }
404
405 void
brw_get_scratch_bo(struct brw_context * brw,struct brw_bo ** scratch_bo,int size)406 brw_get_scratch_bo(struct brw_context *brw,
407 struct brw_bo **scratch_bo, int size)
408 {
409 struct brw_bo *old_bo = *scratch_bo;
410
411 if (old_bo && old_bo->size < size) {
412 brw_bo_unreference(old_bo);
413 old_bo = NULL;
414 }
415
416 if (!old_bo) {
417 *scratch_bo =
418 brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH);
419 }
420 }
421
422 /**
423 * Reserve enough scratch space for the given stage to hold \p per_thread_size
424 * bytes times the given \p thread_count.
425 */
426 void
brw_alloc_stage_scratch(struct brw_context * brw,struct brw_stage_state * stage_state,unsigned per_thread_size)427 brw_alloc_stage_scratch(struct brw_context *brw,
428 struct brw_stage_state *stage_state,
429 unsigned per_thread_size)
430 {
431 if (stage_state->per_thread_scratch >= per_thread_size)
432 return;
433
434 stage_state->per_thread_scratch = per_thread_size;
435
436 if (stage_state->scratch_bo)
437 brw_bo_unreference(stage_state->scratch_bo);
438
439 const struct intel_device_info *devinfo = &brw->screen->devinfo;
440 assert(stage_state->stage < ARRAY_SIZE(devinfo->max_scratch_ids));
441 unsigned max_ids = devinfo->max_scratch_ids[stage_state->stage];
442 stage_state->scratch_bo =
443 brw_bo_alloc(brw->bufmgr, "shader scratch space",
444 per_thread_size * max_ids, BRW_MEMZONE_SCRATCH);
445 }
446
447 void
brw_init_frag_prog_functions(struct dd_function_table * functions)448 brw_init_frag_prog_functions(struct dd_function_table *functions)
449 {
450 assert(functions->ProgramStringNotify == _tnl_program_string);
451
452 functions->NewProgram = brw_new_program;
453 functions->DeleteProgram = brw_delete_program;
454 functions->ProgramStringNotify = brw_program_string_notify;
455
456 functions->LinkShader = brw_link_shader;
457
458 functions->MemoryBarrier = brw_memory_barrier;
459 functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
460 }
461
462 struct shader_times {
463 uint64_t time;
464 uint64_t written;
465 uint64_t reset;
466 };
467
468 void
brw_init_shader_time(struct brw_context * brw)469 brw_init_shader_time(struct brw_context *brw)
470 {
471 const int max_entries = 2048;
472 brw->shader_time.bo =
473 brw_bo_alloc(brw->bufmgr, "shader time",
474 max_entries * BRW_SHADER_TIME_STRIDE * 3,
475 BRW_MEMZONE_OTHER);
476 brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
477 brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
478 brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
479 max_entries);
480 brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
481 max_entries);
482 brw->shader_time.max_entries = max_entries;
483 }
484
485 static int
compare_time(const void * a,const void * b)486 compare_time(const void *a, const void *b)
487 {
488 uint64_t * const *a_val = a;
489 uint64_t * const *b_val = b;
490
491 /* We don't just subtract because we're turning the value to an int. */
492 if (**a_val < **b_val)
493 return -1;
494 else if (**a_val == **b_val)
495 return 0;
496 else
497 return 1;
498 }
499
500 static void
print_shader_time_line(const char * stage,const char * name,int shader_num,uint64_t time,uint64_t total)501 print_shader_time_line(const char *stage, const char *name,
502 int shader_num, uint64_t time, uint64_t total)
503 {
504 fprintf(stderr, "%-6s%-18s", stage, name);
505
506 if (shader_num != 0)
507 fprintf(stderr, "%4d: ", shader_num);
508 else
509 fprintf(stderr, " : ");
510
511 fprintf(stderr, "%16lld (%7.2f Gcycles) %4.1f%%\n",
512 (long long)time,
513 (double)time / 1000000000.0,
514 (double)time / total * 100.0);
515 }
516
517 static void
brw_report_shader_time(struct brw_context * brw)518 brw_report_shader_time(struct brw_context *brw)
519 {
520 if (!brw->shader_time.bo || !brw->shader_time.num_entries)
521 return;
522
523 uint64_t scaled[brw->shader_time.num_entries];
524 uint64_t *sorted[brw->shader_time.num_entries];
525 uint64_t total_by_type[ST_CS + 1];
526 memset(total_by_type, 0, sizeof(total_by_type));
527 double total = 0;
528 for (int i = 0; i < brw->shader_time.num_entries; i++) {
529 uint64_t written = 0, reset = 0;
530 enum shader_time_shader_type type = brw->shader_time.types[i];
531
532 sorted[i] = &scaled[i];
533
534 switch (type) {
535 case ST_VS:
536 case ST_TCS:
537 case ST_TES:
538 case ST_GS:
539 case ST_FS8:
540 case ST_FS16:
541 case ST_FS32:
542 case ST_CS:
543 written = brw->shader_time.cumulative[i].written;
544 reset = brw->shader_time.cumulative[i].reset;
545 break;
546
547 default:
548 /* I sometimes want to print things that aren't the 3 shader times.
549 * Just print the sum in that case.
550 */
551 written = 1;
552 reset = 0;
553 break;
554 }
555
556 uint64_t time = brw->shader_time.cumulative[i].time;
557 if (written) {
558 scaled[i] = time / written * (written + reset);
559 } else {
560 scaled[i] = time;
561 }
562
563 switch (type) {
564 case ST_VS:
565 case ST_TCS:
566 case ST_TES:
567 case ST_GS:
568 case ST_FS8:
569 case ST_FS16:
570 case ST_FS32:
571 case ST_CS:
572 total_by_type[type] += scaled[i];
573 break;
574 default:
575 break;
576 }
577
578 total += scaled[i];
579 }
580
581 if (total == 0) {
582 fprintf(stderr, "No shader time collected yet\n");
583 return;
584 }
585
586 qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
587
588 fprintf(stderr, "\n");
589 fprintf(stderr, "type ID cycles spent %% of total\n");
590 for (int s = 0; s < brw->shader_time.num_entries; s++) {
591 const char *stage;
592 /* Work back from the sorted pointers times to a time to print. */
593 int i = sorted[s] - scaled;
594
595 if (scaled[i] == 0)
596 continue;
597
598 int shader_num = brw->shader_time.ids[i];
599 const char *shader_name = brw->shader_time.names[i];
600
601 switch (brw->shader_time.types[i]) {
602 case ST_VS:
603 stage = "vs";
604 break;
605 case ST_TCS:
606 stage = "tcs";
607 break;
608 case ST_TES:
609 stage = "tes";
610 break;
611 case ST_GS:
612 stage = "gs";
613 break;
614 case ST_FS8:
615 stage = "fs8";
616 break;
617 case ST_FS16:
618 stage = "fs16";
619 break;
620 case ST_FS32:
621 stage = "fs32";
622 break;
623 case ST_CS:
624 stage = "cs";
625 break;
626 default:
627 stage = "other";
628 break;
629 }
630
631 print_shader_time_line(stage, shader_name, shader_num,
632 scaled[i], total);
633 }
634
635 fprintf(stderr, "\n");
636 print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
637 print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
638 print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
639 print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
640 print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
641 print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
642 print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total);
643 print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
644 }
645
646 static void
brw_collect_shader_time(struct brw_context * brw)647 brw_collect_shader_time(struct brw_context *brw)
648 {
649 if (!brw->shader_time.bo)
650 return;
651
652 /* This probably stalls on the last rendering. We could fix that by
653 * delaying reading the reports, but it doesn't look like it's a big
654 * overhead compared to the cost of tracking the time in the first place.
655 */
656 void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
657
658 for (int i = 0; i < brw->shader_time.num_entries; i++) {
659 uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
660
661 brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
662 brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
663 brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
664 }
665
666 /* Zero the BO out to clear it out for our next collection.
667 */
668 memset(bo_map, 0, brw->shader_time.bo->size);
669 brw_bo_unmap(brw->shader_time.bo);
670 }
671
672 void
brw_collect_and_report_shader_time(struct brw_context * brw)673 brw_collect_and_report_shader_time(struct brw_context *brw)
674 {
675 brw_collect_shader_time(brw);
676
677 if (brw->shader_time.report_time == 0 ||
678 get_time() - brw->shader_time.report_time >= 1.0) {
679 brw_report_shader_time(brw);
680 brw->shader_time.report_time = get_time();
681 }
682 }
683
684 /**
685 * Chooses an index in the shader_time buffer and sets up tracking information
686 * for our printouts.
687 *
688 * Note that this holds on to references to the underlying programs, which may
689 * change their lifetimes compared to normal operation.
690 */
691 int
brw_get_shader_time_index(struct brw_context * brw,struct gl_program * prog,enum shader_time_shader_type type,bool is_glsl_sh)692 brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
693 enum shader_time_shader_type type, bool is_glsl_sh)
694 {
695 int shader_time_index = brw->shader_time.num_entries++;
696 assert(shader_time_index < brw->shader_time.max_entries);
697 brw->shader_time.types[shader_time_index] = type;
698
699 const char *name;
700 if (prog->Id == 0) {
701 name = "ff";
702 } else if (is_glsl_sh) {
703 name = prog->info.label ?
704 ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
705 } else {
706 name = "prog";
707 }
708
709 brw->shader_time.names[shader_time_index] = name;
710 brw->shader_time.ids[shader_time_index] = prog->Id;
711
712 return shader_time_index;
713 }
714
715 void
brw_destroy_shader_time(struct brw_context * brw)716 brw_destroy_shader_time(struct brw_context *brw)
717 {
718 brw_bo_unreference(brw->shader_time.bo);
719 brw->shader_time.bo = NULL;
720 }
721
722 void
brw_stage_prog_data_free(const void * p)723 brw_stage_prog_data_free(const void *p)
724 {
725 struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
726
727 ralloc_free(prog_data->param);
728 ralloc_free(prog_data->pull_param);
729 }
730
731 void
brw_dump_arb_asm(const char * stage,struct gl_program * prog)732 brw_dump_arb_asm(const char *stage, struct gl_program *prog)
733 {
734 fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
735 stage, prog->Id, stage);
736 _mesa_print_program(prog);
737 }
738
739 void
brw_setup_tex_for_precompile(const struct intel_device_info * devinfo,struct brw_sampler_prog_key_data * tex,const struct gl_program * prog)740 brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
741 struct brw_sampler_prog_key_data *tex,
742 const struct gl_program *prog)
743 {
744 const bool has_shader_channel_select = devinfo->verx10 >= 75;
745 unsigned sampler_count = util_last_bit(prog->SamplersUsed);
746 for (unsigned i = 0; i < sampler_count; i++) {
747 if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
748 /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
749 tex->swizzles[i] =
750 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
751 } else {
752 /* Color sampler: assume no swizzling. */
753 tex->swizzles[i] = SWIZZLE_XYZW;
754 }
755 }
756 }
757
758 /**
759 * Sets up the starting offsets for the groups of binding table entries
760 * common to all pipeline stages.
761 *
762 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
763 * unused but also make sure that addition of small offsets to them will
764 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
765 */
766 uint32_t
brw_assign_common_binding_table_offsets(const struct intel_device_info * devinfo,const struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data,uint32_t next_binding_table_offset)767 brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
768 const struct gl_program *prog,
769 struct brw_stage_prog_data *stage_prog_data,
770 uint32_t next_binding_table_offset)
771 {
772 int num_textures = util_last_bit(prog->SamplersUsed);
773
774 stage_prog_data->binding_table.texture_start = next_binding_table_offset;
775 next_binding_table_offset += num_textures;
776
777 if (prog->info.num_ubos) {
778 assert(prog->info.num_ubos <= BRW_MAX_UBO);
779 stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
780 next_binding_table_offset += prog->info.num_ubos;
781 } else {
782 stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
783 }
784
785 if (prog->info.num_ssbos || prog->info.num_abos) {
786 assert(prog->info.num_abos <= BRW_MAX_ABO);
787 assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
788 stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
789 next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
790 } else {
791 stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
792 }
793
794 if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
795 stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
796 next_binding_table_offset++;
797 } else {
798 stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
799 }
800
801 if (prog->info.uses_texture_gather) {
802 if (devinfo->ver >= 8) {
803 stage_prog_data->binding_table.gather_texture_start =
804 stage_prog_data->binding_table.texture_start;
805 } else {
806 stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
807 next_binding_table_offset += num_textures;
808 }
809 } else {
810 stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
811 }
812
813 if (prog->info.num_images) {
814 stage_prog_data->binding_table.image_start = next_binding_table_offset;
815 next_binding_table_offset += prog->info.num_images;
816 } else {
817 stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
818 }
819
820 /* This may or may not be used depending on how the compile goes. */
821 stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
822 next_binding_table_offset++;
823
824 /* Plane 0 is just the regular texture section */
825 stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
826
827 stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
828 next_binding_table_offset += num_textures;
829
830 stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
831 next_binding_table_offset += num_textures;
832
833 /* Set the binding table size. Some callers may append new entries
834 * and increase this accordingly.
835 */
836 stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
837
838 assert(next_binding_table_offset <= BRW_MAX_SURFACES);
839 return next_binding_table_offset;
840 }
841
842 void
brw_populate_default_key(const struct brw_compiler * compiler,union brw_any_prog_key * prog_key,struct gl_shader_program * sh_prog,struct gl_program * prog)843 brw_populate_default_key(const struct brw_compiler *compiler,
844 union brw_any_prog_key *prog_key,
845 struct gl_shader_program *sh_prog,
846 struct gl_program *prog)
847 {
848 switch (prog->info.stage) {
849 case MESA_SHADER_VERTEX:
850 brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
851 break;
852 case MESA_SHADER_TESS_CTRL:
853 brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
854 break;
855 case MESA_SHADER_TESS_EVAL:
856 brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
857 break;
858 case MESA_SHADER_GEOMETRY:
859 brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
860 break;
861 case MESA_SHADER_FRAGMENT:
862 brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
863 break;
864 case MESA_SHADER_COMPUTE:
865 brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
866 break;
867 default:
868 unreachable("Unsupported stage!");
869 }
870 }
871
872 void
brw_debug_recompile(struct brw_context * brw,gl_shader_stage stage,unsigned api_id,struct brw_base_prog_key * key)873 brw_debug_recompile(struct brw_context *brw,
874 gl_shader_stage stage,
875 unsigned api_id,
876 struct brw_base_prog_key *key)
877 {
878 const struct brw_compiler *compiler = brw->screen->compiler;
879 enum brw_cache_id cache_id = brw_stage_cache_id(stage);
880
881 brw_shader_perf_log(compiler, brw, "Recompiling %s shader for program %d\n",
882 _mesa_shader_stage_to_string(stage), api_id);
883
884 const void *old_key =
885 brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
886
887 brw_debug_key_recompile(compiler, brw, stage, old_key, key);
888 }
889