1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 #ifndef BRW_FS_H
29 #define BRW_FS_H
30
31 #include "brw_shader.h"
32 #include "brw_ir_fs.h"
33 #include "brw_fs_live_variables.h"
34 #include "brw_ir_performance.h"
35 #include "compiler/nir/nir.h"
36
37 struct bblock_t;
38 namespace {
39 struct acp_entry;
40 }
41
42 class fs_visitor;
43
44 namespace brw {
45 /**
46 * Register pressure analysis of a shader. Estimates how many registers
47 * are live at any point of the program in GRF units.
48 */
49 struct register_pressure {
50 register_pressure(const fs_visitor *v);
51 ~register_pressure();
52
53 analysis_dependency_class
dependency_classregister_pressure54 dependency_class() const
55 {
56 return (DEPENDENCY_INSTRUCTION_IDENTITY |
57 DEPENDENCY_INSTRUCTION_DATA_FLOW |
58 DEPENDENCY_VARIABLES);
59 }
60
61 bool
validateregister_pressure62 validate(const fs_visitor *) const
63 {
64 /* FINISHME */
65 return true;
66 }
67
68 unsigned *regs_live_at_ip;
69 };
70 }
71
72 struct brw_gs_compile;
73
74 namespace brw {
75 class fs_builder;
76 }
77
78 struct shader_stats {
79 const char *scheduler_mode;
80 unsigned promoted_constants;
81 unsigned spill_count;
82 unsigned fill_count;
83 unsigned max_register_pressure;
84 };
85
86 /** Register numbers for thread payload fields. */
87 struct thread_payload {
88 /** The number of thread payload registers the hardware will supply. */
89 uint8_t num_regs;
90
91 virtual ~thread_payload() = default;
92
93 protected:
thread_payloadthread_payload94 thread_payload() : num_regs() {}
95 };
96
97 struct vs_thread_payload : public thread_payload {
98 vs_thread_payload(const fs_visitor &v);
99
100 fs_reg urb_handles;
101 };
102
103 struct tcs_thread_payload : public thread_payload {
104 tcs_thread_payload(const fs_visitor &v);
105
106 fs_reg patch_urb_output;
107 fs_reg primitive_id;
108 fs_reg icp_handle_start;
109 };
110
111 struct tes_thread_payload : public thread_payload {
112 tes_thread_payload(const fs_visitor &v);
113
114 fs_reg patch_urb_input;
115 fs_reg primitive_id;
116 fs_reg coords[3];
117 fs_reg urb_output;
118 };
119
120 struct gs_thread_payload : public thread_payload {
121 gs_thread_payload(fs_visitor &v);
122
123 fs_reg urb_handles;
124 fs_reg primitive_id;
125 fs_reg instance_id;
126 fs_reg icp_handle_start;
127 };
128
129 struct fs_thread_payload : public thread_payload {
130 fs_thread_payload(const fs_visitor &v,
131 bool &source_depth_to_render_target);
132
133 uint8_t subspan_coord_reg[2];
134 uint8_t source_depth_reg[2];
135 uint8_t source_w_reg[2];
136 uint8_t aa_dest_stencil_reg[2];
137 uint8_t dest_depth_reg[2];
138 uint8_t sample_pos_reg[2];
139 uint8_t sample_mask_in_reg[2];
140 uint8_t depth_w_coef_reg;
141 uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
142 };
143
144 struct cs_thread_payload : public thread_payload {
145 cs_thread_payload(const fs_visitor &v);
146
147 void load_subgroup_id(const brw::fs_builder &bld, fs_reg &dest) const;
148
149 fs_reg local_invocation_id[3];
150
151 protected:
152 fs_reg subgroup_id_;
153 };
154
155 struct task_mesh_thread_payload : public cs_thread_payload {
156 task_mesh_thread_payload(fs_visitor &v);
157
158 fs_reg extended_parameter_0;
159 fs_reg local_index;
160 fs_reg inline_parameter;
161
162 fs_reg urb_output;
163
164 /* URB to read Task memory inputs. Only valid for MESH stage. */
165 fs_reg task_urb_input;
166 };
167
168 struct bs_thread_payload : public thread_payload {
169 bs_thread_payload(const fs_visitor &v);
170
171 fs_reg global_arg_ptr;
172 fs_reg local_arg_ptr;
173
174 void load_shader_type(const brw::fs_builder &bld, fs_reg &dest) const;
175 };
176
177 class fs_instruction_scheduler;
178
179 /**
180 * The fragment shader front-end.
181 *
182 * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
183 */
184 class fs_visitor : public backend_shader
185 {
186 public:
187 fs_visitor(const struct brw_compiler *compiler,
188 const struct brw_compile_params *params,
189 const brw_base_prog_key *key,
190 struct brw_stage_prog_data *prog_data,
191 const nir_shader *shader,
192 unsigned dispatch_width,
193 bool needs_register_pressure,
194 bool debug_enabled);
195 fs_visitor(const struct brw_compiler *compiler,
196 const struct brw_compile_params *params,
197 const brw_wm_prog_key *key,
198 struct brw_wm_prog_data *prog_data,
199 const nir_shader *shader,
200 unsigned dispatch_width,
201 unsigned num_polygons,
202 bool needs_register_pressure,
203 bool debug_enabled);
204 fs_visitor(const struct brw_compiler *compiler,
205 const struct brw_compile_params *params,
206 struct brw_gs_compile *gs_compile,
207 struct brw_gs_prog_data *prog_data,
208 const nir_shader *shader,
209 bool needs_register_pressure,
210 bool debug_enabled);
211 void init();
212 ~fs_visitor();
213
214 fs_reg vgrf(const glsl_type *const type);
215 void import_uniforms(fs_visitor *v);
216
217 void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
218 const fs_reg &dst,
219 const fs_reg &surface,
220 const fs_reg &surface_handle,
221 const fs_reg &varying_offset,
222 uint32_t const_offset,
223 uint8_t alignment,
224 unsigned components);
225
226 bool run_fs(bool allow_spilling, bool do_rep_send);
227 bool run_vs();
228 bool run_tcs();
229 bool run_tes();
230 bool run_gs();
231 bool run_cs(bool allow_spilling);
232 bool run_bs(bool allow_spilling);
233 bool run_task(bool allow_spilling);
234 bool run_mesh(bool allow_spilling);
235 void allocate_registers(bool allow_spilling);
236 uint32_t compute_max_register_pressure();
237 void assign_curb_setup();
238 void assign_urb_setup();
239 void convert_attr_sources_to_hw_regs(fs_inst *inst);
240 void assign_vs_urb_setup();
241 void assign_tcs_urb_setup();
242 void assign_tes_urb_setup();
243 void assign_gs_urb_setup();
244 bool assign_regs(bool allow_spilling, bool spill_all);
245 void assign_regs_trivial();
246 void calculate_payload_ranges(unsigned payload_node_count,
247 int *payload_last_use_ip) const;
248 void assign_constant_locations();
249 bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
250 unsigned *out_pull_index);
251 virtual void invalidate_analysis(brw::analysis_dependency_class c);
252
253 #ifndef NDEBUG
254 void validate();
255 #else
validate()256 void validate() {}
257 #endif
258
259 fs_instruction_scheduler *prepare_scheduler(void *mem_ctx);
260 void schedule_instructions_pre_ra(fs_instruction_scheduler *sched,
261 instruction_scheduler_mode mode);
262 void schedule_instructions_post_ra();
263
264 void vfail(const char *msg, va_list args);
265 void fail(const char *msg, ...);
266 void limit_dispatch_width(unsigned n, const char *msg);
267
268 void emit_repclear_shader();
269 void emit_interpolation_setup();
270
271 void set_tcs_invocation_id();
272
273 fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
274 fs_reg color1, fs_reg color2,
275 fs_reg src0_alpha, unsigned components);
276 void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
277 void emit_fb_writes();
278 void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
279 void emit_gs_control_data_bits(const fs_reg &vertex_count);
280 void emit_gs_thread_end();
281 bool mark_last_urb_write_with_eot();
282 void emit_tcs_thread_end();
283 void emit_urb_fence();
284 void emit_cs_terminate();
285
286 fs_reg interp_reg(const brw::fs_builder &bld, unsigned location,
287 unsigned channel, unsigned comp);
288 fs_reg per_primitive_reg(const brw::fs_builder &bld,
289 int location, unsigned comp);
290
291 virtual void dump_instruction_to_file(const backend_instruction *inst, FILE *file) const;
292 virtual void dump_instructions_to_file(FILE *file) const;
293
294 const brw_base_prog_key *const key;
295
296 struct brw_gs_compile *gs_compile;
297
298 struct brw_stage_prog_data *prog_data;
299
300 brw_analysis<brw::fs_live_variables, backend_shader> live_analysis;
301 brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
302 brw_analysis<brw::performance, fs_visitor> performance_analysis;
303
304 /** Number of uniform variable components visited. */
305 unsigned uniforms;
306
307 /** Byte-offset for the next available spot in the scratch space buffer. */
308 unsigned last_scratch;
309
310 /**
311 * Array mapping UNIFORM register numbers to the push parameter index,
312 * or -1 if this uniform register isn't being uploaded as a push constant.
313 */
314 int *push_constant_loc;
315
316 fs_reg frag_depth;
317 fs_reg frag_stencil;
318 fs_reg sample_mask;
319 fs_reg outputs[VARYING_SLOT_MAX];
320 fs_reg dual_src_output;
321 int first_non_payload_grf;
322
323 bool failed;
324 char *fail_msg;
325
326 thread_payload *payload_;
327
payload()328 thread_payload &payload() {
329 return *this->payload_;
330 }
331
vs_payload()332 vs_thread_payload &vs_payload() {
333 assert(stage == MESA_SHADER_VERTEX);
334 return *static_cast<vs_thread_payload *>(this->payload_);
335 }
336
tcs_payload()337 tcs_thread_payload &tcs_payload() {
338 assert(stage == MESA_SHADER_TESS_CTRL);
339 return *static_cast<tcs_thread_payload *>(this->payload_);
340 }
341
tes_payload()342 tes_thread_payload &tes_payload() {
343 assert(stage == MESA_SHADER_TESS_EVAL);
344 return *static_cast<tes_thread_payload *>(this->payload_);
345 }
346
gs_payload()347 gs_thread_payload &gs_payload() {
348 assert(stage == MESA_SHADER_GEOMETRY);
349 return *static_cast<gs_thread_payload *>(this->payload_);
350 }
351
fs_payload()352 fs_thread_payload &fs_payload() {
353 assert(stage == MESA_SHADER_FRAGMENT);
354 return *static_cast<fs_thread_payload *>(this->payload_);
355 };
356
cs_payload()357 cs_thread_payload &cs_payload() {
358 assert(gl_shader_stage_uses_workgroup(stage));
359 return *static_cast<cs_thread_payload *>(this->payload_);
360 }
361
task_mesh_payload()362 task_mesh_thread_payload &task_mesh_payload() {
363 assert(stage == MESA_SHADER_TASK || stage == MESA_SHADER_MESH);
364 return *static_cast<task_mesh_thread_payload *>(this->payload_);
365 }
366
bs_payload()367 bs_thread_payload &bs_payload() {
368 assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);
369 return *static_cast<bs_thread_payload *>(this->payload_);
370 }
371
372 bool source_depth_to_render_target;
373
374 fs_reg pixel_x;
375 fs_reg pixel_y;
376 fs_reg pixel_z;
377 fs_reg wpos_w;
378 fs_reg pixel_w;
379 fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
380 fs_reg final_gs_vertex_count;
381 fs_reg control_data_bits;
382 fs_reg invocation_id;
383
384 unsigned grf_used;
385 bool spilled_any_registers;
386 bool needs_register_pressure;
387
388 const unsigned dispatch_width; /**< 8, 16 or 32 */
389 const unsigned max_polygons;
390 unsigned max_dispatch_width;
391
392 /* The API selected subgroup size */
393 unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
394
395 struct shader_stats shader_stats;
396
397 unsigned workgroup_size() const;
398
399 void debug_optimizer(const nir_shader *nir,
400 const char *pass_name,
401 int iteration, int pass_num) const;
402 };
403
404 /**
405 * Return the flag register used in fragment shaders to keep track of live
406 * samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
407 * dispatch mode.
408 */
409 static inline unsigned
sample_mask_flag_subreg(const fs_visitor & s)410 sample_mask_flag_subreg(const fs_visitor &s)
411 {
412 assert(s.stage == MESA_SHADER_FRAGMENT);
413 return 2;
414 }
415
416 /**
417 * The fragment shader code generator.
418 *
419 * Translates FS IR to actual i965 assembly code.
420 */
421 class fs_generator
422 {
423 public:
424 fs_generator(const struct brw_compiler *compiler,
425 const struct brw_compile_params *params,
426 struct brw_stage_prog_data *prog_data,
427 gl_shader_stage stage);
428 ~fs_generator();
429
430 void enable_debug(const char *shader_name);
431 int generate_code(const cfg_t *cfg, int dispatch_width,
432 struct shader_stats shader_stats,
433 const brw::performance &perf,
434 struct brw_compile_stats *stats,
435 unsigned max_polygons = 0);
436 void add_const_data(void *data, unsigned size);
437 void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt);
438 const unsigned *get_assembly();
439
440 private:
441 void generate_send(fs_inst *inst,
442 struct brw_reg dst,
443 struct brw_reg desc,
444 struct brw_reg ex_desc,
445 struct brw_reg payload,
446 struct brw_reg payload2);
447 void generate_fb_read(fs_inst *inst, struct brw_reg dst,
448 struct brw_reg payload);
449 void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
450 void generate_barrier(fs_inst *inst, struct brw_reg src);
451 bool generate_linterp(fs_inst *inst, struct brw_reg dst,
452 struct brw_reg *src);
453 void generate_ddx(const fs_inst *inst,
454 struct brw_reg dst, struct brw_reg src);
455 void generate_ddy(const fs_inst *inst,
456 struct brw_reg dst, struct brw_reg src);
457 void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
458
459 void generate_halt(fs_inst *inst);
460
461 void generate_mov_indirect(fs_inst *inst,
462 struct brw_reg dst,
463 struct brw_reg reg,
464 struct brw_reg indirect_byte_offset);
465
466 void generate_shuffle(fs_inst *inst,
467 struct brw_reg dst,
468 struct brw_reg src,
469 struct brw_reg idx);
470
471 void generate_quad_swizzle(const fs_inst *inst,
472 struct brw_reg dst, struct brw_reg src,
473 unsigned swiz);
474
475 bool patch_halt_jumps();
476
477 const struct brw_compiler *compiler;
478 const struct brw_compile_params *params;
479
480 const struct intel_device_info *devinfo;
481
482 struct brw_codegen *p;
483 struct brw_stage_prog_data * const prog_data;
484
485 unsigned dispatch_width; /**< 8, 16 or 32 */
486
487 exec_list discard_halt_patches;
488 bool debug_flag;
489 const char *shader_name;
490 gl_shader_stage stage;
491 void *mem_ctx;
492 };
493
494 namespace brw {
495 fs_reg
496 fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
497 brw_reg_type type = BRW_REGISTER_TYPE_F,
498 unsigned n = 1);
499
500 fs_reg
501 fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]);
502
503 inline fs_reg
dynamic_msaa_flags(const struct brw_wm_prog_data * wm_prog_data)504 dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
505 {
506 return fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
507 BRW_REGISTER_TYPE_UD);
508 }
509
510 void
511 check_dynamic_msaa_flag(const fs_builder &bld,
512 const struct brw_wm_prog_data *wm_prog_data,
513 enum intel_msaa_flags flag);
514
515 bool
516 lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
517 }
518
519 void shuffle_from_32bit_read(const brw::fs_builder &bld,
520 const fs_reg &dst,
521 const fs_reg &src,
522 uint32_t first_component,
523 uint32_t components);
524
525 enum brw_barycentric_mode brw_barycentric_mode(nir_intrinsic_instr *intr);
526
527 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
528 const struct brw_wm_prog_data *prog_data);
529
530 void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
531
532 bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
533
534 fs_reg brw_sample_mask_reg(const brw::fs_builder &bld);
535 void brw_emit_predicate_on_sample_mask(const brw::fs_builder &bld, fs_inst *inst);
536
537 int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
538 const brw_stage_prog_data *prog_data);
539
540 bool brw_lower_dpas(fs_visitor &v);
541
542 void nir_to_brw(fs_visitor *s);
543
544 void brw_fs_optimize(fs_visitor &s);
545
546 bool brw_fs_lower_3src_null_dest(fs_visitor &s);
547 bool brw_fs_lower_barycentrics(fs_visitor &s);
548 bool brw_fs_lower_constant_loads(fs_visitor &s);
549 bool brw_fs_lower_derivatives(fs_visitor &s);
550 bool brw_fs_lower_find_live_channel(fs_visitor &s);
551 bool brw_fs_lower_integer_multiplication(fs_visitor &s);
552 bool brw_fs_lower_logical_sends(fs_visitor &s);
553 bool brw_fs_lower_pack(fs_visitor &s);
554 bool brw_fs_lower_load_payload(fs_visitor &s);
555 bool brw_fs_lower_regioning(fs_visitor &s);
556 bool brw_fs_lower_scoreboard(fs_visitor &s);
557 bool brw_fs_lower_sends_overlapping_payload(fs_visitor &s);
558 bool brw_fs_lower_simd_width(fs_visitor &s);
559 bool brw_fs_lower_sub_sat(fs_visitor &s);
560 bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s);
561
562 bool brw_fs_opt_algebraic(fs_visitor &s);
563 bool brw_fs_opt_bank_conflicts(fs_visitor &s);
564 bool brw_fs_opt_cmod_propagation(fs_visitor &s);
565 bool brw_fs_opt_combine_constants(fs_visitor &s);
566 bool brw_fs_opt_compact_virtual_grfs(fs_visitor &s);
567 bool brw_fs_opt_copy_propagation(fs_visitor &s);
568 bool brw_fs_opt_cse(fs_visitor &s);
569 bool brw_fs_opt_dead_code_eliminate(fs_visitor &s);
570 bool brw_fs_opt_eliminate_find_live_channel(fs_visitor &s);
571 bool brw_fs_opt_peephole_sel(fs_visitor &s);
572 bool brw_fs_opt_register_coalesce(fs_visitor &s);
573 bool brw_fs_opt_remove_extra_rounding_modes(fs_visitor &s);
574 bool brw_fs_opt_remove_redundant_halts(fs_visitor &s);
575 bool brw_fs_opt_saturate_propagation(fs_visitor &s);
576 bool brw_fs_opt_split_sends(fs_visitor &s);
577 bool brw_fs_opt_split_virtual_grfs(fs_visitor &s);
578 bool brw_fs_opt_zero_samples(fs_visitor &s);
579
580 bool brw_fs_workaround_emit_dummy_mov_instruction(fs_visitor &s);
581 bool brw_fs_workaround_memory_fence_before_eot(fs_visitor &s);
582 bool brw_fs_workaround_nomask_control_flow(fs_visitor &s);
583
584 /* Helpers. */
585 unsigned brw_fs_get_lowered_simd_width(const fs_visitor *shader,
586 const fs_inst *inst);
587
588 #endif /* BRW_FS_H */
589