1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 #pragma once
29
30 #include "elk_shader.h"
31 #include "elk_ir_fs.h"
32 #include "elk_fs_live_variables.h"
33 #include "elk_ir_performance.h"
34 #include "compiler/nir/nir.h"
35
36 struct elk_bblock_t;
37 namespace {
38 struct acp_entry;
39 }
40
41 class elk_fs_visitor;
42
43 namespace elk {
44 /**
45 * Register pressure analysis of a shader. Estimates how many registers
46 * are live at any point of the program in GRF units.
47 */
48 struct register_pressure {
49 register_pressure(const elk_fs_visitor *v);
50 register_pressure(const register_pressure &) = delete;
51 ~register_pressure();
52 register_pressure & operator=(const register_pressure &) = delete;
53
54 analysis_dependency_class
dependency_classregister_pressure55 dependency_class() const
56 {
57 return (DEPENDENCY_INSTRUCTION_IDENTITY |
58 DEPENDENCY_INSTRUCTION_DATA_FLOW |
59 DEPENDENCY_VARIABLES);
60 }
61
62 bool
validateregister_pressure63 validate(const elk_fs_visitor *) const
64 {
65 /* FINISHME */
66 return true;
67 }
68
69 unsigned *regs_live_at_ip;
70 };
71 }
72
73 struct elk_gs_compile;
74
75 namespace elk {
76 class fs_builder;
77 }
78
79 struct elk_shader_stats {
80 const char *scheduler_mode;
81 unsigned promoted_constants;
82 unsigned spill_count;
83 unsigned fill_count;
84 unsigned max_register_pressure;
85 };
86
87 /** Register numbers for thread payload fields. */
88 struct elk_elk_thread_payload {
89 /** The number of thread payload registers the hardware will supply. */
90 uint8_t num_regs;
91
92 virtual ~elk_elk_thread_payload() = default;
93
94 protected:
elk_elk_thread_payloadelk_elk_thread_payload95 elk_elk_thread_payload() : num_regs() {}
96 };
97
98 struct elk_vs_thread_payload : public elk_elk_thread_payload {
99 elk_vs_thread_payload(const elk_fs_visitor &v);
100
101 elk_fs_reg urb_handles;
102 };
103
104 struct elk_tcs_thread_payload : public elk_elk_thread_payload {
105 elk_tcs_thread_payload(const elk_fs_visitor &v);
106
107 elk_fs_reg patch_urb_output;
108 elk_fs_reg primitive_id;
109 elk_fs_reg icp_handle_start;
110 };
111
112 struct elk_tes_thread_payload : public elk_elk_thread_payload {
113 elk_tes_thread_payload(const elk_fs_visitor &v);
114
115 elk_fs_reg patch_urb_input;
116 elk_fs_reg primitive_id;
117 elk_fs_reg coords[3];
118 elk_fs_reg urb_output;
119 };
120
121 struct elk_gs_thread_payload : public elk_elk_thread_payload {
122 elk_gs_thread_payload(elk_fs_visitor &v);
123
124 elk_fs_reg urb_handles;
125 elk_fs_reg primitive_id;
126 elk_fs_reg instance_id;
127 elk_fs_reg icp_handle_start;
128 };
129
130 struct elk_fs_thread_payload : public elk_elk_thread_payload {
131 elk_fs_thread_payload(const elk_fs_visitor &v,
132 bool &source_depth_to_render_target,
133 bool &runtime_check_aads_emit);
134
135 uint8_t subspan_coord_reg[2];
136 uint8_t source_depth_reg[2];
137 uint8_t source_w_reg[2];
138 uint8_t aa_dest_stencil_reg[2];
139 uint8_t dest_depth_reg[2];
140 uint8_t sample_pos_reg[2];
141 uint8_t sample_mask_in_reg[2];
142 uint8_t depth_w_coef_reg;
143 uint8_t barycentric_coord_reg[ELK_BARYCENTRIC_MODE_COUNT][2];
144 };
145
146 struct elk_cs_thread_payload : public elk_elk_thread_payload {
147 elk_cs_thread_payload(const elk_fs_visitor &v);
148
149 void load_subgroup_id(const elk::fs_builder &bld, elk_fs_reg &dest) const;
150
151 elk_fs_reg local_invocation_id[3];
152
153 protected:
154 elk_fs_reg subgroup_id_;
155 };
156
157 class elk_fs_instruction_scheduler;
158
159 /**
160 * The fragment shader front-end.
161 *
162 * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
163 */
164 class elk_fs_visitor : public elk_backend_shader
165 {
166 public:
167 elk_fs_visitor(const struct elk_compiler *compiler,
168 const struct elk_compile_params *params,
169 const elk_base_prog_key *key,
170 struct elk_stage_prog_data *prog_data,
171 const nir_shader *shader,
172 unsigned dispatch_width,
173 bool needs_register_pressure,
174 bool debug_enabled);
175 elk_fs_visitor(const struct elk_compiler *compiler,
176 const struct elk_compile_params *params,
177 const elk_wm_prog_key *key,
178 struct elk_wm_prog_data *prog_data,
179 const nir_shader *shader,
180 unsigned dispatch_width,
181 bool needs_register_pressure,
182 bool debug_enabled);
183 elk_fs_visitor(const struct elk_compiler *compiler,
184 const struct elk_compile_params *params,
185 struct elk_gs_compile *gs_compile,
186 struct elk_gs_prog_data *prog_data,
187 const nir_shader *shader,
188 bool needs_register_pressure,
189 bool debug_enabled);
190 elk_fs_visitor(const elk_fs_visitor &) = delete;
191 void init();
192 ~elk_fs_visitor();
193
194 elk_fs_visitor & operator=(const elk_fs_visitor &) = delete;
195
196 elk_fs_reg vgrf(const glsl_type *const type);
197 void import_uniforms(elk_fs_visitor *v);
198
199 void VARYING_PULL_CONSTANT_LOAD(const elk::fs_builder &bld,
200 const elk_fs_reg &dst,
201 const elk_fs_reg &surface,
202 const elk_fs_reg &surface_handle,
203 const elk_fs_reg &varying_offset,
204 uint32_t const_offset,
205 uint8_t alignment,
206 unsigned components);
207 void DEP_RESOLVE_MOV(const elk::fs_builder &bld, int grf);
208
209 bool run_fs(bool allow_spilling, bool do_rep_send);
210 bool run_vs();
211 bool run_tcs();
212 bool run_tes();
213 bool run_gs();
214 bool run_cs(bool allow_spilling);
215 void optimize();
216 void allocate_registers(bool allow_spilling);
217 uint32_t compute_max_register_pressure();
218 void fixup_3src_null_dest();
219 void assign_curb_setup();
220 void assign_urb_setup();
221 void convert_attr_sources_to_hw_regs(elk_fs_inst *inst);
222 void assign_vs_urb_setup();
223 void assign_tcs_urb_setup();
224 void assign_tes_urb_setup();
225 void assign_gs_urb_setup();
226 bool assign_regs(bool allow_spilling, bool spill_all);
227 void assign_regs_trivial();
228 void calculate_payload_ranges(unsigned payload_node_count,
229 int *payload_last_use_ip) const;
230 bool split_virtual_grfs();
231 bool compact_virtual_grfs();
232 void assign_constant_locations();
233 bool get_pull_locs(const elk_fs_reg &src, unsigned *out_surf_index,
234 unsigned *out_pull_index);
235 bool lower_constant_loads();
236 virtual void invalidate_analysis(elk::analysis_dependency_class c);
237
238 #ifndef NDEBUG
239 void validate();
240 #else
validate()241 void validate() {}
242 #endif
243
244 bool opt_algebraic();
245 bool opt_redundant_halt();
246 bool opt_cse();
247 bool opt_cse_local(const elk::fs_live_variables &live, elk_bblock_t *block, int &ip);
248
249 bool opt_copy_propagation();
250 bool opt_bank_conflicts();
251 bool register_coalesce();
252 bool compute_to_mrf();
253 bool eliminate_find_live_channel();
254 bool dead_code_eliminate();
255 bool remove_duplicate_mrf_writes();
256 bool remove_extra_rounding_modes();
257
258 elk_fs_instruction_scheduler *prepare_scheduler(void *mem_ctx);
259 void schedule_instructions_pre_ra(elk_fs_instruction_scheduler *sched,
260 instruction_scheduler_mode mode);
261 void schedule_instructions_post_ra();
262
263 void insert_gfx4_send_dependency_workarounds();
264 void insert_gfx4_pre_send_dependency_workarounds(elk_bblock_t *block,
265 elk_fs_inst *inst);
266 void insert_gfx4_post_send_dependency_workarounds(elk_bblock_t *block,
267 elk_fs_inst *inst);
268 bool workaround_source_arf_before_eot();
269 void vfail(const char *msg, va_list args);
270 void fail(const char *msg, ...);
271 void limit_dispatch_width(unsigned n, const char *msg);
272 bool lower_uniform_pull_constant_loads();
273 bool lower_load_payload();
274 bool lower_pack();
275 bool lower_regioning();
276 bool lower_logical_sends();
277 bool lower_integer_multiplication();
278 bool lower_minmax();
279 bool lower_simd_width();
280 bool lower_barycentrics();
281 bool lower_find_live_channel();
282 bool lower_scoreboard();
283 bool lower_sub_sat();
284 bool opt_combine_constants();
285
286 void emit_repclear_shader();
287 void emit_interpolation_setup_gfx4();
288 void emit_interpolation_setup_gfx6();
289 bool opt_peephole_sel();
290 bool opt_saturate_propagation();
291 bool opt_cmod_propagation();
292 bool opt_zero_samples();
293
294 void set_tcs_invocation_id();
295
296 void emit_alpha_test();
297 elk_fs_inst *emit_single_fb_write(const elk::fs_builder &bld,
298 elk_fs_reg color1, elk_fs_reg color2,
299 elk_fs_reg src0_alpha, unsigned components);
300 void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
301 void emit_fb_writes();
302 void emit_urb_writes(const elk_fs_reg &gs_vertex_count = elk_fs_reg());
303 void emit_gs_control_data_bits(const elk_fs_reg &vertex_count);
304 void emit_gs_thread_end();
305 bool mark_last_urb_write_with_eot();
306 void emit_tcs_thread_end();
307 void emit_urb_fence();
308 void emit_cs_terminate();
309
310 elk_fs_reg interp_reg(const elk::fs_builder &bld, unsigned location,
311 unsigned channel, unsigned comp);
312 elk_fs_reg per_primitive_reg(const elk::fs_builder &bld,
313 int location, unsigned comp);
314
315 virtual void dump_instruction_to_file(const elk_backend_instruction *inst, FILE *file) const;
316 virtual void dump_instructions_to_file(FILE *file) const;
317
318 const elk_base_prog_key *const key;
319 const struct elk_sampler_prog_key_data *key_tex;
320
321 struct elk_gs_compile *gs_compile;
322
323 struct elk_stage_prog_data *prog_data;
324
325 elk_analysis<elk::fs_live_variables, elk_backend_shader> live_analysis;
326 elk_analysis<elk::register_pressure, elk_fs_visitor> regpressure_analysis;
327 elk_analysis<elk::performance, elk_fs_visitor> performance_analysis;
328
329 /** Number of uniform variable components visited. */
330 unsigned uniforms;
331
332 /** Byte-offset for the next available spot in the scratch space buffer. */
333 unsigned last_scratch;
334
335 /**
336 * Array mapping UNIFORM register numbers to the push parameter index,
337 * or -1 if this uniform register isn't being uploaded as a push constant.
338 */
339 int *push_constant_loc;
340
341 elk_fs_reg frag_depth;
342 elk_fs_reg frag_stencil;
343 elk_fs_reg sample_mask;
344 elk_fs_reg outputs[VARYING_SLOT_MAX];
345 elk_fs_reg dual_src_output;
346 int first_non_payload_grf;
347 /** Either ELK_MAX_GRF or GFX7_MRF_HACK_START */
348 unsigned max_grf;
349
350 bool failed;
351 char *fail_msg;
352
353 elk_elk_thread_payload *payload_;
354
payload()355 elk_elk_thread_payload &payload() {
356 return *this->payload_;
357 }
358
vs_payload()359 elk_vs_thread_payload &vs_payload() {
360 assert(stage == MESA_SHADER_VERTEX);
361 return *static_cast<elk_vs_thread_payload *>(this->payload_);
362 }
363
tcs_payload()364 elk_tcs_thread_payload &tcs_payload() {
365 assert(stage == MESA_SHADER_TESS_CTRL);
366 return *static_cast<elk_tcs_thread_payload *>(this->payload_);
367 }
368
tes_payload()369 elk_tes_thread_payload &tes_payload() {
370 assert(stage == MESA_SHADER_TESS_EVAL);
371 return *static_cast<elk_tes_thread_payload *>(this->payload_);
372 }
373
gs_payload()374 elk_gs_thread_payload &gs_payload() {
375 assert(stage == MESA_SHADER_GEOMETRY);
376 return *static_cast<elk_gs_thread_payload *>(this->payload_);
377 }
378
fs_payload()379 elk_fs_thread_payload &fs_payload() {
380 assert(stage == MESA_SHADER_FRAGMENT);
381 return *static_cast<elk_fs_thread_payload *>(this->payload_);
382 };
383
cs_payload()384 elk_cs_thread_payload &cs_payload() {
385 assert(gl_shader_stage_uses_workgroup(stage));
386 return *static_cast<elk_cs_thread_payload *>(this->payload_);
387 }
388
389 bool source_depth_to_render_target;
390 bool runtime_check_aads_emit;
391
392 elk_fs_reg pixel_x;
393 elk_fs_reg pixel_y;
394 elk_fs_reg pixel_z;
395 elk_fs_reg wpos_w;
396 elk_fs_reg pixel_w;
397 elk_fs_reg delta_xy[ELK_BARYCENTRIC_MODE_COUNT];
398 elk_fs_reg final_gs_vertex_count;
399 elk_fs_reg control_data_bits;
400 elk_fs_reg invocation_id;
401
402 unsigned grf_used;
403 bool spilled_any_registers;
404 bool needs_register_pressure;
405
406 const unsigned dispatch_width; /**< 8, 16 or 32 */
407 unsigned max_dispatch_width;
408
409 /* The API selected subgroup size */
410 unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
411
412 struct elk_shader_stats shader_stats;
413
414 void lower_mul_dword_inst(elk_fs_inst *inst, elk_bblock_t *block);
415 void lower_mul_qword_inst(elk_fs_inst *inst, elk_bblock_t *block);
416 void lower_mulh_inst(elk_fs_inst *inst, elk_bblock_t *block);
417
418 unsigned workgroup_size() const;
419
420 void debug_optimizer(const nir_shader *nir,
421 const char *pass_name,
422 int iteration, int pass_num) const;
423 };
424
425 /**
426 * Return the flag register used in fragment shaders to keep track of live
427 * samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
428 * dispatch mode, while earlier generations are constrained to f0.1, which
429 * limits the dispatch width to SIMD16 for fragment shaders that use discard.
430 */
431 static inline unsigned
sample_mask_flag_subreg(const elk_fs_visitor & s)432 sample_mask_flag_subreg(const elk_fs_visitor &s)
433 {
434 assert(s.stage == MESA_SHADER_FRAGMENT);
435 return s.devinfo->ver >= 7 ? 2 : 1;
436 }
437
438 /**
439 * The fragment shader code generator.
440 *
441 * Translates FS IR to actual i965 assembly code.
442 */
443 class elk_fs_generator
444 {
445 public:
446 elk_fs_generator(const struct elk_compiler *compiler,
447 const struct elk_compile_params *params,
448 struct elk_stage_prog_data *prog_data,
449 bool runtime_check_aads_emit,
450 gl_shader_stage stage);
451 ~elk_fs_generator();
452
453 void enable_debug(const char *shader_name);
454 int generate_code(const elk_cfg_t *cfg, int dispatch_width,
455 struct elk_shader_stats shader_stats,
456 const elk::performance &perf,
457 struct elk_compile_stats *stats);
458 void add_const_data(void *data, unsigned size);
459 const unsigned *get_assembly();
460
461 private:
462 void fire_fb_write(elk_fs_inst *inst,
463 struct elk_reg payload,
464 struct elk_reg implied_header,
465 GLuint nr);
466 void generate_send(elk_fs_inst *inst,
467 struct elk_reg dst,
468 struct elk_reg desc,
469 struct elk_reg payload);
470 void generate_fb_write(elk_fs_inst *inst, struct elk_reg payload);
471 void generate_cs_terminate(elk_fs_inst *inst, struct elk_reg payload);
472 void generate_barrier(elk_fs_inst *inst, struct elk_reg src);
473 bool generate_linterp(elk_fs_inst *inst, struct elk_reg dst,
474 struct elk_reg *src);
475 void generate_tex(elk_fs_inst *inst, struct elk_reg dst,
476 struct elk_reg surface_index,
477 struct elk_reg sampler_index);
478 void generate_ddx(const elk_fs_inst *inst,
479 struct elk_reg dst, struct elk_reg src);
480 void generate_ddy(const elk_fs_inst *inst,
481 struct elk_reg dst, struct elk_reg src);
482 void generate_scratch_write(elk_fs_inst *inst, struct elk_reg src);
483 void generate_scratch_read(elk_fs_inst *inst, struct elk_reg dst);
484 void generate_scratch_read_gfx7(elk_fs_inst *inst, struct elk_reg dst);
485 void generate_scratch_header(elk_fs_inst *inst, struct elk_reg dst);
486 void generate_uniform_pull_constant_load(elk_fs_inst *inst, struct elk_reg dst,
487 struct elk_reg index,
488 struct elk_reg offset);
489 void generate_varying_pull_constant_load_gfx4(elk_fs_inst *inst,
490 struct elk_reg dst,
491 struct elk_reg index);
492
493 void generate_set_sample_id(elk_fs_inst *inst,
494 struct elk_reg dst,
495 struct elk_reg src0,
496 struct elk_reg src1);
497
498 void generate_halt(elk_fs_inst *inst);
499
500 void generate_mov_indirect(elk_fs_inst *inst,
501 struct elk_reg dst,
502 struct elk_reg reg,
503 struct elk_reg indirect_byte_offset);
504
505 void generate_shuffle(elk_fs_inst *inst,
506 struct elk_reg dst,
507 struct elk_reg src,
508 struct elk_reg idx);
509
510 void generate_quad_swizzle(const elk_fs_inst *inst,
511 struct elk_reg dst, struct elk_reg src,
512 unsigned swiz);
513
514 bool patch_halt_jumps();
515
516 const struct elk_compiler *compiler;
517 const struct elk_compile_params *params;
518
519 const struct intel_device_info *devinfo;
520
521 struct elk_codegen *p;
522 struct elk_stage_prog_data * const prog_data;
523
524 unsigned dispatch_width; /**< 8, 16 or 32 */
525
526 exec_list discard_halt_patches;
527 bool runtime_check_aads_emit;
528 bool debug_flag;
529 const char *shader_name;
530 gl_shader_stage stage;
531 void *mem_ctx;
532 };
533
534 namespace elk {
535 elk_fs_reg
536 fetch_payload_reg(const elk::fs_builder &bld, uint8_t regs[2],
537 elk_reg_type type = ELK_REGISTER_TYPE_F,
538 unsigned n = 1);
539
540 elk_fs_reg
541 fetch_barycentric_reg(const elk::fs_builder &bld, uint8_t regs[2]);
542
543 inline elk_fs_reg
dynamic_msaa_flags(const struct elk_wm_prog_data * wm_prog_data)544 dynamic_msaa_flags(const struct elk_wm_prog_data *wm_prog_data)
545 {
546 return elk_fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
547 ELK_REGISTER_TYPE_UD);
548 }
549
550 void
551 check_dynamic_msaa_flag(const fs_builder &bld,
552 const struct elk_wm_prog_data *wm_prog_data,
553 enum intel_msaa_flags flag);
554
555 bool
556 lower_src_modifiers(elk_fs_visitor *v, elk_bblock_t *block, elk_fs_inst *inst, unsigned i);
557 }
558
559 void elk_shuffle_from_32bit_read(const elk::fs_builder &bld,
560 const elk_fs_reg &dst,
561 const elk_fs_reg &src,
562 uint32_t first_component,
563 uint32_t components);
564
565 elk_fs_reg elk_setup_imm_df(const elk::fs_builder &bld,
566 double v);
567
568 elk_fs_reg elk_setup_imm_b(const elk::fs_builder &bld,
569 int8_t v);
570
571 elk_fs_reg elk_setup_imm_ub(const elk::fs_builder &bld,
572 uint8_t v);
573
574 enum elk_barycentric_mode elk_barycentric_mode(nir_intrinsic_instr *intr);
575
576 uint32_t elk_fb_write_msg_control(const elk_fs_inst *inst,
577 const struct elk_wm_prog_data *prog_data);
578
579 void elk_compute_urb_setup_index(struct elk_wm_prog_data *wm_prog_data);
580
581 bool elk_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
582
583 elk_fs_reg elk_sample_mask_reg(const elk::fs_builder &bld);
584 void elk_emit_predicate_on_sample_mask(const elk::fs_builder &bld, elk_fs_inst *inst);
585
586 int elk_get_subgroup_id_param_index(const intel_device_info *devinfo,
587 const elk_stage_prog_data *prog_data);
588
589 void nir_to_elk(elk_fs_visitor *s);
590