1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_cfg.h"
25 #include "brw_eu.h"
26 #include "brw_fs.h"
27 #include "brw_nir.h"
28 #include "brw_vec4_tes.h"
29 #include "dev/intel_debug.h"
30 #include "main/uniforms.h"
31 #include "util/macros.h"
32
33 enum brw_reg_type
brw_type_for_base_type(const struct glsl_type * type)34 brw_type_for_base_type(const struct glsl_type *type)
35 {
36 switch (type->base_type) {
37 case GLSL_TYPE_FLOAT16:
38 return BRW_REGISTER_TYPE_HF;
39 case GLSL_TYPE_FLOAT:
40 return BRW_REGISTER_TYPE_F;
41 case GLSL_TYPE_INT:
42 case GLSL_TYPE_BOOL:
43 case GLSL_TYPE_SUBROUTINE:
44 return BRW_REGISTER_TYPE_D;
45 case GLSL_TYPE_INT16:
46 return BRW_REGISTER_TYPE_W;
47 case GLSL_TYPE_INT8:
48 return BRW_REGISTER_TYPE_B;
49 case GLSL_TYPE_UINT:
50 return BRW_REGISTER_TYPE_UD;
51 case GLSL_TYPE_UINT16:
52 return BRW_REGISTER_TYPE_UW;
53 case GLSL_TYPE_UINT8:
54 return BRW_REGISTER_TYPE_UB;
55 case GLSL_TYPE_ARRAY:
56 return brw_type_for_base_type(type->fields.array);
57 case GLSL_TYPE_STRUCT:
58 case GLSL_TYPE_INTERFACE:
59 case GLSL_TYPE_SAMPLER:
60 case GLSL_TYPE_TEXTURE:
61 case GLSL_TYPE_ATOMIC_UINT:
62 /* These should be overridden with the type of the member when
63 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
64 * way to trip up if we don't.
65 */
66 return BRW_REGISTER_TYPE_UD;
67 case GLSL_TYPE_IMAGE:
68 return BRW_REGISTER_TYPE_UD;
69 case GLSL_TYPE_DOUBLE:
70 return BRW_REGISTER_TYPE_DF;
71 case GLSL_TYPE_UINT64:
72 return BRW_REGISTER_TYPE_UQ;
73 case GLSL_TYPE_INT64:
74 return BRW_REGISTER_TYPE_Q;
75 case GLSL_TYPE_VOID:
76 case GLSL_TYPE_ERROR:
77 case GLSL_TYPE_FUNCTION:
78 unreachable("not reached");
79 }
80
81 return BRW_REGISTER_TYPE_F;
82 }
83
84 enum brw_conditional_mod
brw_conditional_for_comparison(unsigned int op)85 brw_conditional_for_comparison(unsigned int op)
86 {
87 switch (op) {
88 case ir_binop_less:
89 return BRW_CONDITIONAL_L;
90 case ir_binop_gequal:
91 return BRW_CONDITIONAL_GE;
92 case ir_binop_equal:
93 case ir_binop_all_equal: /* same as equal for scalars */
94 return BRW_CONDITIONAL_Z;
95 case ir_binop_nequal:
96 case ir_binop_any_nequal: /* same as nequal for scalars */
97 return BRW_CONDITIONAL_NZ;
98 default:
99 unreachable("not reached: bad operation for comparison");
100 }
101 }
102
103 uint32_t
brw_math_function(enum opcode op)104 brw_math_function(enum opcode op)
105 {
106 switch (op) {
107 case SHADER_OPCODE_RCP:
108 return BRW_MATH_FUNCTION_INV;
109 case SHADER_OPCODE_RSQ:
110 return BRW_MATH_FUNCTION_RSQ;
111 case SHADER_OPCODE_SQRT:
112 return BRW_MATH_FUNCTION_SQRT;
113 case SHADER_OPCODE_EXP2:
114 return BRW_MATH_FUNCTION_EXP;
115 case SHADER_OPCODE_LOG2:
116 return BRW_MATH_FUNCTION_LOG;
117 case SHADER_OPCODE_POW:
118 return BRW_MATH_FUNCTION_POW;
119 case SHADER_OPCODE_SIN:
120 return BRW_MATH_FUNCTION_SIN;
121 case SHADER_OPCODE_COS:
122 return BRW_MATH_FUNCTION_COS;
123 case SHADER_OPCODE_INT_QUOTIENT:
124 return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
125 case SHADER_OPCODE_INT_REMAINDER:
126 return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
127 default:
128 unreachable("not reached: unknown math function");
129 }
130 }
131
132 bool
brw_texture_offset(const nir_tex_instr * tex,unsigned src,uint32_t * offset_bits_out)133 brw_texture_offset(const nir_tex_instr *tex, unsigned src,
134 uint32_t *offset_bits_out)
135 {
136 if (!nir_src_is_const(tex->src[src].src))
137 return false;
138
139 const unsigned num_components = nir_tex_instr_src_size(tex, src);
140
141 /* Combine all three offsets into a single unsigned dword:
142 *
143 * bits 11:8 - U Offset (X component)
144 * bits 7:4 - V Offset (Y component)
145 * bits 3:0 - R Offset (Z component)
146 */
147 uint32_t offset_bits = 0;
148 for (unsigned i = 0; i < num_components; i++) {
149 int offset = nir_src_comp_as_int(tex->src[src].src, i);
150
151 /* offset out of bounds; caller will handle it. */
152 if (offset > 7 || offset < -8)
153 return false;
154
155 const unsigned shift = 4 * (2 - i);
156 offset_bits |= (offset << shift) & (0xF << shift);
157 }
158
159 *offset_bits_out = offset_bits;
160
161 return true;
162 }
163
164 const char *
brw_instruction_name(const struct brw_isa_info * isa,enum opcode op)165 brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
166 {
167 const struct intel_device_info *devinfo = isa->devinfo;
168
169 switch (op) {
170 case 0 ... NUM_BRW_OPCODES - 1:
171 /* The DO instruction doesn't exist on Gfx6+, but we use it to mark the
172 * start of a loop in the IR.
173 */
174 if (devinfo->ver >= 6 && op == BRW_OPCODE_DO)
175 return "do";
176
177 /* The following conversion opcodes doesn't exist on Gfx8+, but we use
178 * then to mark that we want to do the conversion.
179 */
180 if (devinfo->ver > 7 && op == BRW_OPCODE_F32TO16)
181 return "f32to16";
182
183 if (devinfo->ver > 7 && op == BRW_OPCODE_F16TO32)
184 return "f16to32";
185
186 assert(brw_opcode_desc(isa, op)->name);
187 return brw_opcode_desc(isa, op)->name;
188 case FS_OPCODE_FB_WRITE:
189 return "fb_write";
190 case FS_OPCODE_FB_WRITE_LOGICAL:
191 return "fb_write_logical";
192 case FS_OPCODE_REP_FB_WRITE:
193 return "rep_fb_write";
194 case FS_OPCODE_FB_READ:
195 return "fb_read";
196 case FS_OPCODE_FB_READ_LOGICAL:
197 return "fb_read_logical";
198
199 case SHADER_OPCODE_RCP:
200 return "rcp";
201 case SHADER_OPCODE_RSQ:
202 return "rsq";
203 case SHADER_OPCODE_SQRT:
204 return "sqrt";
205 case SHADER_OPCODE_EXP2:
206 return "exp2";
207 case SHADER_OPCODE_LOG2:
208 return "log2";
209 case SHADER_OPCODE_POW:
210 return "pow";
211 case SHADER_OPCODE_INT_QUOTIENT:
212 return "int_quot";
213 case SHADER_OPCODE_INT_REMAINDER:
214 return "int_rem";
215 case SHADER_OPCODE_SIN:
216 return "sin";
217 case SHADER_OPCODE_COS:
218 return "cos";
219
220 case SHADER_OPCODE_SEND:
221 return "send";
222
223 case SHADER_OPCODE_UNDEF:
224 return "undef";
225
226 case SHADER_OPCODE_TEX:
227 return "tex";
228 case SHADER_OPCODE_TEX_LOGICAL:
229 return "tex_logical";
230 case SHADER_OPCODE_TXD:
231 return "txd";
232 case SHADER_OPCODE_TXD_LOGICAL:
233 return "txd_logical";
234 case SHADER_OPCODE_TXF:
235 return "txf";
236 case SHADER_OPCODE_TXF_LOGICAL:
237 return "txf_logical";
238 case SHADER_OPCODE_TXF_LZ:
239 return "txf_lz";
240 case SHADER_OPCODE_TXL:
241 return "txl";
242 case SHADER_OPCODE_TXL_LOGICAL:
243 return "txl_logical";
244 case SHADER_OPCODE_TXL_LZ:
245 return "txl_lz";
246 case SHADER_OPCODE_TXS:
247 return "txs";
248 case SHADER_OPCODE_TXS_LOGICAL:
249 return "txs_logical";
250 case FS_OPCODE_TXB:
251 return "txb";
252 case FS_OPCODE_TXB_LOGICAL:
253 return "txb_logical";
254 case SHADER_OPCODE_TXF_CMS:
255 return "txf_cms";
256 case SHADER_OPCODE_TXF_CMS_LOGICAL:
257 return "txf_cms_logical";
258 case SHADER_OPCODE_TXF_CMS_W:
259 return "txf_cms_w";
260 case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
261 return "txf_cms_w_logical";
262 case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
263 return "txf_cms_w_gfx12_logical";
264 case SHADER_OPCODE_TXF_UMS:
265 return "txf_ums";
266 case SHADER_OPCODE_TXF_UMS_LOGICAL:
267 return "txf_ums_logical";
268 case SHADER_OPCODE_TXF_MCS:
269 return "txf_mcs";
270 case SHADER_OPCODE_TXF_MCS_LOGICAL:
271 return "txf_mcs_logical";
272 case SHADER_OPCODE_LOD:
273 return "lod";
274 case SHADER_OPCODE_LOD_LOGICAL:
275 return "lod_logical";
276 case SHADER_OPCODE_TG4:
277 return "tg4";
278 case SHADER_OPCODE_TG4_LOGICAL:
279 return "tg4_logical";
280 case SHADER_OPCODE_TG4_OFFSET:
281 return "tg4_offset";
282 case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
283 return "tg4_offset_logical";
284 case SHADER_OPCODE_SAMPLEINFO:
285 return "sampleinfo";
286 case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
287 return "sampleinfo_logical";
288
289 case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
290 return "image_size_logical";
291
292 case VEC4_OPCODE_UNTYPED_ATOMIC:
293 return "untyped_atomic";
294 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
295 return "untyped_atomic_logical";
296 case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
297 return "untyped_atomic_float_logical";
298 case VEC4_OPCODE_UNTYPED_SURFACE_READ:
299 return "untyped_surface_read";
300 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
301 return "untyped_surface_read_logical";
302 case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
303 return "untyped_surface_write";
304 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
305 return "untyped_surface_write_logical";
306 case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL:
307 return "oword_block_read_logical";
308 case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
309 return "unaligned_oword_block_read_logical";
310 case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
311 return "oword_block_write_logical";
312 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
313 return "a64_untyped_read_logical";
314 case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
315 return "a64_oword_block_read_logical";
316 case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
317 return "a64_unaligned_oword_block_read_logical";
318 case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
319 return "a64_oword_block_write_logical";
320 case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
321 return "a64_untyped_write_logical";
322 case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
323 return "a64_byte_scattered_read_logical";
324 case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
325 return "a64_byte_scattered_write_logical";
326 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
327 return "a64_untyped_atomic_logical";
328 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
329 return "a64_untyped_atomic_int16_logical";
330 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
331 return "a64_untyped_atomic_int64_logical";
332 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
333 return "a64_untyped_atomic_float16_logical";
334 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
335 return "a64_untyped_atomic_float32_logical";
336 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL:
337 return "a64_untyped_atomic_float64_logical";
338 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
339 return "typed_atomic_logical";
340 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
341 return "typed_surface_read_logical";
342 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
343 return "typed_surface_write_logical";
344 case SHADER_OPCODE_MEMORY_FENCE:
345 return "memory_fence";
346 case FS_OPCODE_SCHEDULING_FENCE:
347 return "scheduling_fence";
348 case SHADER_OPCODE_INTERLOCK:
349 /* For an interlock we actually issue a memory fence via sendc. */
350 return "interlock";
351
352 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
353 return "byte_scattered_read_logical";
354 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
355 return "byte_scattered_write_logical";
356 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
357 return "dword_scattered_read_logical";
358 case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
359 return "dword_scattered_write_logical";
360
361 case SHADER_OPCODE_LOAD_PAYLOAD:
362 return "load_payload";
363 case FS_OPCODE_PACK:
364 return "pack";
365
366 case SHADER_OPCODE_GFX4_SCRATCH_READ:
367 return "gfx4_scratch_read";
368 case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
369 return "gfx4_scratch_write";
370 case SHADER_OPCODE_GFX7_SCRATCH_READ:
371 return "gfx7_scratch_read";
372 case SHADER_OPCODE_SCRATCH_HEADER:
373 return "scratch_header";
374
375 case SHADER_OPCODE_URB_WRITE_LOGICAL:
376 return "urb_write_logical";
377 case SHADER_OPCODE_URB_READ_LOGICAL:
378 return "urb_read_logical";
379
380 case SHADER_OPCODE_FIND_LIVE_CHANNEL:
381 return "find_live_channel";
382 case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
383 return "find_last_live_channel";
384 case FS_OPCODE_LOAD_LIVE_CHANNELS:
385 return "load_live_channels";
386
387 case SHADER_OPCODE_BROADCAST:
388 return "broadcast";
389 case SHADER_OPCODE_SHUFFLE:
390 return "shuffle";
391 case SHADER_OPCODE_SEL_EXEC:
392 return "sel_exec";
393 case SHADER_OPCODE_QUAD_SWIZZLE:
394 return "quad_swizzle";
395 case SHADER_OPCODE_CLUSTER_BROADCAST:
396 return "cluster_broadcast";
397
398 case SHADER_OPCODE_GET_BUFFER_SIZE:
399 return "get_buffer_size";
400
401 case VEC4_OPCODE_MOV_BYTES:
402 return "mov_bytes";
403 case VEC4_OPCODE_PACK_BYTES:
404 return "pack_bytes";
405 case VEC4_OPCODE_UNPACK_UNIFORM:
406 return "unpack_uniform";
407 case VEC4_OPCODE_DOUBLE_TO_F32:
408 return "double_to_f32";
409 case VEC4_OPCODE_DOUBLE_TO_D32:
410 return "double_to_d32";
411 case VEC4_OPCODE_DOUBLE_TO_U32:
412 return "double_to_u32";
413 case VEC4_OPCODE_TO_DOUBLE:
414 return "single_to_double";
415 case VEC4_OPCODE_PICK_LOW_32BIT:
416 return "pick_low_32bit";
417 case VEC4_OPCODE_PICK_HIGH_32BIT:
418 return "pick_high_32bit";
419 case VEC4_OPCODE_SET_LOW_32BIT:
420 return "set_low_32bit";
421 case VEC4_OPCODE_SET_HIGH_32BIT:
422 return "set_high_32bit";
423 case VEC4_OPCODE_MOV_FOR_SCRATCH:
424 return "mov_for_scratch";
425 case VEC4_OPCODE_ZERO_OOB_PUSH_REGS:
426 return "zero_oob_push_regs";
427
428 case FS_OPCODE_DDX_COARSE:
429 return "ddx_coarse";
430 case FS_OPCODE_DDX_FINE:
431 return "ddx_fine";
432 case FS_OPCODE_DDY_COARSE:
433 return "ddy_coarse";
434 case FS_OPCODE_DDY_FINE:
435 return "ddy_fine";
436
437 case FS_OPCODE_LINTERP:
438 return "linterp";
439
440 case FS_OPCODE_PIXEL_X:
441 return "pixel_x";
442 case FS_OPCODE_PIXEL_Y:
443 return "pixel_y";
444
445 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
446 return "uniform_pull_const";
447 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7:
448 return "uniform_pull_const_gfx7";
449 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:
450 return "varying_pull_const_gfx4";
451 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
452 return "varying_pull_const_logical";
453
454 case FS_OPCODE_SET_SAMPLE_ID:
455 return "set_sample_id";
456
457 case FS_OPCODE_PACK_HALF_2x16_SPLIT:
458 return "pack_half_2x16_split";
459
460 case SHADER_OPCODE_HALT_TARGET:
461 return "halt_target";
462
463 case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
464 return "interp_sample";
465 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
466 return "interp_shared_offset";
467 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
468 return "interp_per_slot_offset";
469
470 case VEC4_VS_OPCODE_URB_WRITE:
471 return "vs_urb_write";
472 case VS_OPCODE_PULL_CONSTANT_LOAD:
473 return "pull_constant_load";
474 case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
475 return "pull_constant_load_gfx7";
476
477 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
478 return "unpack_flags_simd4x2";
479
480 case VEC4_GS_OPCODE_URB_WRITE:
481 return "gs_urb_write";
482 case VEC4_GS_OPCODE_URB_WRITE_ALLOCATE:
483 return "gs_urb_write_allocate";
484 case GS_OPCODE_THREAD_END:
485 return "gs_thread_end";
486 case GS_OPCODE_SET_WRITE_OFFSET:
487 return "set_write_offset";
488 case GS_OPCODE_SET_VERTEX_COUNT:
489 return "set_vertex_count";
490 case GS_OPCODE_SET_DWORD_2:
491 return "set_dword_2";
492 case GS_OPCODE_PREPARE_CHANNEL_MASKS:
493 return "prepare_channel_masks";
494 case GS_OPCODE_SET_CHANNEL_MASKS:
495 return "set_channel_masks";
496 case GS_OPCODE_GET_INSTANCE_ID:
497 return "get_instance_id";
498 case GS_OPCODE_FF_SYNC:
499 return "ff_sync";
500 case GS_OPCODE_SET_PRIMITIVE_ID:
501 return "set_primitive_id";
502 case GS_OPCODE_SVB_WRITE:
503 return "gs_svb_write";
504 case GS_OPCODE_SVB_SET_DST_INDEX:
505 return "gs_svb_set_dst_index";
506 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
507 return "gs_ff_sync_set_primitives";
508 case CS_OPCODE_CS_TERMINATE:
509 return "cs_terminate";
510 case SHADER_OPCODE_BARRIER:
511 return "barrier";
512 case SHADER_OPCODE_MULH:
513 return "mulh";
514 case SHADER_OPCODE_ISUB_SAT:
515 return "isub_sat";
516 case SHADER_OPCODE_USUB_SAT:
517 return "usub_sat";
518 case SHADER_OPCODE_MOV_INDIRECT:
519 return "mov_indirect";
520 case SHADER_OPCODE_MOV_RELOC_IMM:
521 return "mov_reloc_imm";
522
523 case VEC4_OPCODE_URB_READ:
524 return "urb_read";
525 case TCS_OPCODE_GET_INSTANCE_ID:
526 return "tcs_get_instance_id";
527 case VEC4_TCS_OPCODE_URB_WRITE:
528 return "tcs_urb_write";
529 case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS:
530 return "tcs_set_input_urb_offsets";
531 case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
532 return "tcs_set_output_urb_offsets";
533 case TCS_OPCODE_GET_PRIMITIVE_ID:
534 return "tcs_get_primitive_id";
535 case TCS_OPCODE_CREATE_BARRIER_HEADER:
536 return "tcs_create_barrier_header";
537 case TCS_OPCODE_SRC0_010_IS_ZERO:
538 return "tcs_src0<0,1,0>_is_zero";
539 case TCS_OPCODE_RELEASE_INPUT:
540 return "tcs_release_input";
541 case TCS_OPCODE_THREAD_END:
542 return "tcs_thread_end";
543 case TES_OPCODE_CREATE_INPUT_READ_HEADER:
544 return "tes_create_input_read_header";
545 case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
546 return "tes_add_indirect_urb_offset";
547 case TES_OPCODE_GET_PRIMITIVE_ID:
548 return "tes_get_primitive_id";
549
550 case RT_OPCODE_TRACE_RAY_LOGICAL:
551 return "rt_trace_ray_logical";
552
553 case SHADER_OPCODE_RND_MODE:
554 return "rnd_mode";
555 case SHADER_OPCODE_FLOAT_CONTROL_MODE:
556 return "float_control_mode";
557 case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
558 return "btd_spawn_logical";
559 case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
560 return "btd_retire_logical";
561 case SHADER_OPCODE_READ_SR_REG:
562 return "read_sr_reg";
563 }
564
565 unreachable("not reached");
566 }
567
568 bool
brw_saturate_immediate(enum brw_reg_type type,struct brw_reg * reg)569 brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
570 {
571 union {
572 unsigned ud;
573 int d;
574 float f;
575 double df;
576 } imm, sat_imm = { 0 };
577
578 const unsigned size = type_sz(type);
579
580 /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
581 * irrelevant, so just check the size of the type and copy from/to an
582 * appropriately sized field.
583 */
584 if (size < 8)
585 imm.ud = reg->ud;
586 else
587 imm.df = reg->df;
588
589 switch (type) {
590 case BRW_REGISTER_TYPE_UD:
591 case BRW_REGISTER_TYPE_D:
592 case BRW_REGISTER_TYPE_UW:
593 case BRW_REGISTER_TYPE_W:
594 case BRW_REGISTER_TYPE_UQ:
595 case BRW_REGISTER_TYPE_Q:
596 /* Nothing to do. */
597 return false;
598 case BRW_REGISTER_TYPE_F:
599 sat_imm.f = SATURATE(imm.f);
600 break;
601 case BRW_REGISTER_TYPE_DF:
602 sat_imm.df = SATURATE(imm.df);
603 break;
604 case BRW_REGISTER_TYPE_UB:
605 case BRW_REGISTER_TYPE_B:
606 unreachable("no UB/B immediates");
607 case BRW_REGISTER_TYPE_V:
608 case BRW_REGISTER_TYPE_UV:
609 case BRW_REGISTER_TYPE_VF:
610 unreachable("unimplemented: saturate vector immediate");
611 case BRW_REGISTER_TYPE_HF:
612 unreachable("unimplemented: saturate HF immediate");
613 case BRW_REGISTER_TYPE_NF:
614 unreachable("no NF immediates");
615 }
616
617 if (size < 8) {
618 if (imm.ud != sat_imm.ud) {
619 reg->ud = sat_imm.ud;
620 return true;
621 }
622 } else {
623 if (imm.df != sat_imm.df) {
624 reg->df = sat_imm.df;
625 return true;
626 }
627 }
628 return false;
629 }
630
631 bool
brw_negate_immediate(enum brw_reg_type type,struct brw_reg * reg)632 brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
633 {
634 switch (type) {
635 case BRW_REGISTER_TYPE_D:
636 case BRW_REGISTER_TYPE_UD:
637 reg->d = -reg->d;
638 return true;
639 case BRW_REGISTER_TYPE_W:
640 case BRW_REGISTER_TYPE_UW: {
641 uint16_t value = -(int16_t)reg->ud;
642 reg->ud = value | (uint32_t)value << 16;
643 return true;
644 }
645 case BRW_REGISTER_TYPE_F:
646 reg->f = -reg->f;
647 return true;
648 case BRW_REGISTER_TYPE_VF:
649 reg->ud ^= 0x80808080;
650 return true;
651 case BRW_REGISTER_TYPE_DF:
652 reg->df = -reg->df;
653 return true;
654 case BRW_REGISTER_TYPE_UQ:
655 case BRW_REGISTER_TYPE_Q:
656 reg->d64 = -reg->d64;
657 return true;
658 case BRW_REGISTER_TYPE_UB:
659 case BRW_REGISTER_TYPE_B:
660 unreachable("no UB/B immediates");
661 case BRW_REGISTER_TYPE_UV:
662 case BRW_REGISTER_TYPE_V:
663 assert(!"unimplemented: negate UV/V immediate");
664 case BRW_REGISTER_TYPE_HF:
665 reg->ud ^= 0x80008000;
666 return true;
667 case BRW_REGISTER_TYPE_NF:
668 unreachable("no NF immediates");
669 }
670
671 return false;
672 }
673
674 bool
brw_abs_immediate(enum brw_reg_type type,struct brw_reg * reg)675 brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
676 {
677 switch (type) {
678 case BRW_REGISTER_TYPE_D:
679 reg->d = abs(reg->d);
680 return true;
681 case BRW_REGISTER_TYPE_W: {
682 uint16_t value = abs((int16_t)reg->ud);
683 reg->ud = value | (uint32_t)value << 16;
684 return true;
685 }
686 case BRW_REGISTER_TYPE_F:
687 reg->f = fabsf(reg->f);
688 return true;
689 case BRW_REGISTER_TYPE_DF:
690 reg->df = fabs(reg->df);
691 return true;
692 case BRW_REGISTER_TYPE_VF:
693 reg->ud &= ~0x80808080;
694 return true;
695 case BRW_REGISTER_TYPE_Q:
696 reg->d64 = imaxabs(reg->d64);
697 return true;
698 case BRW_REGISTER_TYPE_UB:
699 case BRW_REGISTER_TYPE_B:
700 unreachable("no UB/B immediates");
701 case BRW_REGISTER_TYPE_UQ:
702 case BRW_REGISTER_TYPE_UD:
703 case BRW_REGISTER_TYPE_UW:
704 case BRW_REGISTER_TYPE_UV:
705 /* Presumably the absolute value modifier on an unsigned source is a
706 * nop, but it would be nice to confirm.
707 */
708 assert(!"unimplemented: abs unsigned immediate");
709 case BRW_REGISTER_TYPE_V:
710 assert(!"unimplemented: abs V immediate");
711 case BRW_REGISTER_TYPE_HF:
712 reg->ud &= ~0x80008000;
713 return true;
714 case BRW_REGISTER_TYPE_NF:
715 unreachable("no NF immediates");
716 }
717
718 return false;
719 }
720
backend_shader(const struct brw_compiler * compiler,void * log_data,void * mem_ctx,const nir_shader * shader,struct brw_stage_prog_data * stage_prog_data,bool debug_enabled)721 backend_shader::backend_shader(const struct brw_compiler *compiler,
722 void *log_data,
723 void *mem_ctx,
724 const nir_shader *shader,
725 struct brw_stage_prog_data *stage_prog_data,
726 bool debug_enabled)
727 : compiler(compiler),
728 log_data(log_data),
729 devinfo(compiler->devinfo),
730 nir(shader),
731 stage_prog_data(stage_prog_data),
732 mem_ctx(mem_ctx),
733 cfg(NULL), idom_analysis(this),
734 stage(shader->info.stage),
735 debug_enabled(debug_enabled)
736 {
737 stage_name = _mesa_shader_stage_to_string(stage);
738 stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
739 }
740
~backend_shader()741 backend_shader::~backend_shader()
742 {
743 }
744
745 bool
equals(const backend_reg & r) const746 backend_reg::equals(const backend_reg &r) const
747 {
748 return brw_regs_equal(this, &r) && offset == r.offset;
749 }
750
751 bool
negative_equals(const backend_reg & r) const752 backend_reg::negative_equals(const backend_reg &r) const
753 {
754 return brw_regs_negative_equal(this, &r) && offset == r.offset;
755 }
756
757 bool
is_zero() const758 backend_reg::is_zero() const
759 {
760 if (file != IMM)
761 return false;
762
763 assert(type_sz(type) > 1);
764
765 switch (type) {
766 case BRW_REGISTER_TYPE_HF:
767 assert((d & 0xffff) == ((d >> 16) & 0xffff));
768 return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
769 case BRW_REGISTER_TYPE_F:
770 return f == 0;
771 case BRW_REGISTER_TYPE_DF:
772 return df == 0;
773 case BRW_REGISTER_TYPE_W:
774 case BRW_REGISTER_TYPE_UW:
775 assert((d & 0xffff) == ((d >> 16) & 0xffff));
776 return (d & 0xffff) == 0;
777 case BRW_REGISTER_TYPE_D:
778 case BRW_REGISTER_TYPE_UD:
779 return d == 0;
780 case BRW_REGISTER_TYPE_UQ:
781 case BRW_REGISTER_TYPE_Q:
782 return u64 == 0;
783 default:
784 return false;
785 }
786 }
787
788 bool
is_one() const789 backend_reg::is_one() const
790 {
791 if (file != IMM)
792 return false;
793
794 assert(type_sz(type) > 1);
795
796 switch (type) {
797 case BRW_REGISTER_TYPE_HF:
798 assert((d & 0xffff) == ((d >> 16) & 0xffff));
799 return (d & 0xffff) == 0x3c00;
800 case BRW_REGISTER_TYPE_F:
801 return f == 1.0f;
802 case BRW_REGISTER_TYPE_DF:
803 return df == 1.0;
804 case BRW_REGISTER_TYPE_W:
805 case BRW_REGISTER_TYPE_UW:
806 assert((d & 0xffff) == ((d >> 16) & 0xffff));
807 return (d & 0xffff) == 1;
808 case BRW_REGISTER_TYPE_D:
809 case BRW_REGISTER_TYPE_UD:
810 return d == 1;
811 case BRW_REGISTER_TYPE_UQ:
812 case BRW_REGISTER_TYPE_Q:
813 return u64 == 1;
814 default:
815 return false;
816 }
817 }
818
819 bool
is_negative_one() const820 backend_reg::is_negative_one() const
821 {
822 if (file != IMM)
823 return false;
824
825 assert(type_sz(type) > 1);
826
827 switch (type) {
828 case BRW_REGISTER_TYPE_HF:
829 assert((d & 0xffff) == ((d >> 16) & 0xffff));
830 return (d & 0xffff) == 0xbc00;
831 case BRW_REGISTER_TYPE_F:
832 return f == -1.0;
833 case BRW_REGISTER_TYPE_DF:
834 return df == -1.0;
835 case BRW_REGISTER_TYPE_W:
836 assert((d & 0xffff) == ((d >> 16) & 0xffff));
837 return (d & 0xffff) == 0xffff;
838 case BRW_REGISTER_TYPE_D:
839 return d == -1;
840 case BRW_REGISTER_TYPE_Q:
841 return d64 == -1;
842 default:
843 return false;
844 }
845 }
846
847 bool
is_null() const848 backend_reg::is_null() const
849 {
850 return file == ARF && nr == BRW_ARF_NULL;
851 }
852
853
854 bool
is_accumulator() const855 backend_reg::is_accumulator() const
856 {
857 return file == ARF && nr == BRW_ARF_ACCUMULATOR;
858 }
859
860 bool
is_commutative() const861 backend_instruction::is_commutative() const
862 {
863 switch (opcode) {
864 case BRW_OPCODE_AND:
865 case BRW_OPCODE_OR:
866 case BRW_OPCODE_XOR:
867 case BRW_OPCODE_ADD:
868 case BRW_OPCODE_ADD3:
869 case BRW_OPCODE_MUL:
870 case SHADER_OPCODE_MULH:
871 return true;
872 case BRW_OPCODE_SEL:
873 /* MIN and MAX are commutative. */
874 if (conditional_mod == BRW_CONDITIONAL_GE ||
875 conditional_mod == BRW_CONDITIONAL_L) {
876 return true;
877 }
878 FALLTHROUGH;
879 default:
880 return false;
881 }
882 }
883
884 bool
is_3src(const struct brw_compiler * compiler) const885 backend_instruction::is_3src(const struct brw_compiler *compiler) const
886 {
887 return ::is_3src(&compiler->isa, opcode);
888 }
889
890 bool
is_tex() const891 backend_instruction::is_tex() const
892 {
893 return (opcode == SHADER_OPCODE_TEX ||
894 opcode == FS_OPCODE_TXB ||
895 opcode == SHADER_OPCODE_TXD ||
896 opcode == SHADER_OPCODE_TXF ||
897 opcode == SHADER_OPCODE_TXF_LZ ||
898 opcode == SHADER_OPCODE_TXF_CMS ||
899 opcode == SHADER_OPCODE_TXF_CMS_W ||
900 opcode == SHADER_OPCODE_TXF_UMS ||
901 opcode == SHADER_OPCODE_TXF_MCS ||
902 opcode == SHADER_OPCODE_TXL ||
903 opcode == SHADER_OPCODE_TXL_LZ ||
904 opcode == SHADER_OPCODE_TXS ||
905 opcode == SHADER_OPCODE_LOD ||
906 opcode == SHADER_OPCODE_TG4 ||
907 opcode == SHADER_OPCODE_TG4_OFFSET ||
908 opcode == SHADER_OPCODE_SAMPLEINFO);
909 }
910
911 bool
is_math() const912 backend_instruction::is_math() const
913 {
914 return (opcode == SHADER_OPCODE_RCP ||
915 opcode == SHADER_OPCODE_RSQ ||
916 opcode == SHADER_OPCODE_SQRT ||
917 opcode == SHADER_OPCODE_EXP2 ||
918 opcode == SHADER_OPCODE_LOG2 ||
919 opcode == SHADER_OPCODE_SIN ||
920 opcode == SHADER_OPCODE_COS ||
921 opcode == SHADER_OPCODE_INT_QUOTIENT ||
922 opcode == SHADER_OPCODE_INT_REMAINDER ||
923 opcode == SHADER_OPCODE_POW);
924 }
925
926 bool
is_control_flow() const927 backend_instruction::is_control_flow() const
928 {
929 switch (opcode) {
930 case BRW_OPCODE_DO:
931 case BRW_OPCODE_WHILE:
932 case BRW_OPCODE_IF:
933 case BRW_OPCODE_ELSE:
934 case BRW_OPCODE_ENDIF:
935 case BRW_OPCODE_BREAK:
936 case BRW_OPCODE_CONTINUE:
937 return true;
938 default:
939 return false;
940 }
941 }
942
943 bool
uses_indirect_addressing() const944 backend_instruction::uses_indirect_addressing() const
945 {
946 switch (opcode) {
947 case SHADER_OPCODE_BROADCAST:
948 case SHADER_OPCODE_CLUSTER_BROADCAST:
949 case SHADER_OPCODE_MOV_INDIRECT:
950 return true;
951 default:
952 return false;
953 }
954 }
955
956 bool
can_do_source_mods() const957 backend_instruction::can_do_source_mods() const
958 {
959 switch (opcode) {
960 case BRW_OPCODE_ADDC:
961 case BRW_OPCODE_BFE:
962 case BRW_OPCODE_BFI1:
963 case BRW_OPCODE_BFI2:
964 case BRW_OPCODE_BFREV:
965 case BRW_OPCODE_CBIT:
966 case BRW_OPCODE_FBH:
967 case BRW_OPCODE_FBL:
968 case BRW_OPCODE_ROL:
969 case BRW_OPCODE_ROR:
970 case BRW_OPCODE_SUBB:
971 case BRW_OPCODE_DP4A:
972 case SHADER_OPCODE_BROADCAST:
973 case SHADER_OPCODE_CLUSTER_BROADCAST:
974 case SHADER_OPCODE_MOV_INDIRECT:
975 case SHADER_OPCODE_SHUFFLE:
976 case SHADER_OPCODE_INT_QUOTIENT:
977 case SHADER_OPCODE_INT_REMAINDER:
978 return false;
979 default:
980 return true;
981 }
982 }
983
984 bool
can_do_saturate() const985 backend_instruction::can_do_saturate() const
986 {
987 switch (opcode) {
988 case BRW_OPCODE_ADD:
989 case BRW_OPCODE_ADD3:
990 case BRW_OPCODE_ASR:
991 case BRW_OPCODE_AVG:
992 case BRW_OPCODE_CSEL:
993 case BRW_OPCODE_DP2:
994 case BRW_OPCODE_DP3:
995 case BRW_OPCODE_DP4:
996 case BRW_OPCODE_DPH:
997 case BRW_OPCODE_DP4A:
998 case BRW_OPCODE_F16TO32:
999 case BRW_OPCODE_F32TO16:
1000 case BRW_OPCODE_LINE:
1001 case BRW_OPCODE_LRP:
1002 case BRW_OPCODE_MAC:
1003 case BRW_OPCODE_MAD:
1004 case BRW_OPCODE_MATH:
1005 case BRW_OPCODE_MOV:
1006 case BRW_OPCODE_MUL:
1007 case SHADER_OPCODE_MULH:
1008 case BRW_OPCODE_PLN:
1009 case BRW_OPCODE_RNDD:
1010 case BRW_OPCODE_RNDE:
1011 case BRW_OPCODE_RNDU:
1012 case BRW_OPCODE_RNDZ:
1013 case BRW_OPCODE_SEL:
1014 case BRW_OPCODE_SHL:
1015 case BRW_OPCODE_SHR:
1016 case FS_OPCODE_LINTERP:
1017 case SHADER_OPCODE_COS:
1018 case SHADER_OPCODE_EXP2:
1019 case SHADER_OPCODE_LOG2:
1020 case SHADER_OPCODE_POW:
1021 case SHADER_OPCODE_RCP:
1022 case SHADER_OPCODE_RSQ:
1023 case SHADER_OPCODE_SIN:
1024 case SHADER_OPCODE_SQRT:
1025 return true;
1026 default:
1027 return false;
1028 }
1029 }
1030
1031 bool
can_do_cmod() const1032 backend_instruction::can_do_cmod() const
1033 {
1034 switch (opcode) {
1035 case BRW_OPCODE_ADD:
1036 case BRW_OPCODE_ADD3:
1037 case BRW_OPCODE_ADDC:
1038 case BRW_OPCODE_AND:
1039 case BRW_OPCODE_ASR:
1040 case BRW_OPCODE_AVG:
1041 case BRW_OPCODE_CMP:
1042 case BRW_OPCODE_CMPN:
1043 case BRW_OPCODE_DP2:
1044 case BRW_OPCODE_DP3:
1045 case BRW_OPCODE_DP4:
1046 case BRW_OPCODE_DPH:
1047 case BRW_OPCODE_F16TO32:
1048 case BRW_OPCODE_F32TO16:
1049 case BRW_OPCODE_FRC:
1050 case BRW_OPCODE_LINE:
1051 case BRW_OPCODE_LRP:
1052 case BRW_OPCODE_LZD:
1053 case BRW_OPCODE_MAC:
1054 case BRW_OPCODE_MACH:
1055 case BRW_OPCODE_MAD:
1056 case BRW_OPCODE_MOV:
1057 case BRW_OPCODE_MUL:
1058 case BRW_OPCODE_NOT:
1059 case BRW_OPCODE_OR:
1060 case BRW_OPCODE_PLN:
1061 case BRW_OPCODE_RNDD:
1062 case BRW_OPCODE_RNDE:
1063 case BRW_OPCODE_RNDU:
1064 case BRW_OPCODE_RNDZ:
1065 case BRW_OPCODE_SAD2:
1066 case BRW_OPCODE_SADA2:
1067 case BRW_OPCODE_SHL:
1068 case BRW_OPCODE_SHR:
1069 case BRW_OPCODE_SUBB:
1070 case BRW_OPCODE_XOR:
1071 case FS_OPCODE_LINTERP:
1072 return true;
1073 default:
1074 return false;
1075 }
1076 }
1077
1078 bool
reads_accumulator_implicitly() const1079 backend_instruction::reads_accumulator_implicitly() const
1080 {
1081 switch (opcode) {
1082 case BRW_OPCODE_MAC:
1083 case BRW_OPCODE_MACH:
1084 case BRW_OPCODE_SADA2:
1085 return true;
1086 default:
1087 return false;
1088 }
1089 }
1090
1091 bool
writes_accumulator_implicitly(const struct intel_device_info * devinfo) const1092 backend_instruction::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const
1093 {
1094 return writes_accumulator ||
1095 (devinfo->ver < 6 &&
1096 ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
1097 (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) ||
1098 (opcode == FS_OPCODE_LINTERP &&
1099 (!devinfo->has_pln || devinfo->ver <= 6)) ||
1100 (eot && devinfo->ver >= 12); /* See Wa_14010017096. */
1101 }
1102
1103 bool
has_side_effects() const1104 backend_instruction::has_side_effects() const
1105 {
1106 switch (opcode) {
1107 case SHADER_OPCODE_SEND:
1108 return send_has_side_effects;
1109
1110 case BRW_OPCODE_SYNC:
1111 case VEC4_OPCODE_UNTYPED_ATOMIC:
1112 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
1113 case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
1114 case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
1115 case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
1116 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
1117 case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
1118 case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
1119 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
1120 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
1121 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
1122 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
1123 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
1124 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL:
1125 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
1126 case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
1127 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
1128 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
1129 case SHADER_OPCODE_MEMORY_FENCE:
1130 case SHADER_OPCODE_INTERLOCK:
1131 case SHADER_OPCODE_URB_WRITE_LOGICAL:
1132 case FS_OPCODE_FB_WRITE:
1133 case FS_OPCODE_FB_WRITE_LOGICAL:
1134 case FS_OPCODE_REP_FB_WRITE:
1135 case SHADER_OPCODE_BARRIER:
1136 case VEC4_TCS_OPCODE_URB_WRITE:
1137 case TCS_OPCODE_RELEASE_INPUT:
1138 case SHADER_OPCODE_RND_MODE:
1139 case SHADER_OPCODE_FLOAT_CONTROL_MODE:
1140 case FS_OPCODE_SCHEDULING_FENCE:
1141 case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
1142 case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
1143 case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
1144 case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
1145 case RT_OPCODE_TRACE_RAY_LOGICAL:
1146 case VEC4_OPCODE_ZERO_OOB_PUSH_REGS:
1147 return true;
1148 default:
1149 return eot;
1150 }
1151 }
1152
1153 bool
is_volatile() const1154 backend_instruction::is_volatile() const
1155 {
1156 switch (opcode) {
1157 case SHADER_OPCODE_SEND:
1158 return send_is_volatile;
1159
1160 case VEC4_OPCODE_UNTYPED_SURFACE_READ:
1161 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
1162 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
1163 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
1164 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
1165 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
1166 case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
1167 case VEC4_OPCODE_URB_READ:
1168 return true;
1169 default:
1170 return false;
1171 }
1172 }
1173
1174 #ifndef NDEBUG
1175 static bool
inst_is_in_block(const bblock_t * block,const backend_instruction * inst)1176 inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
1177 {
1178 foreach_inst_in_block (backend_instruction, i, block) {
1179 if (inst == i)
1180 return true;
1181 }
1182 return false;
1183 }
1184 #endif
1185
1186 static void
adjust_later_block_ips(bblock_t * start_block,int ip_adjustment)1187 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
1188 {
1189 for (bblock_t *block_iter = start_block->next();
1190 block_iter;
1191 block_iter = block_iter->next()) {
1192 block_iter->start_ip += ip_adjustment;
1193 block_iter->end_ip += ip_adjustment;
1194 }
1195 }
1196
1197 void
insert_after(bblock_t * block,backend_instruction * inst)1198 backend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
1199 {
1200 assert(this != inst);
1201 assert(block->end_ip_delta == 0);
1202
1203 if (!this->is_head_sentinel())
1204 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1205
1206 block->end_ip++;
1207
1208 adjust_later_block_ips(block, 1);
1209
1210 exec_node::insert_after(inst);
1211 }
1212
1213 void
insert_before(bblock_t * block,backend_instruction * inst)1214 backend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
1215 {
1216 assert(this != inst);
1217 assert(block->end_ip_delta == 0);
1218
1219 if (!this->is_tail_sentinel())
1220 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1221
1222 block->end_ip++;
1223
1224 adjust_later_block_ips(block, 1);
1225
1226 exec_node::insert_before(inst);
1227 }
1228
1229 void
insert_before(bblock_t * block,exec_list * list)1230 backend_instruction::insert_before(bblock_t *block, exec_list *list)
1231 {
1232 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1233 assert(block->end_ip_delta == 0);
1234
1235 unsigned num_inst = list->length();
1236
1237 block->end_ip += num_inst;
1238
1239 adjust_later_block_ips(block, num_inst);
1240
1241 exec_node::insert_before(list);
1242 }
1243
1244 void
remove(bblock_t * block,bool defer_later_block_ip_updates)1245 backend_instruction::remove(bblock_t *block, bool defer_later_block_ip_updates)
1246 {
1247 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1248
1249 if (defer_later_block_ip_updates) {
1250 block->end_ip_delta--;
1251 } else {
1252 assert(block->end_ip_delta == 0);
1253 adjust_later_block_ips(block, -1);
1254 }
1255
1256 if (block->start_ip == block->end_ip) {
1257 if (block->end_ip_delta != 0) {
1258 adjust_later_block_ips(block, block->end_ip_delta);
1259 block->end_ip_delta = 0;
1260 }
1261
1262 block->cfg->remove_block(block);
1263 } else {
1264 block->end_ip--;
1265 }
1266
1267 exec_node::remove();
1268 }
1269
1270 void
dump_instructions() const1271 backend_shader::dump_instructions() const
1272 {
1273 dump_instructions(NULL);
1274 }
1275
1276 void
dump_instructions(const char * name) const1277 backend_shader::dump_instructions(const char *name) const
1278 {
1279 FILE *file = stderr;
1280 if (name && geteuid() != 0) {
1281 file = fopen(name, "w");
1282 if (!file)
1283 file = stderr;
1284 }
1285
1286 if (cfg) {
1287 int ip = 0;
1288 foreach_block_and_inst(block, backend_instruction, inst, cfg) {
1289 if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1290 fprintf(file, "%4d: ", ip++);
1291 dump_instruction(inst, file);
1292 }
1293 } else {
1294 int ip = 0;
1295 foreach_in_list(backend_instruction, inst, &instructions) {
1296 if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1297 fprintf(file, "%4d: ", ip++);
1298 dump_instruction(inst, file);
1299 }
1300 }
1301
1302 if (file != stderr) {
1303 fclose(file);
1304 }
1305 }
1306
1307 void
calculate_cfg()1308 backend_shader::calculate_cfg()
1309 {
1310 if (this->cfg)
1311 return;
1312 cfg = new(mem_ctx) cfg_t(this, &this->instructions);
1313 }
1314
1315 void
invalidate_analysis(brw::analysis_dependency_class c)1316 backend_shader::invalidate_analysis(brw::analysis_dependency_class c)
1317 {
1318 idom_analysis.invalidate(c);
1319 }
1320
1321 extern "C" const unsigned *
brw_compile_tes(const struct brw_compiler * compiler,void * mem_ctx,brw_compile_tes_params * params)1322 brw_compile_tes(const struct brw_compiler *compiler,
1323 void *mem_ctx,
1324 brw_compile_tes_params *params)
1325 {
1326 const struct intel_device_info *devinfo = compiler->devinfo;
1327 nir_shader *nir = params->nir;
1328 const struct brw_tes_prog_key *key = params->key;
1329 const struct brw_vue_map *input_vue_map = params->input_vue_map;
1330 struct brw_tes_prog_data *prog_data = params->prog_data;
1331
1332 const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
1333 const bool debug_enabled = INTEL_DEBUG(DEBUG_TES);
1334 const unsigned *assembly;
1335
1336 prog_data->base.base.stage = MESA_SHADER_TESS_EVAL;
1337 prog_data->base.base.ray_queries = nir->info.ray_queries;
1338
1339 nir->info.inputs_read = key->inputs_read;
1340 nir->info.patch_inputs_read = key->patch_inputs_read;
1341
1342 brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
1343 brw_nir_lower_tes_inputs(nir, input_vue_map);
1344 brw_nir_lower_vue_outputs(nir);
1345 brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled,
1346 key->base.robust_buffer_access);
1347
1348 brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
1349 nir->info.outputs_written,
1350 nir->info.separate_shader, 1);
1351
1352 unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
1353
1354 assert(output_size_bytes >= 1);
1355 if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
1356 params->error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size");
1357 return NULL;
1358 }
1359
1360 prog_data->base.clip_distance_mask =
1361 ((1 << nir->info.clip_distance_array_size) - 1);
1362 prog_data->base.cull_distance_mask =
1363 ((1 << nir->info.cull_distance_array_size) - 1) <<
1364 nir->info.clip_distance_array_size;
1365
1366 prog_data->include_primitive_id =
1367 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
1368
1369 /* URB entry sizes are stored as a multiple of 64 bytes. */
1370 prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
1371
1372 prog_data->base.urb_read_length = 0;
1373
1374 STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
1375 STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL ==
1376 TESS_SPACING_FRACTIONAL_ODD - 1);
1377 STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL ==
1378 TESS_SPACING_FRACTIONAL_EVEN - 1);
1379
1380 prog_data->partitioning =
1381 (enum brw_tess_partitioning) (nir->info.tess.spacing - 1);
1382
1383 switch (nir->info.tess._primitive_mode) {
1384 case TESS_PRIMITIVE_QUADS:
1385 prog_data->domain = BRW_TESS_DOMAIN_QUAD;
1386 break;
1387 case TESS_PRIMITIVE_TRIANGLES:
1388 prog_data->domain = BRW_TESS_DOMAIN_TRI;
1389 break;
1390 case TESS_PRIMITIVE_ISOLINES:
1391 prog_data->domain = BRW_TESS_DOMAIN_ISOLINE;
1392 break;
1393 default:
1394 unreachable("invalid domain shader primitive mode");
1395 }
1396
1397 if (nir->info.tess.point_mode) {
1398 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT;
1399 } else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
1400 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE;
1401 } else {
1402 /* Hardware winding order is backwards from OpenGL */
1403 prog_data->output_topology =
1404 nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW
1405 : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
1406 }
1407
1408 if (unlikely(debug_enabled)) {
1409 fprintf(stderr, "TES Input ");
1410 brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL);
1411 fprintf(stderr, "TES Output ");
1412 brw_print_vue_map(stderr, &prog_data->base.vue_map,
1413 MESA_SHADER_TESS_EVAL);
1414 }
1415
1416 if (is_scalar) {
1417 fs_visitor v(compiler, params->log_data, mem_ctx, &key->base,
1418 &prog_data->base.base, nir, 8,
1419 debug_enabled);
1420 if (!v.run_tes()) {
1421 params->error_str = ralloc_strdup(mem_ctx, v.fail_msg);
1422 return NULL;
1423 }
1424
1425 prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
1426 prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
1427
1428 fs_generator g(compiler, params->log_data, mem_ctx,
1429 &prog_data->base.base, false, MESA_SHADER_TESS_EVAL);
1430 if (unlikely(debug_enabled)) {
1431 g.enable_debug(ralloc_asprintf(mem_ctx,
1432 "%s tessellation evaluation shader %s",
1433 nir->info.label ? nir->info.label
1434 : "unnamed",
1435 nir->info.name));
1436 }
1437
1438 g.generate_code(v.cfg, 8, v.shader_stats,
1439 v.performance_analysis.require(), params->stats);
1440
1441 g.add_const_data(nir->constant_data, nir->constant_data_size);
1442
1443 assembly = g.get_assembly();
1444 } else {
1445 brw::vec4_tes_visitor v(compiler, params->log_data, key, prog_data,
1446 nir, mem_ctx, debug_enabled);
1447 if (!v.run()) {
1448 params->error_str = ralloc_strdup(mem_ctx, v.fail_msg);
1449 return NULL;
1450 }
1451
1452 if (unlikely(debug_enabled))
1453 v.dump_instructions();
1454
1455 assembly = brw_vec4_generate_assembly(compiler, params->log_data, mem_ctx, nir,
1456 &prog_data->base, v.cfg,
1457 v.performance_analysis.require(),
1458 params->stats, debug_enabled);
1459 }
1460
1461 return assembly;
1462 }
1463