1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_cfg.h"
25 #include "brw_eu.h"
26 #include "brw_fs.h"
27 #include "brw_nir.h"
28 #include "brw_private.h"
29 #include "dev/intel_debug.h"
30 #include "util/macros.h"
31 #include "util/u_debug.h"
32
33 enum brw_reg_type
brw_type_for_base_type(const struct glsl_type * type)34 brw_type_for_base_type(const struct glsl_type *type)
35 {
36 switch (type->base_type) {
37 case GLSL_TYPE_FLOAT16:
38 return BRW_REGISTER_TYPE_HF;
39 case GLSL_TYPE_FLOAT:
40 return BRW_REGISTER_TYPE_F;
41 case GLSL_TYPE_INT:
42 case GLSL_TYPE_BOOL:
43 case GLSL_TYPE_SUBROUTINE:
44 return BRW_REGISTER_TYPE_D;
45 case GLSL_TYPE_INT16:
46 return BRW_REGISTER_TYPE_W;
47 case GLSL_TYPE_INT8:
48 return BRW_REGISTER_TYPE_B;
49 case GLSL_TYPE_UINT:
50 return BRW_REGISTER_TYPE_UD;
51 case GLSL_TYPE_UINT16:
52 return BRW_REGISTER_TYPE_UW;
53 case GLSL_TYPE_UINT8:
54 return BRW_REGISTER_TYPE_UB;
55 case GLSL_TYPE_ARRAY:
56 return brw_type_for_base_type(type->fields.array);
57 case GLSL_TYPE_STRUCT:
58 case GLSL_TYPE_INTERFACE:
59 case GLSL_TYPE_SAMPLER:
60 case GLSL_TYPE_TEXTURE:
61 case GLSL_TYPE_ATOMIC_UINT:
62 /* These should be overridden with the type of the member when
63 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
64 * way to trip up if we don't.
65 */
66 return BRW_REGISTER_TYPE_UD;
67 case GLSL_TYPE_IMAGE:
68 return BRW_REGISTER_TYPE_UD;
69 case GLSL_TYPE_DOUBLE:
70 return BRW_REGISTER_TYPE_DF;
71 case GLSL_TYPE_UINT64:
72 return BRW_REGISTER_TYPE_UQ;
73 case GLSL_TYPE_INT64:
74 return BRW_REGISTER_TYPE_Q;
75 case GLSL_TYPE_VOID:
76 case GLSL_TYPE_ERROR:
77 case GLSL_TYPE_COOPERATIVE_MATRIX:
78 unreachable("not reached");
79 }
80
81 return BRW_REGISTER_TYPE_F;
82 }
83
84 uint32_t
brw_math_function(enum opcode op)85 brw_math_function(enum opcode op)
86 {
87 switch (op) {
88 case SHADER_OPCODE_RCP:
89 return BRW_MATH_FUNCTION_INV;
90 case SHADER_OPCODE_RSQ:
91 return BRW_MATH_FUNCTION_RSQ;
92 case SHADER_OPCODE_SQRT:
93 return BRW_MATH_FUNCTION_SQRT;
94 case SHADER_OPCODE_EXP2:
95 return BRW_MATH_FUNCTION_EXP;
96 case SHADER_OPCODE_LOG2:
97 return BRW_MATH_FUNCTION_LOG;
98 case SHADER_OPCODE_POW:
99 return BRW_MATH_FUNCTION_POW;
100 case SHADER_OPCODE_SIN:
101 return BRW_MATH_FUNCTION_SIN;
102 case SHADER_OPCODE_COS:
103 return BRW_MATH_FUNCTION_COS;
104 case SHADER_OPCODE_INT_QUOTIENT:
105 return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
106 case SHADER_OPCODE_INT_REMAINDER:
107 return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
108 default:
109 unreachable("not reached: unknown math function");
110 }
111 }
112
113 bool
brw_texture_offset(const nir_tex_instr * tex,unsigned src,uint32_t * offset_bits_out)114 brw_texture_offset(const nir_tex_instr *tex, unsigned src,
115 uint32_t *offset_bits_out)
116 {
117 if (!nir_src_is_const(tex->src[src].src))
118 return false;
119
120 const unsigned num_components = nir_tex_instr_src_size(tex, src);
121
122 /* Combine all three offsets into a single unsigned dword:
123 *
124 * bits 11:8 - U Offset (X component)
125 * bits 7:4 - V Offset (Y component)
126 * bits 3:0 - R Offset (Z component)
127 */
128 uint32_t offset_bits = 0;
129 for (unsigned i = 0; i < num_components; i++) {
130 int offset = nir_src_comp_as_int(tex->src[src].src, i);
131
132 /* offset out of bounds; caller will handle it. */
133 if (offset > 7 || offset < -8)
134 return false;
135
136 const unsigned shift = 4 * (2 - i);
137 offset_bits |= (offset << shift) & (0xF << shift);
138 }
139
140 *offset_bits_out = offset_bits;
141
142 return true;
143 }
144
145 const char *
brw_instruction_name(const struct brw_isa_info * isa,enum opcode op)146 brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
147 {
148 const struct intel_device_info *devinfo = isa->devinfo;
149
150 switch (op) {
151 case 0 ... NUM_BRW_OPCODES - 1:
152 /* The DO instruction doesn't exist on Gfx9+, but we use it to mark the
153 * start of a loop in the IR.
154 */
155 if (op == BRW_OPCODE_DO)
156 return "do";
157
158 /* DPAS instructions may transiently exist on platforms that do not
159 * support DPAS. They will eventually be lowered, but in the meantime it
160 * must be possible to query the instruction name.
161 */
162 if (devinfo->verx10 < 125 && op == BRW_OPCODE_DPAS)
163 return "dpas";
164
165 assert(brw_opcode_desc(isa, op)->name);
166 return brw_opcode_desc(isa, op)->name;
167 case FS_OPCODE_FB_WRITE_LOGICAL:
168 return "fb_write_logical";
169 case FS_OPCODE_FB_READ:
170 return "fb_read";
171 case FS_OPCODE_FB_READ_LOGICAL:
172 return "fb_read_logical";
173
174 case SHADER_OPCODE_RCP:
175 return "rcp";
176 case SHADER_OPCODE_RSQ:
177 return "rsq";
178 case SHADER_OPCODE_SQRT:
179 return "sqrt";
180 case SHADER_OPCODE_EXP2:
181 return "exp2";
182 case SHADER_OPCODE_LOG2:
183 return "log2";
184 case SHADER_OPCODE_POW:
185 return "pow";
186 case SHADER_OPCODE_INT_QUOTIENT:
187 return "int_quot";
188 case SHADER_OPCODE_INT_REMAINDER:
189 return "int_rem";
190 case SHADER_OPCODE_SIN:
191 return "sin";
192 case SHADER_OPCODE_COS:
193 return "cos";
194
195 case SHADER_OPCODE_SEND:
196 return "send";
197
198 case SHADER_OPCODE_UNDEF:
199 return "undef";
200
201 case SHADER_OPCODE_TEX:
202 return "tex";
203 case SHADER_OPCODE_TEX_LOGICAL:
204 return "tex_logical";
205 case SHADER_OPCODE_TXD:
206 return "txd";
207 case SHADER_OPCODE_TXD_LOGICAL:
208 return "txd_logical";
209 case SHADER_OPCODE_TXF:
210 return "txf";
211 case SHADER_OPCODE_TXF_LOGICAL:
212 return "txf_logical";
213 case SHADER_OPCODE_TXF_LZ:
214 return "txf_lz";
215 case SHADER_OPCODE_TXL:
216 return "txl";
217 case SHADER_OPCODE_TXL_LOGICAL:
218 return "txl_logical";
219 case SHADER_OPCODE_TXL_LZ:
220 return "txl_lz";
221 case SHADER_OPCODE_TXS:
222 return "txs";
223 case SHADER_OPCODE_TXS_LOGICAL:
224 return "txs_logical";
225 case FS_OPCODE_TXB:
226 return "txb";
227 case FS_OPCODE_TXB_LOGICAL:
228 return "txb_logical";
229 case SHADER_OPCODE_TXF_CMS:
230 return "txf_cms";
231 case SHADER_OPCODE_TXF_CMS_LOGICAL:
232 return "txf_cms_logical";
233 case SHADER_OPCODE_TXF_CMS_W:
234 return "txf_cms_w";
235 case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
236 return "txf_cms_w_logical";
237 case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
238 return "txf_cms_w_gfx12_logical";
239 case SHADER_OPCODE_TXF_UMS:
240 return "txf_ums";
241 case SHADER_OPCODE_TXF_UMS_LOGICAL:
242 return "txf_ums_logical";
243 case SHADER_OPCODE_TXF_MCS:
244 return "txf_mcs";
245 case SHADER_OPCODE_TXF_MCS_LOGICAL:
246 return "txf_mcs_logical";
247 case SHADER_OPCODE_LOD:
248 return "lod";
249 case SHADER_OPCODE_LOD_LOGICAL:
250 return "lod_logical";
251 case SHADER_OPCODE_TG4:
252 return "tg4";
253 case SHADER_OPCODE_TG4_LOGICAL:
254 return "tg4_logical";
255 case SHADER_OPCODE_TG4_OFFSET:
256 return "tg4_offset";
257 case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
258 return "tg4_offset_logical";
259 case SHADER_OPCODE_TG4_OFFSET_LOD:
260 return "tg4_offset_lod";
261 case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
262 return "tg4_offset_lod_logical";
263 case SHADER_OPCODE_TG4_OFFSET_BIAS:
264 return "tg4_offset_bias";
265 case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
266 return "tg4_offset_bias_logical";
267 case SHADER_OPCODE_TG4_BIAS:
268 return "tg4_b";
269 case SHADER_OPCODE_TG4_BIAS_LOGICAL:
270 return "tg4_b_logical";
271 case SHADER_OPCODE_TG4_EXPLICIT_LOD:
272 return "tg4_l";
273 case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
274 return "tg4_l_logical";
275 case SHADER_OPCODE_TG4_IMPLICIT_LOD:
276 return "tg4_i";
277 case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
278 return "tg4_i_logical";
279 case SHADER_OPCODE_SAMPLEINFO:
280 return "sampleinfo";
281 case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
282 return "sampleinfo_logical";
283
284 case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
285 return "image_size_logical";
286
287 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
288 return "untyped_atomic_logical";
289 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
290 return "untyped_surface_read_logical";
291 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
292 return "untyped_surface_write_logical";
293 case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
294 return "unaligned_oword_block_read_logical";
295 case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
296 return "oword_block_write_logical";
297 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
298 return "a64_untyped_read_logical";
299 case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
300 return "a64_oword_block_read_logical";
301 case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
302 return "a64_unaligned_oword_block_read_logical";
303 case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
304 return "a64_oword_block_write_logical";
305 case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
306 return "a64_untyped_write_logical";
307 case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
308 return "a64_byte_scattered_read_logical";
309 case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
310 return "a64_byte_scattered_write_logical";
311 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
312 return "a64_untyped_atomic_logical";
313 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
314 return "typed_atomic_logical";
315 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
316 return "typed_surface_read_logical";
317 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
318 return "typed_surface_write_logical";
319 case SHADER_OPCODE_MEMORY_FENCE:
320 return "memory_fence";
321 case FS_OPCODE_SCHEDULING_FENCE:
322 return "scheduling_fence";
323 case SHADER_OPCODE_INTERLOCK:
324 /* For an interlock we actually issue a memory fence via sendc. */
325 return "interlock";
326
327 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
328 return "byte_scattered_read_logical";
329 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
330 return "byte_scattered_write_logical";
331 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
332 return "dword_scattered_read_logical";
333 case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
334 return "dword_scattered_write_logical";
335
336 case SHADER_OPCODE_LOAD_PAYLOAD:
337 return "load_payload";
338 case FS_OPCODE_PACK:
339 return "pack";
340
341 case SHADER_OPCODE_SCRATCH_HEADER:
342 return "scratch_header";
343
344 case SHADER_OPCODE_URB_WRITE_LOGICAL:
345 return "urb_write_logical";
346 case SHADER_OPCODE_URB_READ_LOGICAL:
347 return "urb_read_logical";
348
349 case SHADER_OPCODE_FIND_LIVE_CHANNEL:
350 return "find_live_channel";
351 case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
352 return "find_last_live_channel";
353 case SHADER_OPCODE_LOAD_LIVE_CHANNELS:
354 return "load_live_channels";
355 case FS_OPCODE_LOAD_LIVE_CHANNELS:
356 return "fs_load_live_channels";
357
358 case SHADER_OPCODE_BROADCAST:
359 return "broadcast";
360 case SHADER_OPCODE_SHUFFLE:
361 return "shuffle";
362 case SHADER_OPCODE_SEL_EXEC:
363 return "sel_exec";
364 case SHADER_OPCODE_QUAD_SWIZZLE:
365 return "quad_swizzle";
366 case SHADER_OPCODE_CLUSTER_BROADCAST:
367 return "cluster_broadcast";
368
369 case SHADER_OPCODE_GET_BUFFER_SIZE:
370 return "get_buffer_size";
371
372 case FS_OPCODE_DDX_COARSE:
373 return "ddx_coarse";
374 case FS_OPCODE_DDX_FINE:
375 return "ddx_fine";
376 case FS_OPCODE_DDY_COARSE:
377 return "ddy_coarse";
378 case FS_OPCODE_DDY_FINE:
379 return "ddy_fine";
380
381 case FS_OPCODE_LINTERP:
382 return "linterp";
383
384 case FS_OPCODE_PIXEL_X:
385 return "pixel_x";
386 case FS_OPCODE_PIXEL_Y:
387 return "pixel_y";
388
389 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
390 return "uniform_pull_const";
391 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
392 return "varying_pull_const_logical";
393
394 case FS_OPCODE_PACK_HALF_2x16_SPLIT:
395 return "pack_half_2x16_split";
396
397 case SHADER_OPCODE_HALT_TARGET:
398 return "halt_target";
399
400 case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
401 return "interp_sample";
402 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
403 return "interp_shared_offset";
404 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
405 return "interp_per_slot_offset";
406
407 case CS_OPCODE_CS_TERMINATE:
408 return "cs_terminate";
409 case SHADER_OPCODE_BARRIER:
410 return "barrier";
411 case SHADER_OPCODE_MULH:
412 return "mulh";
413 case SHADER_OPCODE_ISUB_SAT:
414 return "isub_sat";
415 case SHADER_OPCODE_USUB_SAT:
416 return "usub_sat";
417 case SHADER_OPCODE_MOV_INDIRECT:
418 return "mov_indirect";
419 case SHADER_OPCODE_MOV_RELOC_IMM:
420 return "mov_reloc_imm";
421
422 case RT_OPCODE_TRACE_RAY_LOGICAL:
423 return "rt_trace_ray_logical";
424
425 case SHADER_OPCODE_RND_MODE:
426 return "rnd_mode";
427 case SHADER_OPCODE_FLOAT_CONTROL_MODE:
428 return "float_control_mode";
429 case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
430 return "btd_spawn_logical";
431 case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
432 return "btd_retire_logical";
433 case SHADER_OPCODE_READ_SR_REG:
434 return "read_sr_reg";
435 }
436
437 unreachable("not reached");
438 }
439
440 bool
brw_saturate_immediate(enum brw_reg_type type,struct brw_reg * reg)441 brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
442 {
443 union {
444 unsigned ud;
445 int d;
446 float f;
447 double df;
448 } imm, sat_imm = { 0 };
449
450 const unsigned size = type_sz(type);
451
452 /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
453 * irrelevant, so just check the size of the type and copy from/to an
454 * appropriately sized field.
455 */
456 if (size < 8)
457 imm.ud = reg->ud;
458 else
459 imm.df = reg->df;
460
461 switch (type) {
462 case BRW_REGISTER_TYPE_UD:
463 case BRW_REGISTER_TYPE_D:
464 case BRW_REGISTER_TYPE_UW:
465 case BRW_REGISTER_TYPE_W:
466 case BRW_REGISTER_TYPE_UQ:
467 case BRW_REGISTER_TYPE_Q:
468 /* Nothing to do. */
469 return false;
470 case BRW_REGISTER_TYPE_F:
471 sat_imm.f = SATURATE(imm.f);
472 break;
473 case BRW_REGISTER_TYPE_DF:
474 sat_imm.df = SATURATE(imm.df);
475 break;
476 case BRW_REGISTER_TYPE_UB:
477 case BRW_REGISTER_TYPE_B:
478 unreachable("no UB/B immediates");
479 case BRW_REGISTER_TYPE_V:
480 case BRW_REGISTER_TYPE_UV:
481 case BRW_REGISTER_TYPE_VF:
482 unreachable("unimplemented: saturate vector immediate");
483 case BRW_REGISTER_TYPE_HF:
484 unreachable("unimplemented: saturate HF immediate");
485 case BRW_REGISTER_TYPE_NF:
486 unreachable("no NF immediates");
487 }
488
489 if (size < 8) {
490 if (imm.ud != sat_imm.ud) {
491 reg->ud = sat_imm.ud;
492 return true;
493 }
494 } else {
495 if (imm.df != sat_imm.df) {
496 reg->df = sat_imm.df;
497 return true;
498 }
499 }
500 return false;
501 }
502
503 bool
brw_negate_immediate(enum brw_reg_type type,struct brw_reg * reg)504 brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
505 {
506 switch (type) {
507 case BRW_REGISTER_TYPE_D:
508 case BRW_REGISTER_TYPE_UD:
509 reg->d = -reg->d;
510 return true;
511 case BRW_REGISTER_TYPE_W:
512 case BRW_REGISTER_TYPE_UW: {
513 uint16_t value = -(int16_t)reg->ud;
514 reg->ud = value | (uint32_t)value << 16;
515 return true;
516 }
517 case BRW_REGISTER_TYPE_F:
518 reg->f = -reg->f;
519 return true;
520 case BRW_REGISTER_TYPE_VF:
521 reg->ud ^= 0x80808080;
522 return true;
523 case BRW_REGISTER_TYPE_DF:
524 reg->df = -reg->df;
525 return true;
526 case BRW_REGISTER_TYPE_UQ:
527 case BRW_REGISTER_TYPE_Q:
528 reg->d64 = -reg->d64;
529 return true;
530 case BRW_REGISTER_TYPE_UB:
531 case BRW_REGISTER_TYPE_B:
532 unreachable("no UB/B immediates");
533 case BRW_REGISTER_TYPE_UV:
534 case BRW_REGISTER_TYPE_V:
535 assert(!"unimplemented: negate UV/V immediate");
536 case BRW_REGISTER_TYPE_HF:
537 reg->ud ^= 0x80008000;
538 return true;
539 case BRW_REGISTER_TYPE_NF:
540 unreachable("no NF immediates");
541 }
542
543 return false;
544 }
545
546 bool
brw_abs_immediate(enum brw_reg_type type,struct brw_reg * reg)547 brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
548 {
549 switch (type) {
550 case BRW_REGISTER_TYPE_D:
551 reg->d = abs(reg->d);
552 return true;
553 case BRW_REGISTER_TYPE_W: {
554 uint16_t value = abs((int16_t)reg->ud);
555 reg->ud = value | (uint32_t)value << 16;
556 return true;
557 }
558 case BRW_REGISTER_TYPE_F:
559 reg->f = fabsf(reg->f);
560 return true;
561 case BRW_REGISTER_TYPE_DF:
562 reg->df = fabs(reg->df);
563 return true;
564 case BRW_REGISTER_TYPE_VF:
565 reg->ud &= ~0x80808080;
566 return true;
567 case BRW_REGISTER_TYPE_Q:
568 reg->d64 = imaxabs(reg->d64);
569 return true;
570 case BRW_REGISTER_TYPE_UB:
571 case BRW_REGISTER_TYPE_B:
572 unreachable("no UB/B immediates");
573 case BRW_REGISTER_TYPE_UQ:
574 case BRW_REGISTER_TYPE_UD:
575 case BRW_REGISTER_TYPE_UW:
576 case BRW_REGISTER_TYPE_UV:
577 /* Presumably the absolute value modifier on an unsigned source is a
578 * nop, but it would be nice to confirm.
579 */
580 assert(!"unimplemented: abs unsigned immediate");
581 case BRW_REGISTER_TYPE_V:
582 assert(!"unimplemented: abs V immediate");
583 case BRW_REGISTER_TYPE_HF:
584 reg->ud &= ~0x80008000;
585 return true;
586 case BRW_REGISTER_TYPE_NF:
587 unreachable("no NF immediates");
588 }
589
590 return false;
591 }
592
backend_shader(const struct brw_compiler * compiler,const struct brw_compile_params * params,const nir_shader * shader,struct brw_stage_prog_data * stage_prog_data,bool debug_enabled)593 backend_shader::backend_shader(const struct brw_compiler *compiler,
594 const struct brw_compile_params *params,
595 const nir_shader *shader,
596 struct brw_stage_prog_data *stage_prog_data,
597 bool debug_enabled)
598 : compiler(compiler),
599 log_data(params->log_data),
600 devinfo(compiler->devinfo),
601 nir(shader),
602 stage_prog_data(stage_prog_data),
603 mem_ctx(params->mem_ctx),
604 cfg(NULL), idom_analysis(this),
605 stage(shader->info.stage),
606 debug_enabled(debug_enabled)
607 {
608 }
609
~backend_shader()610 backend_shader::~backend_shader()
611 {
612 }
613
614 bool
equals(const backend_reg & r) const615 backend_reg::equals(const backend_reg &r) const
616 {
617 return brw_regs_equal(this, &r) && offset == r.offset;
618 }
619
620 bool
negative_equals(const backend_reg & r) const621 backend_reg::negative_equals(const backend_reg &r) const
622 {
623 return brw_regs_negative_equal(this, &r) && offset == r.offset;
624 }
625
626 bool
is_zero() const627 backend_reg::is_zero() const
628 {
629 if (file != IMM)
630 return false;
631
632 assert(type_sz(type) > 1);
633
634 switch (type) {
635 case BRW_REGISTER_TYPE_HF:
636 assert((d & 0xffff) == ((d >> 16) & 0xffff));
637 return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
638 case BRW_REGISTER_TYPE_F:
639 return f == 0;
640 case BRW_REGISTER_TYPE_DF:
641 return df == 0;
642 case BRW_REGISTER_TYPE_W:
643 case BRW_REGISTER_TYPE_UW:
644 assert((d & 0xffff) == ((d >> 16) & 0xffff));
645 return (d & 0xffff) == 0;
646 case BRW_REGISTER_TYPE_D:
647 case BRW_REGISTER_TYPE_UD:
648 return d == 0;
649 case BRW_REGISTER_TYPE_UQ:
650 case BRW_REGISTER_TYPE_Q:
651 return u64 == 0;
652 default:
653 return false;
654 }
655 }
656
657 bool
is_one() const658 backend_reg::is_one() const
659 {
660 if (file != IMM)
661 return false;
662
663 assert(type_sz(type) > 1);
664
665 switch (type) {
666 case BRW_REGISTER_TYPE_HF:
667 assert((d & 0xffff) == ((d >> 16) & 0xffff));
668 return (d & 0xffff) == 0x3c00;
669 case BRW_REGISTER_TYPE_F:
670 return f == 1.0f;
671 case BRW_REGISTER_TYPE_DF:
672 return df == 1.0;
673 case BRW_REGISTER_TYPE_W:
674 case BRW_REGISTER_TYPE_UW:
675 assert((d & 0xffff) == ((d >> 16) & 0xffff));
676 return (d & 0xffff) == 1;
677 case BRW_REGISTER_TYPE_D:
678 case BRW_REGISTER_TYPE_UD:
679 return d == 1;
680 case BRW_REGISTER_TYPE_UQ:
681 case BRW_REGISTER_TYPE_Q:
682 return u64 == 1;
683 default:
684 return false;
685 }
686 }
687
688 bool
is_negative_one() const689 backend_reg::is_negative_one() const
690 {
691 if (file != IMM)
692 return false;
693
694 assert(type_sz(type) > 1);
695
696 switch (type) {
697 case BRW_REGISTER_TYPE_HF:
698 assert((d & 0xffff) == ((d >> 16) & 0xffff));
699 return (d & 0xffff) == 0xbc00;
700 case BRW_REGISTER_TYPE_F:
701 return f == -1.0;
702 case BRW_REGISTER_TYPE_DF:
703 return df == -1.0;
704 case BRW_REGISTER_TYPE_W:
705 assert((d & 0xffff) == ((d >> 16) & 0xffff));
706 return (d & 0xffff) == 0xffff;
707 case BRW_REGISTER_TYPE_D:
708 return d == -1;
709 case BRW_REGISTER_TYPE_Q:
710 return d64 == -1;
711 default:
712 return false;
713 }
714 }
715
716 bool
is_null() const717 backend_reg::is_null() const
718 {
719 return file == ARF && nr == BRW_ARF_NULL;
720 }
721
722
723 bool
is_accumulator() const724 backend_reg::is_accumulator() const
725 {
726 return file == ARF && nr == BRW_ARF_ACCUMULATOR;
727 }
728
729 bool
is_commutative() const730 backend_instruction::is_commutative() const
731 {
732 switch (opcode) {
733 case BRW_OPCODE_AND:
734 case BRW_OPCODE_OR:
735 case BRW_OPCODE_XOR:
736 case BRW_OPCODE_ADD:
737 case BRW_OPCODE_ADD3:
738 case BRW_OPCODE_MUL:
739 case SHADER_OPCODE_MULH:
740 return true;
741 case BRW_OPCODE_SEL:
742 /* MIN and MAX are commutative. */
743 if (conditional_mod == BRW_CONDITIONAL_GE ||
744 conditional_mod == BRW_CONDITIONAL_L) {
745 return true;
746 }
747 FALLTHROUGH;
748 default:
749 return false;
750 }
751 }
752
753 bool
is_3src(const struct brw_compiler * compiler) const754 backend_instruction::is_3src(const struct brw_compiler *compiler) const
755 {
756 return ::is_3src(&compiler->isa, opcode);
757 }
758
759 bool
is_math() const760 backend_instruction::is_math() const
761 {
762 return (opcode == SHADER_OPCODE_RCP ||
763 opcode == SHADER_OPCODE_RSQ ||
764 opcode == SHADER_OPCODE_SQRT ||
765 opcode == SHADER_OPCODE_EXP2 ||
766 opcode == SHADER_OPCODE_LOG2 ||
767 opcode == SHADER_OPCODE_SIN ||
768 opcode == SHADER_OPCODE_COS ||
769 opcode == SHADER_OPCODE_INT_QUOTIENT ||
770 opcode == SHADER_OPCODE_INT_REMAINDER ||
771 opcode == SHADER_OPCODE_POW);
772 }
773
774 bool
is_control_flow_begin() const775 backend_instruction::is_control_flow_begin() const
776 {
777 switch (opcode) {
778 case BRW_OPCODE_DO:
779 case BRW_OPCODE_IF:
780 case BRW_OPCODE_ELSE:
781 return true;
782 default:
783 return false;
784 }
785 }
786
787 bool
is_control_flow_end() const788 backend_instruction::is_control_flow_end() const
789 {
790 switch (opcode) {
791 case BRW_OPCODE_ELSE:
792 case BRW_OPCODE_WHILE:
793 case BRW_OPCODE_ENDIF:
794 return true;
795 default:
796 return false;
797 }
798 }
799
800 bool
is_control_flow() const801 backend_instruction::is_control_flow() const
802 {
803 switch (opcode) {
804 case BRW_OPCODE_DO:
805 case BRW_OPCODE_WHILE:
806 case BRW_OPCODE_IF:
807 case BRW_OPCODE_ELSE:
808 case BRW_OPCODE_ENDIF:
809 case BRW_OPCODE_BREAK:
810 case BRW_OPCODE_CONTINUE:
811 return true;
812 default:
813 return false;
814 }
815 }
816
817 bool
uses_indirect_addressing() const818 backend_instruction::uses_indirect_addressing() const
819 {
820 switch (opcode) {
821 case SHADER_OPCODE_BROADCAST:
822 case SHADER_OPCODE_CLUSTER_BROADCAST:
823 case SHADER_OPCODE_MOV_INDIRECT:
824 return true;
825 default:
826 return false;
827 }
828 }
829
830 bool
can_do_source_mods() const831 backend_instruction::can_do_source_mods() const
832 {
833 switch (opcode) {
834 case BRW_OPCODE_ADDC:
835 case BRW_OPCODE_BFE:
836 case BRW_OPCODE_BFI1:
837 case BRW_OPCODE_BFI2:
838 case BRW_OPCODE_BFREV:
839 case BRW_OPCODE_CBIT:
840 case BRW_OPCODE_FBH:
841 case BRW_OPCODE_FBL:
842 case BRW_OPCODE_ROL:
843 case BRW_OPCODE_ROR:
844 case BRW_OPCODE_SUBB:
845 case BRW_OPCODE_DP4A:
846 case BRW_OPCODE_DPAS:
847 case SHADER_OPCODE_BROADCAST:
848 case SHADER_OPCODE_CLUSTER_BROADCAST:
849 case SHADER_OPCODE_MOV_INDIRECT:
850 case SHADER_OPCODE_SHUFFLE:
851 case SHADER_OPCODE_INT_QUOTIENT:
852 case SHADER_OPCODE_INT_REMAINDER:
853 return false;
854 default:
855 return true;
856 }
857 }
858
859 bool
can_do_saturate() const860 backend_instruction::can_do_saturate() const
861 {
862 switch (opcode) {
863 case BRW_OPCODE_ADD:
864 case BRW_OPCODE_ADD3:
865 case BRW_OPCODE_ASR:
866 case BRW_OPCODE_AVG:
867 case BRW_OPCODE_CSEL:
868 case BRW_OPCODE_DP2:
869 case BRW_OPCODE_DP3:
870 case BRW_OPCODE_DP4:
871 case BRW_OPCODE_DPH:
872 case BRW_OPCODE_DP4A:
873 case BRW_OPCODE_LINE:
874 case BRW_OPCODE_LRP:
875 case BRW_OPCODE_MAC:
876 case BRW_OPCODE_MAD:
877 case BRW_OPCODE_MATH:
878 case BRW_OPCODE_MOV:
879 case BRW_OPCODE_MUL:
880 case SHADER_OPCODE_MULH:
881 case BRW_OPCODE_PLN:
882 case BRW_OPCODE_RNDD:
883 case BRW_OPCODE_RNDE:
884 case BRW_OPCODE_RNDU:
885 case BRW_OPCODE_RNDZ:
886 case BRW_OPCODE_SEL:
887 case BRW_OPCODE_SHL:
888 case BRW_OPCODE_SHR:
889 case FS_OPCODE_LINTERP:
890 case SHADER_OPCODE_COS:
891 case SHADER_OPCODE_EXP2:
892 case SHADER_OPCODE_LOG2:
893 case SHADER_OPCODE_POW:
894 case SHADER_OPCODE_RCP:
895 case SHADER_OPCODE_RSQ:
896 case SHADER_OPCODE_SIN:
897 case SHADER_OPCODE_SQRT:
898 return true;
899 default:
900 return false;
901 }
902 }
903
904 bool
can_do_cmod() const905 backend_instruction::can_do_cmod() const
906 {
907 switch (opcode) {
908 case BRW_OPCODE_ADD:
909 case BRW_OPCODE_ADD3:
910 case BRW_OPCODE_ADDC:
911 case BRW_OPCODE_AND:
912 case BRW_OPCODE_ASR:
913 case BRW_OPCODE_AVG:
914 case BRW_OPCODE_CMP:
915 case BRW_OPCODE_CMPN:
916 case BRW_OPCODE_DP2:
917 case BRW_OPCODE_DP3:
918 case BRW_OPCODE_DP4:
919 case BRW_OPCODE_DPH:
920 case BRW_OPCODE_FRC:
921 case BRW_OPCODE_LINE:
922 case BRW_OPCODE_LRP:
923 case BRW_OPCODE_LZD:
924 case BRW_OPCODE_MAC:
925 case BRW_OPCODE_MACH:
926 case BRW_OPCODE_MAD:
927 case BRW_OPCODE_MOV:
928 case BRW_OPCODE_MUL:
929 case BRW_OPCODE_NOT:
930 case BRW_OPCODE_OR:
931 case BRW_OPCODE_PLN:
932 case BRW_OPCODE_RNDD:
933 case BRW_OPCODE_RNDE:
934 case BRW_OPCODE_RNDU:
935 case BRW_OPCODE_RNDZ:
936 case BRW_OPCODE_SAD2:
937 case BRW_OPCODE_SADA2:
938 case BRW_OPCODE_SHL:
939 case BRW_OPCODE_SHR:
940 case BRW_OPCODE_SUBB:
941 case BRW_OPCODE_XOR:
942 case FS_OPCODE_LINTERP:
943 return true;
944 default:
945 return false;
946 }
947 }
948
949 bool
reads_accumulator_implicitly() const950 backend_instruction::reads_accumulator_implicitly() const
951 {
952 switch (opcode) {
953 case BRW_OPCODE_MAC:
954 case BRW_OPCODE_MACH:
955 case BRW_OPCODE_SADA2:
956 return true;
957 default:
958 return false;
959 }
960 }
961
962 bool
writes_accumulator_implicitly(const struct intel_device_info * devinfo) const963 backend_instruction::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const
964 {
965 return writes_accumulator ||
966 (opcode == FS_OPCODE_LINTERP && !devinfo->has_pln) ||
967 (eot && intel_needs_workaround(devinfo, 14010017096));
968 }
969
970 bool
has_side_effects() const971 backend_instruction::has_side_effects() const
972 {
973 switch (opcode) {
974 case SHADER_OPCODE_SEND:
975 return send_has_side_effects;
976
977 case BRW_OPCODE_SYNC:
978 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
979 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
980 case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
981 case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
982 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
983 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
984 case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
985 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
986 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
987 case SHADER_OPCODE_MEMORY_FENCE:
988 case SHADER_OPCODE_INTERLOCK:
989 case SHADER_OPCODE_URB_WRITE_LOGICAL:
990 case FS_OPCODE_FB_WRITE_LOGICAL:
991 case SHADER_OPCODE_BARRIER:
992 case SHADER_OPCODE_RND_MODE:
993 case SHADER_OPCODE_FLOAT_CONTROL_MODE:
994 case FS_OPCODE_SCHEDULING_FENCE:
995 case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
996 case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
997 case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
998 case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
999 case RT_OPCODE_TRACE_RAY_LOGICAL:
1000 return true;
1001 default:
1002 return eot;
1003 }
1004 }
1005
1006 bool
is_volatile() const1007 backend_instruction::is_volatile() const
1008 {
1009 switch (opcode) {
1010 case SHADER_OPCODE_SEND:
1011 return send_is_volatile;
1012
1013 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
1014 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
1015 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
1016 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
1017 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
1018 case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
1019 return true;
1020 default:
1021 return false;
1022 }
1023 }
1024
1025 #ifndef NDEBUG
1026 static bool
inst_is_in_block(const bblock_t * block,const backend_instruction * inst)1027 inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
1028 {
1029 const exec_node *n = inst;
1030
1031 /* Find the tail sentinel. If the tail sentinel is the sentinel from the
1032 * list header in the bblock_t, then this instruction is in that basic
1033 * block.
1034 */
1035 while (!n->is_tail_sentinel())
1036 n = n->get_next();
1037
1038 return n == &block->instructions.tail_sentinel;
1039 }
1040 #endif
1041
1042 static void
adjust_later_block_ips(bblock_t * start_block,int ip_adjustment)1043 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
1044 {
1045 for (bblock_t *block_iter = start_block->next();
1046 block_iter;
1047 block_iter = block_iter->next()) {
1048 block_iter->start_ip += ip_adjustment;
1049 block_iter->end_ip += ip_adjustment;
1050 }
1051 }
1052
1053 void
insert_after(bblock_t * block,backend_instruction * inst)1054 backend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
1055 {
1056 assert(this != inst);
1057 assert(block->end_ip_delta == 0);
1058
1059 if (!this->is_head_sentinel())
1060 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1061
1062 block->end_ip++;
1063
1064 adjust_later_block_ips(block, 1);
1065
1066 exec_node::insert_after(inst);
1067 }
1068
1069 void
insert_before(bblock_t * block,backend_instruction * inst)1070 backend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
1071 {
1072 assert(this != inst);
1073 assert(block->end_ip_delta == 0);
1074
1075 if (!this->is_tail_sentinel())
1076 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1077
1078 block->end_ip++;
1079
1080 adjust_later_block_ips(block, 1);
1081
1082 exec_node::insert_before(inst);
1083 }
1084
1085 void
remove(bblock_t * block,bool defer_later_block_ip_updates)1086 backend_instruction::remove(bblock_t *block, bool defer_later_block_ip_updates)
1087 {
1088 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1089
1090 if (defer_later_block_ip_updates) {
1091 block->end_ip_delta--;
1092 } else {
1093 assert(block->end_ip_delta == 0);
1094 adjust_later_block_ips(block, -1);
1095 }
1096
1097 if (block->start_ip == block->end_ip) {
1098 if (block->end_ip_delta != 0) {
1099 adjust_later_block_ips(block, block->end_ip_delta);
1100 block->end_ip_delta = 0;
1101 }
1102
1103 block->cfg->remove_block(block);
1104 } else {
1105 block->end_ip--;
1106 }
1107
1108 exec_node::remove();
1109 }
1110
1111 void
dump_instructions(const char * name) const1112 backend_shader::dump_instructions(const char *name) const
1113 {
1114 FILE *file = stderr;
1115 if (name && __normal_user()) {
1116 file = fopen(name, "w");
1117 if (!file)
1118 file = stderr;
1119 }
1120
1121 dump_instructions_to_file(file);
1122
1123 if (file != stderr) {
1124 fclose(file);
1125 }
1126 }
1127
1128 void
dump_instructions_to_file(FILE * file) const1129 backend_shader::dump_instructions_to_file(FILE *file) const
1130 {
1131 if (cfg) {
1132 int ip = 0;
1133 foreach_block_and_inst(block, backend_instruction, inst, cfg) {
1134 if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1135 fprintf(file, "%4d: ", ip++);
1136 dump_instruction(inst, file);
1137 }
1138 } else {
1139 int ip = 0;
1140 foreach_in_list(backend_instruction, inst, &instructions) {
1141 if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1142 fprintf(file, "%4d: ", ip++);
1143 dump_instruction(inst, file);
1144 }
1145 }
1146 }
1147
1148 void
calculate_cfg()1149 backend_shader::calculate_cfg()
1150 {
1151 if (this->cfg)
1152 return;
1153 cfg = new(mem_ctx) cfg_t(this, &this->instructions);
1154 }
1155
1156 void
invalidate_analysis(brw::analysis_dependency_class c)1157 backend_shader::invalidate_analysis(brw::analysis_dependency_class c)
1158 {
1159 idom_analysis.invalidate(c);
1160 }
1161
1162 extern "C" const unsigned *
brw_compile_tes(const struct brw_compiler * compiler,brw_compile_tes_params * params)1163 brw_compile_tes(const struct brw_compiler *compiler,
1164 brw_compile_tes_params *params)
1165 {
1166 const struct intel_device_info *devinfo = compiler->devinfo;
1167 nir_shader *nir = params->base.nir;
1168 const struct brw_tes_prog_key *key = params->key;
1169 const struct intel_vue_map *input_vue_map = params->input_vue_map;
1170 struct brw_tes_prog_data *prog_data = params->prog_data;
1171
1172 const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TES);
1173
1174 prog_data->base.base.stage = MESA_SHADER_TESS_EVAL;
1175 prog_data->base.base.ray_queries = nir->info.ray_queries;
1176
1177 nir->info.inputs_read = key->inputs_read;
1178 nir->info.patch_inputs_read = key->patch_inputs_read;
1179
1180 brw_nir_apply_key(nir, compiler, &key->base, 8);
1181 brw_nir_lower_tes_inputs(nir, input_vue_map);
1182 brw_nir_lower_vue_outputs(nir);
1183 brw_postprocess_nir(nir, compiler, debug_enabled,
1184 key->base.robust_flags);
1185
1186 brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
1187 nir->info.outputs_written,
1188 nir->info.separate_shader, 1);
1189
1190 unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
1191
1192 assert(output_size_bytes >= 1);
1193 if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
1194 params->base.error_str = ralloc_strdup(params->base.mem_ctx,
1195 "DS outputs exceed maximum size");
1196 return NULL;
1197 }
1198
1199 prog_data->base.clip_distance_mask =
1200 ((1 << nir->info.clip_distance_array_size) - 1);
1201 prog_data->base.cull_distance_mask =
1202 ((1 << nir->info.cull_distance_array_size) - 1) <<
1203 nir->info.clip_distance_array_size;
1204
1205 prog_data->include_primitive_id =
1206 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
1207
1208 /* URB entry sizes are stored as a multiple of 64 bytes. */
1209 prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
1210
1211 prog_data->base.urb_read_length = 0;
1212
1213 STATIC_ASSERT(INTEL_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
1214 STATIC_ASSERT(INTEL_TESS_PARTITIONING_ODD_FRACTIONAL ==
1215 TESS_SPACING_FRACTIONAL_ODD - 1);
1216 STATIC_ASSERT(INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL ==
1217 TESS_SPACING_FRACTIONAL_EVEN - 1);
1218
1219 prog_data->partitioning =
1220 (enum intel_tess_partitioning) (nir->info.tess.spacing - 1);
1221
1222 switch (nir->info.tess._primitive_mode) {
1223 case TESS_PRIMITIVE_QUADS:
1224 prog_data->domain = INTEL_TESS_DOMAIN_QUAD;
1225 break;
1226 case TESS_PRIMITIVE_TRIANGLES:
1227 prog_data->domain = INTEL_TESS_DOMAIN_TRI;
1228 break;
1229 case TESS_PRIMITIVE_ISOLINES:
1230 prog_data->domain = INTEL_TESS_DOMAIN_ISOLINE;
1231 break;
1232 default:
1233 unreachable("invalid domain shader primitive mode");
1234 }
1235
1236 if (nir->info.tess.point_mode) {
1237 prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
1238 } else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
1239 prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_LINE;
1240 } else {
1241 /* Hardware winding order is backwards from OpenGL */
1242 prog_data->output_topology =
1243 nir->info.tess.ccw ? INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW
1244 : INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
1245 }
1246
1247 if (unlikely(debug_enabled)) {
1248 fprintf(stderr, "TES Input ");
1249 brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL);
1250 fprintf(stderr, "TES Output ");
1251 brw_print_vue_map(stderr, &prog_data->base.vue_map,
1252 MESA_SHADER_TESS_EVAL);
1253 }
1254
1255 const unsigned dispatch_width = devinfo->ver >= 20 ? 16 : 8;
1256 fs_visitor v(compiler, ¶ms->base, &key->base,
1257 &prog_data->base.base, nir, dispatch_width,
1258 params->base.stats != NULL, debug_enabled);
1259 if (!v.run_tes()) {
1260 params->base.error_str =
1261 ralloc_strdup(params->base.mem_ctx, v.fail_msg);
1262 return NULL;
1263 }
1264
1265 assert(v.payload().num_regs % reg_unit(devinfo) == 0);
1266 prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
1267
1268 prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8;
1269
1270 fs_generator g(compiler, ¶ms->base,
1271 &prog_data->base.base, MESA_SHADER_TESS_EVAL);
1272 if (unlikely(debug_enabled)) {
1273 g.enable_debug(ralloc_asprintf(params->base.mem_ctx,
1274 "%s tessellation evaluation shader %s",
1275 nir->info.label ? nir->info.label
1276 : "unnamed",
1277 nir->info.name));
1278 }
1279
1280 g.generate_code(v.cfg, dispatch_width, v.shader_stats,
1281 v.performance_analysis.require(), params->base.stats);
1282
1283 g.add_const_data(nir->constant_data, nir->constant_data_size);
1284
1285 return g.get_assembly();
1286 }
1287