• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_cfg.h"
25 #include "brw_eu.h"
26 #include "brw_fs.h"
27 #include "brw_nir.h"
28 #include "brw_private.h"
29 #include "dev/intel_debug.h"
30 #include "util/macros.h"
31 #include "util/u_debug.h"
32 
33 enum brw_reg_type
brw_type_for_base_type(const struct glsl_type * type)34 brw_type_for_base_type(const struct glsl_type *type)
35 {
36    switch (type->base_type) {
37    case GLSL_TYPE_FLOAT16:
38       return BRW_REGISTER_TYPE_HF;
39    case GLSL_TYPE_FLOAT:
40       return BRW_REGISTER_TYPE_F;
41    case GLSL_TYPE_INT:
42    case GLSL_TYPE_BOOL:
43    case GLSL_TYPE_SUBROUTINE:
44       return BRW_REGISTER_TYPE_D;
45    case GLSL_TYPE_INT16:
46       return BRW_REGISTER_TYPE_W;
47    case GLSL_TYPE_INT8:
48       return BRW_REGISTER_TYPE_B;
49    case GLSL_TYPE_UINT:
50       return BRW_REGISTER_TYPE_UD;
51    case GLSL_TYPE_UINT16:
52       return BRW_REGISTER_TYPE_UW;
53    case GLSL_TYPE_UINT8:
54       return BRW_REGISTER_TYPE_UB;
55    case GLSL_TYPE_ARRAY:
56       return brw_type_for_base_type(type->fields.array);
57    case GLSL_TYPE_STRUCT:
58    case GLSL_TYPE_INTERFACE:
59    case GLSL_TYPE_SAMPLER:
60    case GLSL_TYPE_TEXTURE:
61    case GLSL_TYPE_ATOMIC_UINT:
62       /* These should be overridden with the type of the member when
63        * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
64        * way to trip up if we don't.
65        */
66       return BRW_REGISTER_TYPE_UD;
67    case GLSL_TYPE_IMAGE:
68       return BRW_REGISTER_TYPE_UD;
69    case GLSL_TYPE_DOUBLE:
70       return BRW_REGISTER_TYPE_DF;
71    case GLSL_TYPE_UINT64:
72       return BRW_REGISTER_TYPE_UQ;
73    case GLSL_TYPE_INT64:
74       return BRW_REGISTER_TYPE_Q;
75    case GLSL_TYPE_VOID:
76    case GLSL_TYPE_ERROR:
77    case GLSL_TYPE_COOPERATIVE_MATRIX:
78       unreachable("not reached");
79    }
80 
81    return BRW_REGISTER_TYPE_F;
82 }
83 
84 uint32_t
brw_math_function(enum opcode op)85 brw_math_function(enum opcode op)
86 {
87    switch (op) {
88    case SHADER_OPCODE_RCP:
89       return BRW_MATH_FUNCTION_INV;
90    case SHADER_OPCODE_RSQ:
91       return BRW_MATH_FUNCTION_RSQ;
92    case SHADER_OPCODE_SQRT:
93       return BRW_MATH_FUNCTION_SQRT;
94    case SHADER_OPCODE_EXP2:
95       return BRW_MATH_FUNCTION_EXP;
96    case SHADER_OPCODE_LOG2:
97       return BRW_MATH_FUNCTION_LOG;
98    case SHADER_OPCODE_POW:
99       return BRW_MATH_FUNCTION_POW;
100    case SHADER_OPCODE_SIN:
101       return BRW_MATH_FUNCTION_SIN;
102    case SHADER_OPCODE_COS:
103       return BRW_MATH_FUNCTION_COS;
104    case SHADER_OPCODE_INT_QUOTIENT:
105       return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
106    case SHADER_OPCODE_INT_REMAINDER:
107       return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
108    default:
109       unreachable("not reached: unknown math function");
110    }
111 }
112 
113 bool
brw_texture_offset(const nir_tex_instr * tex,unsigned src,uint32_t * offset_bits_out)114 brw_texture_offset(const nir_tex_instr *tex, unsigned src,
115                    uint32_t *offset_bits_out)
116 {
117    if (!nir_src_is_const(tex->src[src].src))
118       return false;
119 
120    const unsigned num_components = nir_tex_instr_src_size(tex, src);
121 
122    /* Combine all three offsets into a single unsigned dword:
123     *
124     *    bits 11:8 - U Offset (X component)
125     *    bits  7:4 - V Offset (Y component)
126     *    bits  3:0 - R Offset (Z component)
127     */
128    uint32_t offset_bits = 0;
129    for (unsigned i = 0; i < num_components; i++) {
130       int offset = nir_src_comp_as_int(tex->src[src].src, i);
131 
132       /* offset out of bounds; caller will handle it. */
133       if (offset > 7 || offset < -8)
134          return false;
135 
136       const unsigned shift = 4 * (2 - i);
137       offset_bits |= (offset << shift) & (0xF << shift);
138    }
139 
140    *offset_bits_out = offset_bits;
141 
142    return true;
143 }
144 
145 const char *
brw_instruction_name(const struct brw_isa_info * isa,enum opcode op)146 brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
147 {
148    const struct intel_device_info *devinfo = isa->devinfo;
149 
150    switch (op) {
151    case 0 ... NUM_BRW_OPCODES - 1:
152       /* The DO instruction doesn't exist on Gfx9+, but we use it to mark the
153        * start of a loop in the IR.
154        */
155       if (op == BRW_OPCODE_DO)
156          return "do";
157 
158       /* DPAS instructions may transiently exist on platforms that do not
159        * support DPAS. They will eventually be lowered, but in the meantime it
160        * must be possible to query the instruction name.
161        */
162       if (devinfo->verx10 < 125 && op == BRW_OPCODE_DPAS)
163          return "dpas";
164 
165       assert(brw_opcode_desc(isa, op)->name);
166       return brw_opcode_desc(isa, op)->name;
167    case FS_OPCODE_FB_WRITE_LOGICAL:
168       return "fb_write_logical";
169    case FS_OPCODE_FB_READ:
170       return "fb_read";
171    case FS_OPCODE_FB_READ_LOGICAL:
172       return "fb_read_logical";
173 
174    case SHADER_OPCODE_RCP:
175       return "rcp";
176    case SHADER_OPCODE_RSQ:
177       return "rsq";
178    case SHADER_OPCODE_SQRT:
179       return "sqrt";
180    case SHADER_OPCODE_EXP2:
181       return "exp2";
182    case SHADER_OPCODE_LOG2:
183       return "log2";
184    case SHADER_OPCODE_POW:
185       return "pow";
186    case SHADER_OPCODE_INT_QUOTIENT:
187       return "int_quot";
188    case SHADER_OPCODE_INT_REMAINDER:
189       return "int_rem";
190    case SHADER_OPCODE_SIN:
191       return "sin";
192    case SHADER_OPCODE_COS:
193       return "cos";
194 
195    case SHADER_OPCODE_SEND:
196       return "send";
197 
198    case SHADER_OPCODE_UNDEF:
199       return "undef";
200 
201    case SHADER_OPCODE_TEX:
202       return "tex";
203    case SHADER_OPCODE_TEX_LOGICAL:
204       return "tex_logical";
205    case SHADER_OPCODE_TXD:
206       return "txd";
207    case SHADER_OPCODE_TXD_LOGICAL:
208       return "txd_logical";
209    case SHADER_OPCODE_TXF:
210       return "txf";
211    case SHADER_OPCODE_TXF_LOGICAL:
212       return "txf_logical";
213    case SHADER_OPCODE_TXF_LZ:
214       return "txf_lz";
215    case SHADER_OPCODE_TXL:
216       return "txl";
217    case SHADER_OPCODE_TXL_LOGICAL:
218       return "txl_logical";
219    case SHADER_OPCODE_TXL_LZ:
220       return "txl_lz";
221    case SHADER_OPCODE_TXS:
222       return "txs";
223    case SHADER_OPCODE_TXS_LOGICAL:
224       return "txs_logical";
225    case FS_OPCODE_TXB:
226       return "txb";
227    case FS_OPCODE_TXB_LOGICAL:
228       return "txb_logical";
229    case SHADER_OPCODE_TXF_CMS:
230       return "txf_cms";
231    case SHADER_OPCODE_TXF_CMS_LOGICAL:
232       return "txf_cms_logical";
233    case SHADER_OPCODE_TXF_CMS_W:
234       return "txf_cms_w";
235    case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
236       return "txf_cms_w_logical";
237    case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
238       return "txf_cms_w_gfx12_logical";
239    case SHADER_OPCODE_TXF_UMS:
240       return "txf_ums";
241    case SHADER_OPCODE_TXF_UMS_LOGICAL:
242       return "txf_ums_logical";
243    case SHADER_OPCODE_TXF_MCS:
244       return "txf_mcs";
245    case SHADER_OPCODE_TXF_MCS_LOGICAL:
246       return "txf_mcs_logical";
247    case SHADER_OPCODE_LOD:
248       return "lod";
249    case SHADER_OPCODE_LOD_LOGICAL:
250       return "lod_logical";
251    case SHADER_OPCODE_TG4:
252       return "tg4";
253    case SHADER_OPCODE_TG4_LOGICAL:
254       return "tg4_logical";
255    case SHADER_OPCODE_TG4_OFFSET:
256       return "tg4_offset";
257    case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
258       return "tg4_offset_logical";
259    case SHADER_OPCODE_TG4_OFFSET_LOD:
260       return "tg4_offset_lod";
261    case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
262       return "tg4_offset_lod_logical";
263    case SHADER_OPCODE_TG4_OFFSET_BIAS:
264       return "tg4_offset_bias";
265    case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
266       return "tg4_offset_bias_logical";
267    case SHADER_OPCODE_TG4_BIAS:
268       return "tg4_b";
269    case SHADER_OPCODE_TG4_BIAS_LOGICAL:
270       return "tg4_b_logical";
271    case SHADER_OPCODE_TG4_EXPLICIT_LOD:
272       return "tg4_l";
273    case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
274       return "tg4_l_logical";
275    case SHADER_OPCODE_TG4_IMPLICIT_LOD:
276       return "tg4_i";
277    case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
278       return "tg4_i_logical";
279    case SHADER_OPCODE_SAMPLEINFO:
280       return "sampleinfo";
281    case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
282       return "sampleinfo_logical";
283 
284    case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
285       return "image_size_logical";
286 
287    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
288       return "untyped_atomic_logical";
289    case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
290       return "untyped_surface_read_logical";
291    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
292       return "untyped_surface_write_logical";
293    case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
294       return "unaligned_oword_block_read_logical";
295    case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
296       return "oword_block_write_logical";
297    case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
298       return "a64_untyped_read_logical";
299    case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
300       return "a64_oword_block_read_logical";
301    case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
302       return "a64_unaligned_oword_block_read_logical";
303    case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
304       return "a64_oword_block_write_logical";
305    case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
306       return "a64_untyped_write_logical";
307    case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
308       return "a64_byte_scattered_read_logical";
309    case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
310       return "a64_byte_scattered_write_logical";
311    case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
312       return "a64_untyped_atomic_logical";
313    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
314       return "typed_atomic_logical";
315    case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
316       return "typed_surface_read_logical";
317    case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
318       return "typed_surface_write_logical";
319    case SHADER_OPCODE_MEMORY_FENCE:
320       return "memory_fence";
321    case FS_OPCODE_SCHEDULING_FENCE:
322       return "scheduling_fence";
323    case SHADER_OPCODE_INTERLOCK:
324       /* For an interlock we actually issue a memory fence via sendc. */
325       return "interlock";
326 
327    case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
328       return "byte_scattered_read_logical";
329    case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
330       return "byte_scattered_write_logical";
331    case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
332       return "dword_scattered_read_logical";
333    case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
334       return "dword_scattered_write_logical";
335 
336    case SHADER_OPCODE_LOAD_PAYLOAD:
337       return "load_payload";
338    case FS_OPCODE_PACK:
339       return "pack";
340 
341    case SHADER_OPCODE_SCRATCH_HEADER:
342       return "scratch_header";
343 
344    case SHADER_OPCODE_URB_WRITE_LOGICAL:
345       return "urb_write_logical";
346    case SHADER_OPCODE_URB_READ_LOGICAL:
347       return "urb_read_logical";
348 
349    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
350       return "find_live_channel";
351    case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
352       return "find_last_live_channel";
353    case SHADER_OPCODE_LOAD_LIVE_CHANNELS:
354       return "load_live_channels";
355    case FS_OPCODE_LOAD_LIVE_CHANNELS:
356       return "fs_load_live_channels";
357 
358    case SHADER_OPCODE_BROADCAST:
359       return "broadcast";
360    case SHADER_OPCODE_SHUFFLE:
361       return "shuffle";
362    case SHADER_OPCODE_SEL_EXEC:
363       return "sel_exec";
364    case SHADER_OPCODE_QUAD_SWIZZLE:
365       return "quad_swizzle";
366    case SHADER_OPCODE_CLUSTER_BROADCAST:
367       return "cluster_broadcast";
368 
369    case SHADER_OPCODE_GET_BUFFER_SIZE:
370       return "get_buffer_size";
371 
372    case FS_OPCODE_DDX_COARSE:
373       return "ddx_coarse";
374    case FS_OPCODE_DDX_FINE:
375       return "ddx_fine";
376    case FS_OPCODE_DDY_COARSE:
377       return "ddy_coarse";
378    case FS_OPCODE_DDY_FINE:
379       return "ddy_fine";
380 
381    case FS_OPCODE_LINTERP:
382       return "linterp";
383 
384    case FS_OPCODE_PIXEL_X:
385       return "pixel_x";
386    case FS_OPCODE_PIXEL_Y:
387       return "pixel_y";
388 
389    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
390       return "uniform_pull_const";
391    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
392       return "varying_pull_const_logical";
393 
394    case FS_OPCODE_PACK_HALF_2x16_SPLIT:
395       return "pack_half_2x16_split";
396 
397    case SHADER_OPCODE_HALT_TARGET:
398       return "halt_target";
399 
400    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
401       return "interp_sample";
402    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
403       return "interp_shared_offset";
404    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
405       return "interp_per_slot_offset";
406 
407    case CS_OPCODE_CS_TERMINATE:
408       return "cs_terminate";
409    case SHADER_OPCODE_BARRIER:
410       return "barrier";
411    case SHADER_OPCODE_MULH:
412       return "mulh";
413    case SHADER_OPCODE_ISUB_SAT:
414       return "isub_sat";
415    case SHADER_OPCODE_USUB_SAT:
416       return "usub_sat";
417    case SHADER_OPCODE_MOV_INDIRECT:
418       return "mov_indirect";
419    case SHADER_OPCODE_MOV_RELOC_IMM:
420       return "mov_reloc_imm";
421 
422    case RT_OPCODE_TRACE_RAY_LOGICAL:
423       return "rt_trace_ray_logical";
424 
425    case SHADER_OPCODE_RND_MODE:
426       return "rnd_mode";
427    case SHADER_OPCODE_FLOAT_CONTROL_MODE:
428       return "float_control_mode";
429    case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
430       return "btd_spawn_logical";
431    case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
432       return "btd_retire_logical";
433    case SHADER_OPCODE_READ_SR_REG:
434       return "read_sr_reg";
435    }
436 
437    unreachable("not reached");
438 }
439 
440 bool
brw_saturate_immediate(enum brw_reg_type type,struct brw_reg * reg)441 brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
442 {
443    union {
444       unsigned ud;
445       int d;
446       float f;
447       double df;
448    } imm, sat_imm = { 0 };
449 
450    const unsigned size = type_sz(type);
451 
452    /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
453     * irrelevant, so just check the size of the type and copy from/to an
454     * appropriately sized field.
455     */
456    if (size < 8)
457       imm.ud = reg->ud;
458    else
459       imm.df = reg->df;
460 
461    switch (type) {
462    case BRW_REGISTER_TYPE_UD:
463    case BRW_REGISTER_TYPE_D:
464    case BRW_REGISTER_TYPE_UW:
465    case BRW_REGISTER_TYPE_W:
466    case BRW_REGISTER_TYPE_UQ:
467    case BRW_REGISTER_TYPE_Q:
468       /* Nothing to do. */
469       return false;
470    case BRW_REGISTER_TYPE_F:
471       sat_imm.f = SATURATE(imm.f);
472       break;
473    case BRW_REGISTER_TYPE_DF:
474       sat_imm.df = SATURATE(imm.df);
475       break;
476    case BRW_REGISTER_TYPE_UB:
477    case BRW_REGISTER_TYPE_B:
478       unreachable("no UB/B immediates");
479    case BRW_REGISTER_TYPE_V:
480    case BRW_REGISTER_TYPE_UV:
481    case BRW_REGISTER_TYPE_VF:
482       unreachable("unimplemented: saturate vector immediate");
483    case BRW_REGISTER_TYPE_HF:
484       unreachable("unimplemented: saturate HF immediate");
485    case BRW_REGISTER_TYPE_NF:
486       unreachable("no NF immediates");
487    }
488 
489    if (size < 8) {
490       if (imm.ud != sat_imm.ud) {
491          reg->ud = sat_imm.ud;
492          return true;
493       }
494    } else {
495       if (imm.df != sat_imm.df) {
496          reg->df = sat_imm.df;
497          return true;
498       }
499    }
500    return false;
501 }
502 
503 bool
brw_negate_immediate(enum brw_reg_type type,struct brw_reg * reg)504 brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
505 {
506    switch (type) {
507    case BRW_REGISTER_TYPE_D:
508    case BRW_REGISTER_TYPE_UD:
509       reg->d = -reg->d;
510       return true;
511    case BRW_REGISTER_TYPE_W:
512    case BRW_REGISTER_TYPE_UW: {
513       uint16_t value = -(int16_t)reg->ud;
514       reg->ud = value | (uint32_t)value << 16;
515       return true;
516    }
517    case BRW_REGISTER_TYPE_F:
518       reg->f = -reg->f;
519       return true;
520    case BRW_REGISTER_TYPE_VF:
521       reg->ud ^= 0x80808080;
522       return true;
523    case BRW_REGISTER_TYPE_DF:
524       reg->df = -reg->df;
525       return true;
526    case BRW_REGISTER_TYPE_UQ:
527    case BRW_REGISTER_TYPE_Q:
528       reg->d64 = -reg->d64;
529       return true;
530    case BRW_REGISTER_TYPE_UB:
531    case BRW_REGISTER_TYPE_B:
532       unreachable("no UB/B immediates");
533    case BRW_REGISTER_TYPE_UV:
534    case BRW_REGISTER_TYPE_V:
535       assert(!"unimplemented: negate UV/V immediate");
536    case BRW_REGISTER_TYPE_HF:
537       reg->ud ^= 0x80008000;
538       return true;
539    case BRW_REGISTER_TYPE_NF:
540       unreachable("no NF immediates");
541    }
542 
543    return false;
544 }
545 
546 bool
brw_abs_immediate(enum brw_reg_type type,struct brw_reg * reg)547 brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
548 {
549    switch (type) {
550    case BRW_REGISTER_TYPE_D:
551       reg->d = abs(reg->d);
552       return true;
553    case BRW_REGISTER_TYPE_W: {
554       uint16_t value = abs((int16_t)reg->ud);
555       reg->ud = value | (uint32_t)value << 16;
556       return true;
557    }
558    case BRW_REGISTER_TYPE_F:
559       reg->f = fabsf(reg->f);
560       return true;
561    case BRW_REGISTER_TYPE_DF:
562       reg->df = fabs(reg->df);
563       return true;
564    case BRW_REGISTER_TYPE_VF:
565       reg->ud &= ~0x80808080;
566       return true;
567    case BRW_REGISTER_TYPE_Q:
568       reg->d64 = imaxabs(reg->d64);
569       return true;
570    case BRW_REGISTER_TYPE_UB:
571    case BRW_REGISTER_TYPE_B:
572       unreachable("no UB/B immediates");
573    case BRW_REGISTER_TYPE_UQ:
574    case BRW_REGISTER_TYPE_UD:
575    case BRW_REGISTER_TYPE_UW:
576    case BRW_REGISTER_TYPE_UV:
577       /* Presumably the absolute value modifier on an unsigned source is a
578        * nop, but it would be nice to confirm.
579        */
580       assert(!"unimplemented: abs unsigned immediate");
581    case BRW_REGISTER_TYPE_V:
582       assert(!"unimplemented: abs V immediate");
583    case BRW_REGISTER_TYPE_HF:
584       reg->ud &= ~0x80008000;
585       return true;
586    case BRW_REGISTER_TYPE_NF:
587       unreachable("no NF immediates");
588    }
589 
590    return false;
591 }
592 
backend_shader(const struct brw_compiler * compiler,const struct brw_compile_params * params,const nir_shader * shader,struct brw_stage_prog_data * stage_prog_data,bool debug_enabled)593 backend_shader::backend_shader(const struct brw_compiler *compiler,
594                                const struct brw_compile_params *params,
595                                const nir_shader *shader,
596                                struct brw_stage_prog_data *stage_prog_data,
597                                bool debug_enabled)
598    : compiler(compiler),
599      log_data(params->log_data),
600      devinfo(compiler->devinfo),
601      nir(shader),
602      stage_prog_data(stage_prog_data),
603      mem_ctx(params->mem_ctx),
604      cfg(NULL), idom_analysis(this),
605      stage(shader->info.stage),
606      debug_enabled(debug_enabled)
607 {
608 }
609 
~backend_shader()610 backend_shader::~backend_shader()
611 {
612 }
613 
614 bool
equals(const backend_reg & r) const615 backend_reg::equals(const backend_reg &r) const
616 {
617    return brw_regs_equal(this, &r) && offset == r.offset;
618 }
619 
620 bool
negative_equals(const backend_reg & r) const621 backend_reg::negative_equals(const backend_reg &r) const
622 {
623    return brw_regs_negative_equal(this, &r) && offset == r.offset;
624 }
625 
626 bool
is_zero() const627 backend_reg::is_zero() const
628 {
629    if (file != IMM)
630       return false;
631 
632    assert(type_sz(type) > 1);
633 
634    switch (type) {
635    case BRW_REGISTER_TYPE_HF:
636       assert((d & 0xffff) == ((d >> 16) & 0xffff));
637       return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
638    case BRW_REGISTER_TYPE_F:
639       return f == 0;
640    case BRW_REGISTER_TYPE_DF:
641       return df == 0;
642    case BRW_REGISTER_TYPE_W:
643    case BRW_REGISTER_TYPE_UW:
644       assert((d & 0xffff) == ((d >> 16) & 0xffff));
645       return (d & 0xffff) == 0;
646    case BRW_REGISTER_TYPE_D:
647    case BRW_REGISTER_TYPE_UD:
648       return d == 0;
649    case BRW_REGISTER_TYPE_UQ:
650    case BRW_REGISTER_TYPE_Q:
651       return u64 == 0;
652    default:
653       return false;
654    }
655 }
656 
657 bool
is_one() const658 backend_reg::is_one() const
659 {
660    if (file != IMM)
661       return false;
662 
663    assert(type_sz(type) > 1);
664 
665    switch (type) {
666    case BRW_REGISTER_TYPE_HF:
667       assert((d & 0xffff) == ((d >> 16) & 0xffff));
668       return (d & 0xffff) == 0x3c00;
669    case BRW_REGISTER_TYPE_F:
670       return f == 1.0f;
671    case BRW_REGISTER_TYPE_DF:
672       return df == 1.0;
673    case BRW_REGISTER_TYPE_W:
674    case BRW_REGISTER_TYPE_UW:
675       assert((d & 0xffff) == ((d >> 16) & 0xffff));
676       return (d & 0xffff) == 1;
677    case BRW_REGISTER_TYPE_D:
678    case BRW_REGISTER_TYPE_UD:
679       return d == 1;
680    case BRW_REGISTER_TYPE_UQ:
681    case BRW_REGISTER_TYPE_Q:
682       return u64 == 1;
683    default:
684       return false;
685    }
686 }
687 
688 bool
is_negative_one() const689 backend_reg::is_negative_one() const
690 {
691    if (file != IMM)
692       return false;
693 
694    assert(type_sz(type) > 1);
695 
696    switch (type) {
697    case BRW_REGISTER_TYPE_HF:
698       assert((d & 0xffff) == ((d >> 16) & 0xffff));
699       return (d & 0xffff) == 0xbc00;
700    case BRW_REGISTER_TYPE_F:
701       return f == -1.0;
702    case BRW_REGISTER_TYPE_DF:
703       return df == -1.0;
704    case BRW_REGISTER_TYPE_W:
705       assert((d & 0xffff) == ((d >> 16) & 0xffff));
706       return (d & 0xffff) == 0xffff;
707    case BRW_REGISTER_TYPE_D:
708       return d == -1;
709    case BRW_REGISTER_TYPE_Q:
710       return d64 == -1;
711    default:
712       return false;
713    }
714 }
715 
716 bool
is_null() const717 backend_reg::is_null() const
718 {
719    return file == ARF && nr == BRW_ARF_NULL;
720 }
721 
722 
723 bool
is_accumulator() const724 backend_reg::is_accumulator() const
725 {
726    return file == ARF && nr == BRW_ARF_ACCUMULATOR;
727 }
728 
729 bool
is_commutative() const730 backend_instruction::is_commutative() const
731 {
732    switch (opcode) {
733    case BRW_OPCODE_AND:
734    case BRW_OPCODE_OR:
735    case BRW_OPCODE_XOR:
736    case BRW_OPCODE_ADD:
737    case BRW_OPCODE_ADD3:
738    case BRW_OPCODE_MUL:
739    case SHADER_OPCODE_MULH:
740       return true;
741    case BRW_OPCODE_SEL:
742       /* MIN and MAX are commutative. */
743       if (conditional_mod == BRW_CONDITIONAL_GE ||
744           conditional_mod == BRW_CONDITIONAL_L) {
745          return true;
746       }
747       FALLTHROUGH;
748    default:
749       return false;
750    }
751 }
752 
753 bool
is_3src(const struct brw_compiler * compiler) const754 backend_instruction::is_3src(const struct brw_compiler *compiler) const
755 {
756    return ::is_3src(&compiler->isa, opcode);
757 }
758 
759 bool
is_math() const760 backend_instruction::is_math() const
761 {
762    return (opcode == SHADER_OPCODE_RCP ||
763            opcode == SHADER_OPCODE_RSQ ||
764            opcode == SHADER_OPCODE_SQRT ||
765            opcode == SHADER_OPCODE_EXP2 ||
766            opcode == SHADER_OPCODE_LOG2 ||
767            opcode == SHADER_OPCODE_SIN ||
768            opcode == SHADER_OPCODE_COS ||
769            opcode == SHADER_OPCODE_INT_QUOTIENT ||
770            opcode == SHADER_OPCODE_INT_REMAINDER ||
771            opcode == SHADER_OPCODE_POW);
772 }
773 
774 bool
is_control_flow_begin() const775 backend_instruction::is_control_flow_begin() const
776 {
777    switch (opcode) {
778    case BRW_OPCODE_DO:
779    case BRW_OPCODE_IF:
780    case BRW_OPCODE_ELSE:
781       return true;
782    default:
783       return false;
784    }
785 }
786 
787 bool
is_control_flow_end() const788 backend_instruction::is_control_flow_end() const
789 {
790    switch (opcode) {
791    case BRW_OPCODE_ELSE:
792    case BRW_OPCODE_WHILE:
793    case BRW_OPCODE_ENDIF:
794       return true;
795    default:
796       return false;
797    }
798 }
799 
800 bool
is_control_flow() const801 backend_instruction::is_control_flow() const
802 {
803    switch (opcode) {
804    case BRW_OPCODE_DO:
805    case BRW_OPCODE_WHILE:
806    case BRW_OPCODE_IF:
807    case BRW_OPCODE_ELSE:
808    case BRW_OPCODE_ENDIF:
809    case BRW_OPCODE_BREAK:
810    case BRW_OPCODE_CONTINUE:
811       return true;
812    default:
813       return false;
814    }
815 }
816 
817 bool
uses_indirect_addressing() const818 backend_instruction::uses_indirect_addressing() const
819 {
820    switch (opcode) {
821    case SHADER_OPCODE_BROADCAST:
822    case SHADER_OPCODE_CLUSTER_BROADCAST:
823    case SHADER_OPCODE_MOV_INDIRECT:
824       return true;
825    default:
826       return false;
827    }
828 }
829 
830 bool
can_do_source_mods() const831 backend_instruction::can_do_source_mods() const
832 {
833    switch (opcode) {
834    case BRW_OPCODE_ADDC:
835    case BRW_OPCODE_BFE:
836    case BRW_OPCODE_BFI1:
837    case BRW_OPCODE_BFI2:
838    case BRW_OPCODE_BFREV:
839    case BRW_OPCODE_CBIT:
840    case BRW_OPCODE_FBH:
841    case BRW_OPCODE_FBL:
842    case BRW_OPCODE_ROL:
843    case BRW_OPCODE_ROR:
844    case BRW_OPCODE_SUBB:
845    case BRW_OPCODE_DP4A:
846    case BRW_OPCODE_DPAS:
847    case SHADER_OPCODE_BROADCAST:
848    case SHADER_OPCODE_CLUSTER_BROADCAST:
849    case SHADER_OPCODE_MOV_INDIRECT:
850    case SHADER_OPCODE_SHUFFLE:
851    case SHADER_OPCODE_INT_QUOTIENT:
852    case SHADER_OPCODE_INT_REMAINDER:
853       return false;
854    default:
855       return true;
856    }
857 }
858 
859 bool
can_do_saturate() const860 backend_instruction::can_do_saturate() const
861 {
862    switch (opcode) {
863    case BRW_OPCODE_ADD:
864    case BRW_OPCODE_ADD3:
865    case BRW_OPCODE_ASR:
866    case BRW_OPCODE_AVG:
867    case BRW_OPCODE_CSEL:
868    case BRW_OPCODE_DP2:
869    case BRW_OPCODE_DP3:
870    case BRW_OPCODE_DP4:
871    case BRW_OPCODE_DPH:
872    case BRW_OPCODE_DP4A:
873    case BRW_OPCODE_LINE:
874    case BRW_OPCODE_LRP:
875    case BRW_OPCODE_MAC:
876    case BRW_OPCODE_MAD:
877    case BRW_OPCODE_MATH:
878    case BRW_OPCODE_MOV:
879    case BRW_OPCODE_MUL:
880    case SHADER_OPCODE_MULH:
881    case BRW_OPCODE_PLN:
882    case BRW_OPCODE_RNDD:
883    case BRW_OPCODE_RNDE:
884    case BRW_OPCODE_RNDU:
885    case BRW_OPCODE_RNDZ:
886    case BRW_OPCODE_SEL:
887    case BRW_OPCODE_SHL:
888    case BRW_OPCODE_SHR:
889    case FS_OPCODE_LINTERP:
890    case SHADER_OPCODE_COS:
891    case SHADER_OPCODE_EXP2:
892    case SHADER_OPCODE_LOG2:
893    case SHADER_OPCODE_POW:
894    case SHADER_OPCODE_RCP:
895    case SHADER_OPCODE_RSQ:
896    case SHADER_OPCODE_SIN:
897    case SHADER_OPCODE_SQRT:
898       return true;
899    default:
900       return false;
901    }
902 }
903 
904 bool
can_do_cmod() const905 backend_instruction::can_do_cmod() const
906 {
907    switch (opcode) {
908    case BRW_OPCODE_ADD:
909    case BRW_OPCODE_ADD3:
910    case BRW_OPCODE_ADDC:
911    case BRW_OPCODE_AND:
912    case BRW_OPCODE_ASR:
913    case BRW_OPCODE_AVG:
914    case BRW_OPCODE_CMP:
915    case BRW_OPCODE_CMPN:
916    case BRW_OPCODE_DP2:
917    case BRW_OPCODE_DP3:
918    case BRW_OPCODE_DP4:
919    case BRW_OPCODE_DPH:
920    case BRW_OPCODE_FRC:
921    case BRW_OPCODE_LINE:
922    case BRW_OPCODE_LRP:
923    case BRW_OPCODE_LZD:
924    case BRW_OPCODE_MAC:
925    case BRW_OPCODE_MACH:
926    case BRW_OPCODE_MAD:
927    case BRW_OPCODE_MOV:
928    case BRW_OPCODE_MUL:
929    case BRW_OPCODE_NOT:
930    case BRW_OPCODE_OR:
931    case BRW_OPCODE_PLN:
932    case BRW_OPCODE_RNDD:
933    case BRW_OPCODE_RNDE:
934    case BRW_OPCODE_RNDU:
935    case BRW_OPCODE_RNDZ:
936    case BRW_OPCODE_SAD2:
937    case BRW_OPCODE_SADA2:
938    case BRW_OPCODE_SHL:
939    case BRW_OPCODE_SHR:
940    case BRW_OPCODE_SUBB:
941    case BRW_OPCODE_XOR:
942    case FS_OPCODE_LINTERP:
943       return true;
944    default:
945       return false;
946    }
947 }
948 
949 bool
reads_accumulator_implicitly() const950 backend_instruction::reads_accumulator_implicitly() const
951 {
952    switch (opcode) {
953    case BRW_OPCODE_MAC:
954    case BRW_OPCODE_MACH:
955    case BRW_OPCODE_SADA2:
956       return true;
957    default:
958       return false;
959    }
960 }
961 
962 bool
writes_accumulator_implicitly(const struct intel_device_info * devinfo) const963 backend_instruction::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const
964 {
965    return writes_accumulator ||
966           (opcode == FS_OPCODE_LINTERP && !devinfo->has_pln) ||
967           (eot && intel_needs_workaround(devinfo, 14010017096));
968 }
969 
970 bool
has_side_effects() const971 backend_instruction::has_side_effects() const
972 {
973    switch (opcode) {
974    case SHADER_OPCODE_SEND:
975       return send_has_side_effects;
976 
977    case BRW_OPCODE_SYNC:
978    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
979    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
980    case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
981    case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
982    case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
983    case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
984    case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
985    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
986    case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
987    case SHADER_OPCODE_MEMORY_FENCE:
988    case SHADER_OPCODE_INTERLOCK:
989    case SHADER_OPCODE_URB_WRITE_LOGICAL:
990    case FS_OPCODE_FB_WRITE_LOGICAL:
991    case SHADER_OPCODE_BARRIER:
992    case SHADER_OPCODE_RND_MODE:
993    case SHADER_OPCODE_FLOAT_CONTROL_MODE:
994    case FS_OPCODE_SCHEDULING_FENCE:
995    case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
996    case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
997    case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
998    case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
999    case RT_OPCODE_TRACE_RAY_LOGICAL:
1000       return true;
1001    default:
1002       return eot;
1003    }
1004 }
1005 
1006 bool
is_volatile() const1007 backend_instruction::is_volatile() const
1008 {
1009    switch (opcode) {
1010    case SHADER_OPCODE_SEND:
1011       return send_is_volatile;
1012 
1013    case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
1014    case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
1015    case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
1016    case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
1017    case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
1018    case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
1019       return true;
1020    default:
1021       return false;
1022    }
1023 }
1024 
1025 #ifndef NDEBUG
1026 static bool
inst_is_in_block(const bblock_t * block,const backend_instruction * inst)1027 inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
1028 {
1029    const exec_node *n = inst;
1030 
1031    /* Find the tail sentinel. If the tail sentinel is the sentinel from the
1032     * list header in the bblock_t, then this instruction is in that basic
1033     * block.
1034     */
1035    while (!n->is_tail_sentinel())
1036       n = n->get_next();
1037 
1038    return n == &block->instructions.tail_sentinel;
1039 }
1040 #endif
1041 
1042 static void
adjust_later_block_ips(bblock_t * start_block,int ip_adjustment)1043 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
1044 {
1045    for (bblock_t *block_iter = start_block->next();
1046         block_iter;
1047         block_iter = block_iter->next()) {
1048       block_iter->start_ip += ip_adjustment;
1049       block_iter->end_ip += ip_adjustment;
1050    }
1051 }
1052 
1053 void
insert_after(bblock_t * block,backend_instruction * inst)1054 backend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
1055 {
1056    assert(this != inst);
1057    assert(block->end_ip_delta == 0);
1058 
1059    if (!this->is_head_sentinel())
1060       assert(inst_is_in_block(block, this) || !"Instruction not in block");
1061 
1062    block->end_ip++;
1063 
1064    adjust_later_block_ips(block, 1);
1065 
1066    exec_node::insert_after(inst);
1067 }
1068 
1069 void
insert_before(bblock_t * block,backend_instruction * inst)1070 backend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
1071 {
1072    assert(this != inst);
1073    assert(block->end_ip_delta == 0);
1074 
1075    if (!this->is_tail_sentinel())
1076       assert(inst_is_in_block(block, this) || !"Instruction not in block");
1077 
1078    block->end_ip++;
1079 
1080    adjust_later_block_ips(block, 1);
1081 
1082    exec_node::insert_before(inst);
1083 }
1084 
1085 void
remove(bblock_t * block,bool defer_later_block_ip_updates)1086 backend_instruction::remove(bblock_t *block, bool defer_later_block_ip_updates)
1087 {
1088    assert(inst_is_in_block(block, this) || !"Instruction not in block");
1089 
1090    if (defer_later_block_ip_updates) {
1091       block->end_ip_delta--;
1092    } else {
1093       assert(block->end_ip_delta == 0);
1094       adjust_later_block_ips(block, -1);
1095    }
1096 
1097    if (block->start_ip == block->end_ip) {
1098       if (block->end_ip_delta != 0) {
1099          adjust_later_block_ips(block, block->end_ip_delta);
1100          block->end_ip_delta = 0;
1101       }
1102 
1103       block->cfg->remove_block(block);
1104    } else {
1105       block->end_ip--;
1106    }
1107 
1108    exec_node::remove();
1109 }
1110 
1111 void
dump_instructions(const char * name) const1112 backend_shader::dump_instructions(const char *name) const
1113 {
1114    FILE *file = stderr;
1115    if (name && __normal_user()) {
1116       file = fopen(name, "w");
1117       if (!file)
1118          file = stderr;
1119    }
1120 
1121    dump_instructions_to_file(file);
1122 
1123    if (file != stderr) {
1124       fclose(file);
1125    }
1126 }
1127 
1128 void
dump_instructions_to_file(FILE * file) const1129 backend_shader::dump_instructions_to_file(FILE *file) const
1130 {
1131    if (cfg) {
1132       int ip = 0;
1133       foreach_block_and_inst(block, backend_instruction, inst, cfg) {
1134          if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1135             fprintf(file, "%4d: ", ip++);
1136          dump_instruction(inst, file);
1137       }
1138    } else {
1139       int ip = 0;
1140       foreach_in_list(backend_instruction, inst, &instructions) {
1141          if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1142             fprintf(file, "%4d: ", ip++);
1143          dump_instruction(inst, file);
1144       }
1145    }
1146 }
1147 
1148 void
calculate_cfg()1149 backend_shader::calculate_cfg()
1150 {
1151    if (this->cfg)
1152       return;
1153    cfg = new(mem_ctx) cfg_t(this, &this->instructions);
1154 }
1155 
1156 void
invalidate_analysis(brw::analysis_dependency_class c)1157 backend_shader::invalidate_analysis(brw::analysis_dependency_class c)
1158 {
1159    idom_analysis.invalidate(c);
1160 }
1161 
1162 extern "C" const unsigned *
brw_compile_tes(const struct brw_compiler * compiler,brw_compile_tes_params * params)1163 brw_compile_tes(const struct brw_compiler *compiler,
1164                 brw_compile_tes_params *params)
1165 {
1166    const struct intel_device_info *devinfo = compiler->devinfo;
1167    nir_shader *nir = params->base.nir;
1168    const struct brw_tes_prog_key *key = params->key;
1169    const struct intel_vue_map *input_vue_map = params->input_vue_map;
1170    struct brw_tes_prog_data *prog_data = params->prog_data;
1171 
1172    const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TES);
1173 
1174    prog_data->base.base.stage = MESA_SHADER_TESS_EVAL;
1175    prog_data->base.base.ray_queries = nir->info.ray_queries;
1176 
1177    nir->info.inputs_read = key->inputs_read;
1178    nir->info.patch_inputs_read = key->patch_inputs_read;
1179 
1180    brw_nir_apply_key(nir, compiler, &key->base, 8);
1181    brw_nir_lower_tes_inputs(nir, input_vue_map);
1182    brw_nir_lower_vue_outputs(nir);
1183    brw_postprocess_nir(nir, compiler, debug_enabled,
1184                        key->base.robust_flags);
1185 
1186    brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
1187                        nir->info.outputs_written,
1188                        nir->info.separate_shader, 1);
1189 
1190    unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
1191 
1192    assert(output_size_bytes >= 1);
1193    if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
1194       params->base.error_str = ralloc_strdup(params->base.mem_ctx,
1195                                              "DS outputs exceed maximum size");
1196       return NULL;
1197    }
1198 
1199    prog_data->base.clip_distance_mask =
1200       ((1 << nir->info.clip_distance_array_size) - 1);
1201    prog_data->base.cull_distance_mask =
1202       ((1 << nir->info.cull_distance_array_size) - 1) <<
1203       nir->info.clip_distance_array_size;
1204 
1205    prog_data->include_primitive_id =
1206       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
1207 
1208    /* URB entry sizes are stored as a multiple of 64 bytes. */
1209    prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
1210 
1211    prog_data->base.urb_read_length = 0;
1212 
1213    STATIC_ASSERT(INTEL_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
1214    STATIC_ASSERT(INTEL_TESS_PARTITIONING_ODD_FRACTIONAL ==
1215                  TESS_SPACING_FRACTIONAL_ODD - 1);
1216    STATIC_ASSERT(INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL ==
1217                  TESS_SPACING_FRACTIONAL_EVEN - 1);
1218 
1219    prog_data->partitioning =
1220       (enum intel_tess_partitioning) (nir->info.tess.spacing - 1);
1221 
1222    switch (nir->info.tess._primitive_mode) {
1223    case TESS_PRIMITIVE_QUADS:
1224       prog_data->domain = INTEL_TESS_DOMAIN_QUAD;
1225       break;
1226    case TESS_PRIMITIVE_TRIANGLES:
1227       prog_data->domain = INTEL_TESS_DOMAIN_TRI;
1228       break;
1229    case TESS_PRIMITIVE_ISOLINES:
1230       prog_data->domain = INTEL_TESS_DOMAIN_ISOLINE;
1231       break;
1232    default:
1233       unreachable("invalid domain shader primitive mode");
1234    }
1235 
1236    if (nir->info.tess.point_mode) {
1237       prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
1238    } else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
1239       prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_LINE;
1240    } else {
1241       /* Hardware winding order is backwards from OpenGL */
1242       prog_data->output_topology =
1243          nir->info.tess.ccw ? INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW
1244                              : INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
1245    }
1246 
1247    if (unlikely(debug_enabled)) {
1248       fprintf(stderr, "TES Input ");
1249       brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL);
1250       fprintf(stderr, "TES Output ");
1251       brw_print_vue_map(stderr, &prog_data->base.vue_map,
1252                         MESA_SHADER_TESS_EVAL);
1253    }
1254 
1255    const unsigned dispatch_width = devinfo->ver >= 20 ? 16 : 8;
1256    fs_visitor v(compiler, &params->base, &key->base,
1257                 &prog_data->base.base, nir, dispatch_width,
1258                 params->base.stats != NULL, debug_enabled);
1259    if (!v.run_tes()) {
1260       params->base.error_str =
1261          ralloc_strdup(params->base.mem_ctx, v.fail_msg);
1262       return NULL;
1263    }
1264 
1265    assert(v.payload().num_regs % reg_unit(devinfo) == 0);
1266    prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
1267 
1268    prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8;
1269 
1270    fs_generator g(compiler, &params->base,
1271                   &prog_data->base.base, MESA_SHADER_TESS_EVAL);
1272    if (unlikely(debug_enabled)) {
1273       g.enable_debug(ralloc_asprintf(params->base.mem_ctx,
1274                                      "%s tessellation evaluation shader %s",
1275                                      nir->info.label ? nir->info.label
1276                                                      : "unnamed",
1277                                      nir->info.name));
1278    }
1279 
1280    g.generate_code(v.cfg, dispatch_width, v.shader_stats,
1281                    v.performance_analysis.require(), params->base.stats);
1282 
1283    g.add_const_data(nir->constant_data, nir->constant_data_size);
1284 
1285    return g.get_assembly();
1286 }
1287