1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_vec4_tes.cpp
26 *
27 * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
28 */
29
30 #include "brw_vec4_tes.h"
31 #include "brw_cfg.h"
32 #include "dev/intel_debug.h"
33
34 namespace brw {
35
vec4_tes_visitor(const struct brw_compiler * compiler,void * log_data,const struct brw_tes_prog_key * key,struct brw_tes_prog_data * prog_data,const nir_shader * shader,void * mem_ctx,bool debug_enabled)36 vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
37 void *log_data,
38 const struct brw_tes_prog_key *key,
39 struct brw_tes_prog_data *prog_data,
40 const nir_shader *shader,
41 void *mem_ctx,
42 bool debug_enabled)
43 : vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base,
44 shader, mem_ctx, false, debug_enabled)
45 {
46 }
47
48 void
setup_payload()49 vec4_tes_visitor::setup_payload()
50 {
51 int reg = 0;
52
53 /* The payload always contains important data in r0 and r1, which contains
54 * the URB handles that are passed on to the URB write at the end
55 * of the thread.
56 */
57 reg += 2;
58
59 reg = setup_uniforms(reg);
60
61 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
62 for (int i = 0; i < 3; i++) {
63 if (inst->src[i].file != ATTR)
64 continue;
65
66 unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
67 struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
68 grf = stride(grf, 0, 4, 1);
69 grf.swizzle = inst->src[i].swizzle;
70 grf.type = inst->src[i].type;
71 grf.abs = inst->src[i].abs;
72 grf.negate = inst->src[i].negate;
73 inst->src[i] = grf;
74 }
75 }
76
77 reg += 8 * prog_data->urb_read_length;
78
79 this->first_non_payload_grf = reg;
80 }
81
82
83 void
emit_prolog()84 vec4_tes_visitor::emit_prolog()
85 {
86 input_read_header = src_reg(this, glsl_type::uvec4_type);
87 emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
88
89 this->current_annotation = NULL;
90 }
91
92
93 void
emit_urb_write_header(int mrf)94 vec4_tes_visitor::emit_urb_write_header(int mrf)
95 {
96 /* No need to do anything for DS; an implied write to this MRF will be
97 * performed by VEC4_VS_OPCODE_URB_WRITE.
98 */
99 (void) mrf;
100 }
101
102
103 vec4_instruction *
emit_urb_write_opcode(bool complete)104 vec4_tes_visitor::emit_urb_write_opcode(bool complete)
105 {
106 vec4_instruction *inst = emit(VEC4_VS_OPCODE_URB_WRITE);
107 inst->urb_write_flags = complete ?
108 BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
109
110 return inst;
111 }
112
113 void
nir_emit_intrinsic(nir_intrinsic_instr * instr)114 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
115 {
116 const struct brw_tes_prog_data *tes_prog_data =
117 (const struct brw_tes_prog_data *) prog_data;
118
119 switch (instr->intrinsic) {
120 case nir_intrinsic_load_tess_coord:
121 /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
122 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
123 src_reg(brw_vec8_grf(1, 0))));
124 break;
125 case nir_intrinsic_load_tess_level_outer:
126 if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
127 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
128 swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
129 BRW_SWIZZLE_ZWZW)));
130 } else {
131 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
132 swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
133 BRW_SWIZZLE_WZYX)));
134 }
135 break;
136 case nir_intrinsic_load_tess_level_inner:
137 if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
138 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
139 swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
140 BRW_SWIZZLE_WZYX)));
141 } else {
142 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
143 src_reg(ATTR, 1, glsl_type::float_type)));
144 }
145 break;
146 case nir_intrinsic_load_primitive_id:
147 emit(TES_OPCODE_GET_PRIMITIVE_ID,
148 get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
149 break;
150
151 case nir_intrinsic_load_input:
152 case nir_intrinsic_load_per_vertex_input: {
153 assert(nir_dest_bit_size(instr->dest) == 32);
154 src_reg indirect_offset = get_indirect_offset(instr);
155 unsigned imm_offset = instr->const_index[0];
156 src_reg header = input_read_header;
157 unsigned first_component = nir_intrinsic_component(instr);
158
159 if (indirect_offset.file != BAD_FILE) {
160 src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type);
161
162 /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
163 * valid range of the offset is [0, 0FFFFFFFh].
164 */
165 emit_minmax(BRW_CONDITIONAL_L,
166 dst_reg(clamped_indirect_offset),
167 retype(indirect_offset, BRW_REGISTER_TYPE_UD),
168 brw_imm_ud(0x0fffffffu));
169
170 header = src_reg(this, glsl_type::uvec4_type);
171 emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
172 input_read_header, clamped_indirect_offset);
173 } else {
174 /* Arbitrarily only push up to 24 vec4 slots worth of data,
175 * which is 12 registers (since each holds 2 vec4 slots).
176 */
177 const unsigned max_push_slots = 24;
178 if (imm_offset < max_push_slots) {
179 src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type);
180 src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
181
182 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), src));
183
184 prog_data->urb_read_length =
185 MAX2(prog_data->urb_read_length,
186 DIV_ROUND_UP(imm_offset + 1, 2));
187 break;
188 }
189 }
190
191 dst_reg temp(this, glsl_type::ivec4_type);
192 vec4_instruction *read =
193 emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
194 read->offset = imm_offset;
195 read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
196
197 src_reg src = src_reg(temp);
198 src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
199
200 /* Copy to target. We might end up with some funky writemasks landing
201 * in here, but we really don't want them in the above pseudo-ops.
202 */
203 dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
204 dst.writemask = brw_writemask_for_size(instr->num_components);
205 emit(MOV(dst, src));
206 break;
207 }
208 default:
209 vec4_visitor::nir_emit_intrinsic(instr);
210 }
211 }
212
213
214 void
emit_thread_end()215 vec4_tes_visitor::emit_thread_end()
216 {
217 /* For DS, we always end the thread by emitting a single vertex.
218 * emit_urb_write_opcode() will take care of setting the eot flag on the
219 * SEND instruction.
220 */
221 emit_vertex();
222 }
223
224 } /* namespace brw */
225