1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 #include "brw_compiler.h"
33 #include "brw_eu.h"
34 #include "brw_prim.h"
35
36 #include "dev/intel_debug.h"
37
38 #define MAX_GS_VERTS (4)
39
40 struct brw_ff_gs_compile {
41 struct brw_codegen func;
42 struct brw_ff_gs_prog_key key;
43 struct brw_ff_gs_prog_data *prog_data;
44
45 struct {
46 struct brw_reg R0;
47
48 /**
49 * Register holding streamed vertex buffer pointers -- see the Sandy
50 * Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload
51 * [DevSNB]). These pointers are delivered in GRF 1.
52 */
53 struct brw_reg SVBI;
54
55 struct brw_reg vertex[MAX_GS_VERTS];
56 struct brw_reg header;
57 struct brw_reg temp;
58
59 /**
60 * Register holding destination indices for streamed buffer writes.
61 * Only used for SOL programs.
62 */
63 struct brw_reg destination_indices;
64 } reg;
65
66 /* Number of registers used to store vertex data */
67 GLuint nr_regs;
68
69 struct brw_vue_map vue_map;
70 };
71
72 /**
73 * Allocate registers for GS.
74 *
75 * If sol_program is true, then:
76 *
77 * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF
78 * 1 needs to be set aside to hold the streamed vertex buffer indices.
79 *
80 * - The thread will need to use the destination_indices register.
81 */
brw_ff_gs_alloc_regs(struct brw_ff_gs_compile * c,GLuint nr_verts,bool sol_program)82 static void brw_ff_gs_alloc_regs(struct brw_ff_gs_compile *c,
83 GLuint nr_verts,
84 bool sol_program)
85 {
86 GLuint i = 0,j;
87
88 /* Register usage is static, precompute here:
89 */
90 c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
91
92 /* Streamed vertex buffer indices */
93 if (sol_program)
94 c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
95
96 /* Payload vertices plus space for more generated vertices:
97 */
98 for (j = 0; j < nr_verts; j++) {
99 c->reg.vertex[j] = brw_vec4_grf(i, 0);
100 i += c->nr_regs;
101 }
102
103 c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
104 c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
105
106 if (sol_program) {
107 c->reg.destination_indices =
108 retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD);
109 }
110
111 c->prog_data->urb_read_length = c->nr_regs;
112 c->prog_data->total_grf = i;
113 }
114
115
116 /**
117 * Set up the initial value of c->reg.header register based on c->reg.R0.
118 *
119 * The following information is passed to the GS thread in R0, and needs to be
120 * included in the first URB_WRITE or FF_SYNC message sent by the GS:
121 *
122 * - DWORD 0 [31:0] handle info (Gen4 only)
123 * - DWORD 5 [7:0] FFTID
124 * - DWORD 6 [31:0] Debug info
125 * - DWORD 7 [31:0] Debug info
126 *
127 * This function sets up the above data by copying by copying the contents of
128 * R0 to the header register.
129 */
brw_ff_gs_initialize_header(struct brw_ff_gs_compile * c)130 static void brw_ff_gs_initialize_header(struct brw_ff_gs_compile *c)
131 {
132 struct brw_codegen *p = &c->func;
133 brw_MOV(p, c->reg.header, c->reg.R0);
134 }
135
136 /**
137 * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value.
138 *
139 * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart,
140 * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we
141 * need to be able to update on a per-vertex basis.
142 */
brw_ff_gs_overwrite_header_dw2(struct brw_ff_gs_compile * c,unsigned dw2)143 static void brw_ff_gs_overwrite_header_dw2(struct brw_ff_gs_compile *c,
144 unsigned dw2)
145 {
146 struct brw_codegen *p = &c->func;
147 brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2));
148 }
149
150 /**
151 * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0.
152 *
153 * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0
154 * of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of
155 * DWORD 2. So this function extracts the primitive type field, bitshifts it
156 * appropriately, and stores it in c->reg.header.
157 */
brw_ff_gs_overwrite_header_dw2_from_r0(struct brw_ff_gs_compile * c)158 static void brw_ff_gs_overwrite_header_dw2_from_r0(struct brw_ff_gs_compile *c)
159 {
160 struct brw_codegen *p = &c->func;
161 brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
162 brw_imm_ud(0x1f));
163 brw_SHL(p, get_element_ud(c->reg.header, 2),
164 get_element_ud(c->reg.header, 2), brw_imm_ud(2));
165 }
166
167 /**
168 * Apply an additive offset to DWORD 2 of c->reg.header.
169 *
170 * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately
171 * for each vertex.
172 */
brw_ff_gs_offset_header_dw2(struct brw_ff_gs_compile * c,int offset)173 static void brw_ff_gs_offset_header_dw2(struct brw_ff_gs_compile *c,
174 int offset)
175 {
176 struct brw_codegen *p = &c->func;
177 brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2),
178 brw_imm_d(offset));
179 }
180
181
182 /**
183 * Emit a vertex using the URB_WRITE message. Use the contents of
184 * c->reg.header for the message header, and the registers starting at \c vert
185 * for the vertex data.
186 *
187 * If \c last is true, then this is the last vertex, so no further URB space
188 * should be allocated, and this message should end the thread.
189 *
190 * If \c last is false, then a new URB entry will be allocated, and its handle
191 * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE
192 * message.
193 */
brw_ff_gs_emit_vue(struct brw_ff_gs_compile * c,struct brw_reg vert,bool last)194 static void brw_ff_gs_emit_vue(struct brw_ff_gs_compile *c,
195 struct brw_reg vert,
196 bool last)
197 {
198 struct brw_codegen *p = &c->func;
199 int write_offset = 0;
200 bool complete = false;
201
202 do {
203 /* We can't write more than 14 registers at a time to the URB */
204 int write_len = MIN2(c->nr_regs - write_offset, 14);
205 if (write_len == c->nr_regs - write_offset)
206 complete = true;
207
208 /* Copy the vertex from vertn into m1..mN+1:
209 */
210 brw_copy8(p, brw_message_reg(1), offset(vert, write_offset), write_len);
211
212 /* Send the vertex data to the URB. If this is the last write for this
213 * vertex, then we mark it as complete, and either end the thread or
214 * allocate another vertex URB entry (depending whether this is the last
215 * vertex).
216 */
217 enum brw_urb_write_flags flags;
218 if (!complete)
219 flags = BRW_URB_WRITE_NO_FLAGS;
220 else if (last)
221 flags = BRW_URB_WRITE_EOT_COMPLETE;
222 else
223 flags = BRW_URB_WRITE_ALLOCATE_COMPLETE;
224 brw_urb_WRITE(p,
225 (flags & BRW_URB_WRITE_ALLOCATE) ? c->reg.temp
226 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
227 0,
228 c->reg.header,
229 flags,
230 write_len + 1, /* msg length */
231 (flags & BRW_URB_WRITE_ALLOCATE) ? 1
232 : 0, /* response length */
233 write_offset, /* urb offset */
234 BRW_URB_SWIZZLE_NONE);
235 write_offset += write_len;
236 } while (!complete);
237
238 if (!last) {
239 brw_MOV(p, get_element_ud(c->reg.header, 0),
240 get_element_ud(c->reg.temp, 0));
241 }
242 }
243
244 /**
245 * Send an FF_SYNC message to ensure that all previously spawned GS threads
246 * have finished sending primitives down the pipeline, and to allocate a URB
247 * entry for the first output vertex. Only needed on Ironlake+.
248 *
249 * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which
250 * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to
251 * the allocated URB entry (which will be needed by the URB_WRITE meesage that
252 * follows).
253 */
brw_ff_gs_ff_sync(struct brw_ff_gs_compile * c,int num_prim)254 static void brw_ff_gs_ff_sync(struct brw_ff_gs_compile *c, int num_prim)
255 {
256 struct brw_codegen *p = &c->func;
257
258 brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim));
259 brw_ff_sync(p,
260 c->reg.temp,
261 0,
262 c->reg.header,
263 1, /* allocate */
264 1, /* response length */
265 0 /* eot */);
266 brw_MOV(p, get_element_ud(c->reg.header, 0),
267 get_element_ud(c->reg.temp, 0));
268 }
269
270
271 static void
brw_ff_gs_quads(struct brw_ff_gs_compile * c,const struct brw_ff_gs_prog_key * key)272 brw_ff_gs_quads(struct brw_ff_gs_compile *c,
273 const struct brw_ff_gs_prog_key *key)
274 {
275 brw_ff_gs_alloc_regs(c, 4, false);
276 brw_ff_gs_initialize_header(c);
277 /* Use polygons for correct edgeflag behaviour. Note that vertex 3
278 * is the PV for quads, but vertex 0 for polygons:
279 */
280 if (c->func.devinfo->ver == 5)
281 brw_ff_gs_ff_sync(c, 1);
282 brw_ff_gs_overwrite_header_dw2(
283 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
284 | URB_WRITE_PRIM_START));
285 if (key->pv_first) {
286 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
287 brw_ff_gs_overwrite_header_dw2(
288 c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
289 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
290 brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
291 brw_ff_gs_overwrite_header_dw2(
292 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
293 | URB_WRITE_PRIM_END));
294 brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1);
295 }
296 else {
297 brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0);
298 brw_ff_gs_overwrite_header_dw2(
299 c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
300 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
301 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
302 brw_ff_gs_overwrite_header_dw2(
303 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
304 | URB_WRITE_PRIM_END));
305 brw_ff_gs_emit_vue(c, c->reg.vertex[2], 1);
306 }
307 }
308
309 static void
brw_ff_gs_quad_strip(struct brw_ff_gs_compile * c,const struct brw_ff_gs_prog_key * key)310 brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c,
311 const struct brw_ff_gs_prog_key *key)
312 {
313 brw_ff_gs_alloc_regs(c, 4, false);
314 brw_ff_gs_initialize_header(c);
315
316 if (c->func.devinfo->ver == 5)
317 brw_ff_gs_ff_sync(c, 1);
318 brw_ff_gs_overwrite_header_dw2(
319 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
320 | URB_WRITE_PRIM_START));
321 if (key->pv_first) {
322 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
323 brw_ff_gs_overwrite_header_dw2(
324 c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
325 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
326 brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
327 brw_ff_gs_overwrite_header_dw2(
328 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
329 | URB_WRITE_PRIM_END));
330 brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1);
331 }
332 else {
333 brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
334 brw_ff_gs_overwrite_header_dw2(
335 c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
336 brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0);
337 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
338 brw_ff_gs_overwrite_header_dw2(
339 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
340 | URB_WRITE_PRIM_END));
341 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1);
342 }
343 }
344
brw_ff_gs_lines(struct brw_ff_gs_compile * c)345 static void brw_ff_gs_lines(struct brw_ff_gs_compile *c)
346 {
347 brw_ff_gs_alloc_regs(c, 2, false);
348 brw_ff_gs_initialize_header(c);
349
350 if (c->func.devinfo->ver == 5)
351 brw_ff_gs_ff_sync(c, 1);
352 brw_ff_gs_overwrite_header_dw2(
353 c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
354 | URB_WRITE_PRIM_START));
355 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
356 brw_ff_gs_overwrite_header_dw2(
357 c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
358 | URB_WRITE_PRIM_END));
359 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1);
360 }
361
362 /**
363 * Generate the geometry shader program used on Gen6 to perform stream output
364 * (transform feedback).
365 */
366 static void
gfx6_sol_program(struct brw_ff_gs_compile * c,const struct brw_ff_gs_prog_key * key,unsigned num_verts,bool check_edge_flags)367 gfx6_sol_program(struct brw_ff_gs_compile *c, const struct brw_ff_gs_prog_key *key,
368 unsigned num_verts, bool check_edge_flags)
369 {
370 struct brw_codegen *p = &c->func;
371 brw_inst *inst;
372 c->prog_data->svbi_postincrement_value = num_verts;
373
374 brw_ff_gs_alloc_regs(c, num_verts, true);
375 brw_ff_gs_initialize_header(c);
376
377 if (key->num_transform_feedback_bindings > 0) {
378 unsigned vertex, binding;
379 struct brw_reg destination_indices_uw =
380 vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));
381
382 /* Note: since we use the binding table to keep track of buffer offsets
383 * and stride, the GS doesn't need to keep track of a separate pointer
384 * into each buffer; it uses a single pointer which increments by 1 for
385 * each vertex. So we use SVBI0 for this pointer, regardless of whether
386 * transform feedback is in interleaved or separate attribs mode.
387 *
388 * Make sure that the buffers have enough room for all the vertices.
389 */
390 brw_ADD(p, get_element_ud(c->reg.temp, 0),
391 get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
392 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
393 get_element_ud(c->reg.temp, 0),
394 get_element_ud(c->reg.SVBI, 4));
395 brw_IF(p, BRW_EXECUTE_1);
396
397 /* Compute the destination indices to write to. Usually we use SVBI[0]
398 * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the
399 * vertices come down the pipeline in reversed winding order, so we need
400 * to flip the order when writing to the transform feedback buffer. To
401 * ensure that flatshading accuracy is preserved, we need to write them
402 * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
403 * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
404 * the last provoking vertex convention.
405 *
406 * Note: since brw_imm_v can only be used in instructions in
407 * packed-word execution mode, and SVBI is a double-word, we need to
408 * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
409 * or (1, 0, 2)) to the destination_indices register, and then add SVBI
410 * using a separate instruction. Also, since the immediate constant is
411 * expressed as packed words, and we need to load double-words into
412 * destination_indices, we need to intersperse zeros to fill the upper
413 * halves of each double-word.
414 */
415 brw_MOV(p, destination_indices_uw,
416 brw_imm_v(0x00020100)); /* (0, 1, 2) */
417 if (num_verts == 3) {
418 /* Get primitive type into temp register. */
419 brw_AND(p, get_element_ud(c->reg.temp, 0),
420 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));
421
422 /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as
423 * an 8-wide comparison so that the conditional MOV that follows
424 * moves all 8 words correctly.
425 */
426 brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
427 get_element_ud(c->reg.temp, 0),
428 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
429
430 /* If so, then overwrite destination_indices_uw with the appropriate
431 * reordering.
432 */
433 inst = brw_MOV(p, destination_indices_uw,
434 brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */
435 : 0x00020001)); /* (1, 0, 2) */
436 brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL);
437 }
438
439 assert(c->reg.destination_indices.width == BRW_EXECUTE_4);
440 brw_push_insn_state(p);
441 brw_set_default_exec_size(p, BRW_EXECUTE_4);
442 brw_ADD(p, c->reg.destination_indices,
443 c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));
444 brw_pop_insn_state(p);
445 /* For each vertex, generate code to output each varying using the
446 * appropriate binding table entry.
447 */
448 for (vertex = 0; vertex < num_verts; ++vertex) {
449 /* Set up the correct destination index for this vertex */
450 brw_MOV(p, get_element_ud(c->reg.header, 5),
451 get_element_ud(c->reg.destination_indices, vertex));
452
453 for (binding = 0; binding < key->num_transform_feedback_bindings;
454 ++binding) {
455 unsigned char varying =
456 key->transform_feedback_bindings[binding];
457 unsigned char slot = c->vue_map.varying_to_slot[varying];
458 /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
459 *
460 * "Prior to End of Thread with a URB_WRITE, the kernel must
461 * ensure that all writes are complete by sending the final
462 * write as a committed write."
463 */
464 bool final_write =
465 binding == key->num_transform_feedback_bindings - 1 &&
466 vertex == num_verts - 1;
467 struct brw_reg vertex_slot = c->reg.vertex[vertex];
468 vertex_slot.nr += slot / 2;
469 vertex_slot.subnr = (slot % 2) * 16;
470 /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
471 vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ
472 ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
473 brw_set_default_access_mode(p, BRW_ALIGN_16);
474 brw_push_insn_state(p);
475 brw_set_default_exec_size(p, BRW_EXECUTE_4);
476
477 brw_MOV(p, stride(c->reg.header, 4, 4, 1),
478 retype(vertex_slot, BRW_REGISTER_TYPE_UD));
479 brw_pop_insn_state(p);
480
481 brw_set_default_access_mode(p, BRW_ALIGN_1);
482 brw_svb_write(p,
483 final_write ? c->reg.temp : brw_null_reg(), /* dest */
484 1, /* msg_reg_nr */
485 c->reg.header, /* src0 */
486 BRW_GFX6_SOL_BINDING_START + binding, /* binding_table_index */
487 final_write); /* send_commit_msg */
488 }
489 }
490 brw_ENDIF(p);
491
492 /* Now, reinitialize the header register from R0 to restore the parts of
493 * the register that we overwrote while streaming out transform feedback
494 * data.
495 */
496 brw_ff_gs_initialize_header(c);
497
498 /* Finally, wait for the write commit to occur so that we can proceed to
499 * other things safely.
500 *
501 * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
502 *
503 * The write commit does not modify the destination register, but
504 * merely clears the dependency associated with the destination
505 * register. Thus, a simple “mov” instruction using the register as a
506 * source is sufficient to wait for the write commit to occur.
507 */
508 brw_MOV(p, c->reg.temp, c->reg.temp);
509 }
510
511 brw_ff_gs_ff_sync(c, 1);
512
513 brw_ff_gs_overwrite_header_dw2_from_r0(c);
514 switch (num_verts) {
515 case 1:
516 brw_ff_gs_offset_header_dw2(c,
517 URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
518 brw_ff_gs_emit_vue(c, c->reg.vertex[0], true);
519 break;
520 case 2:
521 brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
522 brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
523 brw_ff_gs_offset_header_dw2(c,
524 URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
525 brw_ff_gs_emit_vue(c, c->reg.vertex[1], true);
526 break;
527 case 3:
528 if (check_edge_flags) {
529 /* Only emit vertices 0 and 1 if this is the first triangle of the
530 * polygon. Otherwise they are redundant.
531 */
532 brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
533 get_element_ud(c->reg.R0, 2),
534 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
535 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
536 brw_IF(p, BRW_EXECUTE_1);
537 }
538 brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
539 brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
540 brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
541 brw_ff_gs_emit_vue(c, c->reg.vertex[1], false);
542 if (check_edge_flags) {
543 brw_ENDIF(p);
544 /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
545 * of the polygon. Otherwise leave the primitive incomplete because
546 * there are more polygon vertices coming.
547 */
548 brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
549 get_element_ud(c->reg.R0, 2),
550 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
551 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
552 brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
553 }
554 brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
555 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
556 brw_ff_gs_emit_vue(c, c->reg.vertex[2], true);
557 break;
558 }
559 }
560
561 const unsigned *
brw_compile_ff_gs_prog(struct brw_compiler * compiler,void * mem_ctx,const struct brw_ff_gs_prog_key * key,struct brw_ff_gs_prog_data * prog_data,struct brw_vue_map * vue_map,unsigned * final_assembly_size)562 brw_compile_ff_gs_prog(struct brw_compiler *compiler,
563 void *mem_ctx,
564 const struct brw_ff_gs_prog_key *key,
565 struct brw_ff_gs_prog_data *prog_data,
566 struct brw_vue_map *vue_map,
567 unsigned *final_assembly_size)
568 {
569 struct brw_ff_gs_compile c;
570 const GLuint *program;
571
572 memset(&c, 0, sizeof(c));
573
574 c.key = *key;
575 c.vue_map = *vue_map;
576 c.nr_regs = (c.vue_map.num_slots + 1)/2;
577 c.prog_data = prog_data;
578
579 mem_ctx = ralloc_context(NULL);
580
581 /* Begin the compilation:
582 */
583 brw_init_codegen(&compiler->isa, &c.func, mem_ctx);
584
585 c.func.single_program_flow = 1;
586
587 /* For some reason the thread is spawned with only 4 channels
588 * unmasked.
589 */
590 brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE);
591
592 if (compiler->devinfo->ver >= 6) {
593 unsigned num_verts;
594 bool check_edge_flag;
595 /* On Sandybridge, we use the GS for implementing transform feedback
596 * (called "Stream Out" in the PRM).
597 */
598 switch (key->primitive) {
599 case _3DPRIM_POINTLIST:
600 num_verts = 1;
601 check_edge_flag = false;
602 break;
603 case _3DPRIM_LINELIST:
604 case _3DPRIM_LINESTRIP:
605 case _3DPRIM_LINELOOP:
606 num_verts = 2;
607 check_edge_flag = false;
608 break;
609 case _3DPRIM_TRILIST:
610 case _3DPRIM_TRIFAN:
611 case _3DPRIM_TRISTRIP:
612 case _3DPRIM_RECTLIST:
613 num_verts = 3;
614 check_edge_flag = false;
615 break;
616 case _3DPRIM_QUADLIST:
617 case _3DPRIM_QUADSTRIP:
618 case _3DPRIM_POLYGON:
619 num_verts = 3;
620 check_edge_flag = true;
621 break;
622 default:
623 unreachable("Unexpected primitive type in Gen6 SOL program.");
624 }
625 gfx6_sol_program(&c, key, num_verts, check_edge_flag);
626 } else {
627 /* On Gen4-5, we use the GS to decompose certain types of primitives.
628 * Note that primitives which don't require a GS program have already
629 * been weeded out by now.
630 */
631 switch (key->primitive) {
632 case _3DPRIM_QUADLIST:
633 brw_ff_gs_quads( &c, key );
634 break;
635 case _3DPRIM_QUADSTRIP:
636 brw_ff_gs_quad_strip( &c, key );
637 break;
638 case _3DPRIM_LINELOOP:
639 brw_ff_gs_lines( &c );
640 break;
641 default:
642 return NULL;
643 }
644 }
645
646 brw_compact_instructions(&c.func, 0, NULL);
647
648 /* get the program
649 */
650 program = brw_get_program(&c.func, final_assembly_size);
651
652 if (INTEL_DEBUG(DEBUG_GS)) {
653 fprintf(stderr, "gs:\n");
654 brw_disassemble_with_labels(&compiler->isa, c.func.store,
655 0, *final_assembly_size, stderr);
656 fprintf(stderr, "\n");
657 }
658
659 return program;
660 }
661
662