• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * An implementation of the transform feedback driver hooks for Haswell
26  * and later hardware.  This uses MI_MATH to compute the number of vertices
27  * written (for use by DrawTransformFeedback()) without any CPU<->GPU
28  * synchronization which could stall.
29  */
30 
31 #include "brw_context.h"
32 #include "brw_state.h"
33 #include "brw_defines.h"
34 #include "intel_batchbuffer.h"
35 #include "intel_buffer_objects.h"
36 #include "main/transformfeedback.h"
37 
38 /**
39  * We store several values in obj->prim_count_bo:
40  *
41  * [4x 32-bit values]: Final Number of Vertices Written
42  * [4x 32-bit values]: Tally of Primitives Written So Far
43  * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots
44  *
45  * The first set of values is used by DrawTransformFeedback(), which
46  * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs
47  * an indirect draw.  The other values are just temporary storage.
48  */
49 
50 #define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t))
51 #define START_OFFSET (TALLY_OFFSET * 2)
52 
53 /**
54  * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
55  * to prim_count_bo.
56  */
57 static void
save_prim_start_values(struct brw_context * brw,struct brw_transform_feedback_object * obj)58 save_prim_start_values(struct brw_context *brw,
59                        struct brw_transform_feedback_object *obj)
60 {
61    /* Flush any drawing so that the counters have the right values. */
62    brw_emit_mi_flush(brw);
63 
64    /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
65    for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
66       brw_store_register_mem64(brw, obj->prim_count_bo,
67                                GEN7_SO_NUM_PRIMS_WRITTEN(i),
68                                START_OFFSET + i * sizeof(uint64_t));
69    }
70 }
71 
72 /**
73  * Compute the number of primitives written during our most recent
74  * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
75  * minus the stashed "start" value), and add it to our running tally.
76  *
77  * If \p finalize is true, also compute the number of vertices written
78  * (by multiplying by the number of vertices per primitive), and store
79  * that to the "final" location.
80  *
81  * Otherwise, just overwrite the old tally with the new one.
82  */
83 static void
tally_prims_written(struct brw_context * brw,struct brw_transform_feedback_object * obj,bool finalize)84 tally_prims_written(struct brw_context *brw,
85                     struct brw_transform_feedback_object *obj,
86                     bool finalize)
87 {
88    /* Flush any drawing so that the counters have the right values. */
89    brw_emit_mi_flush(brw);
90 
91    for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
92       /* GPR0 = Tally */
93       brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
94       brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo,
95                             I915_GEM_DOMAIN_INSTRUCTION,
96                             I915_GEM_DOMAIN_INSTRUCTION,
97                             TALLY_OFFSET + i * sizeof(uint32_t));
98       if (!obj->base.Paused) {
99          /* GPR1 = Start Snapshot */
100          brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
101                                  I915_GEM_DOMAIN_INSTRUCTION,
102                                  I915_GEM_DOMAIN_INSTRUCTION,
103                                  START_OFFSET + i * sizeof(uint64_t));
104          /* GPR2 = Ending Snapshot */
105          brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2));
106 
107          BEGIN_BATCH(9);
108          OUT_BATCH(HSW_MI_MATH | (9 - 2));
109          /* GPR1 = GPR2 (End) - GPR1 (Start) */
110          OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
111          OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
112          OUT_BATCH(MI_MATH_ALU0(SUB));
113          OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
114          /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
115          OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
116          OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
117             OUT_BATCH(MI_MATH_ALU0(ADD));
118          OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
119          ADVANCE_BATCH();
120       }
121 
122       if (!finalize) {
123          /* Write back the new tally */
124          brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
125                                   TALLY_OFFSET + i * sizeof(uint32_t));
126       } else {
127          /* Convert the number of primitives to the number of vertices. */
128          if (obj->primitive_mode == GL_LINES) {
129             /* Double R0 (R0 = R0 + R0) */
130             BEGIN_BATCH(5);
131             OUT_BATCH(HSW_MI_MATH | (5 - 2));
132             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
133             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
134             OUT_BATCH(MI_MATH_ALU0(ADD));
135             OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
136             ADVANCE_BATCH();
137          } else if (obj->primitive_mode == GL_TRIANGLES) {
138             /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
139             BEGIN_BATCH(9);
140             OUT_BATCH(HSW_MI_MATH | (9 - 2));
141             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
142             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
143             OUT_BATCH(MI_MATH_ALU0(ADD));
144             OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
145             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
146             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
147             OUT_BATCH(MI_MATH_ALU0(ADD));
148             OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
149             ADVANCE_BATCH();
150          }
151          /* Store it to the final result */
152          brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
153                                   i * sizeof(uint32_t));
154       }
155    }
156 }
157 
158 /**
159  * BeginTransformFeedback() driver hook.
160  */
161 void
hsw_begin_transform_feedback(struct gl_context * ctx,GLenum mode,struct gl_transform_feedback_object * obj)162 hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
163                               struct gl_transform_feedback_object *obj)
164 {
165    struct brw_context *brw = brw_context(ctx);
166    struct brw_transform_feedback_object *brw_obj =
167       (struct brw_transform_feedback_object *) obj;
168 
169    brw_obj->primitive_mode = mode;
170 
171    /* Reset the SO buffer offsets to 0. */
172    if (brw->gen >= 8) {
173       brw_obj->zero_offsets = true;
174    } else {
175       BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS);
176       OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2));
177       for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
178          OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
179          OUT_BATCH(0);
180       }
181       ADVANCE_BATCH();
182    }
183 
184    /* Zero out the initial tallies */
185    brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET,     0ull);
186    brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull);
187 
188    /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
189    save_prim_start_values(brw, brw_obj);
190 }
191 
192 /**
193  * PauseTransformFeedback() driver hook.
194  */
195 void
hsw_pause_transform_feedback(struct gl_context * ctx,struct gl_transform_feedback_object * obj)196 hsw_pause_transform_feedback(struct gl_context *ctx,
197                               struct gl_transform_feedback_object *obj)
198 {
199    struct brw_context *brw = brw_context(ctx);
200    struct brw_transform_feedback_object *brw_obj =
201       (struct brw_transform_feedback_object *) obj;
202 
203    if (brw->is_haswell) {
204       /* Flush any drawing so that the counters have the right values. */
205       brw_emit_mi_flush(brw);
206 
207       /* Save the SOL buffer offset register values. */
208       for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
209          BEGIN_BATCH(3);
210          OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
211          OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
212          OUT_RELOC(brw_obj->offset_bo,
213                    I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
214                    i * sizeof(uint32_t));
215          ADVANCE_BATCH();
216       }
217    }
218 
219    /* Add any primitives written to our tally */
220    tally_prims_written(brw, brw_obj, false);
221 }
222 
223 /**
224  * ResumeTransformFeedback() driver hook.
225  */
226 void
hsw_resume_transform_feedback(struct gl_context * ctx,struct gl_transform_feedback_object * obj)227 hsw_resume_transform_feedback(struct gl_context *ctx,
228                                struct gl_transform_feedback_object *obj)
229 {
230    struct brw_context *brw = brw_context(ctx);
231    struct brw_transform_feedback_object *brw_obj =
232       (struct brw_transform_feedback_object *) obj;
233 
234    if (brw->is_haswell) {
235       /* Reload the SOL buffer offset registers. */
236       for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
237          BEGIN_BATCH(3);
238          OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
239          OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
240          OUT_RELOC(brw_obj->offset_bo,
241                    I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
242                    i * sizeof(uint32_t));
243          ADVANCE_BATCH();
244       }
245    }
246 
247    /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
248    save_prim_start_values(brw, brw_obj);
249 }
250 
251 /**
252  * EndTransformFeedback() driver hook.
253  */
254 void
hsw_end_transform_feedback(struct gl_context * ctx,struct gl_transform_feedback_object * obj)255 hsw_end_transform_feedback(struct gl_context *ctx,
256 			    struct gl_transform_feedback_object *obj)
257 {
258    struct brw_context *brw = brw_context(ctx);
259    struct brw_transform_feedback_object *brw_obj =
260       (struct brw_transform_feedback_object *) obj;
261 
262    /* Add any primitives written to our tally, convert it from the number
263     * of primitives written to the number of vertices written, and store
264     * it in the "final" location in the buffer which DrawTransformFeedback()
265     * will use as the vertex count.
266     */
267    tally_prims_written(brw, brw_obj, true);
268 }
269