1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * An implementation of the transform feedback driver hooks for Haswell
26 * and later hardware. This uses MI_MATH to compute the number of vertices
27 * written (for use by DrawTransformFeedback()) without any CPU<->GPU
28 * synchronization which could stall.
29 */
30
31 #include "brw_context.h"
32 #include "brw_state.h"
33 #include "brw_defines.h"
34 #include "intel_batchbuffer.h"
35 #include "intel_buffer_objects.h"
36 #include "main/transformfeedback.h"
37
38 /**
39 * We store several values in obj->prim_count_bo:
40 *
41 * [4x 32-bit values]: Final Number of Vertices Written
42 * [4x 32-bit values]: Tally of Primitives Written So Far
43 * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots
44 *
45 * The first set of values is used by DrawTransformFeedback(), which
46 * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs
47 * an indirect draw. The other values are just temporary storage.
48 */
49
50 #define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t))
51 #define START_OFFSET (TALLY_OFFSET * 2)
52
53 /**
54 * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
55 * to prim_count_bo.
56 */
57 static void
save_prim_start_values(struct brw_context * brw,struct brw_transform_feedback_object * obj)58 save_prim_start_values(struct brw_context *brw,
59 struct brw_transform_feedback_object *obj)
60 {
61 /* Flush any drawing so that the counters have the right values. */
62 brw_emit_mi_flush(brw);
63
64 /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
65 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
66 brw_store_register_mem64(brw, obj->prim_count_bo,
67 GEN7_SO_NUM_PRIMS_WRITTEN(i),
68 START_OFFSET + i * sizeof(uint64_t));
69 }
70 }
71
72 /**
73 * Compute the number of primitives written during our most recent
74 * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
75 * minus the stashed "start" value), and add it to our running tally.
76 *
77 * If \p finalize is true, also compute the number of vertices written
78 * (by multiplying by the number of vertices per primitive), and store
79 * that to the "final" location.
80 *
81 * Otherwise, just overwrite the old tally with the new one.
82 */
83 static void
tally_prims_written(struct brw_context * brw,struct brw_transform_feedback_object * obj,bool finalize)84 tally_prims_written(struct brw_context *brw,
85 struct brw_transform_feedback_object *obj,
86 bool finalize)
87 {
88 /* Flush any drawing so that the counters have the right values. */
89 brw_emit_mi_flush(brw);
90
91 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
92 /* GPR0 = Tally */
93 brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
94 brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo,
95 TALLY_OFFSET + i * sizeof(uint32_t));
96 if (!obj->base.Paused) {
97 /* GPR1 = Start Snapshot */
98 brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
99 START_OFFSET + i * sizeof(uint64_t));
100 /* GPR2 = Ending Snapshot */
101 brw_load_register_reg64(brw, HSW_CS_GPR(2),
102 GEN7_SO_NUM_PRIMS_WRITTEN(i));
103
104 BEGIN_BATCH(9);
105 OUT_BATCH(HSW_MI_MATH | (9 - 2));
106 /* GPR1 = GPR2 (End) - GPR1 (Start) */
107 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
108 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
109 OUT_BATCH(MI_MATH_ALU0(SUB));
110 OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
111 /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
112 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
113 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
114 OUT_BATCH(MI_MATH_ALU0(ADD));
115 OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
116 ADVANCE_BATCH();
117 }
118
119 if (!finalize) {
120 /* Write back the new tally */
121 brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
122 TALLY_OFFSET + i * sizeof(uint32_t));
123 } else {
124 /* Convert the number of primitives to the number of vertices. */
125 if (obj->primitive_mode == GL_LINES) {
126 /* Double R0 (R0 = R0 + R0) */
127 BEGIN_BATCH(5);
128 OUT_BATCH(HSW_MI_MATH | (5 - 2));
129 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
130 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
131 OUT_BATCH(MI_MATH_ALU0(ADD));
132 OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
133 ADVANCE_BATCH();
134 } else if (obj->primitive_mode == GL_TRIANGLES) {
135 /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
136 BEGIN_BATCH(9);
137 OUT_BATCH(HSW_MI_MATH | (9 - 2));
138 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
139 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
140 OUT_BATCH(MI_MATH_ALU0(ADD));
141 OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
142 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
143 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
144 OUT_BATCH(MI_MATH_ALU0(ADD));
145 OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
146 ADVANCE_BATCH();
147 }
148 /* Store it to the final result */
149 brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
150 i * sizeof(uint32_t));
151 }
152 }
153 }
154
155 /**
156 * BeginTransformFeedback() driver hook.
157 */
158 void
hsw_begin_transform_feedback(struct gl_context * ctx,GLenum mode,struct gl_transform_feedback_object * obj)159 hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
160 struct gl_transform_feedback_object *obj)
161 {
162 struct brw_context *brw = brw_context(ctx);
163 struct brw_transform_feedback_object *brw_obj =
164 (struct brw_transform_feedback_object *) obj;
165 const struct gen_device_info *devinfo = &brw->screen->devinfo;
166
167 brw_obj->primitive_mode = mode;
168
169 /* Reset the SO buffer offsets to 0. */
170 if (devinfo->gen >= 8) {
171 brw_obj->zero_offsets = true;
172 } else {
173 BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS);
174 OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2));
175 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
176 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
177 OUT_BATCH(0);
178 }
179 ADVANCE_BATCH();
180 }
181
182 /* Zero out the initial tallies */
183 brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET, 0ull);
184 brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull);
185
186 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
187 save_prim_start_values(brw, brw_obj);
188 }
189
190 /**
191 * PauseTransformFeedback() driver hook.
192 */
193 void
hsw_pause_transform_feedback(struct gl_context * ctx,struct gl_transform_feedback_object * obj)194 hsw_pause_transform_feedback(struct gl_context *ctx,
195 struct gl_transform_feedback_object *obj)
196 {
197 struct brw_context *brw = brw_context(ctx);
198 struct brw_transform_feedback_object *brw_obj =
199 (struct brw_transform_feedback_object *) obj;
200 const struct gen_device_info *devinfo = &brw->screen->devinfo;
201
202 if (devinfo->is_haswell) {
203 /* Flush any drawing so that the counters have the right values. */
204 brw_emit_mi_flush(brw);
205
206 /* Save the SOL buffer offset register values. */
207 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
208 BEGIN_BATCH(3);
209 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
210 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
211 OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
212 ADVANCE_BATCH();
213 }
214 }
215
216 /* Add any primitives written to our tally */
217 tally_prims_written(brw, brw_obj, false);
218 }
219
220 /**
221 * ResumeTransformFeedback() driver hook.
222 */
223 void
hsw_resume_transform_feedback(struct gl_context * ctx,struct gl_transform_feedback_object * obj)224 hsw_resume_transform_feedback(struct gl_context *ctx,
225 struct gl_transform_feedback_object *obj)
226 {
227 struct brw_context *brw = brw_context(ctx);
228 struct brw_transform_feedback_object *brw_obj =
229 (struct brw_transform_feedback_object *) obj;
230 const struct gen_device_info *devinfo = &brw->screen->devinfo;
231
232 if (devinfo->is_haswell) {
233 /* Reload the SOL buffer offset registers. */
234 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
235 BEGIN_BATCH(3);
236 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
237 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
238 OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
239 ADVANCE_BATCH();
240 }
241 }
242
243 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
244 save_prim_start_values(brw, brw_obj);
245 }
246
247 /**
248 * EndTransformFeedback() driver hook.
249 */
250 void
hsw_end_transform_feedback(struct gl_context * ctx,struct gl_transform_feedback_object * obj)251 hsw_end_transform_feedback(struct gl_context *ctx,
252 struct gl_transform_feedback_object *obj)
253 {
254 struct brw_context *brw = brw_context(ctx);
255 struct brw_transform_feedback_object *brw_obj =
256 (struct brw_transform_feedback_object *) obj;
257
258 /* Add any primitives written to our tally, convert it from the number
259 * of primitives written to the number of vertices written, and store
260 * it in the "final" location in the buffer which DrawTransformFeedback()
261 * will use as the vertex count.
262 */
263 tally_prims_written(brw, brw_obj, true);
264 }
265