1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "util/u_inlines.h"
27
28 #include "nvc0/nvc0_context.h"
29 #include "nvc0/nvc0_query_hw.h"
30
31 #include "nvc0/nvc0_compute.xml.h"
32
33 static inline void
nvc0_program_update_context_state(struct nvc0_context * nvc0,struct nvc0_program * prog,int stage)34 nvc0_program_update_context_state(struct nvc0_context *nvc0,
35 struct nvc0_program *prog, int stage)
36 {
37 if (prog && prog->need_tls) {
38 const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;
39 if (!nvc0->state.tls_required)
40 BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls);
41 nvc0->state.tls_required |= 1 << stage;
42 } else {
43 if (nvc0->state.tls_required == (1 << stage))
44 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS);
45 nvc0->state.tls_required &= ~(1 << stage);
46 }
47 }
48
49 static inline bool
nvc0_program_validate(struct nvc0_context * nvc0,struct nvc0_program * prog)50 nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
51 {
52 if (prog->mem)
53 return true;
54
55 if (!prog->translated) {
56 prog->translated = nvc0_program_translate(
57 prog, nvc0->screen->base.device->chipset,
58 nvc0->screen->base.disk_shader_cache, &nvc0->base.debug);
59 if (!prog->translated)
60 return false;
61 }
62
63 if (likely(prog->code_size))
64 return nvc0_program_upload(nvc0, prog);
65 return true; /* stream output info only */
66 }
67
68 void
nvc0_program_sp_start_id(struct nvc0_context * nvc0,int stage,struct nvc0_program * prog)69 nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage,
70 struct nvc0_program *prog)
71 {
72 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
73
74 if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) {
75 BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
76 PUSH_DATA (push, prog->code_base);
77 } else {
78 BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2);
79 PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base);
80 PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base);
81 }
82 }
83
84 void
nvc0_vertprog_validate(struct nvc0_context * nvc0)85 nvc0_vertprog_validate(struct nvc0_context *nvc0)
86 {
87 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
88 struct nvc0_program *vp = nvc0->vertprog;
89
90 if (!nvc0_program_validate(nvc0, vp))
91 return;
92 nvc0_program_update_context_state(nvc0, vp, 0);
93
94 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1);
95 PUSH_DATA (push, 0x11);
96 nvc0_program_sp_start_id(nvc0, 1, vp);
97 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
98 PUSH_DATA (push, vp->num_gprs);
99
100 // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
101 // PUSH_DATA (push, 0);
102 }
103
104 void
nvc0_fragprog_validate(struct nvc0_context * nvc0)105 nvc0_fragprog_validate(struct nvc0_context *nvc0)
106 {
107 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
108 struct nvc0_program *fp = nvc0->fragprog;
109 struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
110
111 if (fp->fp.force_persample_interp != rast->force_persample_interp) {
112 /* Force the program to be reuploaded, which will trigger interp fixups
113 * to get applied
114 */
115 if (fp->mem)
116 nouveau_heap_free(&fp->mem);
117
118 fp->fp.force_persample_interp = rast->force_persample_interp;
119 }
120
121 /* Shade model works well enough when both colors follow it. However if one
122 * (or both) is explicitly set, then we have to go the patching route.
123 */
124 bool has_explicit_color = fp->fp.colors &&
125 (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
126 ((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
127 bool hwflatshade = false;
128 if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
129 /* Force re-upload */
130 if (fp->mem)
131 nouveau_heap_free(&fp->mem);
132
133 fp->fp.flatshade = rast->flatshade;
134
135 /* Always smooth-shade in this mode, the shader will decide on its own
136 * when to flat-shade.
137 */
138 } else if (!has_explicit_color) {
139 hwflatshade = rast->flatshade;
140
141 /* No need to binary-patch the shader each time, make sure that it's set
142 * up for the default behaviour.
143 */
144 fp->fp.flatshade = 0;
145 }
146
147 if (hwflatshade != nvc0->state.flatshade) {
148 nvc0->state.flatshade = hwflatshade;
149 BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
150 PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
151 NVC0_3D_SHADE_MODEL_SMOOTH);
152 }
153
154 if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) {
155 return;
156 }
157
158 if (!nvc0_program_validate(nvc0, fp))
159 return;
160 nvc0_program_update_context_state(nvc0, fp, 4);
161
162 if (fp->fp.early_z != nvc0->state.early_z_forced) {
163 nvc0->state.early_z_forced = fp->fp.early_z;
164 IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
165 }
166 if (fp->fp.post_depth_coverage != nvc0->state.post_depth_coverage) {
167 nvc0->state.post_depth_coverage = fp->fp.post_depth_coverage;
168 IMMED_NVC0(push, NVC0_3D(POST_DEPTH_COVERAGE),
169 fp->fp.post_depth_coverage);
170 }
171
172 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1);
173 PUSH_DATA (push, 0x51);
174 nvc0_program_sp_start_id(nvc0, 5, fp);
175 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
176 PUSH_DATA (push, fp->num_gprs);
177
178 BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
179 PUSH_DATA (push, 0x20164010);
180 PUSH_DATA (push, 0x20);
181 BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
182 PUSH_DATA (push, fp->flags[0]);
183 }
184
185 void
nvc0_tctlprog_validate(struct nvc0_context * nvc0)186 nvc0_tctlprog_validate(struct nvc0_context *nvc0)
187 {
188 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
189 struct nvc0_program *tp = nvc0->tctlprog;
190
191 if (tp && nvc0_program_validate(nvc0, tp)) {
192 if (tp->tp.tess_mode != ~0) {
193 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
194 PUSH_DATA (push, tp->tp.tess_mode);
195 }
196 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
197 PUSH_DATA (push, 0x21);
198 nvc0_program_sp_start_id(nvc0, 2, tp);
199 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
200 PUSH_DATA (push, tp->num_gprs);
201 } else {
202 tp = nvc0->tcp_empty;
203 /* not a whole lot we can do to handle this failure */
204 if (!nvc0_program_validate(nvc0, tp))
205 assert(!"unable to validate empty tcp");
206 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
207 PUSH_DATA (push, 0x20);
208 nvc0_program_sp_start_id(nvc0, 2, tp);
209 }
210 nvc0_program_update_context_state(nvc0, tp, 1);
211 }
212
213 void
nvc0_tevlprog_validate(struct nvc0_context * nvc0)214 nvc0_tevlprog_validate(struct nvc0_context *nvc0)
215 {
216 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
217 struct nvc0_program *tp = nvc0->tevlprog;
218
219 if (tp && nvc0_program_validate(nvc0, tp)) {
220 if (tp->tp.tess_mode != ~0) {
221 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
222 PUSH_DATA (push, tp->tp.tess_mode);
223 }
224 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
225 PUSH_DATA (push, 0x31);
226 nvc0_program_sp_start_id(nvc0, 3, tp);
227 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
228 PUSH_DATA (push, tp->num_gprs);
229 } else {
230 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
231 PUSH_DATA (push, 0x30);
232 }
233 nvc0_program_update_context_state(nvc0, tp, 2);
234 }
235
236 void
nvc0_gmtyprog_validate(struct nvc0_context * nvc0)237 nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
238 {
239 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
240 struct nvc0_program *gp = nvc0->gmtyprog;
241
242 /* we allow GPs with no code for specifying stream output state only */
243 if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {
244 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
245 PUSH_DATA (push, 0x41);
246 nvc0_program_sp_start_id(nvc0, 4, gp);
247 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
248 PUSH_DATA (push, gp->num_gprs);
249 } else {
250 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
251 PUSH_DATA (push, 0x40);
252 }
253 nvc0_program_update_context_state(nvc0, gp, 3);
254 }
255
256 void
nvc0_compprog_validate(struct nvc0_context * nvc0)257 nvc0_compprog_validate(struct nvc0_context *nvc0)
258 {
259 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
260 struct nvc0_program *cp = nvc0->compprog;
261
262 if (cp && !nvc0_program_validate(nvc0, cp))
263 return;
264
265 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
266 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
267 }
268
269 void
nvc0_layer_validate(struct nvc0_context * nvc0)270 nvc0_layer_validate(struct nvc0_context *nvc0)
271 {
272 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
273 struct nvc0_program *last;
274 bool prog_selects_layer = false;
275 bool layer_viewport_relative = false;
276
277 if (nvc0->gmtyprog)
278 last = nvc0->gmtyprog;
279 else if (nvc0->tevlprog)
280 last = nvc0->tevlprog;
281 else
282 last = nvc0->vertprog;
283
284 if (last) {
285 prog_selects_layer = !!(last->hdr[13] & (1 << 9));
286 layer_viewport_relative = last->vp.layer_viewport_relative;
287 }
288
289 BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
290 PUSH_DATA (push, prog_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
291 if (nvc0->screen->eng3d->oclass >= GM200_3D_CLASS) {
292 IMMED_NVC0(push, NVC0_3D(LAYER_VIEWPORT_RELATIVE),
293 layer_viewport_relative);
294 }
295 }
296
297 void
nvc0_tfb_validate(struct nvc0_context * nvc0)298 nvc0_tfb_validate(struct nvc0_context *nvc0)
299 {
300 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
301 struct nvc0_transform_feedback_state *tfb;
302 unsigned b;
303
304 if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
305 else
306 if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
307 else
308 tfb = nvc0->vertprog->tfb;
309
310 IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
311
312 if (tfb && tfb != nvc0->state.tfb) {
313 for (b = 0; b < 4; ++b) {
314 if (tfb->varying_count[b]) {
315 unsigned n = (tfb->varying_count[b] + 3) / 4;
316
317 BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);
318 PUSH_DATA (push, tfb->stream[b]);
319 PUSH_DATA (push, tfb->varying_count[b]);
320 PUSH_DATA (push, tfb->stride[b]);
321 BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);
322 PUSH_DATAp(push, tfb->varying_index[b], n);
323
324 if (nvc0->tfbbuf[b])
325 nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
326 } else {
327 IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);
328 }
329 }
330 }
331 nvc0->state.tfb = tfb;
332
333 if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS))
334 return;
335
336 for (b = 0; b < nvc0->num_tfbbufs; ++b) {
337 struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
338 struct nv04_resource *buf;
339
340 if (targ && tfb)
341 targ->stride = tfb->stride[b];
342
343 if (!targ || !targ->stride) {
344 IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
345 continue;
346 }
347
348 buf = nv04_resource(targ->pipe.buffer);
349
350 BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR);
351
352 if (!(nvc0->tfbbuf_dirty & (1 << b)))
353 continue;
354
355 if (!targ->clean)
356 nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
357 nouveau_pushbuf_space(push, 0, 0, 1);
358 BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
359 PUSH_DATA (push, 1);
360 PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
361 PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
362 PUSH_DATA (push, targ->pipe.buffer_size);
363 if (!targ->clean) {
364 nvc0_hw_query_pushbuf_submit(push, nvc0_query(targ->pq), 0x4);
365 } else {
366 PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
367 targ->clean = false;
368 }
369 }
370 for (; b < 4; ++b)
371 IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
372 }
373