1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "pipe/p_defines.h"
24 #include "util/u_framebuffer.h"
25 #include "util/u_upload_mgr.h"
26
27 #include "nv50/nv50_context.h"
28 #include "nv50/nv50_screen.h"
29 #include "nv50/nv50_resource.h"
30
31 static void
nv50_flush(struct pipe_context * pipe,struct pipe_fence_handle ** fence,unsigned flags)32 nv50_flush(struct pipe_context *pipe,
33 struct pipe_fence_handle **fence,
34 unsigned flags)
35 {
36 struct nouveau_context *context = nouveau_context(pipe);
37 struct nouveau_screen *screen = context->screen;
38
39 if (fence)
40 nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
41
42 PUSH_KICK(context->pushbuf);
43
44 nouveau_context_update_frame_stats(nouveau_context(pipe));
45 }
46
47 static void
nv50_texture_barrier(struct pipe_context * pipe,unsigned flags)48 nv50_texture_barrier(struct pipe_context *pipe, unsigned flags)
49 {
50 struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
51
52 BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
53 PUSH_DATA (push, 0);
54 BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
55 PUSH_DATA (push, 0x20);
56 }
57
58 static void
nv50_memory_barrier(struct pipe_context * pipe,unsigned flags)59 nv50_memory_barrier(struct pipe_context *pipe, unsigned flags)
60 {
61 struct nv50_context *nv50 = nv50_context(pipe);
62 struct nouveau_pushbuf *push = nv50->base.pushbuf;
63 int i, s;
64
65 if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
66 for (i = 0; i < nv50->num_vtxbufs; ++i) {
67 if (!nv50->vtxbuf[i].buffer.resource && !nv50->vtxbuf[i].is_user_buffer)
68 continue;
69 if (nv50->vtxbuf[i].buffer.resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
70 nv50->base.vbo_dirty = true;
71 }
72
73 for (s = 0; s < NV50_MAX_3D_SHADER_STAGES && !nv50->cb_dirty; ++s) {
74 uint32_t valid = nv50->constbuf_valid[s];
75
76 while (valid && !nv50->cb_dirty) {
77 const unsigned i = ffs(valid) - 1;
78 struct pipe_resource *res;
79
80 valid &= ~(1 << i);
81 if (nv50->constbuf[s][i].user)
82 continue;
83
84 res = nv50->constbuf[s][i].u.buf;
85 if (!res)
86 continue;
87
88 if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
89 nv50->cb_dirty = true;
90 }
91 }
92 } else {
93 BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
94 PUSH_DATA (push, 0);
95 }
96
97 /* If we're going to texture from a buffer/image written by a shader, we
98 * must flush the texture cache.
99 */
100 if (flags & PIPE_BARRIER_TEXTURE) {
101 BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
102 PUSH_DATA (push, 0x20);
103 }
104
105 if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
106 nv50->cb_dirty = true;
107 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_INDEX_BUFFER))
108 nv50->base.vbo_dirty = true;
109 }
110
111 static void
nv50_emit_string_marker(struct pipe_context * pipe,const char * str,int len)112 nv50_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
113 {
114 struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
115 int string_words = len / 4;
116 int data_words;
117
118 if (len <= 0)
119 return;
120 string_words = MIN2(string_words, NV04_PFIFO_MAX_PACKET_LEN);
121 if (string_words == NV04_PFIFO_MAX_PACKET_LEN)
122 data_words = string_words;
123 else
124 data_words = string_words + !!(len & 3);
125 BEGIN_NI04(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
126 if (string_words)
127 PUSH_DATAp(push, str, string_words);
128 if (string_words != data_words) {
129 int data = 0;
130 memcpy(&data, &str[string_words * 4], len & 3);
131 PUSH_DATA (push, data);
132 }
133 }
134
135 void
nv50_default_kick_notify(struct nouveau_pushbuf * push)136 nv50_default_kick_notify(struct nouveau_pushbuf *push)
137 {
138 struct nv50_screen *screen = push->user_priv;
139
140 if (screen) {
141 nouveau_fence_next(&screen->base);
142 nouveau_fence_update(&screen->base, true);
143 if (screen->cur_ctx)
144 screen->cur_ctx->state.flushed = true;
145 }
146 }
147
148 static void
nv50_context_unreference_resources(struct nv50_context * nv50)149 nv50_context_unreference_resources(struct nv50_context *nv50)
150 {
151 unsigned s, i;
152
153 nouveau_bufctx_del(&nv50->bufctx_3d);
154 nouveau_bufctx_del(&nv50->bufctx);
155 nouveau_bufctx_del(&nv50->bufctx_cp);
156
157 util_unreference_framebuffer_state(&nv50->framebuffer);
158
159 assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
160 for (i = 0; i < nv50->num_vtxbufs; ++i)
161 pipe_vertex_buffer_unreference(&nv50->vtxbuf[i]);
162
163 for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) {
164 assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
165 for (i = 0; i < nv50->num_textures[s]; ++i)
166 pipe_sampler_view_reference(&nv50->textures[s][i], NULL);
167
168 for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i)
169 if (!nv50->constbuf[s][i].user)
170 pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
171 }
172
173 for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
174 ++i) {
175 struct pipe_resource **res = util_dynarray_element(
176 &nv50->global_residents, struct pipe_resource *, i);
177 pipe_resource_reference(res, NULL);
178 }
179 util_dynarray_fini(&nv50->global_residents);
180 }
181
182 static void
nv50_destroy(struct pipe_context * pipe)183 nv50_destroy(struct pipe_context *pipe)
184 {
185 struct nv50_context *nv50 = nv50_context(pipe);
186
187 if (nv50->screen->cur_ctx == nv50) {
188 nv50->screen->cur_ctx = NULL;
189 /* Save off the state in case another context gets created */
190 nv50->screen->save_state = nv50->state;
191 }
192
193 if (nv50->base.pipe.stream_uploader)
194 u_upload_destroy(nv50->base.pipe.stream_uploader);
195
196 nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL);
197 nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel);
198
199 nv50_context_unreference_resources(nv50);
200
201 FREE(nv50->blit);
202
203 nouveau_context_destroy(&nv50->base);
204 }
205
206 static int
nv50_invalidate_resource_storage(struct nouveau_context * ctx,struct pipe_resource * res,int ref)207 nv50_invalidate_resource_storage(struct nouveau_context *ctx,
208 struct pipe_resource *res,
209 int ref)
210 {
211 struct nv50_context *nv50 = nv50_context(&ctx->pipe);
212 unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
213 unsigned s, i;
214
215 if (bind & PIPE_BIND_RENDER_TARGET) {
216 assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
217 for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
218 if (nv50->framebuffer.cbufs[i] &&
219 nv50->framebuffer.cbufs[i]->texture == res) {
220 nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
221 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
222 if (!--ref)
223 return ref;
224 }
225 }
226 }
227 if (bind & PIPE_BIND_DEPTH_STENCIL) {
228 if (nv50->framebuffer.zsbuf &&
229 nv50->framebuffer.zsbuf->texture == res) {
230 nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
231 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
232 if (!--ref)
233 return ref;
234 }
235 }
236
237 if (bind & (PIPE_BIND_VERTEX_BUFFER |
238 PIPE_BIND_INDEX_BUFFER |
239 PIPE_BIND_CONSTANT_BUFFER |
240 PIPE_BIND_STREAM_OUTPUT |
241 PIPE_BIND_SAMPLER_VIEW)) {
242
243 assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
244 for (i = 0; i < nv50->num_vtxbufs; ++i) {
245 if (nv50->vtxbuf[i].buffer.resource == res) {
246 nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
247 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX);
248 if (!--ref)
249 return ref;
250 }
251 }
252
253 for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) {
254 assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
255 for (i = 0; i < nv50->num_textures[s]; ++i) {
256 if (nv50->textures[s][i] &&
257 nv50->textures[s][i]->texture == res) {
258 if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) {
259 nv50->dirty_cp |= NV50_NEW_CP_TEXTURES;
260 nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_TEXTURES);
261 } else {
262 nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
263 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
264 }
265 if (!--ref)
266 return ref;
267 }
268 }
269 }
270
271 for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) {
272 for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i) {
273 if (!(nv50->constbuf_valid[s] & (1 << i)))
274 continue;
275 if (!nv50->constbuf[s][i].user &&
276 nv50->constbuf[s][i].u.buf == res) {
277 nv50->constbuf_dirty[s] |= 1 << i;
278 if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) {
279 nv50->dirty_cp |= NV50_NEW_CP_CONSTBUF;
280 nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_CB(i));
281 } else {
282 nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
283 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
284 }
285 if (!--ref)
286 return ref;
287 }
288 }
289 }
290 }
291
292 return ref;
293 }
294
295 static void
296 nv50_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
297 float *);
298
299 struct pipe_context *
nv50_create(struct pipe_screen * pscreen,void * priv,unsigned ctxflags)300 nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
301 {
302 struct nv50_screen *screen = nv50_screen(pscreen);
303 struct nv50_context *nv50;
304 struct pipe_context *pipe;
305 int ret;
306 uint32_t flags;
307
308 nv50 = CALLOC_STRUCT(nv50_context);
309 if (!nv50)
310 return NULL;
311 pipe = &nv50->base.pipe;
312
313 if (!nv50_blitctx_create(nv50))
314 goto out_err;
315
316 nv50->base.pushbuf = screen->base.pushbuf;
317 nv50->base.client = screen->base.client;
318
319 ret = nouveau_bufctx_new(nv50->base.client, 2, &nv50->bufctx);
320 if (!ret)
321 ret = nouveau_bufctx_new(nv50->base.client, NV50_BIND_3D_COUNT,
322 &nv50->bufctx_3d);
323 if (!ret)
324 ret = nouveau_bufctx_new(nv50->base.client, NV50_BIND_CP_COUNT,
325 &nv50->bufctx_cp);
326 if (ret)
327 goto out_err;
328
329 nv50->base.screen = &screen->base;
330 nv50->base.copy_data = nv50_m2mf_copy_linear;
331 nv50->base.push_data = nv50_sifc_linear_u8;
332 nv50->base.push_cb = nv50_cb_push;
333
334 nv50->screen = screen;
335 pipe->screen = pscreen;
336 pipe->priv = priv;
337 pipe->stream_uploader = u_upload_create_default(pipe);
338 if (!pipe->stream_uploader)
339 goto out_err;
340 pipe->const_uploader = pipe->stream_uploader;
341
342 pipe->destroy = nv50_destroy;
343
344 pipe->draw_vbo = nv50_draw_vbo;
345 pipe->clear = nv50_clear;
346 pipe->launch_grid = nv50_launch_grid;
347
348 pipe->flush = nv50_flush;
349 pipe->texture_barrier = nv50_texture_barrier;
350 pipe->memory_barrier = nv50_memory_barrier;
351 pipe->get_sample_position = nv50_context_get_sample_position;
352 pipe->emit_string_marker = nv50_emit_string_marker;
353
354 if (!screen->cur_ctx) {
355 /* Restore the last context's state here, normally handled during
356 * context switch
357 */
358 nv50->state = screen->save_state;
359 screen->cur_ctx = nv50;
360 nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx);
361 }
362 nv50->base.pushbuf->kick_notify = nv50_default_kick_notify;
363
364 nouveau_context_init(&nv50->base);
365 nv50_init_query_functions(nv50);
366 nv50_init_surface_functions(nv50);
367 nv50_init_state_functions(nv50);
368 nv50_init_resource_functions(pipe);
369
370 nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage;
371
372 if (screen->base.device->chipset < 0x84 ||
373 debug_get_bool_option("NOUVEAU_PMPEG", false)) {
374 /* PMPEG */
375 nouveau_context_init_vdec(&nv50->base);
376 } else if (screen->base.device->chipset < 0x98 ||
377 screen->base.device->chipset == 0xa0) {
378 /* VP2 */
379 pipe->create_video_codec = nv84_create_decoder;
380 pipe->create_video_buffer = nv84_video_buffer_create;
381 } else {
382 /* VP3/4 */
383 pipe->create_video_codec = nv98_create_decoder;
384 pipe->create_video_buffer = nv98_video_buffer_create;
385 }
386
387 flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
388
389 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->code);
390 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->uniforms);
391 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->txc);
392 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->stack_bo);
393 if (screen->compute) {
394 BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
395 BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->uniforms);
396 BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
397 BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo);
398 }
399
400 flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
401
402 BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo);
403 BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
404 if (screen->compute)
405 BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
406
407 nv50->base.scratch.bo_size = 2 << 20;
408
409 util_dynarray_init(&nv50->global_residents, NULL);
410
411 // Make sure that the first TSC entry has SRGB conversion bit set, since we
412 // use it as a fallback.
413 if (!screen->tsc.entries[0])
414 nv50_upload_tsc0(nv50);
415
416 // And mark samplers as dirty so that the first slot would get bound to the
417 // zero entry if it's not otherwise set.
418 nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
419
420 return pipe;
421
422 out_err:
423 if (pipe->stream_uploader)
424 u_upload_destroy(pipe->stream_uploader);
425 if (nv50->bufctx_3d)
426 nouveau_bufctx_del(&nv50->bufctx_3d);
427 if (nv50->bufctx_cp)
428 nouveau_bufctx_del(&nv50->bufctx_cp);
429 if (nv50->bufctx)
430 nouveau_bufctx_del(&nv50->bufctx);
431 FREE(nv50->blit);
432 FREE(nv50);
433 return NULL;
434 }
435
436 void
nv50_bufctx_fence(struct nouveau_bufctx * bufctx,bool on_flush)437 nv50_bufctx_fence(struct nouveau_bufctx *bufctx, bool on_flush)
438 {
439 struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
440 struct nouveau_list *it;
441
442 for (it = list->next; it != list; it = it->next) {
443 struct nouveau_bufref *ref = (struct nouveau_bufref *)it;
444 struct nv04_resource *res = ref->priv;
445 if (res)
446 nv50_resource_validate(res, (unsigned)ref->priv_data);
447 }
448 }
449
450 static void
nv50_context_get_sample_position(struct pipe_context * pipe,unsigned sample_count,unsigned sample_index,float * xy)451 nv50_context_get_sample_position(struct pipe_context *pipe,
452 unsigned sample_count, unsigned sample_index,
453 float *xy)
454 {
455 static const uint8_t ms1[1][2] = { { 0x8, 0x8 } };
456 static const uint8_t ms2[2][2] = {
457 { 0x4, 0x4 }, { 0xc, 0xc } }; /* surface coords (0,0), (1,0) */
458 static const uint8_t ms4[4][2] = {
459 { 0x6, 0x2 }, { 0xe, 0x6 }, /* (0,0), (1,0) */
460 { 0x2, 0xa }, { 0xa, 0xe } }; /* (0,1), (1,1) */
461 static const uint8_t ms8[8][2] = {
462 { 0x1, 0x7 }, { 0x5, 0x3 }, /* (0,0), (1,0) */
463 { 0x3, 0xd }, { 0x7, 0xb }, /* (0,1), (1,1) */
464 { 0x9, 0x5 }, { 0xf, 0x1 }, /* (2,0), (3,0) */
465 { 0xb, 0xf }, { 0xd, 0x9 } }; /* (2,1), (3,1) */
466 #if 0
467 /* NOTE: there are alternative modes for MS2 and MS8, currently not used */
468 static const uint8_t ms8_alt[8][2] = {
469 { 0x9, 0x5 }, { 0x7, 0xb }, /* (2,0), (1,1) */
470 { 0xd, 0x9 }, { 0x5, 0x3 }, /* (3,1), (1,0) */
471 { 0x3, 0xd }, { 0x1, 0x7 }, /* (0,1), (0,0) */
472 { 0xb, 0xf }, { 0xf, 0x1 } }; /* (2,1), (3,0) */
473 #endif
474
475 const uint8_t (*ptr)[2];
476
477 switch (sample_count) {
478 case 0:
479 case 1: ptr = ms1; break;
480 case 2: ptr = ms2; break;
481 case 4: ptr = ms4; break;
482 case 8: ptr = ms8; break;
483 default:
484 assert(0);
485 return; /* bad sample count -> undefined locations */
486 }
487 xy[0] = ptr[sample_index][0] * 0.0625f;
488 xy[1] = ptr[sample_index][1] * 0.0625f;
489 }
490