1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2015 Patrick Rudolph <siro@das-labor.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "buffer9.h"
25 #include "device9.h"
26 #include "indexbuffer9.h"
27 #include "nine_buffer_upload.h"
28 #include "nine_helpers.h"
29 #include "nine_pipe.h"
30
31 #include "pipe/p_screen.h"
32 #include "pipe/p_context.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_defines.h"
35 #include "pipe/p_format.h"
36 #include "util/u_box.h"
37 #include "util/u_inlines.h"
38
39 #define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER)
40
41 HRESULT
NineBuffer9_ctor(struct NineBuffer9 * This,struct NineUnknownParams * pParams,D3DRESOURCETYPE Type,DWORD Usage,UINT Size,D3DPOOL Pool)42 NineBuffer9_ctor( struct NineBuffer9 *This,
43 struct NineUnknownParams *pParams,
44 D3DRESOURCETYPE Type,
45 DWORD Usage,
46 UINT Size,
47 D3DPOOL Pool )
48 {
49 struct pipe_resource *info = &This->base.info;
50 HRESULT hr;
51
52 DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, Size, Usage, Pool);
53
54 user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
55
56 This->maps = MALLOC(sizeof(struct NineTransfer));
57 if (!This->maps)
58 return E_OUTOFMEMORY;
59 This->nlocks = 0;
60 This->nmaps = 0;
61 This->maxmaps = 1;
62 This->size = Size;
63
64 info->screen = pParams->device->screen;
65 info->target = PIPE_BUFFER;
66 info->format = PIPE_FORMAT_R8_UNORM;
67 info->width0 = Size;
68 info->flags = 0;
69
70 /* Note: WRITEONLY is just tip for resource placement, the resource
71 * can still be read (but slower). */
72 info->bind = (Type == D3DRTYPE_INDEXBUFFER) ? PIPE_BIND_INDEX_BUFFER : PIPE_BIND_VERTEX_BUFFER;
73
74 /* Software vertex processing:
75 * If the device is full software vertex processing,
76 * then the buffer is supposed to be used only for sw processing.
77 * For mixed vertex processing, buffers with D3DUSAGE_SOFTWAREPROCESSING
78 * can be used for both sw and hw processing.
79 * These buffers are expected to be stored in RAM.
80 * Apps expect locking the full buffer with no flags, then
81 * render a a few primitive, then locking again, etc
82 * to be a fast pattern. Only the SYSTEMMEM DYNAMIC path
83 * will give that pattern ok performance in our case.
84 * An alternative would be when sw processing is detected to
85 * convert Draw* calls to Draw*Up calls. */
86 if (Usage & D3DUSAGE_SOFTWAREPROCESSING ||
87 pParams->device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) {
88 Pool = D3DPOOL_SYSTEMMEM;
89 Usage |= D3DUSAGE_DYNAMIC;
90 /* Note: the application cannot retrieve Pool and Usage */
91 }
92
93 /* Always use the DYNAMIC path for SYSTEMMEM.
94 * If the app uses the vertex buffer is a dynamic fashion,
95 * this is going to be very significantly faster that way.
96 * If the app uses the vertex buffer in a static fashion,
97 * instead of being filled all at once, the buffer will be filled
98 * little per little, until it is fully filled, thus the perf hit
99 * will be very small. */
100 if (Pool == D3DPOOL_SYSTEMMEM)
101 Usage |= D3DUSAGE_DYNAMIC;
102
103 /* It is hard to find clear information on where to place the buffer in
104 * memory depending on the flag.
105 * MSDN: resources are static, except for those with DYNAMIC, thus why you
106 * can only use DISCARD on them.
107 * ATI doc: The driver has the liberty it wants for having things static
108 * or not.
109 * MANAGED: Ram + uploads to Vram copy at unlock (msdn and nvidia doc say
110 * at first draw call using the buffer)
111 * DEFAULT + Usage = 0 => System memory backing for easy read access
112 * (That doc is very unclear on the details, like whether some copies to
113 * vram copy are involved or not).
114 * DEFAULT + WRITEONLY => Vram
115 * DEFAULT + WRITEONLY + DYNAMIC => Either Vram buffer or GTT_WC, depending on what the driver wants.
116 * SYSTEMMEM: Same as MANAGED, but handled by the driver instead of the runtime (which means
117 * some small behavior differences between vendors). Implementing exactly as MANAGED should
118 * be fine.
119 */
120 if (Pool == D3DPOOL_SYSTEMMEM && Usage & D3DUSAGE_DYNAMIC)
121 info->usage = PIPE_USAGE_STREAM;
122 else if (Pool != D3DPOOL_DEFAULT)
123 info->usage = PIPE_USAGE_DEFAULT;
124 else if (Usage & D3DUSAGE_DYNAMIC && Usage & D3DUSAGE_WRITEONLY)
125 info->usage = PIPE_USAGE_STREAM;
126 else if (Usage & D3DUSAGE_WRITEONLY)
127 info->usage = PIPE_USAGE_DEFAULT;
128 /* For the remaining two, PIPE_USAGE_STAGING would probably be
129 * a good fit according to the doc. However it seems rather a mistake
130 * from apps to use these (mistakes that do really happen). Try
131 * to put the flags that are the best compromise between the real
132 * behaviour and what buggy apps should get for better performance. */
133 else if (Usage & D3DUSAGE_DYNAMIC)
134 info->usage = PIPE_USAGE_STREAM;
135 else
136 info->usage = PIPE_USAGE_DYNAMIC;
137
138 /* When Writeonly is not set, we don't want to enable the
139 * optimizations */
140 This->discard_nooverwrite_only = !!(Usage & D3DUSAGE_WRITEONLY) &&
141 pParams->device->buffer_upload;
142 /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
143 /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
144 /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
145 /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
146 /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
147 /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
148
149 info->height0 = 1;
150 info->depth0 = 1;
151 info->array_size = 1;
152 info->last_level = 0;
153 info->nr_samples = 0;
154 info->nr_storage_samples = 0;
155
156 hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
157 Type, Pool, Usage);
158
159 if (FAILED(hr))
160 return hr;
161
162 if (Pool != D3DPOOL_DEFAULT) {
163 This->managed.data = align_calloc(
164 nine_format_get_level_alloc_size(This->base.info.format,
165 Size, 1, 0), 32);
166 if (!This->managed.data)
167 return E_OUTOFMEMORY;
168 This->managed.dirty = TRUE;
169 u_box_1d(0, Size, &This->managed.dirty_box);
170 u_box_1d(0, 0, &This->managed.valid_region);
171 u_box_1d(0, 0, &This->managed.required_valid_region);
172 u_box_1d(0, 0, &This->managed.filled_region);
173 This->managed.can_unsynchronized = true;
174 This->managed.num_worker_thread_syncs = 0;
175 list_inithead(&This->managed.list);
176 list_inithead(&This->managed.list2);
177 list_add(&This->managed.list2, &pParams->device->managed_buffers);
178 }
179
180 return D3D_OK;
181 }
182
183 void
NineBuffer9_dtor(struct NineBuffer9 * This)184 NineBuffer9_dtor( struct NineBuffer9 *This )
185 {
186 DBG("This=%p\n", This);
187
188 if (This->maps) {
189 while (This->nlocks) {
190 NineBuffer9_Unlock(This);
191 }
192 assert(!This->nmaps);
193 FREE(This->maps);
194 }
195
196 if (This->base.pool != D3DPOOL_DEFAULT) {
197 if (This->managed.data)
198 align_free(This->managed.data);
199 if (list_is_linked(&This->managed.list))
200 list_del(&This->managed.list);
201 if (list_is_linked(&This->managed.list2))
202 list_del(&This->managed.list2);
203 }
204
205 if (This->buf)
206 nine_upload_release_buffer(This->base.base.device->buffer_upload, This->buf);
207
208 NineResource9_dtor(&This->base);
209 }
210
211 struct pipe_resource *
NineBuffer9_GetResource(struct NineBuffer9 * This,unsigned * offset)212 NineBuffer9_GetResource( struct NineBuffer9 *This, unsigned *offset )
213 {
214 if (This->buf)
215 return nine_upload_buffer_resource_and_offset(This->buf, offset);
216 *offset = 0;
217 return NineResource9_GetResource(&This->base);
218 }
219
220 static void
NineBuffer9_RebindIfRequired(struct NineBuffer9 * This,struct NineDevice9 * device,struct pipe_resource * resource,unsigned offset)221 NineBuffer9_RebindIfRequired( struct NineBuffer9 *This,
222 struct NineDevice9 *device,
223 struct pipe_resource *resource,
224 unsigned offset )
225 {
226 int i;
227
228 if (!This->bind_count)
229 return;
230 for (i = 0; i < device->caps.MaxStreams; i++) {
231 if (device->state.stream[i] == (struct NineVertexBuffer9 *)This)
232 nine_context_set_stream_source_apply(device, i,
233 resource,
234 device->state.vtxbuf[i].buffer_offset + offset,
235 device->state.vtxbuf[i].stride);
236 }
237 if (device->state.idxbuf == (struct NineIndexBuffer9 *)This)
238 nine_context_set_indices_apply(device, resource,
239 ((struct NineIndexBuffer9 *)This)->index_size,
240 offset);
241 }
242
243 HRESULT NINE_WINAPI
NineBuffer9_Lock(struct NineBuffer9 * This,UINT OffsetToLock,UINT SizeToLock,void ** ppbData,DWORD Flags)244 NineBuffer9_Lock( struct NineBuffer9 *This,
245 UINT OffsetToLock,
246 UINT SizeToLock,
247 void **ppbData,
248 DWORD Flags )
249 {
250 struct NineDevice9 *device = This->base.base.device;
251 struct pipe_box box;
252 struct pipe_context *pipe;
253 void *data;
254 unsigned usage;
255
256 DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
257 This, This->base.resource,
258 OffsetToLock, SizeToLock, Flags);
259
260 user_assert(ppbData, E_POINTER);
261
262 if (SizeToLock == 0) {
263 SizeToLock = This->size - OffsetToLock;
264 user_warn(OffsetToLock != 0);
265 }
266
267 /* Write out of bound seems to have to be taken into account for these.
268 * TODO: Do more tests (is it only at buffer first lock ? etc).
269 * Since these buffers are supposed to be locked once and never
270 * writen again (MANAGED or DYNAMIC is used for the other uses cases),
271 * performance should be unaffected. */
272 if (!(This->base.usage & D3DUSAGE_DYNAMIC) && This->base.pool == D3DPOOL_DEFAULT)
273 SizeToLock = This->size - OffsetToLock;
274
275 u_box_1d(OffsetToLock, SizeToLock, &box);
276
277 if (This->base.pool != D3DPOOL_DEFAULT) {
278 /* MANAGED: READONLY doesn't dirty the buffer, nor
279 * wait the upload in the worker thread
280 * SYSTEMMEM: AMD/NVidia: All locks dirty the full buffer. Not on Intel
281 * For Nvidia, SYSTEMMEM behaves are if there is no worker thread.
282 * On AMD, READONLY and NOOVERWRITE do dirty the buffer, but do not sync the previous uploads
283 * in the worker thread. On Intel only NOOVERWRITE has that effect.
284 * We implement the AMD behaviour. */
285 if (This->base.pool == D3DPOOL_MANAGED) {
286 if (!(Flags & D3DLOCK_READONLY)) {
287 if (!This->managed.dirty) {
288 assert(list_is_empty(&This->managed.list));
289 This->managed.dirty = TRUE;
290 This->managed.dirty_box = box;
291 /* Flush if regions pending to be uploaded would be dirtied */
292 if (p_atomic_read(&This->managed.pending_upload)) {
293 u_box_intersect_1d(&box, &box, &This->managed.upload_pending_regions);
294 if (box.width != 0)
295 nine_csmt_process(This->base.base.device);
296 }
297 } else
298 u_box_union_1d(&This->managed.dirty_box, &This->managed.dirty_box, &box);
299 /* Tests trying to draw while the buffer is locked show that
300 * SYSTEMMEM/MANAGED buffers are made dirty at Lock time */
301 BASEBUF_REGISTER_UPDATE(This);
302 }
303 } else {
304 if (!(Flags & (D3DLOCK_READONLY|D3DLOCK_NOOVERWRITE)) &&
305 p_atomic_read(&This->managed.pending_upload)) {
306 This->managed.num_worker_thread_syncs++;
307 /* If we sync too often, pick the vertex_uploader path */
308 if (This->managed.num_worker_thread_syncs >= 3)
309 This->managed.can_unsynchronized = false;
310 nine_csmt_process(This->base.base.device);
311 /* Note: AS DISCARD is not relevant for SYSTEMMEM,
312 * NOOVERWRITE might have a similar meaning as what is
313 * in D3D7 doc. Basically that data from previous draws
314 * OF THIS FRAME are unaffected. As we flush csmt in Present(),
315 * we should be correct. In some parts of the doc, the notion
316 * of frame is implied to be related to Begin/EndScene(),
317 * but tests show NOOVERWRITE after EndScene() doesn't flush
318 * the csmt thread. */
319 }
320 This->managed.dirty = true;
321 u_box_1d(0, This->size, &This->managed.dirty_box); /* systemmem non-dynamic */
322 u_box_1d(0, 0, &This->managed.valid_region); /* systemmem dynamic */
323 BASEBUF_REGISTER_UPDATE(This);
324 }
325
326 *ppbData = (char *)This->managed.data + OffsetToLock;
327 DBG("returning pointer %p\n", *ppbData);
328 This->nlocks++;
329 return D3D_OK;
330 }
331
332 /* Driver ddi doc: READONLY is never passed to the device. So it can only
333 * have effect on things handled by the driver (MANAGED pool for example).
334 * Msdn doc: DISCARD and NOOVERWRITE are only for DYNAMIC.
335 * ATI doc: You can use DISCARD and NOOVERWRITE without DYNAMIC.
336 * Msdn doc: D3DLOCK_DONOTWAIT is not among the valid flags for buffers.
337 * Our tests: On win 7 nvidia, D3DLOCK_DONOTWAIT does return
338 * D3DERR_WASSTILLDRAWING if the resource is in use, except for DYNAMIC.
339 * Our tests: some apps do use both DISCARD and NOOVERWRITE at the same
340 * time. On windows it seems to return different pointer in some conditions,
341 * creation flags and drivers. However these tests indicate having
342 * NOOVERWRITE win is a valid behaviour (NVidia).
343 */
344
345 /* Have NOOVERWRITE win over DISCARD. This is allowed (see above) and
346 * it prevents overconsuming buffers if apps do use both at the same time. */
347 if ((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))
348 Flags &= ~D3DLOCK_DISCARD;
349
350 if (Flags & D3DLOCK_DISCARD)
351 usage = PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE;
352 else if (Flags & D3DLOCK_NOOVERWRITE)
353 usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED;
354 else
355 /* Do not ask for READ if writeonly and default pool (should be safe enough,
356 * as the doc says app shouldn't expect reading to work with writeonly). */
357 usage = (This->base.usage & D3DUSAGE_WRITEONLY) ?
358 PIPE_MAP_WRITE :
359 PIPE_MAP_READ_WRITE;
360 if (Flags & D3DLOCK_DONOTWAIT && !(This->base.usage & D3DUSAGE_DYNAMIC))
361 usage |= PIPE_MAP_DONTBLOCK;
362
363 This->discard_nooverwrite_only &= !!(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE));
364
365 if (This->nmaps == This->maxmaps) {
366 struct NineTransfer *newmaps =
367 REALLOC(This->maps, sizeof(struct NineTransfer)*This->maxmaps,
368 sizeof(struct NineTransfer)*(This->maxmaps << 1));
369 if (newmaps == NULL)
370 return E_OUTOFMEMORY;
371
372 This->maxmaps <<= 1;
373 This->maps = newmaps;
374 }
375
376 if (This->buf && !This->discard_nooverwrite_only) {
377 struct pipe_box src_box;
378 unsigned offset;
379 struct pipe_resource *src_res;
380 DBG("Disabling nine_subbuffer for a buffer having"
381 "used a nine_subbuffer buffer\n");
382 /* Copy buffer content to the buffer resource, which
383 * we will now use.
384 * Note: The behaviour may be different from what is expected
385 * with double lock. However applications can't really make expectations
386 * about double locks, and don't really use them, so that's ok. */
387 src_res = nine_upload_buffer_resource_and_offset(This->buf, &offset);
388 u_box_1d(offset, This->size, &src_box);
389
390 pipe = NineDevice9_GetPipe(device);
391 pipe->resource_copy_region(pipe, This->base.resource, 0, 0, 0, 0,
392 src_res, 0, &src_box);
393 /* Release previous resource */
394 if (This->nmaps >= 1)
395 This->maps[This->nmaps-1].should_destroy_buf = true;
396 else
397 nine_upload_release_buffer(device->buffer_upload, This->buf);
398 This->buf = NULL;
399 /* Rebind buffer */
400 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
401 }
402
403 This->maps[This->nmaps].transfer = NULL;
404 This->maps[This->nmaps].is_pipe_secondary = false;
405 This->maps[This->nmaps].buf = NULL;
406 This->maps[This->nmaps].should_destroy_buf = false;
407
408 if (This->discard_nooverwrite_only) {
409 if (This->buf && (Flags & D3DLOCK_DISCARD)) {
410 /* Release previous buffer */
411 if (This->nmaps >= 1)
412 This->maps[This->nmaps-1].should_destroy_buf = true;
413 else
414 nine_upload_release_buffer(device->buffer_upload, This->buf);
415 This->buf = NULL;
416 }
417
418 if (!This->buf) {
419 unsigned offset;
420 struct pipe_resource *res;
421 This->buf = nine_upload_create_buffer(device->buffer_upload, This->base.info.width0);
422 res = nine_upload_buffer_resource_and_offset(This->buf, &offset);
423 NineBuffer9_RebindIfRequired(This, device, res, offset);
424 }
425
426 if (This->buf) {
427 This->maps[This->nmaps].buf = This->buf;
428 This->nmaps++;
429 This->nlocks++;
430 DBG("Returning %p\n", nine_upload_buffer_get_map(This->buf) + OffsetToLock);
431 *ppbData = nine_upload_buffer_get_map(This->buf) + OffsetToLock;
432 return D3D_OK;
433 } else {
434 /* Fallback to normal path, and don't try again */
435 This->discard_nooverwrite_only = false;
436 }
437 }
438
439 /* Previous mappings may need pending commands to write to the
440 * buffer (staging buffer for example). Before a NOOVERWRITE,
441 * we thus need a finish, to guarantee any upload is finished.
442 * Note for discard_nooverwrite_only we don't need to do this
443 * check as neither discard nor nooverwrite have issues there */
444 if (This->need_sync_if_nooverwrite && !(Flags & D3DLOCK_DISCARD) &&
445 (Flags & D3DLOCK_NOOVERWRITE)) {
446 struct pipe_screen *screen = NineDevice9_GetScreen(device);
447 struct pipe_fence_handle *fence = NULL;
448
449 pipe = NineDevice9_GetPipe(device);
450 pipe->flush(pipe, &fence, 0);
451 (void) screen->fence_finish(screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
452 screen->fence_reference(screen, &fence, NULL);
453 }
454 This->need_sync_if_nooverwrite = !(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE));
455
456 /* When csmt is active, we want to avoid stalls as much as possible,
457 * and thus we want to create a new resource on discard and map it
458 * with the secondary pipe, instead of waiting on the main pipe. */
459 if (Flags & D3DLOCK_DISCARD && device->csmt_active) {
460 struct pipe_screen *screen = NineDevice9_GetScreen(device);
461 struct pipe_resource *new_res = nine_resource_create_with_retry(device, screen, &This->base.info);
462 if (new_res) {
463 /* Use the new resource */
464 pipe_resource_reference(&This->base.resource, new_res);
465 pipe_resource_reference(&new_res, NULL);
466 usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED;
467 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
468 This->maps[This->nmaps].is_pipe_secondary = TRUE;
469 }
470 } else if (Flags & D3DLOCK_NOOVERWRITE && device->csmt_active)
471 This->maps[This->nmaps].is_pipe_secondary = TRUE;
472
473 if (This->maps[This->nmaps].is_pipe_secondary)
474 pipe = device->pipe_secondary;
475 else
476 pipe = NineDevice9_GetPipe(device);
477
478 data = pipe->buffer_map(pipe, This->base.resource, 0,
479 usage, &box, &This->maps[This->nmaps].transfer);
480
481 if (!data) {
482 DBG("pipe::buffer_map failed\n"
483 " usage = %x\n"
484 " box.x = %u\n"
485 " box.width = %u\n",
486 usage, box.x, box.width);
487
488 if (Flags & D3DLOCK_DONOTWAIT)
489 return D3DERR_WASSTILLDRAWING;
490 return D3DERR_INVALIDCALL;
491 }
492
493 DBG("returning pointer %p\n", data);
494 This->nmaps++;
495 This->nlocks++;
496 *ppbData = data;
497
498 return D3D_OK;
499 }
500
501 HRESULT NINE_WINAPI
NineBuffer9_Unlock(struct NineBuffer9 * This)502 NineBuffer9_Unlock( struct NineBuffer9 *This )
503 {
504 struct NineDevice9 *device = This->base.base.device;
505 struct pipe_context *pipe;
506 int i;
507 DBG("This=%p\n", This);
508
509 user_assert(This->nlocks > 0, D3DERR_INVALIDCALL);
510 This->nlocks--;
511 if (This->nlocks > 0)
512 return D3D_OK; /* Pending unlocks. Wait all unlocks before unmapping */
513
514 if (This->base.pool == D3DPOOL_DEFAULT) {
515 for (i = 0; i < This->nmaps; i++) {
516 if (!This->maps[i].buf) {
517 pipe = This->maps[i].is_pipe_secondary ?
518 device->pipe_secondary :
519 nine_context_get_pipe_acquire(device);
520 pipe->buffer_unmap(pipe, This->maps[i].transfer);
521 /* We need to flush in case the driver does implicit copies */
522 if (This->maps[i].is_pipe_secondary)
523 pipe->flush(pipe, NULL, 0);
524 else
525 nine_context_get_pipe_release(device);
526 } else if (This->maps[i].should_destroy_buf)
527 nine_upload_release_buffer(device->buffer_upload, This->maps[i].buf);
528 }
529 This->nmaps = 0;
530 }
531 return D3D_OK;
532 }
533
534 void
NineBuffer9_SetDirty(struct NineBuffer9 * This)535 NineBuffer9_SetDirty( struct NineBuffer9 *This )
536 {
537 assert(This->base.pool != D3DPOOL_DEFAULT);
538
539 This->managed.dirty = TRUE;
540 u_box_1d(0, This->size, &This->managed.dirty_box);
541 BASEBUF_REGISTER_UPDATE(This);
542 }
543
544 /* Try to remove b from a, supposed to include b */
u_box_try_remove_region_1d(struct pipe_box * dst,const struct pipe_box * a,const struct pipe_box * b)545 static void u_box_try_remove_region_1d(struct pipe_box *dst,
546 const struct pipe_box *a,
547 const struct pipe_box *b)
548 {
549 int x, width;
550 if (a->x == b->x) {
551 x = a->x + b->width;
552 width = a->width - b->width;
553 } else if ((a->x + a->width) == (b->x + b->width)) {
554 x = a->x;
555 width = a->width - b->width;
556 } else {
557 x = a->x;
558 width = a->width;
559 }
560 dst->x = x;
561 dst->width = width;
562 }
563
564 void
NineBuffer9_Upload(struct NineBuffer9 * This)565 NineBuffer9_Upload( struct NineBuffer9 *This )
566 {
567 struct NineDevice9 *device = This->base.base.device;
568 unsigned upload_flags = 0;
569 struct pipe_box box_upload;
570
571 assert(This->base.pool != D3DPOOL_DEFAULT && This->managed.dirty);
572
573 if (This->base.pool == D3DPOOL_SYSTEMMEM && This->base.usage & D3DUSAGE_DYNAMIC) {
574 struct pipe_box region_already_valid;
575 struct pipe_box conflicting_region;
576 struct pipe_box *valid_region = &This->managed.valid_region;
577 struct pipe_box *required_valid_region = &This->managed.required_valid_region;
578 struct pipe_box *filled_region = &This->managed.filled_region;
579 /* Try to upload SYSTEMMEM DYNAMIC in an efficient fashion.
580 * Unlike non-dynamic for which we upload the whole dirty region, try to
581 * only upload the data needed for the draw. The draw call preparation
582 * fills This->managed.required_valid_region for that */
583 u_box_intersect_1d(®ion_already_valid,
584 valid_region,
585 required_valid_region);
586 /* If the required valid region is already valid, nothing to do */
587 if (region_already_valid.x == required_valid_region->x &&
588 region_already_valid.width == required_valid_region->width) {
589 /* Rebind if the region happens to be valid in the original buffer
590 * but we have since used vertex_uploader */
591 if (!This->managed.can_unsynchronized)
592 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
593 u_box_1d(0, 0, required_valid_region);
594 return;
595 }
596 /* (Try to) Remove valid areas from the region to upload */
597 u_box_try_remove_region_1d(&box_upload,
598 required_valid_region,
599 ®ion_already_valid);
600 assert(box_upload.width > 0);
601 /* To maintain correctly the valid region, as we will do union later with
602 * box_upload, we must ensure box_upload is consecutive with valid_region */
603 if (box_upload.x > valid_region->x + valid_region->width && valid_region->width > 0) {
604 box_upload.width = box_upload.x + box_upload.width - (valid_region->x + valid_region->width);
605 box_upload.x = valid_region->x + valid_region->width;
606 } else if (box_upload.x + box_upload.width < valid_region->x && valid_region->width > 0) {
607 box_upload.width = valid_region->x - box_upload.x;
608 }
609 /* There is conflict if some areas, that are not valid but are filled for previous draw calls,
610 * intersect with the region we plan to upload. Note by construction valid_region IS
611 * included in filled_region, thus so is region_already_valid. */
612 u_box_intersect_1d(&conflicting_region, &box_upload, filled_region);
613 /* As box_upload could still contain region_already_valid, check the intersection
614 * doesn't happen to be exactly region_already_valid (it cannot be smaller, see above) */
615 if (This->managed.can_unsynchronized && (conflicting_region.width == 0 ||
616 (conflicting_region.x == region_already_valid.x &&
617 conflicting_region.width == region_already_valid.width))) {
618 /* No conflicts. */
619 upload_flags |= PIPE_MAP_UNSYNCHRONIZED;
620 } else {
621 /* We cannot use PIPE_MAP_UNSYNCHRONIZED. We must choose between no flag and DISCARD.
622 * Criterias to discard:
623 * . Most of the resource was filled (but some apps do allocate a big buffer
624 * to only use a small part in a round fashion)
625 * . The region to upload is very small compared to the filled region and
626 * at the start of the buffer (hints at round usage starting again)
627 * . The region to upload is very big compared to the required region
628 * . We have not discarded yet this frame
629 * If the buffer use pattern seems to sync the worker thread too often,
630 * revert to the vertex_uploader */
631 if (This->managed.num_worker_thread_syncs < 3 &&
632 (filled_region->width > (This->size / 2) ||
633 (10 * box_upload.width < filled_region->width &&
634 box_upload.x < (filled_region->x + filled_region->width)/2) ||
635 box_upload.width > 2 * required_valid_region->width ||
636 This->managed.frame_count_last_discard != device->frame_count)) {
637 /* Avoid DISCARDING too much by discarding only if most of the buffer
638 * has been used */
639 DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER,
640 "Uploading %p DISCARD: valid %d %d, filled %d %d, required %d %d, box_upload %d %d, required already_valid %d %d, conficting %d %d\n",
641 This, valid_region->x, valid_region->width, filled_region->x, filled_region->width,
642 required_valid_region->x, required_valid_region->width, box_upload.x, box_upload.width,
643 region_already_valid.x, region_already_valid.width, conflicting_region.x, conflicting_region.width
644 );
645 upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
646 u_box_1d(0, 0, filled_region);
647 u_box_1d(0, 0, valid_region);
648 box_upload = This->managed.required_valid_region;
649 /* Rebind the buffer if we used intermediate alternative buffer */
650 if (!This->managed.can_unsynchronized)
651 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
652 This->managed.can_unsynchronized = true;
653 This->managed.frame_count_last_discard = device->frame_count;
654 } else {
655 /* Once we use without UNSYNCHRONIZED, we cannot use it anymore.
656 * Use a different buffer. */
657 unsigned buffer_offset = 0;
658 struct pipe_resource *resource = NULL;
659 This->managed.can_unsynchronized = false;
660 u_upload_data(device->vertex_uploader,
661 required_valid_region->x,
662 required_valid_region->width,
663 64,
664 This->managed.data + required_valid_region->x,
665 &buffer_offset,
666 &resource);
667 buffer_offset -= required_valid_region->x;
668 u_upload_unmap(device->vertex_uploader);
669 if (resource) {
670 NineBuffer9_RebindIfRequired(This, device, resource, buffer_offset);
671 /* Note: This only works because for these types of buffers this function
672 * is called before every draw call. Else it wouldn't work when the app
673 * rebinds buffers. In addition it needs this function to be called only
674 * once per buffers even if bound several times, which we do. */
675 u_box_1d(0, 0, required_valid_region);
676 pipe_resource_reference(&resource, NULL);
677 return;
678 }
679 }
680 }
681
682 u_box_union_1d(filled_region,
683 filled_region,
684 &box_upload);
685 u_box_union_1d(valid_region,
686 valid_region,
687 &box_upload);
688 u_box_1d(0, 0, required_valid_region);
689 } else
690 box_upload = This->managed.dirty_box;
691
692 if (box_upload.x == 0 && box_upload.width == This->size) {
693 upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
694 }
695
696 if (This->managed.pending_upload) {
697 u_box_union_1d(&This->managed.upload_pending_regions,
698 &This->managed.upload_pending_regions,
699 &box_upload);
700 } else {
701 This->managed.upload_pending_regions = box_upload;
702 }
703
704 DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER,
705 "Uploading %p, offset=%d, size=%d, Flags=0x%x\n",
706 This, box_upload.x, box_upload.width, upload_flags);
707 nine_context_range_upload(device, &This->managed.pending_upload,
708 (struct NineUnknown *)This,
709 This->base.resource,
710 box_upload.x,
711 box_upload.width,
712 upload_flags,
713 (char *)This->managed.data + box_upload.x);
714 This->managed.dirty = FALSE;
715 }
716