• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3  * Copyright 2015 Patrick Rudolph <siro@das-labor.org>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 
24 #include "buffer9.h"
25 #include "device9.h"
26 #include "indexbuffer9.h"
27 #include "nine_buffer_upload.h"
28 #include "nine_helpers.h"
29 #include "nine_pipe.h"
30 
31 #include "pipe/p_screen.h"
32 #include "pipe/p_context.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_defines.h"
35 #include "pipe/p_format.h"
36 #include "util/u_box.h"
37 #include "util/u_inlines.h"
38 
39 #define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER)
40 
41 HRESULT
NineBuffer9_ctor(struct NineBuffer9 * This,struct NineUnknownParams * pParams,D3DRESOURCETYPE Type,DWORD Usage,UINT Size,D3DPOOL Pool)42 NineBuffer9_ctor( struct NineBuffer9 *This,
43                         struct NineUnknownParams *pParams,
44                         D3DRESOURCETYPE Type,
45                         DWORD Usage,
46                         UINT Size,
47                         D3DPOOL Pool )
48 {
49     struct pipe_resource *info = &This->base.info;
50     HRESULT hr;
51 
52     DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, Size, Usage, Pool);
53 
54     user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
55 
56     This->maps = MALLOC(sizeof(struct NineTransfer));
57     if (!This->maps)
58         return E_OUTOFMEMORY;
59     This->nlocks = 0;
60     This->nmaps = 0;
61     This->maxmaps = 1;
62     This->size = Size;
63 
64     info->screen = pParams->device->screen;
65     info->target = PIPE_BUFFER;
66     info->format = PIPE_FORMAT_R8_UNORM;
67     info->width0 = Size;
68     info->flags = 0;
69 
70     /* Note: WRITEONLY is just tip for resource placement, the resource
71      * can still be read (but slower). */
72     info->bind = (Type == D3DRTYPE_INDEXBUFFER) ? PIPE_BIND_INDEX_BUFFER : PIPE_BIND_VERTEX_BUFFER;
73 
74     /* Software vertex processing:
75      * If the device is full software vertex processing,
76      * then the buffer is supposed to be used only for sw processing.
77      * For mixed vertex processing, buffers with D3DUSAGE_SOFTWAREPROCESSING
78      * can be used for both sw and hw processing.
79      * These buffers are expected to be stored in RAM.
80      * Apps expect locking the full buffer with no flags, then
81      * render a a few primitive, then locking again, etc
82      * to be a fast pattern. Only the SYSTEMMEM DYNAMIC path
83      * will give that pattern ok performance in our case.
84      * An alternative would be when sw processing is detected to
85      * convert Draw* calls to Draw*Up calls. */
86     if (Usage & D3DUSAGE_SOFTWAREPROCESSING ||
87         pParams->device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) {
88         Pool = D3DPOOL_SYSTEMMEM;
89         Usage |= D3DUSAGE_DYNAMIC;
90         /* Note: the application cannot retrieve Pool and Usage */
91     }
92 
93     /* Always use the DYNAMIC path for SYSTEMMEM.
94      * If the app uses the vertex buffer is a dynamic fashion,
95      * this is going to be very significantly faster that way.
96      * If the app uses the vertex buffer in a static fashion,
97      * instead of being filled all at once, the buffer will be filled
98      * little per little, until it is fully filled, thus the perf hit
99      * will be very small. */
100     if (Pool == D3DPOOL_SYSTEMMEM)
101         Usage |= D3DUSAGE_DYNAMIC;
102 
103     /* It is hard to find clear information on where to place the buffer in
104      * memory depending on the flag.
105      * MSDN: resources are static, except for those with DYNAMIC, thus why you
106      *   can only use DISCARD on them.
107      * ATI doc: The driver has the liberty it wants for having things static
108      *   or not.
109      *   MANAGED: Ram + uploads to Vram copy at unlock (msdn and nvidia doc say
110      *   at first draw call using the buffer)
111      *   DEFAULT + Usage = 0 => System memory backing for easy read access
112      *   (That doc is very unclear on the details, like whether some copies to
113      *   vram copy are involved or not).
114      *   DEFAULT + WRITEONLY => Vram
115      *   DEFAULT + WRITEONLY + DYNAMIC => Either Vram buffer or GTT_WC, depending on what the driver wants.
116      *   SYSTEMMEM: Same as MANAGED, but handled by the driver instead of the runtime (which means
117      *   some small behavior differences between vendors). Implementing exactly as MANAGED should
118      *   be fine.
119      */
120     if (Pool == D3DPOOL_SYSTEMMEM && Usage & D3DUSAGE_DYNAMIC)
121         info->usage = PIPE_USAGE_STREAM;
122     else if (Pool != D3DPOOL_DEFAULT)
123         info->usage = PIPE_USAGE_DEFAULT;
124     else if (Usage & D3DUSAGE_DYNAMIC && Usage & D3DUSAGE_WRITEONLY)
125         info->usage = PIPE_USAGE_STREAM;
126     else if (Usage & D3DUSAGE_WRITEONLY)
127         info->usage = PIPE_USAGE_DEFAULT;
128     /* For the remaining two, PIPE_USAGE_STAGING would probably be
129      * a good fit according to the doc. However it seems rather a mistake
130      * from apps to use these (mistakes that do really happen). Try
131      * to put the flags that are the best compromise between the real
132      * behaviour and what buggy apps should get for better performance. */
133     else if (Usage & D3DUSAGE_DYNAMIC)
134         info->usage = PIPE_USAGE_STREAM;
135     else
136         info->usage = PIPE_USAGE_DYNAMIC;
137 
138     /* When Writeonly is not set, we don't want to enable the
139      * optimizations */
140     This->discard_nooverwrite_only = !!(Usage & D3DUSAGE_WRITEONLY) &&
141                                      pParams->device->buffer_upload;
142     /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
143     /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
144     /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
145     /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
146     /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
147     /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
148 
149     info->height0 = 1;
150     info->depth0 = 1;
151     info->array_size = 1;
152     info->last_level = 0;
153     info->nr_samples = 0;
154     info->nr_storage_samples = 0;
155 
156     hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
157                             Type, Pool, Usage);
158 
159     if (FAILED(hr))
160         return hr;
161 
162     if (Pool != D3DPOOL_DEFAULT) {
163         This->managed.data = align_calloc(
164             nine_format_get_level_alloc_size(This->base.info.format,
165                                              Size, 1, 0), 32);
166         if (!This->managed.data)
167             return E_OUTOFMEMORY;
168         This->managed.dirty = TRUE;
169         u_box_1d(0, Size, &This->managed.dirty_box);
170         u_box_1d(0, 0, &This->managed.valid_region);
171         u_box_1d(0, 0, &This->managed.required_valid_region);
172         u_box_1d(0, 0, &This->managed.filled_region);
173         This->managed.can_unsynchronized = true;
174         This->managed.num_worker_thread_syncs = 0;
175         list_inithead(&This->managed.list);
176         list_inithead(&This->managed.list2);
177         list_add(&This->managed.list2, &pParams->device->managed_buffers);
178     }
179 
180     return D3D_OK;
181 }
182 
183 void
NineBuffer9_dtor(struct NineBuffer9 * This)184 NineBuffer9_dtor( struct NineBuffer9 *This )
185 {
186     DBG("This=%p\n", This);
187 
188     if (This->maps) {
189         while (This->nlocks) {
190             NineBuffer9_Unlock(This);
191         }
192         assert(!This->nmaps);
193         FREE(This->maps);
194     }
195 
196     if (This->base.pool != D3DPOOL_DEFAULT) {
197         if (This->managed.data)
198             align_free(This->managed.data);
199         if (list_is_linked(&This->managed.list))
200             list_del(&This->managed.list);
201         if (list_is_linked(&This->managed.list2))
202             list_del(&This->managed.list2);
203     }
204 
205     if (This->buf)
206         nine_upload_release_buffer(This->base.base.device->buffer_upload, This->buf);
207 
208     NineResource9_dtor(&This->base);
209 }
210 
211 struct pipe_resource *
NineBuffer9_GetResource(struct NineBuffer9 * This,unsigned * offset)212 NineBuffer9_GetResource( struct NineBuffer9 *This, unsigned *offset )
213 {
214     if (This->buf)
215         return nine_upload_buffer_resource_and_offset(This->buf, offset);
216     *offset = 0;
217     return NineResource9_GetResource(&This->base);
218 }
219 
220 static void
NineBuffer9_RebindIfRequired(struct NineBuffer9 * This,struct NineDevice9 * device,struct pipe_resource * resource,unsigned offset)221 NineBuffer9_RebindIfRequired( struct NineBuffer9 *This,
222                               struct NineDevice9 *device,
223                               struct pipe_resource *resource,
224                               unsigned offset )
225 {
226     int i;
227 
228     if (!This->bind_count)
229         return;
230     for (i = 0; i < device->caps.MaxStreams; i++) {
231         if (device->state.stream[i] == (struct NineVertexBuffer9 *)This)
232             nine_context_set_stream_source_apply(device, i,
233                                                  resource,
234                                                  device->state.vtxbuf[i].buffer_offset + offset,
235                                                  device->state.vtxbuf[i].stride);
236     }
237     if (device->state.idxbuf == (struct NineIndexBuffer9 *)This)
238         nine_context_set_indices_apply(device, resource,
239                                        ((struct NineIndexBuffer9 *)This)->index_size,
240                                        offset);
241 }
242 
243 HRESULT NINE_WINAPI
NineBuffer9_Lock(struct NineBuffer9 * This,UINT OffsetToLock,UINT SizeToLock,void ** ppbData,DWORD Flags)244 NineBuffer9_Lock( struct NineBuffer9 *This,
245                         UINT OffsetToLock,
246                         UINT SizeToLock,
247                         void **ppbData,
248                         DWORD Flags )
249 {
250     struct NineDevice9 *device = This->base.base.device;
251     struct pipe_box box;
252     struct pipe_context *pipe;
253     void *data;
254     unsigned usage;
255 
256     DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
257         This, This->base.resource,
258         OffsetToLock, SizeToLock, Flags);
259 
260     user_assert(ppbData, E_POINTER);
261 
262     if (SizeToLock == 0) {
263         SizeToLock = This->size - OffsetToLock;
264         user_warn(OffsetToLock != 0);
265     }
266 
267     /* Write out of bound seems to have to be taken into account for these.
268      * TODO: Do more tests (is it only at buffer first lock ? etc).
269      * Since these buffers are supposed to be locked once and never
270      * writen again (MANAGED or DYNAMIC is used for the other uses cases),
271      * performance should be unaffected. */
272     if (!(This->base.usage & D3DUSAGE_DYNAMIC) && This->base.pool == D3DPOOL_DEFAULT)
273         SizeToLock = This->size - OffsetToLock;
274 
275     SizeToLock = MIN2(SizeToLock, This->size - OffsetToLock); /* Do not read or track out of the buffer */
276     u_box_1d(OffsetToLock, SizeToLock, &box);
277 
278     if (This->base.pool != D3DPOOL_DEFAULT) {
279         /* MANAGED: READONLY doesn't dirty the buffer, nor
280          * wait the upload in the worker thread
281          * SYSTEMMEM: AMD/NVidia: All locks dirty the full buffer. Not on Intel
282          * For Nvidia, SYSTEMMEM behaves are if there is no worker thread.
283          * On AMD, READONLY and NOOVERWRITE do dirty the buffer, but do not sync the previous uploads
284          * in the worker thread. On Intel only NOOVERWRITE has that effect.
285          * We implement the AMD behaviour. */
286         if (This->base.pool == D3DPOOL_MANAGED) {
287             if (!(Flags & D3DLOCK_READONLY)) {
288                 if (!This->managed.dirty) {
289                     assert(list_is_empty(&This->managed.list));
290                     This->managed.dirty = TRUE;
291                     This->managed.dirty_box = box;
292                     /* Flush if regions pending to be uploaded would be dirtied */
293                     if (p_atomic_read(&This->managed.pending_upload)) {
294                         u_box_intersect_1d(&box, &box, &This->managed.upload_pending_regions);
295                         if (box.width != 0)
296                             nine_csmt_process(This->base.base.device);
297                     }
298                 } else
299                     u_box_union_1d(&This->managed.dirty_box, &This->managed.dirty_box, &box);
300                 /* Tests trying to draw while the buffer is locked show that
301                  * SYSTEMMEM/MANAGED buffers are made dirty at Lock time */
302                 BASEBUF_REGISTER_UPDATE(This);
303             }
304         } else {
305             if (!(Flags & (D3DLOCK_READONLY|D3DLOCK_NOOVERWRITE)) &&
306                 p_atomic_read(&This->managed.pending_upload)) {
307                 This->managed.num_worker_thread_syncs++;
308                 /* If we sync too often, pick the vertex_uploader path */
309                 if (This->managed.num_worker_thread_syncs >= 3)
310                     This->managed.can_unsynchronized = false;
311                 nine_csmt_process(This->base.base.device);
312                 /* Note: AS DISCARD is not relevant for SYSTEMMEM,
313                  * NOOVERWRITE might have a similar meaning as what is
314                  * in D3D7 doc. Basically that data from previous draws
315                  * OF THIS FRAME are unaffected. As we flush csmt in Present(),
316                  * we should be correct. In some parts of the doc, the notion
317                  * of frame is implied to be related to Begin/EndScene(),
318                  * but tests show NOOVERWRITE after EndScene() doesn't flush
319                  * the csmt thread. */
320             }
321             This->managed.dirty = true;
322             u_box_1d(0, This->size, &This->managed.dirty_box); /* systemmem non-dynamic */
323             u_box_1d(0, 0, &This->managed.valid_region); /* systemmem dynamic */
324             BASEBUF_REGISTER_UPDATE(This);
325         }
326 
327         *ppbData = (int8_t *)This->managed.data + OffsetToLock;
328         DBG("returning pointer %p\n", *ppbData);
329         This->nlocks++;
330         return D3D_OK;
331     }
332 
333     /* Driver ddi doc: READONLY is never passed to the device. So it can only
334      * have effect on things handled by the driver (MANAGED pool for example).
335      * Msdn doc: DISCARD and NOOVERWRITE are only for DYNAMIC.
336      * ATI doc: You can use DISCARD and NOOVERWRITE without DYNAMIC.
337      * Msdn doc: D3DLOCK_DONOTWAIT is not among the valid flags for buffers.
338      * Our tests: On win 7 nvidia, D3DLOCK_DONOTWAIT does return
339      * D3DERR_WASSTILLDRAWING if the resource is in use, except for DYNAMIC.
340      * Our tests: some apps do use both DISCARD and NOOVERWRITE at the same
341      * time. On windows it seems to return different pointer in some conditions,
342      * creation flags and drivers. However these tests indicate having
343      * NOOVERWRITE win is a valid behaviour (NVidia).
344      */
345 
346     /* Have NOOVERWRITE win over DISCARD. This is allowed (see above) and
347      * it prevents overconsuming buffers if apps do use both at the same time. */
348     if ((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))
349         Flags &= ~D3DLOCK_DISCARD;
350 
351     if (Flags & D3DLOCK_DISCARD)
352         usage = PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE;
353     else if (Flags & D3DLOCK_NOOVERWRITE)
354         usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED;
355     else
356         /* Do not ask for READ if writeonly and default pool (should be safe enough,
357          * as the doc says app shouldn't expect reading to work with writeonly). */
358         usage = (This->base.usage & D3DUSAGE_WRITEONLY) ?
359             PIPE_MAP_WRITE :
360             PIPE_MAP_READ_WRITE;
361     if (Flags & D3DLOCK_DONOTWAIT && !(This->base.usage & D3DUSAGE_DYNAMIC))
362         usage |= PIPE_MAP_DONTBLOCK;
363 
364     This->discard_nooverwrite_only &= !!(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE));
365 
366     if (This->nmaps == This->maxmaps) {
367         struct NineTransfer *newmaps =
368             REALLOC(This->maps, sizeof(struct NineTransfer)*This->maxmaps,
369                     sizeof(struct NineTransfer)*(This->maxmaps << 1));
370         if (newmaps == NULL)
371             return E_OUTOFMEMORY;
372 
373         This->maxmaps <<= 1;
374         This->maps = newmaps;
375     }
376 
377     if (This->buf && !This->discard_nooverwrite_only) {
378         struct pipe_box src_box;
379         unsigned offset;
380         struct pipe_resource *src_res;
381         DBG("Disabling nine_subbuffer for a buffer having"
382             "used a nine_subbuffer buffer\n");
383         /* Copy buffer content to the buffer resource, which
384          * we will now use.
385          * Note: The behaviour may be different from what is expected
386          * with double lock. However applications can't really make expectations
387          * about double locks, and don't really use them, so that's ok. */
388         src_res = nine_upload_buffer_resource_and_offset(This->buf, &offset);
389         u_box_1d(offset, This->size, &src_box);
390 
391         pipe = NineDevice9_GetPipe(device);
392         pipe->resource_copy_region(pipe, This->base.resource, 0, 0, 0, 0,
393                                    src_res, 0, &src_box);
394         /* Release previous resource */
395         if (This->nmaps >= 1)
396             This->maps[This->nmaps-1].should_destroy_buf = true;
397         else
398             nine_upload_release_buffer(device->buffer_upload, This->buf);
399         This->buf = NULL;
400         /* Rebind buffer */
401         NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
402     }
403 
404     This->maps[This->nmaps].transfer = NULL;
405     This->maps[This->nmaps].is_pipe_secondary = false;
406     This->maps[This->nmaps].buf = NULL;
407     This->maps[This->nmaps].should_destroy_buf = false;
408 
409     if (This->discard_nooverwrite_only) {
410         if (This->buf && (Flags & D3DLOCK_DISCARD)) {
411             /* Release previous buffer */
412             if (This->nmaps >= 1)
413                 This->maps[This->nmaps-1].should_destroy_buf = true;
414             else
415                 nine_upload_release_buffer(device->buffer_upload, This->buf);
416             This->buf = NULL;
417         }
418 
419         if (!This->buf) {
420             unsigned offset;
421             struct pipe_resource *res;
422             This->buf = nine_upload_create_buffer(device->buffer_upload, This->base.info.width0);
423             res = nine_upload_buffer_resource_and_offset(This->buf, &offset);
424             NineBuffer9_RebindIfRequired(This, device, res, offset);
425         }
426 
427         if (This->buf) {
428             This->maps[This->nmaps].buf = This->buf;
429             This->nmaps++;
430             This->nlocks++;
431             DBG("Returning %p\n", nine_upload_buffer_get_map(This->buf) + OffsetToLock);
432             *ppbData = nine_upload_buffer_get_map(This->buf) + OffsetToLock;
433             return D3D_OK;
434         } else {
435             /* Fallback to normal path, and don't try again */
436             This->discard_nooverwrite_only = false;
437         }
438     }
439 
440     /* Previous mappings may need pending commands to write to the
441      * buffer (staging buffer for example). Before a NOOVERWRITE,
442      * we thus need a finish, to guarantee any upload is finished.
443      * Note for discard_nooverwrite_only we don't need to do this
444      * check as neither discard nor nooverwrite have issues there */
445     if (This->need_sync_if_nooverwrite && !(Flags & D3DLOCK_DISCARD) &&
446         (Flags & D3DLOCK_NOOVERWRITE)) {
447         struct pipe_screen *screen = NineDevice9_GetScreen(device);
448         struct pipe_fence_handle *fence = NULL;
449 
450         pipe = NineDevice9_GetPipe(device);
451         pipe->flush(pipe, &fence, 0);
452         (void) screen->fence_finish(screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
453         screen->fence_reference(screen, &fence, NULL);
454     }
455     This->need_sync_if_nooverwrite = !(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE));
456 
457     /* When csmt is active, we want to avoid stalls as much as possible,
458      * and thus we want to create a new resource on discard and map it
459      * with the secondary pipe, instead of waiting on the main pipe. */
460     if (Flags & D3DLOCK_DISCARD && device->csmt_active) {
461         struct pipe_screen *screen = NineDevice9_GetScreen(device);
462         struct pipe_resource *new_res = nine_resource_create_with_retry(device, screen, &This->base.info);
463         if (new_res) {
464             /* Use the new resource */
465             pipe_resource_reference(&This->base.resource, new_res);
466             pipe_resource_reference(&new_res, NULL);
467             usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED;
468             NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
469             This->maps[This->nmaps].is_pipe_secondary = TRUE;
470         }
471     } else if (Flags & D3DLOCK_NOOVERWRITE && device->csmt_active)
472         This->maps[This->nmaps].is_pipe_secondary = TRUE;
473 
474     if (This->maps[This->nmaps].is_pipe_secondary)
475         pipe = device->pipe_secondary;
476     else
477         pipe = NineDevice9_GetPipe(device);
478 
479     data = pipe->buffer_map(pipe, This->base.resource, 0,
480                               usage, &box, &This->maps[This->nmaps].transfer);
481 
482     if (!data) {
483         DBG("pipe::buffer_map failed\n"
484             " usage = %x\n"
485             " box.x = %u\n"
486             " box.width = %u\n",
487             usage, box.x, box.width);
488 
489         if (Flags & D3DLOCK_DONOTWAIT)
490             return D3DERR_WASSTILLDRAWING;
491         return D3DERR_INVALIDCALL;
492     }
493 
494     DBG("returning pointer %p\n", data);
495     This->nmaps++;
496     This->nlocks++;
497     *ppbData = data;
498 
499     return D3D_OK;
500 }
501 
502 HRESULT NINE_WINAPI
NineBuffer9_Unlock(struct NineBuffer9 * This)503 NineBuffer9_Unlock( struct NineBuffer9 *This )
504 {
505     struct NineDevice9 *device = This->base.base.device;
506     struct pipe_context *pipe;
507     int i;
508     DBG("This=%p\n", This);
509 
510     user_assert(This->nlocks > 0, D3DERR_INVALIDCALL);
511     This->nlocks--;
512     if (This->nlocks > 0)
513         return D3D_OK; /* Pending unlocks. Wait all unlocks before unmapping */
514 
515     if (This->base.pool == D3DPOOL_DEFAULT) {
516         for (i = 0; i < This->nmaps; i++) {
517             if (!This->maps[i].buf) {
518                 pipe = This->maps[i].is_pipe_secondary ?
519                     device->pipe_secondary :
520                     nine_context_get_pipe_acquire(device);
521                 pipe->buffer_unmap(pipe, This->maps[i].transfer);
522                 /* We need to flush in case the driver does implicit copies */
523                 if (This->maps[i].is_pipe_secondary)
524                     pipe->flush(pipe, NULL, 0);
525                 else
526                     nine_context_get_pipe_release(device);
527             } else if (This->maps[i].should_destroy_buf)
528                 nine_upload_release_buffer(device->buffer_upload, This->maps[i].buf);
529         }
530         This->nmaps = 0;
531     }
532     return D3D_OK;
533 }
534 
535 void
NineBuffer9_SetDirty(struct NineBuffer9 * This)536 NineBuffer9_SetDirty( struct NineBuffer9 *This )
537 {
538     assert(This->base.pool != D3DPOOL_DEFAULT);
539 
540     This->managed.dirty = TRUE;
541     u_box_1d(0, This->size, &This->managed.dirty_box);
542     BASEBUF_REGISTER_UPDATE(This);
543 }
544 
545 /* Try to remove b from a, supposed to include b */
u_box_try_remove_region_1d(struct pipe_box * dst,const struct pipe_box * a,const struct pipe_box * b)546 static void u_box_try_remove_region_1d(struct pipe_box *dst,
547                                        const struct pipe_box *a,
548                                        const struct pipe_box *b)
549 {
550     int x, width;
551     if (a->x == b->x) {
552         x = a->x + b->width;
553         width = a->width - b->width;
554     } else if ((a->x + a->width) == (b->x + b->width)) {
555         x = a->x;
556         width = a->width - b->width;
557     } else {
558         x = a->x;
559         width = a->width;
560     }
561     dst->x = x;
562     dst->width = width;
563 }
564 
565 void
NineBuffer9_Upload(struct NineBuffer9 * This)566 NineBuffer9_Upload( struct NineBuffer9 *This )
567 {
568     struct NineDevice9 *device = This->base.base.device;
569     unsigned upload_flags = 0;
570     struct pipe_box box_upload;
571 
572     assert(This->base.pool != D3DPOOL_DEFAULT && This->managed.dirty);
573 
574     if (This->base.pool == D3DPOOL_SYSTEMMEM && This->base.usage & D3DUSAGE_DYNAMIC) {
575         struct pipe_box region_already_valid;
576         struct pipe_box conflicting_region;
577         struct pipe_box *valid_region = &This->managed.valid_region;
578         struct pipe_box *required_valid_region = &This->managed.required_valid_region;
579         struct pipe_box *filled_region = &This->managed.filled_region;
580         /* Try to upload SYSTEMMEM DYNAMIC in an efficient fashion.
581          * Unlike non-dynamic for which we upload the whole dirty region, try to
582          * only upload the data needed for the draw. The draw call preparation
583          * fills This->managed.required_valid_region for that */
584         u_box_intersect_1d(&region_already_valid,
585                            valid_region,
586                            required_valid_region);
587         /* If the required valid region is already valid, nothing to do */
588         if (region_already_valid.x == required_valid_region->x &&
589             region_already_valid.width == required_valid_region->width) {
590             /* Rebind if the region happens to be valid in the original buffer
591              * but we have since used vertex_uploader */
592             if (!This->managed.can_unsynchronized)
593                 NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
594             u_box_1d(0, 0, required_valid_region);
595             return;
596         }
597         /* (Try to) Remove valid areas from the region to upload */
598         u_box_try_remove_region_1d(&box_upload,
599                                    required_valid_region,
600                                    &region_already_valid);
601         assert(box_upload.width > 0);
602         /* To maintain correctly the valid region, as we will do union later with
603          * box_upload, we must ensure box_upload is consecutive with valid_region */
604         if (box_upload.x > valid_region->x + valid_region->width && valid_region->width > 0) {
605             box_upload.width = box_upload.x + box_upload.width - (valid_region->x + valid_region->width);
606             box_upload.x = valid_region->x + valid_region->width;
607         } else if (box_upload.x + box_upload.width < valid_region->x && valid_region->width > 0) {
608             box_upload.width = valid_region->x - box_upload.x;
609         }
610         /* There is conflict if some areas, that are not valid but are filled for previous draw calls,
611          * intersect with the region we plan to upload. Note by construction valid_region IS
612          * included in filled_region, thus so is region_already_valid. */
613         u_box_intersect_1d(&conflicting_region, &box_upload, filled_region);
614         /* As box_upload could still contain region_already_valid, check the intersection
615          * doesn't happen to be exactly region_already_valid (it cannot be smaller, see above) */
616         if (This->managed.can_unsynchronized && (conflicting_region.width == 0 ||
617             (conflicting_region.x == region_already_valid.x &&
618              conflicting_region.width == region_already_valid.width))) {
619             /* No conflicts. */
620             upload_flags |= PIPE_MAP_UNSYNCHRONIZED;
621         } else {
622             /* We cannot use PIPE_MAP_UNSYNCHRONIZED. We must choose between no flag and DISCARD.
623              * Criterias to discard:
624              * . Most of the resource was filled (but some apps do allocate a big buffer
625              * to only use a small part in a round fashion)
626              * . The region to upload is very small compared to the filled region and
627              * at the start of the buffer (hints at round usage starting again)
628              * . The region to upload is very big compared to the required region
629              * . We have not discarded yet this frame
630              * If the buffer use pattern seems to sync the worker thread too often,
631              * revert to the vertex_uploader */
632             if (This->managed.num_worker_thread_syncs < 3 &&
633                 (filled_region->width > (This->size / 2) ||
634                  (10 * box_upload.width < filled_region->width &&
635                   box_upload.x < (filled_region->x + filled_region->width)/2) ||
636                  box_upload.width > 2 * required_valid_region->width ||
637                  This->managed.frame_count_last_discard != device->frame_count)) {
638                 /* Avoid DISCARDING too much by discarding only if most of the buffer
639                  * has been used */
640                 DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER,
641              "Uploading %p DISCARD: valid %d %d, filled %d %d, required %d %d, box_upload %d %d, required already_valid %d %d, conficting %d %d\n",
642              This, valid_region->x, valid_region->width, filled_region->x, filled_region->width,
643              required_valid_region->x, required_valid_region->width, box_upload.x, box_upload.width,
644              region_already_valid.x, region_already_valid.width, conflicting_region.x, conflicting_region.width
645                 );
646                 upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
647                 u_box_1d(0, 0, filled_region);
648                 u_box_1d(0, 0, valid_region);
649                 box_upload = This->managed.required_valid_region;
650                 /* Rebind the buffer if we used intermediate alternative buffer */
651                 if (!This->managed.can_unsynchronized)
652                     NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0);
653                 This->managed.can_unsynchronized = true;
654                 This->managed.frame_count_last_discard = device->frame_count;
655             } else {
656                 /* Once we use without UNSYNCHRONIZED, we cannot use it anymore.
657                  * Use a different buffer. */
658                 unsigned buffer_offset = 0;
659                 struct pipe_resource *resource = NULL;
660                 This->managed.can_unsynchronized = false;
661                 u_upload_data(device->vertex_uploader,
662                     required_valid_region->x,
663                     required_valid_region->width,
664                     64,
665                     This->managed.data + required_valid_region->x,
666                     &buffer_offset,
667                     &resource);
668                 buffer_offset -= required_valid_region->x;
669                 u_upload_unmap(device->vertex_uploader);
670                 if (resource) {
671                     NineBuffer9_RebindIfRequired(This, device, resource, buffer_offset);
672                     /* Note: This only works because for these types of buffers this function
673                      * is called before every draw call. Else it wouldn't work when the app
674                      * rebinds buffers. In addition it needs this function to be called only
675                      * once per buffers even if bound several times, which we do. */
676                     u_box_1d(0, 0, required_valid_region);
677                     pipe_resource_reference(&resource, NULL);
678                     return;
679                 }
680             }
681         }
682 
683         u_box_union_1d(filled_region,
684                        filled_region,
685                        &box_upload);
686         u_box_union_1d(valid_region,
687                        valid_region,
688                        &box_upload);
689         u_box_1d(0, 0, required_valid_region);
690     } else
691         box_upload = This->managed.dirty_box;
692 
693     if (box_upload.x == 0 && box_upload.width == This->size) {
694         upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
695     }
696 
697     if (This->managed.pending_upload) {
698         u_box_union_1d(&This->managed.upload_pending_regions,
699                        &This->managed.upload_pending_regions,
700                        &box_upload);
701     } else {
702         This->managed.upload_pending_regions = box_upload;
703     }
704 
705     DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER,
706              "Uploading %p, offset=%d, size=%d, Flags=0x%x\n",
707              This, box_upload.x, box_upload.width, upload_flags);
708     nine_context_range_upload(device, &This->managed.pending_upload,
709                               (struct NineUnknown *)This,
710                               This->base.resource,
711                               box_upload.x,
712                               box_upload.width,
713                               upload_flags,
714                               (int8_t *)This->managed.data + box_upload.x);
715     This->managed.dirty = FALSE;
716 }
717