1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/glthread_marshal.h"
25 #include "main/dispatch.h"
26 #include "main/bufferobj.h"
27
28 /**
29 * Create an upload buffer. This is called from the app thread, so everything
30 * has to be thread-safe in the driver.
31 */
32 static struct gl_buffer_object *
new_upload_buffer(struct gl_context * ctx,GLsizeiptr size,uint8_t ** ptr)33 new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
34 {
35 assert(ctx->GLThread.SupportsBufferUploads);
36
37 struct gl_buffer_object *obj = ctx->Driver.NewBufferObject(ctx, -1);
38 if (!obj)
39 return NULL;
40
41 obj->Immutable = true;
42
43 if (!ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER, size, NULL,
44 GL_WRITE_ONLY,
45 GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
46 obj)) {
47 ctx->Driver.DeleteBuffer(ctx, obj);
48 return NULL;
49 }
50
51 *ptr = ctx->Driver.MapBufferRange(ctx, 0, size,
52 GL_MAP_WRITE_BIT |
53 GL_MAP_UNSYNCHRONIZED_BIT |
54 MESA_MAP_THREAD_SAFE_BIT,
55 obj, MAP_GLTHREAD);
56 if (!*ptr) {
57 ctx->Driver.DeleteBuffer(ctx, obj);
58 return NULL;
59 }
60
61 return obj;
62 }
63
64 void
_mesa_glthread_upload(struct gl_context * ctx,const void * data,GLsizeiptr size,unsigned * out_offset,struct gl_buffer_object ** out_buffer,uint8_t ** out_ptr)65 _mesa_glthread_upload(struct gl_context *ctx, const void *data,
66 GLsizeiptr size, unsigned *out_offset,
67 struct gl_buffer_object **out_buffer,
68 uint8_t **out_ptr)
69 {
70 struct glthread_state *glthread = &ctx->GLThread;
71 const unsigned default_size = 1024 * 1024;
72
73 if (unlikely(size > INT_MAX))
74 return;
75
76 /* The alignment was chosen arbitrarily. */
77 unsigned offset = align(glthread->upload_offset, 8);
78
79 /* Allocate a new buffer if needed. */
80 if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
81 /* If the size is greater than the buffer size, allocate a separate buffer
82 * just for this upload.
83 */
84 if (unlikely(size > default_size)) {
85 uint8_t *ptr;
86
87 assert(*out_buffer == NULL);
88 *out_buffer = new_upload_buffer(ctx, size, &ptr);
89 if (!*out_buffer)
90 return;
91
92 *out_offset = 0;
93 if (data)
94 memcpy(ptr, data, size);
95 else
96 *out_ptr = ptr;
97 return;
98 }
99
100 if (glthread->upload_buffer_private_refcount > 0) {
101 p_atomic_add(&glthread->upload_buffer->RefCount,
102 -glthread->upload_buffer_private_refcount);
103 glthread->upload_buffer_private_refcount = 0;
104 }
105 _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
106 glthread->upload_buffer =
107 new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
108 glthread->upload_offset = 0;
109 offset = 0;
110
111 /* Since atomic operations are very very slow when 2 threads are not
112 * sharing one L3 cache (which can happen on AMD Zen), prevent using
113 * atomics as follows:
114 *
115 * This function has to return a buffer reference to the caller.
116 * Instead of atomic_inc for every call, it does all possible future
117 * increments in advance when the upload buffer is allocated.
118 * The maximum number of times the function can be called per upload
119 * buffer is default_size, because the minimum allocation size is 1.
120 * Therefore the function can only return default_size number of
121 * references at most, so we will never need more. This is the number
122 * that is added to RefCount at allocation.
123 *
124 * upload_buffer_private_refcount tracks how many buffer references
125 * are left to return to callers. If the buffer is full and there are
126 * still references left, they are atomically subtracted from RefCount
127 * before the buffer is unreferenced.
128 *
129 * This can increase performance by 20%.
130 */
131 glthread->upload_buffer->RefCount += default_size;
132 glthread->upload_buffer_private_refcount = default_size;
133 }
134
135 /* Upload data. */
136 if (data)
137 memcpy(glthread->upload_ptr + offset, data, size);
138 else
139 *out_ptr = glthread->upload_ptr + offset;
140
141 glthread->upload_offset = offset + size;
142 *out_offset = offset;
143
144 assert(*out_buffer == NULL);
145 assert(glthread->upload_buffer_private_refcount > 0);
146 *out_buffer = glthread->upload_buffer;
147 glthread->upload_buffer_private_refcount--;
148 }
149
150 /** Tracks the current bindings for the vertex array and index array buffers.
151 *
152 * This is part of what we need to enable glthread on compat-GL contexts that
153 * happen to use VBOs, without also supporting the full tracking of VBO vs
154 * user vertex array bindings per attribute on each vertex array for
155 * determining what to upload at draw call time.
156 *
157 * Note that GL core makes it so that a buffer binding with an invalid handle
158 * in the "buffer" parameter will throw an error, and then a
159 * glVertexAttribPointer() that followsmight not end up pointing at a VBO.
160 * However, in GL core the draw call would throw an error as well, so we don't
161 * really care if our tracking is wrong for this case -- we never need to
162 * marshal user data for draw calls, and the unmarshal will just generate an
163 * error or not as appropriate.
164 *
165 * For compatibility GL, we do need to accurately know whether the draw call
166 * on the unmarshal side will dereference a user pointer or load data from a
167 * VBO per vertex. That would make it seem like we need to track whether a
168 * "buffer" is valid, so that we can know when an error will be generated
169 * instead of updating the binding. However, compat GL has the ridiculous
170 * feature that if you pass a bad name, it just gens a buffer object for you,
171 * so we escape without having to know if things are valid or not.
172 */
173 void
_mesa_glthread_BindBuffer(struct gl_context * ctx,GLenum target,GLuint buffer)174 _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer)
175 {
176 struct glthread_state *glthread = &ctx->GLThread;
177
178 switch (target) {
179 case GL_ARRAY_BUFFER:
180 glthread->CurrentArrayBufferName = buffer;
181 break;
182 case GL_ELEMENT_ARRAY_BUFFER:
183 /* The current element array buffer binding is actually tracked in the
184 * vertex array object instead of the context, so this would need to
185 * change on vertex array object updates.
186 */
187 glthread->CurrentVAO->CurrentElementBufferName = buffer;
188 break;
189 case GL_DRAW_INDIRECT_BUFFER:
190 glthread->CurrentDrawIndirectBufferName = buffer;
191 break;
192 }
193 }
194
195 void
_mesa_glthread_DeleteBuffers(struct gl_context * ctx,GLsizei n,const GLuint * buffers)196 _mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
197 const GLuint *buffers)
198 {
199 struct glthread_state *glthread = &ctx->GLThread;
200
201 if (!buffers)
202 return;
203
204 for (unsigned i = 0; i < n; i++) {
205 GLuint id = buffers[i];
206
207 if (id == glthread->CurrentArrayBufferName)
208 _mesa_glthread_BindBuffer(ctx, GL_ARRAY_BUFFER, 0);
209 if (id == glthread->CurrentVAO->CurrentElementBufferName)
210 _mesa_glthread_BindBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, 0);
211 if (id == glthread->CurrentDrawIndirectBufferName)
212 _mesa_glthread_BindBuffer(ctx, GL_DRAW_INDIRECT_BUFFER, 0);
213 }
214 }
215
216 /* BufferData: marshalled asynchronously */
217 struct marshal_cmd_BufferData
218 {
219 struct marshal_cmd_base cmd_base;
220 GLuint target_or_name;
221 GLsizeiptr size;
222 GLenum usage;
223 const GLvoid *data_external_mem;
224 bool data_null; /* If set, no data follows for "data" */
225 bool named;
226 bool ext_dsa;
227 /* Next size bytes are GLubyte data[size] */
228 };
229
230 void
_mesa_unmarshal_BufferData(struct gl_context * ctx,const struct marshal_cmd_BufferData * cmd)231 _mesa_unmarshal_BufferData(struct gl_context *ctx,
232 const struct marshal_cmd_BufferData *cmd)
233 {
234 const GLuint target_or_name = cmd->target_or_name;
235 const GLsizei size = cmd->size;
236 const GLenum usage = cmd->usage;
237 const void *data;
238
239 if (cmd->data_null)
240 data = NULL;
241 else if (!cmd->named && target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
242 data = cmd->data_external_mem;
243 else
244 data = (const void *) (cmd + 1);
245
246 if (cmd->ext_dsa) {
247 CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch,
248 (target_or_name, size, data, usage));
249 } else if (cmd->named) {
250 CALL_NamedBufferData(ctx->CurrentServerDispatch,
251 (target_or_name, size, data, usage));
252 } else {
253 CALL_BufferData(ctx->CurrentServerDispatch,
254 (target_or_name, size, data, usage));
255 }
256 }
257
258 void
_mesa_unmarshal_NamedBufferData(struct gl_context * ctx,const struct marshal_cmd_NamedBufferData * cmd)259 _mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
260 const struct marshal_cmd_NamedBufferData *cmd)
261 {
262 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
263 }
264
265 void
_mesa_unmarshal_NamedBufferDataEXT(struct gl_context * ctx,const struct marshal_cmd_NamedBufferDataEXT * cmd)266 _mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx,
267 const struct marshal_cmd_NamedBufferDataEXT *cmd)
268 {
269 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
270 }
271
272 static void
_mesa_marshal_BufferData_merged(GLuint target_or_name,GLsizeiptr size,const GLvoid * data,GLenum usage,bool named,bool ext_dsa,const char * func)273 _mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size,
274 const GLvoid *data, GLenum usage, bool named,
275 bool ext_dsa, const char *func)
276 {
277 GET_CURRENT_CONTEXT(ctx);
278 bool external_mem = !named &&
279 target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD;
280 bool copy_data = data && !external_mem;
281 int cmd_size = sizeof(struct marshal_cmd_BufferData) + (copy_data ? size : 0);
282
283 if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
284 cmd_size > MARSHAL_MAX_CMD_SIZE ||
285 (named && target_or_name == 0))) {
286 _mesa_glthread_finish_before(ctx, func);
287 if (named) {
288 CALL_NamedBufferData(ctx->CurrentServerDispatch,
289 (target_or_name, size, data, usage));
290 } else {
291 CALL_BufferData(ctx->CurrentServerDispatch,
292 (target_or_name, size, data, usage));
293 }
294 return;
295 }
296
297 struct marshal_cmd_BufferData *cmd =
298 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData,
299 cmd_size);
300
301 cmd->target_or_name = target_or_name;
302 cmd->size = size;
303 cmd->usage = usage;
304 cmd->data_null = !data;
305 cmd->named = named;
306 cmd->ext_dsa = ext_dsa;
307 cmd->data_external_mem = data;
308
309 if (copy_data) {
310 char *variable_data = (char *) (cmd + 1);
311 memcpy(variable_data, data, size);
312 }
313 }
314
315 void GLAPIENTRY
_mesa_marshal_BufferData(GLenum target,GLsizeiptr size,const GLvoid * data,GLenum usage)316 _mesa_marshal_BufferData(GLenum target, GLsizeiptr size, const GLvoid * data,
317 GLenum usage)
318 {
319 _mesa_marshal_BufferData_merged(target, size, data, usage, false, false,
320 "BufferData");
321 }
322
323 void GLAPIENTRY
_mesa_marshal_NamedBufferData(GLuint buffer,GLsizeiptr size,const GLvoid * data,GLenum usage)324 _mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
325 const GLvoid * data, GLenum usage)
326 {
327 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, false,
328 "NamedBufferData");
329 }
330
331 void GLAPIENTRY
_mesa_marshal_NamedBufferDataEXT(GLuint buffer,GLsizeiptr size,const GLvoid * data,GLenum usage)332 _mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size,
333 const GLvoid *data, GLenum usage)
334 {
335 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, true,
336 "NamedBufferDataEXT");
337 }
338
339
340 /* BufferSubData: marshalled asynchronously */
341 struct marshal_cmd_BufferSubData
342 {
343 struct marshal_cmd_base cmd_base;
344 GLenum target_or_name;
345 GLintptr offset;
346 GLsizeiptr size;
347 bool named;
348 bool ext_dsa;
349 /* Next size bytes are GLubyte data[size] */
350 };
351
352 void
_mesa_unmarshal_BufferSubData(struct gl_context * ctx,const struct marshal_cmd_BufferSubData * cmd)353 _mesa_unmarshal_BufferSubData(struct gl_context *ctx,
354 const struct marshal_cmd_BufferSubData *cmd)
355 {
356 const GLenum target_or_name = cmd->target_or_name;
357 const GLintptr offset = cmd->offset;
358 const GLsizeiptr size = cmd->size;
359 const void *data = (const void *) (cmd + 1);
360
361 if (cmd->ext_dsa) {
362 CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch,
363 (target_or_name, offset, size, data));
364 } else if (cmd->named) {
365 CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
366 (target_or_name, offset, size, data));
367 } else {
368 CALL_BufferSubData(ctx->CurrentServerDispatch,
369 (target_or_name, offset, size, data));
370 }
371 }
372
373 void
_mesa_unmarshal_NamedBufferSubData(struct gl_context * ctx,const struct marshal_cmd_NamedBufferSubData * cmd)374 _mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
375 const struct marshal_cmd_NamedBufferSubData *cmd)
376 {
377 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
378 }
379
380 void
_mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context * ctx,const struct marshal_cmd_NamedBufferSubDataEXT * cmd)381 _mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx,
382 const struct marshal_cmd_NamedBufferSubDataEXT *cmd)
383 {
384 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
385 }
386
387 static void
_mesa_marshal_BufferSubData_merged(GLuint target_or_name,GLintptr offset,GLsizeiptr size,const GLvoid * data,bool named,bool ext_dsa,const char * func)388 _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
389 GLsizeiptr size, const GLvoid *data,
390 bool named, bool ext_dsa, const char *func)
391 {
392 GET_CURRENT_CONTEXT(ctx);
393 size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size;
394
395 /* Fast path: Copy the data to an upload buffer, and use the GPU
396 * to copy the uploaded data to the destination buffer.
397 */
398 /* TODO: Handle offset == 0 && size < buffer_size.
399 * If offset == 0 and size == buffer_size, it's better to discard
400 * the buffer storage, but we don't know the buffer size in glthread.
401 */
402 if (ctx->GLThread.SupportsBufferUploads &&
403 data && offset > 0 && size > 0) {
404 struct gl_buffer_object *upload_buffer = NULL;
405 unsigned upload_offset = 0;
406
407 _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
408 NULL);
409
410 if (upload_buffer) {
411 _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
412 upload_offset,
413 target_or_name,
414 offset, size, named,
415 ext_dsa);
416 return;
417 }
418 }
419
420 if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
421 cmd_size > MARSHAL_MAX_CMD_SIZE || !data ||
422 (named && target_or_name == 0))) {
423 _mesa_glthread_finish_before(ctx, func);
424 if (named) {
425 CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
426 (target_or_name, offset, size, data));
427 } else {
428 CALL_BufferSubData(ctx->CurrentServerDispatch,
429 (target_or_name, offset, size, data));
430 }
431 return;
432 }
433
434 struct marshal_cmd_BufferSubData *cmd =
435 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData,
436 cmd_size);
437 cmd->target_or_name = target_or_name;
438 cmd->offset = offset;
439 cmd->size = size;
440 cmd->named = named;
441 cmd->ext_dsa = ext_dsa;
442
443 char *variable_data = (char *) (cmd + 1);
444 memcpy(variable_data, data, size);
445 }
446
447 void GLAPIENTRY
_mesa_marshal_BufferSubData(GLenum target,GLintptr offset,GLsizeiptr size,const GLvoid * data)448 _mesa_marshal_BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
449 const GLvoid * data)
450 {
451 _mesa_marshal_BufferSubData_merged(target, offset, size, data, false,
452 false, "BufferSubData");
453 }
454
455 void GLAPIENTRY
_mesa_marshal_NamedBufferSubData(GLuint buffer,GLintptr offset,GLsizeiptr size,const GLvoid * data)456 _mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset,
457 GLsizeiptr size, const GLvoid * data)
458 {
459 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
460 false, "NamedBufferSubData");
461 }
462
463 void GLAPIENTRY
_mesa_marshal_NamedBufferSubDataEXT(GLuint buffer,GLintptr offset,GLsizeiptr size,const GLvoid * data)464 _mesa_marshal_NamedBufferSubDataEXT(GLuint buffer, GLintptr offset,
465 GLsizeiptr size, const GLvoid * data)
466 {
467 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
468 true, "NamedBufferSubDataEXT");
469 }
470