• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file glthread.c
25  *
26  * Support functions for the glthread feature of Mesa.
27  *
28  * In multicore systems, many applications end up CPU-bound with about half
29  * their time spent inside their rendering thread and half inside Mesa.  To
30  * alleviate this, we put a shim layer in Mesa at the GL dispatch level that
31  * quickly logs the GL commands to a buffer to be processed by a worker
32  * thread.
33  */
34 
35 #include "main/mtypes.h"
36 #include "main/glthread.h"
37 #include "main/glthread_marshal.h"
38 #include "main/hash.h"
39 #include "util/u_atomic.h"
40 #include "util/u_thread.h"
41 #include "util/u_cpu_detect.h"
42 
43 
44 static void
glthread_unmarshal_batch(void * job,void * gdata,int thread_index)45 glthread_unmarshal_batch(void *job, void *gdata, int thread_index)
46 {
47    struct glthread_batch *batch = (struct glthread_batch*)job;
48    struct gl_context *ctx = batch->ctx;
49    unsigned pos = 0;
50    unsigned used = batch->used;
51    uint64_t *buffer = batch->buffer;
52    const uint64_t *last = &buffer[used];
53 
54    _glapi_set_dispatch(ctx->CurrentServerDispatch);
55 
56    _mesa_HashLockMutex(ctx->Shared->BufferObjects);
57    ctx->BufferObjectsLocked = true;
58    mtx_lock(&ctx->Shared->TexMutex);
59    ctx->TexturesLocked = true;
60 
61    while (pos < used) {
62       const struct marshal_cmd_base *cmd =
63          (const struct marshal_cmd_base *)&buffer[pos];
64 
65       pos += _mesa_unmarshal_dispatch[cmd->cmd_id](ctx, cmd, last);
66    }
67 
68    ctx->TexturesLocked = false;
69    mtx_unlock(&ctx->Shared->TexMutex);
70    ctx->BufferObjectsLocked = false;
71    _mesa_HashUnlockMutex(ctx->Shared->BufferObjects);
72 
73    assert(pos == used);
74    batch->used = 0;
75 
76    unsigned batch_index = batch - ctx->GLThread.batches;
77    /* Atomically set this to -1 if it's equal to batch_index. */
78    p_atomic_cmpxchg(&ctx->GLThread.LastProgramChangeBatch, batch_index, -1);
79    p_atomic_cmpxchg(&ctx->GLThread.LastDListChangeBatchIndex, batch_index, -1);
80 }
81 
82 static void
glthread_thread_initialization(void * job,void * gdata,int thread_index)83 glthread_thread_initialization(void *job, void *gdata, int thread_index)
84 {
85    struct gl_context *ctx = (struct gl_context*)job;
86 
87    ctx->Driver.SetBackgroundContext(ctx, &ctx->GLThread.stats);
88    _glapi_set_context(ctx);
89 }
90 
91 void
_mesa_glthread_init(struct gl_context * ctx)92 _mesa_glthread_init(struct gl_context *ctx)
93 {
94    struct glthread_state *glthread = &ctx->GLThread;
95 
96    assert(!glthread->enabled);
97 
98    if (!util_queue_init(&glthread->queue, "gl", MARSHAL_MAX_BATCHES - 2,
99                         1, 0, NULL)) {
100       return;
101    }
102 
103    glthread->VAOs = _mesa_NewHashTable();
104    if (!glthread->VAOs) {
105       util_queue_destroy(&glthread->queue);
106       return;
107    }
108 
109    _mesa_glthread_reset_vao(&glthread->DefaultVAO);
110    glthread->CurrentVAO = &glthread->DefaultVAO;
111 
112    ctx->MarshalExec = _mesa_create_marshal_table(ctx);
113    if (!ctx->MarshalExec) {
114       _mesa_DeleteHashTable(glthread->VAOs);
115       util_queue_destroy(&glthread->queue);
116       return;
117    }
118 
119    for (unsigned i = 0; i < MARSHAL_MAX_BATCHES; i++) {
120       glthread->batches[i].ctx = ctx;
121       util_queue_fence_init(&glthread->batches[i].fence);
122    }
123    glthread->next_batch = &glthread->batches[glthread->next];
124    glthread->used = 0;
125 
126    glthread->enabled = true;
127    glthread->stats.queue = &glthread->queue;
128 
129    glthread->SupportsBufferUploads =
130       ctx->Const.BufferCreateMapUnsynchronizedThreadSafe &&
131       ctx->Const.AllowMappedBuffersDuringExecution;
132 
133    /* If the draw start index is non-zero, glthread can upload to offset 0,
134     * which means the attrib offset has to be -(first * stride).
135     * So require signed vertex buffer offsets.
136     */
137    glthread->SupportsNonVBOUploads = glthread->SupportsBufferUploads &&
138                                      ctx->Const.VertexBufferOffsetIsInt32;
139 
140    ctx->CurrentClientDispatch = ctx->MarshalExec;
141 
142    glthread->LastDListChangeBatchIndex = -1;
143 
144    /* Execute the thread initialization function in the thread. */
145    struct util_queue_fence fence;
146    util_queue_fence_init(&fence);
147    util_queue_add_job(&glthread->queue, ctx, &fence,
148                       glthread_thread_initialization, NULL, 0);
149    util_queue_fence_wait(&fence);
150    util_queue_fence_destroy(&fence);
151 }
152 
153 static void
free_vao(void * data,UNUSED void * userData)154 free_vao(void *data, UNUSED void *userData)
155 {
156    free(data);
157 }
158 
159 void
_mesa_glthread_destroy(struct gl_context * ctx)160 _mesa_glthread_destroy(struct gl_context *ctx)
161 {
162    struct glthread_state *glthread = &ctx->GLThread;
163 
164    if (!glthread->enabled)
165       return;
166 
167    _mesa_glthread_finish(ctx);
168    util_queue_destroy(&glthread->queue);
169 
170    for (unsigned i = 0; i < MARSHAL_MAX_BATCHES; i++)
171       util_queue_fence_destroy(&glthread->batches[i].fence);
172 
173    _mesa_HashDeleteAll(glthread->VAOs, free_vao, NULL);
174    _mesa_DeleteHashTable(glthread->VAOs);
175 
176    ctx->GLThread.enabled = false;
177 
178    _mesa_glthread_restore_dispatch(ctx, "destroy");
179 }
180 
181 void
_mesa_glthread_restore_dispatch(struct gl_context * ctx,const char * func)182 _mesa_glthread_restore_dispatch(struct gl_context *ctx, const char *func)
183 {
184    /* Remove ourselves from the dispatch table except if another ctx/thread
185     * already installed a new dispatch table.
186     *
187     * Typically glxMakeCurrent will bind a new context (install new table) then
188     * old context might be deleted.
189     */
190    if (_glapi_get_dispatch() == ctx->MarshalExec) {
191        ctx->CurrentClientDispatch = ctx->CurrentServerDispatch;
192        _glapi_set_dispatch(ctx->CurrentClientDispatch);
193 #if 0
194        printf("glthread disabled: %s\n", func);
195 #endif
196    }
197 }
198 
199 void
_mesa_glthread_disable(struct gl_context * ctx,const char * func)200 _mesa_glthread_disable(struct gl_context *ctx, const char *func)
201 {
202    _mesa_glthread_finish_before(ctx, func);
203    _mesa_glthread_restore_dispatch(ctx, func);
204 }
205 
206 void
_mesa_glthread_flush_batch(struct gl_context * ctx)207 _mesa_glthread_flush_batch(struct gl_context *ctx)
208 {
209    struct glthread_state *glthread = &ctx->GLThread;
210    if (!glthread->enabled)
211       return;
212 
213    if (!glthread->used)
214       return;
215 
216    /* Pin threads regularly to the same Zen CCX that the main thread is
217     * running on. The main thread can move between CCXs.
218     */
219    if (util_get_cpu_caps()->num_L3_caches > 1 &&
220        /* driver support */
221        ctx->Driver.PinDriverToL3Cache &&
222        ++glthread->pin_thread_counter % 128 == 0) {
223       int cpu = util_get_current_cpu();
224 
225       if (cpu >= 0) {
226          uint16_t L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu];
227          if (L3_cache != U_CPU_INVALID_L3) {
228             util_set_thread_affinity(glthread->queue.threads[0],
229                                      util_get_cpu_caps()->L3_affinity_mask[L3_cache],
230                                      NULL, util_get_cpu_caps()->num_cpu_mask_bits);
231             ctx->Driver.PinDriverToL3Cache(ctx, L3_cache);
232          }
233       }
234    }
235 
236    struct glthread_batch *next = glthread->next_batch;
237 
238    /* Debug: execute the batch immediately from this thread.
239     *
240     * Note that glthread_unmarshal_batch() changes the dispatch table so we'll
241     * need to restore it when it returns.
242     */
243    if (false) {
244       glthread_unmarshal_batch(next, NULL, 0);
245       _glapi_set_dispatch(ctx->CurrentClientDispatch);
246       return;
247    }
248 
249    p_atomic_add(&glthread->stats.num_offloaded_items, glthread->used);
250    next->used = glthread->used;
251 
252    util_queue_add_job(&glthread->queue, next, &next->fence,
253                       glthread_unmarshal_batch, NULL, 0);
254    glthread->last = glthread->next;
255    glthread->next = (glthread->next + 1) % MARSHAL_MAX_BATCHES;
256    glthread->next_batch = &glthread->batches[glthread->next];
257    glthread->used = 0;
258 }
259 
260 /**
261  * Waits for all pending batches have been unmarshaled.
262  *
263  * This can be used by the main thread to synchronize access to the context,
264  * since the worker thread will be idle after this.
265  */
266 void
_mesa_glthread_finish(struct gl_context * ctx)267 _mesa_glthread_finish(struct gl_context *ctx)
268 {
269    struct glthread_state *glthread = &ctx->GLThread;
270    if (!glthread->enabled)
271       return;
272 
273    /* If this is called from the worker thread, then we've hit a path that
274     * might be called from either the main thread or the worker (such as some
275     * dri interface entrypoints), in which case we don't need to actually
276     * synchronize against ourself.
277     */
278    if (u_thread_is_self(glthread->queue.threads[0]))
279       return;
280 
281    struct glthread_batch *last = &glthread->batches[glthread->last];
282    struct glthread_batch *next = glthread->next_batch;
283    bool synced = false;
284 
285    if (!util_queue_fence_is_signalled(&last->fence)) {
286       util_queue_fence_wait(&last->fence);
287       synced = true;
288    }
289 
290    if (glthread->used) {
291       p_atomic_add(&glthread->stats.num_direct_items, glthread->used);
292       next->used = glthread->used;
293       glthread->used = 0;
294 
295       /* Since glthread_unmarshal_batch changes the dispatch to direct,
296        * restore it after it's done.
297        */
298       struct _glapi_table *dispatch = _glapi_get_dispatch();
299       glthread_unmarshal_batch(next, NULL, 0);
300       _glapi_set_dispatch(dispatch);
301 
302       /* It's not a sync because we don't enqueue partial batches, but
303        * it would be a sync if we did. So count it anyway.
304        */
305       synced = true;
306    }
307 
308    if (synced)
309       p_atomic_inc(&glthread->stats.num_syncs);
310 }
311 
312 void
_mesa_glthread_finish_before(struct gl_context * ctx,const char * func)313 _mesa_glthread_finish_before(struct gl_context *ctx, const char *func)
314 {
315    _mesa_glthread_finish(ctx);
316 
317    /* Uncomment this if you want to know where glthread syncs. */
318    /*printf("fallback to sync: %s\n", func);*/
319 }
320 
321 void
_mesa_error_glthread_safe(struct gl_context * ctx,GLenum error,bool glthread,const char * format,...)322 _mesa_error_glthread_safe(struct gl_context *ctx, GLenum error, bool glthread,
323                           const char *format, ...)
324 {
325    if (glthread) {
326       _mesa_marshal_InternalSetError(error);
327    } else {
328       char s[MAX_DEBUG_MESSAGE_LENGTH];
329       va_list args;
330 
331       va_start(args, format);
332       ASSERTED size_t len = vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, format, args);
333       va_end(args);
334 
335       /* Whoever calls _mesa_error should use shorter strings. */
336       assert(len < MAX_DEBUG_MESSAGE_LENGTH);
337 
338       _mesa_error(ctx, error, "%s", s);
339    }
340 }
341