/* * Copyright (C) 2012 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Authors: * Rob Clark */ #include "freedreno_context.h" #include "ir3/ir3_cache.h" #include "util/u_upload_mgr.h" #include "freedreno_blitter.h" #include "freedreno_draw.h" #include "freedreno_fence.h" #include "freedreno_gmem.h" #include "freedreno_program.h" #include "freedreno_query.h" #include "freedreno_query_hw.h" #include "freedreno_resource.h" #include "freedreno_state.h" #include "freedreno_texture.h" #include "freedreno_util.h" #include "freedreno_tracepoints.h" #include "util/u_trace_gallium.h" static void fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fencep, unsigned flags) in_dt { struct fd_context *ctx = fd_context(pctx); struct pipe_fence_handle *fence = NULL; struct fd_batch *batch = NULL; /* We want to lookup current batch if it exists, but not create a new * one if not (unless we need a fence) */ fd_batch_reference(&batch, ctx->batch); DBG("%p: flush: flags=%x, fencep=%p", batch, flags, fencep); if (fencep && !batch) { batch = fd_context_batch(ctx); } else if (!batch) { if (ctx->screen->reorder) fd_bc_flush(ctx, flags & PIPE_FLUSH_DEFERRED); fd_bc_dump(ctx, "%p: NULL batch, remaining:\n", ctx); return; } /* With TC_FLUSH_ASYNC, the fence will have been pre-created from * the front-end thread. But not yet associated with a batch, * because we cannot safely access ctx->batch outside of the driver * thread. So instead, replace the existing batch->fence with the * one created earlier */ if ((flags & TC_FLUSH_ASYNC) && fencep) { /* We don't currently expect async+flush in the fence-fd * case.. for that to work properly we'd need TC to tell * us in the create_fence callback that it needs an fd. */ assert(!(flags & PIPE_FLUSH_FENCE_FD)); fd_fence_set_batch(*fencep, batch); fd_fence_ref(&batch->fence, *fencep); /* If we have nothing to flush, update the pre-created unflushed * fence with the current state of the last-fence: */ if (ctx->last_fence) { fd_fence_repopulate(*fencep, ctx->last_fence); fd_fence_ref(&fence, *fencep); fd_bc_dump(ctx, "%p: (deferred) reuse last_fence, remaining:\n", ctx); goto out; } /* async flush is not compatible with deferred flush, since * nothing triggers the batch flush which fence_flush() would * be waiting for */ flags &= ~PIPE_FLUSH_DEFERRED; } else if (!batch->fence) { batch->fence = fd_fence_create(batch); } /* In some sequence of events, we can end up with a last_fence that is * not an "fd" fence, which results in eglDupNativeFenceFDANDROID() * errors. */ if ((flags & PIPE_FLUSH_FENCE_FD) && ctx->last_fence && !fd_fence_is_fd(ctx->last_fence)) fd_fence_ref(&ctx->last_fence, NULL); /* if no rendering since last flush, ie. app just decided it needed * a fence, re-use the last one: */ if (ctx->last_fence) { fd_fence_ref(&fence, ctx->last_fence); fd_bc_dump(ctx, "%p: reuse last_fence, remaining:\n", ctx); goto out; } /* Take a ref to the batch's fence (batch can be unref'd when flushed: */ fd_fence_ref(&fence, batch->fence); if (flags & PIPE_FLUSH_FENCE_FD) fence->submit_fence.use_fence_fd = true; fd_bc_dump(ctx, "%p: flushing %p<%u>, flags=0x%x, pending:\n", ctx, batch, batch->seqno, flags); /* If we get here, we need to flush for a fence, even if there is * no rendering yet: */ batch->needs_flush = true; if (!ctx->screen->reorder) { fd_batch_flush(batch); } else { fd_bc_flush(ctx, flags & PIPE_FLUSH_DEFERRED); } fd_bc_dump(ctx, "%p: remaining:\n", ctx); out: if (fencep) fd_fence_ref(fencep, fence); fd_fence_ref(&ctx->last_fence, fence); fd_fence_ref(&fence, NULL); fd_batch_reference(&batch, NULL); u_trace_context_process(&ctx->trace_context, !!(flags & PIPE_FLUSH_END_OF_FRAME)); } static void fd_texture_barrier(struct pipe_context *pctx, unsigned flags) in_dt { if (flags == PIPE_TEXTURE_BARRIER_FRAMEBUFFER) { struct fd_context *ctx = fd_context(pctx); if (ctx->framebuffer_barrier) { ctx->framebuffer_barrier(ctx); return; } } /* On devices that could sample from GMEM we could possibly do better. * Or if we knew that we were doing GMEM bypass we could just emit a * cache flush, perhaps? But we don't know if future draws would cause * us to use GMEM, and a flush in bypass isn't the end of the world. */ fd_context_flush(pctx, NULL, 0); } static void fd_memory_barrier(struct pipe_context *pctx, unsigned flags) { if (!(flags & ~PIPE_BARRIER_UPDATE)) return; fd_context_flush(pctx, NULL, 0); /* TODO do we need to check for persistently mapped buffers and * fd_bo_cpu_prep()?? */ } static void emit_string_tail(struct fd_ringbuffer *ring, const char *string, int len) { const uint32_t *buf = (const void *)string; while (len >= 4) { OUT_RING(ring, *buf); buf++; len -= 4; } /* copy remainder bytes without reading past end of input string: */ if (len > 0) { uint32_t w = 0; memcpy(&w, buf, len); OUT_RING(ring, w); } } /* for prior to a5xx: */ void fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len) { /* max packet size is 0x3fff+1 dwords: */ len = MIN2(len, 0x4000 * 4); OUT_PKT3(ring, CP_NOP, align(len, 4) / 4); emit_string_tail(ring, string, len); } /* for a5xx+ */ void fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len) { /* max packet size is 0x3fff dwords: */ len = MIN2(len, 0x3fff * 4); OUT_PKT7(ring, CP_NOP, align(len, 4) / 4); emit_string_tail(ring, string, len); } /** * emit marker string as payload of a no-op packet, which can be * decoded by cffdump. */ static void fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len) in_dt { struct fd_context *ctx = fd_context(pctx); DBG("%.*s", len, string); if (!ctx->batch) return; struct fd_batch *batch = fd_context_batch_locked(ctx); fd_batch_needs_flush(batch); if (ctx->screen->gen >= 5) { fd_emit_string5(batch->draw, string, len); } else { fd_emit_string(batch->draw, string, len); } fd_batch_unlock_submit(batch); fd_batch_reference(&batch, NULL); } /** * If we have a pending fence_server_sync() (GPU side sync), flush now. * The alternative to try to track this with batch dependencies gets * hairy quickly. * * Call this before switching to a different batch, to handle this case. */ void fd_context_switch_from(struct fd_context *ctx) { if (ctx->batch && (ctx->batch->in_fence_fd != -1)) fd_batch_flush(ctx->batch); } /** * If there is a pending fence-fd that we need to sync on, this will * transfer the reference to the next batch we are going to render * to. */ void fd_context_switch_to(struct fd_context *ctx, struct fd_batch *batch) { if (ctx->in_fence_fd != -1) { sync_accumulate("freedreno", &batch->in_fence_fd, ctx->in_fence_fd); close(ctx->in_fence_fd); ctx->in_fence_fd = -1; } } /** * Return a reference to the current batch, caller must unref. */ struct fd_batch * fd_context_batch(struct fd_context *ctx) { struct fd_batch *batch = NULL; tc_assert_driver_thread(ctx->tc); fd_batch_reference(&batch, ctx->batch); if (unlikely(!batch)) { batch = fd_batch_from_fb(ctx, &ctx->framebuffer); util_copy_framebuffer_state(&batch->framebuffer, &ctx->framebuffer); fd_batch_reference(&ctx->batch, batch); fd_context_all_dirty(ctx); } fd_context_switch_to(ctx, batch); return batch; } /** * Return a locked reference to the current batch. A batch with emit * lock held is protected against flushing while the lock is held. * The emit-lock should be acquired before screen-lock. The emit-lock * should be held while emitting cmdstream. */ struct fd_batch * fd_context_batch_locked(struct fd_context *ctx) { struct fd_batch *batch = NULL; while (!batch) { batch = fd_context_batch(ctx); if (!fd_batch_lock_submit(batch)) { fd_batch_reference(&batch, NULL); } } return batch; } void fd_context_destroy(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); unsigned i; DBG(""); fd_screen_lock(ctx->screen); list_del(&ctx->node); fd_screen_unlock(ctx->screen); fd_fence_ref(&ctx->last_fence, NULL); if (ctx->in_fence_fd != -1) close(ctx->in_fence_fd); for (i = 0; i < ARRAY_SIZE(ctx->pvtmem); i++) { if (ctx->pvtmem[i].bo) fd_bo_del(ctx->pvtmem[i].bo); } util_copy_framebuffer_state(&ctx->framebuffer, NULL); fd_batch_reference(&ctx->batch, NULL); /* unref current batch */ /* Make sure nothing in the batch cache references our context any more. */ fd_bc_flush(ctx, false); fd_prog_fini(pctx); if (ctx->blitter) util_blitter_destroy(ctx->blitter); if (pctx->stream_uploader) u_upload_destroy(pctx->stream_uploader); for (i = 0; i < ARRAY_SIZE(ctx->clear_rs_state); i++) if (ctx->clear_rs_state[i]) pctx->delete_rasterizer_state(pctx, ctx->clear_rs_state[i]); slab_destroy_child(&ctx->transfer_pool); slab_destroy_child(&ctx->transfer_pool_unsync); for (i = 0; i < ARRAY_SIZE(ctx->vsc_pipe_bo); i++) { if (!ctx->vsc_pipe_bo[i]) break; fd_bo_del(ctx->vsc_pipe_bo[i]); } fd_device_del(ctx->dev); fd_pipe_purge(ctx->pipe); fd_pipe_del(ctx->pipe); simple_mtx_destroy(&ctx->gmem_lock); u_trace_context_fini(&ctx->trace_context); fd_autotune_fini(&ctx->autotune); ir3_cache_destroy(ctx->shader_cache); if (FD_DBG(BSTAT) || FD_DBG(MSGS)) { mesa_logi( "batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, " "batch_restore=%u\n", (uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem, (uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw, (uint32_t)ctx->stats.batch_restore); } } static void fd_set_debug_callback(struct pipe_context *pctx, const struct util_debug_callback *cb) { struct fd_context *ctx = fd_context(pctx); struct fd_screen *screen = ctx->screen; util_queue_finish(&screen->compile_queue); if (cb) ctx->debug = *cb; else memset(&ctx->debug, 0, sizeof(ctx->debug)); } static uint32_t fd_get_reset_count(struct fd_context *ctx, bool per_context) { uint64_t val; enum fd_param_id param = per_context ? FD_CTX_FAULTS : FD_GLOBAL_FAULTS; int ret = fd_pipe_get_param(ctx->pipe, param, &val); assert(!ret); return val; } static enum pipe_reset_status fd_get_device_reset_status(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); int context_faults = fd_get_reset_count(ctx, true); int global_faults = fd_get_reset_count(ctx, false); enum pipe_reset_status status; if (context_faults != ctx->context_reset_count) { status = PIPE_GUILTY_CONTEXT_RESET; } else if (global_faults != ctx->global_reset_count) { status = PIPE_INNOCENT_CONTEXT_RESET; } else { status = PIPE_NO_RESET; } ctx->context_reset_count = context_faults; ctx->global_reset_count = global_faults; return status; } static void fd_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx, bool end_of_pipe) { struct fd_batch *batch = container_of(ut, struct fd_batch, trace); struct fd_ringbuffer *ring = cs; struct pipe_resource *buffer = timestamps; if (ring->cur == batch->last_timestamp_cmd) { uint64_t *ts = fd_bo_map(fd_resource(buffer)->bo); ts[idx] = U_TRACE_NO_TIMESTAMP; return; } unsigned ts_offset = idx * sizeof(uint64_t); batch->ctx->record_timestamp(ring, fd_resource(buffer)->bo, ts_offset); batch->last_timestamp_cmd = ring->cur; } static uint64_t fd_trace_read_ts(struct u_trace_context *utctx, void *timestamps, unsigned idx, void *flush_data) { struct fd_context *ctx = container_of(utctx, struct fd_context, trace_context); struct pipe_resource *buffer = timestamps; struct fd_bo *ts_bo = fd_resource(buffer)->bo; /* Only need to stall on results for the first entry: */ if (idx == 0) { /* Avoid triggering deferred submits from flushing, since that * changes the behavior of what we are trying to measure: */ while (fd_bo_cpu_prep(ts_bo, ctx->pipe, FD_BO_PREP_NOSYNC)) usleep(10000); int ret = fd_bo_cpu_prep(ts_bo, ctx->pipe, FD_BO_PREP_READ); if (ret) return U_TRACE_NO_TIMESTAMP; } uint64_t *ts = fd_bo_map(ts_bo); /* Don't translate the no-timestamp marker: */ if (ts[idx] == U_TRACE_NO_TIMESTAMP) return U_TRACE_NO_TIMESTAMP; return ctx->ts_to_ns(ts[idx]); } static void fd_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data) { /* We don't use flush_data at the moment. */ } /* TODO we could combine a few of these small buffers (solid_vbuf, * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and * save a tiny bit of memory */ static struct pipe_resource * create_solid_vertexbuf(struct pipe_context *pctx) { static const float init_shader_const[] = { -1.000000f, +1.000000f, +1.000000f, +1.000000f, -1.000000f, +1.000000f, }; struct pipe_resource *prsc = pipe_buffer_create(pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const)); pipe_buffer_write(pctx, prsc, 0, sizeof(init_shader_const), init_shader_const); return prsc; } static struct pipe_resource * create_blit_texcoord_vertexbuf(struct pipe_context *pctx) { struct pipe_resource *prsc = pipe_buffer_create( pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16); return prsc; } void fd_context_setup_common_vbos(struct fd_context *ctx) { struct pipe_context *pctx = &ctx->base; ctx->solid_vbuf = create_solid_vertexbuf(pctx); ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx); /* setup solid_vbuf_state: */ ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state( pctx, 1, (struct pipe_vertex_element[]){{ .vertex_buffer_index = 0, .src_offset = 0, .src_format = PIPE_FORMAT_R32G32B32_FLOAT, }}); ctx->solid_vbuf_state.vertexbuf.count = 1; ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12; ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf; /* setup blit_vbuf_state: */ ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state( pctx, 2, (struct pipe_vertex_element[]){ { .vertex_buffer_index = 0, .src_offset = 0, .src_format = PIPE_FORMAT_R32G32_FLOAT, }, { .vertex_buffer_index = 1, .src_offset = 0, .src_format = PIPE_FORMAT_R32G32B32_FLOAT, }}); ctx->blit_vbuf_state.vertexbuf.count = 2; ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8; ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->blit_texcoord_vbuf; ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12; ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf; } void fd_context_cleanup_common_vbos(struct fd_context *ctx) { struct pipe_context *pctx = &ctx->base; pctx->delete_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx); pctx->delete_vertex_elements_state(pctx, ctx->blit_vbuf_state.vtx); pipe_resource_reference(&ctx->solid_vbuf, NULL); pipe_resource_reference(&ctx->blit_texcoord_vbuf, NULL); } struct pipe_context * fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, void *priv, unsigned flags) disable_thread_safety_analysis { struct fd_screen *screen = fd_screen(pscreen); struct pipe_context *pctx; unsigned prio = 1; /* lower numerical value == higher priority: */ if (FD_DBG(HIPRIO)) prio = 0; else if (flags & PIPE_CONTEXT_HIGH_PRIORITY) prio = 0; else if (flags & PIPE_CONTEXT_LOW_PRIORITY) prio = 2; /* Some of the stats will get printed out at context destroy, so * make sure they are collected: */ if (FD_DBG(BSTAT) || FD_DBG(MSGS)) ctx->stats_users++; ctx->flags = flags; ctx->screen = screen; ctx->pipe = fd_pipe_new2(screen->dev, FD_PIPE_3D, prio); ctx->in_fence_fd = -1; if (fd_device_version(screen->dev) >= FD_VERSION_ROBUSTNESS) { ctx->context_reset_count = fd_get_reset_count(ctx, true); ctx->global_reset_count = fd_get_reset_count(ctx, false); } simple_mtx_init(&ctx->gmem_lock, mtx_plain); /* need some sane default in case gallium frontends don't * set some state: */ ctx->sample_mask = 0xffff; ctx->active_queries = true; pctx = &ctx->base; pctx->screen = pscreen; pctx->priv = priv; pctx->flush = fd_context_flush; pctx->emit_string_marker = fd_emit_string_marker; pctx->set_debug_callback = fd_set_debug_callback; pctx->get_device_reset_status = fd_get_device_reset_status; pctx->create_fence_fd = fd_create_fence_fd; pctx->fence_server_sync = fd_fence_server_sync; pctx->fence_server_signal = fd_fence_server_signal; pctx->texture_barrier = fd_texture_barrier; pctx->memory_barrier = fd_memory_barrier; pctx->stream_uploader = u_upload_create_default(pctx); if (!pctx->stream_uploader) goto fail; pctx->const_uploader = pctx->stream_uploader; slab_create_child(&ctx->transfer_pool, &screen->transfer_pool); slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool); fd_draw_init(pctx); fd_resource_context_init(pctx); fd_query_context_init(pctx); fd_texture_init(pctx); fd_state_init(pctx); ctx->blitter = util_blitter_create(pctx); if (!ctx->blitter) goto fail; list_inithead(&ctx->hw_active_queries); list_inithead(&ctx->acc_active_queries); fd_screen_lock(ctx->screen); ctx->seqno = ++screen->ctx_seqno; list_add(&ctx->node, &ctx->screen->context_list); fd_screen_unlock(ctx->screen); ctx->current_scissor = &ctx->disabled_scissor; fd_gpu_tracepoint_config_variable(); u_trace_pipe_context_init(&ctx->trace_context, pctx, fd_trace_record_ts, fd_trace_read_ts, fd_trace_delete_flush_data); fd_autotune_init(&ctx->autotune, screen->dev); return pctx; fail: pctx->destroy(pctx); return NULL; } struct pipe_context * fd_context_init_tc(struct pipe_context *pctx, unsigned flags) { struct fd_context *ctx = fd_context(pctx); if (!(flags & PIPE_CONTEXT_PREFER_THREADED)) return pctx; /* Clover (compute-only) is unsupported. */ if (flags & PIPE_CONTEXT_COMPUTE_ONLY) return pctx; struct pipe_context *tc = threaded_context_create( pctx, &ctx->screen->transfer_pool, fd_replace_buffer_storage, &(struct threaded_context_options){ .create_fence = fd_fence_create_unflushed, .is_resource_busy = fd_resource_busy, .unsynchronized_get_device_reset_status = true, }, &ctx->tc); if (tc && tc != pctx) threaded_context_init_bytes_mapped_limit((struct threaded_context *)tc, 16); return tc; }