1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27
28 #ifndef AMDGPU_CS_H
29 #define AMDGPU_CS_H
30
31 #include "amdgpu_bo.h"
32 #include "util/u_memory.h"
33 #include "drm-uapi/amdgpu_drm.h"
34
35 /* Smaller submits means the GPU gets busy sooner and there is less
36 * waiting for buffers and fences. Proof:
37 * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
38 */
39 #define IB_MAX_SUBMIT_DWORDS (20 * 1024)
40
41 struct amdgpu_ctx {
42 struct amdgpu_winsys *ws;
43 amdgpu_context_handle ctx;
44 amdgpu_bo_handle user_fence_bo;
45 uint64_t *user_fence_cpu_address_base;
46 int refcount;
47 unsigned initial_num_total_rejected_cs;
48 unsigned num_rejected_cs;
49 };
50
51 struct amdgpu_cs_buffer {
52 struct amdgpu_winsys_bo *bo;
53 unsigned slab_real_idx; /* index of underlying real BO, used by slab buffers only */
54 unsigned usage;
55 };
56
57 enum ib_type {
58 IB_PREAMBLE,
59 IB_MAIN,
60 IB_NUM,
61 };
62
63 struct amdgpu_ib {
64 struct radeon_cmdbuf *rcs; /* pointer to the driver-owned data */
65
66 /* A buffer out of which new IBs are allocated. */
67 struct pb_buffer *big_ib_buffer;
68 uint8_t *ib_mapped;
69 unsigned used_ib_space;
70
71 /* The maximum seen size from cs_check_space. If the driver does
72 * cs_check_space and flush, the newly allocated IB should have at least
73 * this size.
74 */
75 unsigned max_check_space_size;
76
77 unsigned max_ib_size;
78 uint32_t *ptr_ib_size;
79 bool ptr_ib_size_inside_ib;
80 enum ib_type ib_type;
81 };
82
83 struct amdgpu_fence_list {
84 struct pipe_fence_handle **list;
85 unsigned num;
86 unsigned max;
87 };
88
89 struct amdgpu_cs_context {
90 struct drm_amdgpu_cs_chunk_ib ib[IB_NUM];
91 uint32_t *ib_main_addr; /* the beginning of IB before chaining */
92
93 struct amdgpu_winsys *ws;
94
95 /* Buffers. */
96 unsigned max_real_buffers;
97 unsigned num_real_buffers;
98 struct amdgpu_cs_buffer *real_buffers;
99
100 unsigned num_slab_buffers;
101 unsigned max_slab_buffers;
102 struct amdgpu_cs_buffer *slab_buffers;
103
104 unsigned num_sparse_buffers;
105 unsigned max_sparse_buffers;
106 struct amdgpu_cs_buffer *sparse_buffers;
107
108 int16_t *buffer_indices_hashlist;
109
110 struct amdgpu_winsys_bo *last_added_bo;
111 unsigned last_added_bo_index;
112 unsigned last_added_bo_usage;
113
114 struct amdgpu_fence_list fence_dependencies;
115 struct amdgpu_fence_list syncobj_dependencies;
116 struct amdgpu_fence_list syncobj_to_signal;
117
118 struct pipe_fence_handle *fence;
119
120 /* the error returned from cs_flush for non-async submissions */
121 int error_code;
122
123 /* TMZ: will this command be submitted using the TMZ flag */
124 bool secure;
125 };
126
127 /* This high limit is needed for viewperf2020/catia. */
128 #define BUFFER_HASHLIST_SIZE 32768
129
130 struct amdgpu_cs {
131 struct amdgpu_ib main; /* must be first because this is inherited */
132 struct amdgpu_winsys *ws;
133 struct amdgpu_ctx *ctx;
134 enum amd_ip_type ip_type;
135 struct drm_amdgpu_cs_chunk_fence fence_chunk;
136
137 /* We flip between these two CS. While one is being consumed
138 * by the kernel in another thread, the other one is being filled
139 * by the pipe driver. */
140 struct amdgpu_cs_context csc1;
141 struct amdgpu_cs_context csc2;
142 /* The currently-used CS. */
143 struct amdgpu_cs_context *csc;
144 /* The CS being currently-owned by the other thread. */
145 struct amdgpu_cs_context *cst;
146 /* buffer_indices_hashlist[hash(bo)] returns -1 if the bo
147 * isn't part of any buffer lists or the index where the bo could be found.
148 * Since 1) hash collisions of 2 different bo can happen and 2) we use a
149 * single hashlist for the 3 buffer list, this is only a hint.
150 * amdgpu_lookup_buffer uses this hint to speed up buffers look up.
151 */
152 int16_t buffer_indices_hashlist[BUFFER_HASHLIST_SIZE];
153
154 /* Flush CS. */
155 void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
156 void *flush_data;
157 bool stop_exec_on_failure;
158 bool noop;
159 bool has_chaining;
160
161 struct util_queue_fence flush_completed;
162 struct pipe_fence_handle *next_fence;
163 struct pb_buffer *preamble_ib_bo;
164 };
165
166 struct amdgpu_fence {
167 struct pipe_reference reference;
168 /* If ctx == NULL, this fence is syncobj-based. */
169 uint32_t syncobj;
170
171 struct amdgpu_winsys *ws;
172 struct amdgpu_ctx *ctx; /* submission context */
173 struct amdgpu_cs_fence fence;
174 uint64_t *user_fence_cpu_address;
175
176 /* If the fence has been submitted. This is unsignalled for deferred fences
177 * (cs->next_fence) and while an IB is still being submitted in the submit
178 * thread. */
179 struct util_queue_fence submitted;
180
181 volatile int signalled; /* bool (int for atomicity) */
182 };
183
amdgpu_fence_is_syncobj(struct amdgpu_fence * fence)184 static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence)
185 {
186 return fence->ctx == NULL;
187 }
188
amdgpu_ctx_unref(struct amdgpu_ctx * ctx)189 static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx)
190 {
191 if (p_atomic_dec_zero(&ctx->refcount)) {
192 amdgpu_cs_ctx_free(ctx->ctx);
193 amdgpu_bo_free(ctx->user_fence_bo);
194 FREE(ctx);
195 }
196 }
197
amdgpu_fence_reference(struct pipe_fence_handle ** dst,struct pipe_fence_handle * src)198 static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
199 struct pipe_fence_handle *src)
200 {
201 struct amdgpu_fence **adst = (struct amdgpu_fence **)dst;
202 struct amdgpu_fence *asrc = (struct amdgpu_fence *)src;
203
204 if (pipe_reference(&(*adst)->reference, &asrc->reference)) {
205 struct amdgpu_fence *fence = *adst;
206
207 if (amdgpu_fence_is_syncobj(fence))
208 amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj);
209 else
210 amdgpu_ctx_unref(fence->ctx);
211
212 util_queue_fence_destroy(&fence->submitted);
213 FREE(fence);
214 }
215 *adst = asrc;
216 }
217
218 int amdgpu_lookup_buffer_any_type(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo);
219
220 static inline struct amdgpu_cs *
amdgpu_cs(struct radeon_cmdbuf * rcs)221 amdgpu_cs(struct radeon_cmdbuf *rcs)
222 {
223 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs->priv;
224 assert(!cs || cs->main.ib_type == IB_MAIN);
225 return cs;
226 }
227
228 #define get_container(member_ptr, container_type, container_member) \
229 (container_type *)((char *)(member_ptr) - offsetof(container_type, container_member))
230
231 static inline bool
amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs * cs,struct amdgpu_winsys_bo * bo)232 amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
233 struct amdgpu_winsys_bo *bo)
234 {
235 return amdgpu_lookup_buffer_any_type(cs->csc, bo) != -1;
236 }
237
238 static inline bool
amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs * cs,struct amdgpu_winsys_bo * bo,unsigned usage)239 amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
240 struct amdgpu_winsys_bo *bo,
241 unsigned usage)
242 {
243 int index;
244 struct amdgpu_cs_buffer *buffer;
245
246 index = amdgpu_lookup_buffer_any_type(cs->csc, bo);
247 if (index == -1)
248 return false;
249
250 buffer = bo->bo ? &cs->csc->real_buffers[index] :
251 bo->base.usage & RADEON_FLAG_SPARSE ? &cs->csc->sparse_buffers[index] :
252 &cs->csc->slab_buffers[index];
253
254 return (buffer->usage & usage) != 0;
255 }
256
257 bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
258 bool absolute);
259 void amdgpu_add_fences(struct amdgpu_winsys_bo *bo,
260 unsigned num_fences,
261 struct pipe_fence_handle **fences);
262 void amdgpu_cs_sync_flush(struct radeon_cmdbuf *rcs);
263 void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws);
264
265 #endif
266