1 /*
2 * Copyright 2021 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "vkr_ring.h"
7
8 #include <stdio.h>
9 #include <time.h>
10
11 #include "virgl_context.h"
12 #include "vrend_iov.h"
13
14 enum vkr_ring_status_flag {
15 VKR_RING_STATUS_IDLE = 1u << 0,
16 };
17
18 /* callers must make sure they do not seek to end-of-resource or beyond */
19 static const struct iovec *
seek_resource(const struct virgl_resource * res,int base_iov_index,size_t offset,int * out_iov_index,size_t * out_iov_offset)20 seek_resource(const struct virgl_resource *res,
21 int base_iov_index,
22 size_t offset,
23 int *out_iov_index,
24 size_t *out_iov_offset)
25 {
26 const struct iovec *iov = &res->iov[base_iov_index];
27 assert(iov - res->iov < res->iov_count);
28 while (offset >= iov->iov_len) {
29 offset -= iov->iov_len;
30 iov++;
31 assert(iov - res->iov < res->iov_count);
32 }
33
34 *out_iov_index = iov - res->iov;
35 *out_iov_offset = offset;
36
37 return iov;
38 }
39
40 static void *
get_resource_pointer(const struct virgl_resource * res,int base_iov_index,size_t offset)41 get_resource_pointer(const struct virgl_resource *res, int base_iov_index, size_t offset)
42 {
43 const struct iovec *iov =
44 seek_resource(res, base_iov_index, offset, &base_iov_index, &offset);
45 return (uint8_t *)iov->iov_base + offset;
46 }
47
48 static void
vkr_ring_init_extra(struct vkr_ring * ring,const struct vkr_ring_layout * layout)49 vkr_ring_init_extra(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
50 {
51 struct vkr_ring_extra *extra = &ring->extra;
52
53 seek_resource(layout->resource, 0, layout->extra.begin, &extra->base_iov_index,
54 &extra->base_iov_offset);
55
56 extra->region = vkr_region_make_relative(&layout->extra);
57 }
58
59 static void
vkr_ring_init_buffer(struct vkr_ring * ring,const struct vkr_ring_layout * layout)60 vkr_ring_init_buffer(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
61 {
62 struct vkr_ring_buffer *buf = &ring->buffer;
63
64 const struct iovec *base_iov =
65 seek_resource(layout->resource, 0, layout->buffer.begin, &buf->base_iov_index,
66 &buf->base_iov_offset);
67
68 buf->size = vkr_region_size(&layout->buffer);
69 assert(buf->size && util_is_power_of_two(buf->size));
70 buf->mask = buf->size - 1;
71
72 buf->cur = 0;
73 buf->cur_iov = base_iov;
74 buf->cur_iov_index = buf->base_iov_index;
75 buf->cur_iov_offset = buf->base_iov_offset;
76 }
77
78 static bool
vkr_ring_init_control(struct vkr_ring * ring,const struct vkr_ring_layout * layout)79 vkr_ring_init_control(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
80 {
81 struct vkr_ring_control *ctrl = &ring->control;
82
83 ctrl->head = get_resource_pointer(layout->resource, 0, layout->head.begin);
84 ctrl->tail = get_resource_pointer(layout->resource, 0, layout->tail.begin);
85 ctrl->status = get_resource_pointer(layout->resource, 0, layout->status.begin);
86
87 /* we will manage head and status, and we expect them to be 0 initially */
88 if (*ctrl->head || *ctrl->status)
89 return false;
90
91 return true;
92 }
93
94 static void
vkr_ring_store_head(struct vkr_ring * ring)95 vkr_ring_store_head(struct vkr_ring *ring)
96 {
97 /* the renderer is expected to load the head with memory_order_acquire,
98 * forming a release-acquire ordering
99 */
100 atomic_store_explicit(ring->control.head, ring->buffer.cur, memory_order_release);
101 }
102
103 static uint32_t
vkr_ring_load_tail(const struct vkr_ring * ring)104 vkr_ring_load_tail(const struct vkr_ring *ring)
105 {
106 /* the driver is expected to store the tail with memory_order_release,
107 * forming a release-acquire ordering
108 */
109 return atomic_load_explicit(ring->control.tail, memory_order_acquire);
110 }
111
112 static void
vkr_ring_store_status(struct vkr_ring * ring,uint32_t status)113 vkr_ring_store_status(struct vkr_ring *ring, uint32_t status)
114 {
115 atomic_store_explicit(ring->control.status, status, memory_order_seq_cst);
116 }
117
118 /* TODO consider requiring virgl_resource to be logically contiguous */
119 static void
vkr_ring_read_buffer(struct vkr_ring * ring,void * data,uint32_t size)120 vkr_ring_read_buffer(struct vkr_ring *ring, void *data, uint32_t size)
121 {
122 struct vkr_ring_buffer *buf = &ring->buffer;
123 const struct virgl_resource *res = ring->resource;
124
125 assert(size <= buf->size);
126 const uint32_t buf_offset = buf->cur & buf->mask;
127 const uint32_t buf_avail = buf->size - buf_offset;
128 const bool wrap = size >= buf_avail;
129
130 uint32_t read_size;
131 uint32_t wrap_size;
132 if (!wrap) {
133 read_size = size;
134 wrap_size = 0;
135 } else {
136 read_size = buf_avail;
137 /* When size == buf_avail, wrap is true but wrap_size is 0. We want to
138 * wrap because it seems slightly faster on the next call. Besides,
139 * seek_resource does not support seeking to end-of-resource which could
140 * happen if we don't wrap and the buffer region end coincides with the
141 * resource end.
142 */
143 wrap_size = size - buf_avail;
144 }
145
146 /* do the reads */
147 if (read_size <= buf->cur_iov->iov_len - buf->cur_iov_offset) {
148 const void *src = (const uint8_t *)buf->cur_iov->iov_base + buf->cur_iov_offset;
149 memcpy(data, src, read_size);
150
151 /* fast path */
152 if (!wrap) {
153 assert(!wrap_size);
154 buf->cur += read_size;
155 buf->cur_iov_offset += read_size;
156 return;
157 }
158 } else {
159 vrend_read_from_iovec(buf->cur_iov, res->iov_count - buf->cur_iov_index,
160 buf->cur_iov_offset, data, read_size);
161 }
162
163 if (wrap_size) {
164 vrend_read_from_iovec(res->iov + buf->base_iov_index,
165 res->iov_count - buf->base_iov_index, buf->base_iov_offset,
166 (char *)data + read_size, wrap_size);
167 }
168
169 /* advance cur */
170 buf->cur += size;
171 if (!wrap) {
172 buf->cur_iov = seek_resource(res, buf->cur_iov_index, buf->cur_iov_offset + size,
173 &buf->cur_iov_index, &buf->cur_iov_offset);
174 } else {
175 buf->cur_iov =
176 seek_resource(res, buf->base_iov_index, buf->base_iov_offset + wrap_size,
177 &buf->cur_iov_index, &buf->cur_iov_offset);
178 }
179 }
180
181 struct vkr_ring *
vkr_ring_create(const struct vkr_ring_layout * layout,struct virgl_context * ctx,uint64_t idle_timeout)182 vkr_ring_create(const struct vkr_ring_layout *layout,
183 struct virgl_context *ctx,
184 uint64_t idle_timeout)
185 {
186 struct vkr_ring *ring;
187 int ret;
188
189 ring = calloc(1, sizeof(*ring));
190 if (!ring)
191 return NULL;
192
193 ring->resource = layout->resource;
194
195 if (!vkr_ring_init_control(ring, layout)) {
196 free(ring);
197 return NULL;
198 }
199
200 vkr_ring_init_buffer(ring, layout);
201 vkr_ring_init_extra(ring, layout);
202
203 ring->cmd = malloc(ring->buffer.size);
204 if (!ring->cmd) {
205 free(ring);
206 return NULL;
207 }
208
209 ring->context = ctx;
210 ring->idle_timeout = idle_timeout;
211
212 ret = mtx_init(&ring->mutex, mtx_plain);
213 if (ret != thrd_success) {
214 free(ring->cmd);
215 free(ring);
216 return NULL;
217 }
218 ret = cnd_init(&ring->cond);
219 if (ret != thrd_success) {
220 mtx_destroy(&ring->mutex);
221 free(ring->cmd);
222 free(ring);
223 return NULL;
224 }
225
226 return ring;
227 }
228
229 void
vkr_ring_destroy(struct vkr_ring * ring)230 vkr_ring_destroy(struct vkr_ring *ring)
231 {
232 assert(!ring->started);
233 mtx_destroy(&ring->mutex);
234 cnd_destroy(&ring->cond);
235 free(ring->cmd);
236 free(ring);
237 }
238
239 static uint64_t
vkr_ring_now(void)240 vkr_ring_now(void)
241 {
242 const uint64_t ns_per_sec = 1000000000llu;
243 struct timespec now;
244 if (clock_gettime(CLOCK_MONOTONIC, &now))
245 return 0;
246 return ns_per_sec * now.tv_sec + now.tv_nsec;
247 }
248
249 static void
vkr_ring_relax(uint32_t * iter)250 vkr_ring_relax(uint32_t *iter)
251 {
252 /* TODO do better */
253 const uint32_t busy_wait_order = 4;
254 const uint32_t base_sleep_us = 10;
255
256 (*iter)++;
257 if (*iter < (1u << busy_wait_order)) {
258 thrd_yield();
259 return;
260 }
261
262 const uint32_t shift = util_last_bit(*iter) - busy_wait_order - 1;
263 const uint32_t us = base_sleep_us << shift;
264 const struct timespec ts = {
265 .tv_sec = us / 1000000,
266 .tv_nsec = (us % 1000000) * 1000,
267 };
268 clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
269 }
270
271 static int
vkr_ring_thread(void * arg)272 vkr_ring_thread(void *arg)
273 {
274 struct vkr_ring *ring = arg;
275 struct virgl_context *ctx = ring->context;
276 char thread_name[16];
277
278 snprintf(thread_name, ARRAY_SIZE(thread_name), "vkr-ring-%d", ctx->ctx_id);
279 pipe_thread_setname(thread_name);
280
281 uint64_t last_submit = vkr_ring_now();
282 uint32_t relax_iter = 0;
283 int ret = 0;
284 while (ring->started) {
285 bool wait = false;
286 uint32_t cmd_size;
287
288 if (vkr_ring_now() >= last_submit + ring->idle_timeout) {
289 ring->pending_notify = false;
290 vkr_ring_store_status(ring, VKR_RING_STATUS_IDLE);
291 wait = ring->buffer.cur == vkr_ring_load_tail(ring);
292 if (!wait)
293 vkr_ring_store_status(ring, 0);
294 }
295
296 if (wait) {
297 TRACE_SCOPE("ring idle");
298
299 mtx_lock(&ring->mutex);
300 if (ring->started && !ring->pending_notify)
301 cnd_wait(&ring->cond, &ring->mutex);
302 vkr_ring_store_status(ring, 0);
303 mtx_unlock(&ring->mutex);
304
305 if (!ring->started)
306 break;
307
308 last_submit = vkr_ring_now();
309 relax_iter = 0;
310 }
311
312 cmd_size = vkr_ring_load_tail(ring) - ring->buffer.cur;
313 if (cmd_size) {
314 if (cmd_size > ring->buffer.size) {
315 ret = -EINVAL;
316 break;
317 }
318
319 vkr_ring_read_buffer(ring, ring->cmd, cmd_size);
320 ctx->submit_cmd(ctx, ring->cmd, cmd_size);
321 vkr_ring_store_head(ring);
322
323 last_submit = vkr_ring_now();
324 relax_iter = 0;
325 } else {
326 vkr_ring_relax(&relax_iter);
327 }
328 }
329
330 return ret;
331 }
332
333 void
vkr_ring_start(struct vkr_ring * ring)334 vkr_ring_start(struct vkr_ring *ring)
335 {
336 int ret;
337
338 assert(!ring->started);
339 ring->started = true;
340 ret = thrd_create(&ring->thread, vkr_ring_thread, ring);
341 if (ret != thrd_success)
342 ring->started = false;
343 }
344
345 bool
vkr_ring_stop(struct vkr_ring * ring)346 vkr_ring_stop(struct vkr_ring *ring)
347 {
348 mtx_lock(&ring->mutex);
349 if (ring->thread == thrd_current()) {
350 mtx_unlock(&ring->mutex);
351 return false;
352 }
353 assert(ring->started);
354 ring->started = false;
355 cnd_signal(&ring->cond);
356 mtx_unlock(&ring->mutex);
357
358 thrd_join(ring->thread, NULL);
359
360 return true;
361 }
362
363 void
vkr_ring_notify(struct vkr_ring * ring)364 vkr_ring_notify(struct vkr_ring *ring)
365 {
366 mtx_lock(&ring->mutex);
367 ring->pending_notify = true;
368 cnd_signal(&ring->cond);
369 mtx_unlock(&ring->mutex);
370
371 {
372 TRACE_SCOPE("ring notify done");
373 }
374 }
375
376 bool
vkr_ring_write_extra(struct vkr_ring * ring,size_t offset,uint32_t val)377 vkr_ring_write_extra(struct vkr_ring *ring, size_t offset, uint32_t val)
378 {
379 struct vkr_ring_extra *extra = &ring->extra;
380
381 if (unlikely(extra->cached_offset != offset || !extra->cached_data)) {
382 const struct vkr_region access = VKR_REGION_INIT(offset, sizeof(val));
383 if (!vkr_region_is_valid(&access) || !vkr_region_is_within(&access, &extra->region))
384 return false;
385
386 /* Mesa always sets offset to 0 and the cache hit rate will be 100% */
387 extra->cached_offset = offset;
388 extra->cached_data = get_resource_pointer(ring->resource, extra->base_iov_index,
389 extra->base_iov_offset + offset);
390 }
391
392 atomic_store_explicit(extra->cached_data, val, memory_order_release);
393
394 {
395 TRACE_SCOPE("ring extra done");
396 }
397
398 return true;
399 }
400