1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include "util/os_time.h"
27 #include "util/perf/cpu_trace.h"
28
29 static struct anv_bo_sync *
to_anv_bo_sync(struct vk_sync * sync)30 to_anv_bo_sync(struct vk_sync *sync)
31 {
32 assert(sync->type == &anv_bo_sync_type);
33 return container_of(sync, struct anv_bo_sync, sync);
34 }
35
36 static VkResult
anv_bo_sync_init(struct vk_device * vk_device,struct vk_sync * vk_sync,uint64_t initial_value)37 anv_bo_sync_init(struct vk_device *vk_device,
38 struct vk_sync *vk_sync,
39 uint64_t initial_value)
40 {
41 struct anv_device *device = container_of(vk_device, struct anv_device, vk);
42 struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
43
44 sync->state = initial_value ? ANV_BO_SYNC_STATE_SIGNALED :
45 ANV_BO_SYNC_STATE_RESET;
46
47 return anv_device_alloc_bo(device, "bo-sync", 4096,
48 ANV_BO_ALLOC_EXTERNAL |
49 ANV_BO_ALLOC_IMPLICIT_SYNC,
50 0 /* explicit_address */,
51 &sync->bo);
52 }
53
54 static void
anv_bo_sync_finish(struct vk_device * vk_device,struct vk_sync * vk_sync)55 anv_bo_sync_finish(struct vk_device *vk_device,
56 struct vk_sync *vk_sync)
57 {
58 struct anv_device *device = container_of(vk_device, struct anv_device, vk);
59 struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
60
61 anv_device_release_bo(device, sync->bo);
62 }
63
64 static VkResult
anv_bo_sync_reset(struct vk_device * vk_device,struct vk_sync * vk_sync)65 anv_bo_sync_reset(struct vk_device *vk_device,
66 struct vk_sync *vk_sync)
67 {
68 struct anv_bo_sync *sync = to_anv_bo_sync(vk_sync);
69
70 sync->state = ANV_BO_SYNC_STATE_RESET;
71
72 return VK_SUCCESS;
73 }
74
75 static int64_t
anv_get_relative_timeout(uint64_t abs_timeout)76 anv_get_relative_timeout(uint64_t abs_timeout)
77 {
78 uint64_t now = os_time_get_nano();
79
80 /* We don't want negative timeouts.
81 *
82 * DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is
83 * supposed to block indefinitely timeouts < 0. Unfortunately,
84 * this was broken for a couple of kernel releases. Since there's
85 * no way to know whether or not the kernel we're using is one of
86 * the broken ones, the best we can do is to clamp the timeout to
87 * INT64_MAX. This limits the maximum timeout from 584 years to
88 * 292 years - likely not a big deal.
89 */
90 if (abs_timeout < now)
91 return 0;
92
93 uint64_t rel_timeout = abs_timeout - now;
94 if (rel_timeout > (uint64_t) INT64_MAX)
95 rel_timeout = INT64_MAX;
96
97 return rel_timeout;
98 }
99
100 static VkResult
anv_bo_sync_wait(struct vk_device * vk_device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)101 anv_bo_sync_wait(struct vk_device *vk_device,
102 uint32_t wait_count,
103 const struct vk_sync_wait *waits,
104 enum vk_sync_wait_flags wait_flags,
105 uint64_t abs_timeout_ns)
106 {
107 struct anv_device *device = container_of(vk_device, struct anv_device, vk);
108 VkResult result;
109 MESA_TRACE_FUNC();
110
111 uint32_t pending = wait_count;
112 while (pending) {
113 pending = 0;
114 bool signaled = false;
115 for (uint32_t i = 0; i < wait_count; i++) {
116 struct anv_bo_sync *sync = to_anv_bo_sync(waits[i].sync);
117 switch (sync->state) {
118 case ANV_BO_SYNC_STATE_RESET:
119 /* This fence hasn't been submitted yet, we'll catch it the next
120 * time around. Yes, this may mean we dead-loop but, short of
121 * lots of locking and a condition variable, there's not much that
122 * we can do about that.
123 */
124 assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
125 pending++;
126 continue;
127
128 case ANV_BO_SYNC_STATE_SIGNALED:
129 /* This fence is not pending. If waitAll isn't set, we can return
130 * early. Otherwise, we have to keep going.
131 */
132 if (wait_flags & VK_SYNC_WAIT_ANY)
133 return VK_SUCCESS;
134 continue;
135
136 case ANV_BO_SYNC_STATE_SUBMITTED:
137 /* These are the fences we really care about. Go ahead and wait
138 * on it until we hit a timeout.
139 */
140 if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
141 uint64_t rel_timeout = anv_get_relative_timeout(abs_timeout_ns);
142 result = anv_device_wait(device, sync->bo, rel_timeout);
143 /* This also covers VK_TIMEOUT */
144 if (result != VK_SUCCESS)
145 return result;
146
147 sync->state = ANV_BO_SYNC_STATE_SIGNALED;
148 signaled = true;
149 }
150 if (wait_flags & VK_SYNC_WAIT_ANY)
151 return VK_SUCCESS;
152 break;
153
154 default:
155 unreachable("Invalid BO sync state");
156 }
157 }
158
159 if (pending && !signaled) {
160 /* If we've hit this then someone decided to vkWaitForFences before
161 * they've actually submitted any of them to a queue. This is a
162 * fairly pessimal case, so it's ok to lock here and use a standard
163 * pthreads condition variable.
164 */
165 pthread_mutex_lock(&device->mutex);
166
167 /* It's possible that some of the fences have changed state since the
168 * last time we checked. Now that we have the lock, check for
169 * pending fences again and don't wait if it's changed.
170 */
171 uint32_t now_pending = 0;
172 for (uint32_t i = 0; i < wait_count; i++) {
173 struct anv_bo_sync *sync = to_anv_bo_sync(waits[i].sync);
174 if (sync->state == ANV_BO_SYNC_STATE_RESET)
175 now_pending++;
176 }
177 assert(now_pending <= pending);
178
179 if (now_pending == pending) {
180 struct timespec abstime = {
181 .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
182 .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
183 };
184
185 ASSERTED int ret;
186 ret = pthread_cond_timedwait(&device->queue_submit,
187 &device->mutex, &abstime);
188 assert(ret != EINVAL);
189 if (os_time_get_nano() >= abs_timeout_ns) {
190 pthread_mutex_unlock(&device->mutex);
191 return VK_TIMEOUT;
192 }
193 }
194
195 pthread_mutex_unlock(&device->mutex);
196 }
197 }
198
199 return VK_SUCCESS;
200 }
201
202 const struct vk_sync_type anv_bo_sync_type = {
203 .size = sizeof(struct anv_bo_sync),
204 .features = VK_SYNC_FEATURE_BINARY |
205 VK_SYNC_FEATURE_GPU_WAIT |
206 VK_SYNC_FEATURE_GPU_MULTI_WAIT |
207 VK_SYNC_FEATURE_CPU_WAIT |
208 VK_SYNC_FEATURE_CPU_RESET |
209 VK_SYNC_FEATURE_WAIT_ANY |
210 VK_SYNC_FEATURE_WAIT_PENDING,
211 .init = anv_bo_sync_init,
212 .finish = anv_bo_sync_finish,
213 .reset = anv_bo_sync_reset,
214 .wait_many = anv_bo_sync_wait,
215 };
216
217 VkResult
anv_create_sync_for_memory(struct vk_device * device,VkDeviceMemory memory,bool signal_memory,struct vk_sync ** sync_out)218 anv_create_sync_for_memory(struct vk_device *device,
219 VkDeviceMemory memory,
220 bool signal_memory,
221 struct vk_sync **sync_out)
222 {
223 ANV_FROM_HANDLE(anv_device_memory, mem, memory);
224 struct anv_bo_sync *bo_sync;
225
226 bo_sync = vk_zalloc(&device->alloc, sizeof(*bo_sync), 8,
227 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
228 if (bo_sync == NULL)
229 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
230
231 bo_sync->sync.type = &anv_bo_sync_type;
232 bo_sync->state = signal_memory ? ANV_BO_SYNC_STATE_RESET :
233 ANV_BO_SYNC_STATE_SUBMITTED;
234 bo_sync->bo = anv_bo_ref(mem->bo);
235
236 *sync_out = &bo_sync->sync;
237
238 return VK_SUCCESS;
239 }
240