1 /*
2 * Copyright © 2008-2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 /**
29 * @file brw_bufmgr.h
30 *
31 * Public definitions of Intel-specific bufmgr functions.
32 */
33
34 #ifndef BRW_BUFMGR_H
35 #define BRW_BUFMGR_H
36
37 #include <stdbool.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 #include <time.h>
41
42 #include "c11/threads.h"
43 #include "util/u_atomic.h"
44 #include "util/list.h"
45
46 #if defined(__cplusplus)
47 extern "C" {
48 #endif
49
50 struct intel_device_info;
51 struct brw_context;
52
53 /**
54 * Memory zones. When allocating a buffer, you can request that it is
55 * placed into a specific region of the virtual address space (PPGTT).
56 *
57 * Most buffers can go anywhere (BRW_MEMZONE_OTHER). Some buffers are
58 * accessed via an offset from a base address. STATE_BASE_ADDRESS has
59 * a maximum 4GB size for each region, so we need to restrict those
60 * buffers to be within 4GB of the base. Each memory zone corresponds
61 * to a particular base address.
62 *
63 * Currently, i965 partitions the address space into two regions:
64 *
65 * - Low 4GB
66 * - Full 48-bit address space
67 *
68 * Eventually, we hope to carve out 4GB of VMA for each base address.
69 */
70 enum brw_memory_zone {
71 BRW_MEMZONE_LOW_4G,
72 BRW_MEMZONE_OTHER,
73
74 /* Shaders - Instruction State Base Address */
75 BRW_MEMZONE_SHADER = BRW_MEMZONE_LOW_4G,
76
77 /* Scratch - General State Base Address */
78 BRW_MEMZONE_SCRATCH = BRW_MEMZONE_LOW_4G,
79
80 /* Surface State Base Address */
81 BRW_MEMZONE_SURFACE = BRW_MEMZONE_LOW_4G,
82
83 /* Dynamic State Base Address */
84 BRW_MEMZONE_DYNAMIC = BRW_MEMZONE_LOW_4G,
85 };
86
87 #define BRW_MEMZONE_COUNT (BRW_MEMZONE_OTHER + 1)
88
89 struct brw_bo {
90 /**
91 * Size in bytes of the buffer object.
92 *
93 * The size may be larger than the size originally requested for the
94 * allocation, such as being aligned to page size.
95 */
96 uint64_t size;
97
98 /** Buffer manager context associated with this buffer object */
99 struct brw_bufmgr *bufmgr;
100
101 /** The GEM handle for this buffer object. */
102 uint32_t gem_handle;
103
104 /**
105 * Offset of the buffer inside the Graphics Translation Table.
106 *
107 * This is effectively our GPU address for the buffer and we use it
108 * as our base for all state pointers into the buffer. However, since the
109 * kernel may be forced to move it around during the course of the
110 * buffer's lifetime, we can only know where the buffer was on the last
111 * execbuf. We presume, and are usually right, that the buffer will not
112 * move and so we use that last offset for the next batch and by doing
113 * so we can avoid having the kernel perform a relocation fixup pass as
114 * our pointers inside the batch will be using the correct base offset.
115 *
116 * Since we do use it as a base address for the next batch of pointers,
117 * the kernel treats our offset as a request, and if possible will
118 * arrange the buffer to placed at that address (trying to balance
119 * the cost of buffer migration versus the cost of performing
120 * relocations). Furthermore, we can force the kernel to place the buffer,
121 * or report a failure if we specified a conflicting offset, at our chosen
122 * offset by specifying EXEC_OBJECT_PINNED.
123 *
124 * Note the GTT may be either per context, or shared globally across the
125 * system. On a shared system, our buffers have to contend for address
126 * space with both aperture mappings and framebuffers and so are more
127 * likely to be moved. On a full ppGTT system, each batch exists in its
128 * own GTT, and so each buffer may have their own offset within each
129 * context.
130 */
131 uint64_t gtt_offset;
132
133 /**
134 * The validation list index for this buffer, or -1 when not in a batch.
135 * Note that a single buffer may be in multiple batches (contexts), and
136 * this is a global field, which refers to the last batch using the BO.
137 * It should not be considered authoritative, but can be used to avoid a
138 * linear walk of the validation list in the common case by guessing that
139 * exec_bos[bo->index] == bo and confirming whether that's the case.
140 */
141 unsigned index;
142
143 /**
144 * Boolean of whether the GPU is definitely not accessing the buffer.
145 *
146 * This is only valid when reusable, since non-reusable
147 * buffers are those that have been shared with other
148 * processes, so we don't know their state.
149 */
150 bool idle;
151
152 int refcount;
153 const char *name;
154
155 uint64_t kflags;
156
157 /**
158 * Kenel-assigned global name for this object
159 *
160 * List contains both flink named and prime fd'd objects
161 */
162 unsigned int global_name;
163
164 /**
165 * Current tiling mode
166 */
167 uint32_t tiling_mode;
168 uint32_t swizzle_mode;
169 uint32_t stride;
170
171 time_t free_time;
172
173 /** Mapped address for the buffer, saved across map/unmap cycles */
174 void *map_cpu;
175 /** GTT virtual address for the buffer, saved across map/unmap cycles */
176 void *map_gtt;
177 /** WC CPU address for the buffer, saved across map/unmap cycles */
178 void *map_wc;
179
180 /** BO cache list */
181 struct list_head head;
182
183 /**
184 * List of GEM handle exports of this buffer (bo_export).
185 *
186 * Hold bufmgr->lock when using this list.
187 */
188 struct list_head exports;
189
190 /**
191 * Boolean of whether this buffer can be re-used
192 */
193 bool reusable;
194
195 /**
196 * Boolean of whether this buffer has been shared with an external client.
197 */
198 bool external;
199
200 /**
201 * Boolean of whether this buffer is cache coherent
202 */
203 bool cache_coherent;
204 };
205
206 #define BO_ALLOC_BUSY (1<<0)
207 #define BO_ALLOC_ZEROED (1<<1)
208
209 /**
210 * Allocate a buffer object.
211 *
212 * Buffer objects are not necessarily initially mapped into CPU virtual
213 * address space or graphics device aperture. They must be mapped
214 * using brw_bo_map() to be used by the CPU.
215 */
216 struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name,
217 uint64_t size, enum brw_memory_zone memzone);
218
219 /**
220 * Allocate a tiled buffer object.
221 *
222 * Alignment for tiled objects is set automatically; the 'flags'
223 * argument provides a hint about how the object will be used initially.
224 *
225 * Valid tiling formats are:
226 * I915_TILING_NONE
227 * I915_TILING_X
228 * I915_TILING_Y
229 */
230 struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr,
231 const char *name,
232 uint64_t size,
233 enum brw_memory_zone memzone,
234 uint32_t tiling_mode,
235 uint32_t pitch,
236 unsigned flags);
237
238 /**
239 * Allocate a tiled buffer object.
240 *
241 * Alignment for tiled objects is set automatically; the 'flags'
242 * argument provides a hint about how the object will be used initially.
243 *
244 * Valid tiling formats are:
245 * I915_TILING_NONE
246 * I915_TILING_X
247 * I915_TILING_Y
248 *
249 * Note the tiling format may be rejected; callers should check the
250 * 'tiling_mode' field on return, as well as the pitch value, which
251 * may have been rounded up to accommodate for tiling restrictions.
252 */
253 struct brw_bo *brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr,
254 const char *name,
255 int x, int y, int cpp,
256 enum brw_memory_zone memzone,
257 uint32_t tiling_mode,
258 uint32_t *pitch,
259 unsigned flags);
260
261 /** Takes a reference on a buffer object */
262 static inline void
brw_bo_reference(struct brw_bo * bo)263 brw_bo_reference(struct brw_bo *bo)
264 {
265 p_atomic_inc(&bo->refcount);
266 }
267
268 /**
269 * Releases a reference on a buffer object, freeing the data if
270 * no references remain.
271 */
272 void brw_bo_unreference(struct brw_bo *bo);
273
274 /* Must match MapBufferRange interface (for convenience) */
275 #define MAP_READ GL_MAP_READ_BIT
276 #define MAP_WRITE GL_MAP_WRITE_BIT
277 #define MAP_ASYNC GL_MAP_UNSYNCHRONIZED_BIT
278 #define MAP_PERSISTENT GL_MAP_PERSISTENT_BIT
279 #define MAP_COHERENT GL_MAP_COHERENT_BIT
280 /* internal */
281 #define MAP_INTERNAL_MASK (0xffu << 24)
282 #define MAP_RAW (0x01 << 24)
283
284 /**
285 * Maps the buffer into userspace.
286 *
287 * This function will block waiting for any existing execution on the
288 * buffer to complete, first. The resulting mapping is returned.
289 */
290 MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
291
292 /**
293 * Reduces the refcount on the userspace mapping of the buffer
294 * object.
295 */
brw_bo_unmap(UNUSED struct brw_bo * bo)296 static inline int brw_bo_unmap(UNUSED struct brw_bo *bo) { return 0; }
297
298 /** Write data into an object. */
299 int brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
300 uint64_t size, const void *data);
301 /**
302 * Waits for rendering to an object by the GPU to have completed.
303 *
304 * This is not required for any access to the BO by bo_map,
305 * bo_subdata, etc. It is merely a way for the driver to implement
306 * glFinish.
307 */
308 void brw_bo_wait_rendering(struct brw_bo *bo);
309
310 /**
311 * Unref a buffer manager instance.
312 */
313 void brw_bufmgr_unref(struct brw_bufmgr *bufmgr);
314
315 /**
316 * Get the current tiling (and resulting swizzling) mode for the bo.
317 *
318 * \param buf Buffer to get tiling mode for
319 * \param tiling_mode returned tiling mode
320 * \param swizzle_mode returned swizzling mode
321 */
322 int brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
323 uint32_t *swizzle_mode);
324
325 /**
326 * Create a visible name for a buffer which can be used by other apps
327 *
328 * \param buf Buffer to create a name for
329 * \param name Returned name
330 */
331 int brw_bo_flink(struct brw_bo *bo, uint32_t *name);
332
333 /**
334 * Returns 1 if mapping the buffer for write could cause the process
335 * to block, due to the object being active in the GPU.
336 */
337 int brw_bo_busy(struct brw_bo *bo);
338
339 /**
340 * Specify the volatility of the buffer.
341 * \param bo Buffer to create a name for
342 * \param madv The purgeable status
343 *
344 * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
345 * reclaimed under memory pressure. If you subsequently require the buffer,
346 * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
347 *
348 * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
349 * marked as I915_MADV_DONTNEED.
350 */
351 int brw_bo_madvise(struct brw_bo *bo, int madv);
352
353 struct brw_bufmgr *brw_bufmgr_get_for_fd(struct intel_device_info *devinfo,
354 int fd, bool bo_reuse);
355
356 struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
357 const char *name,
358 unsigned int handle);
359
360 int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
361
362 uint32_t brw_create_hw_context(struct brw_bufmgr *bufmgr);
363
364 int brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
365 uint32_t ctx_id,
366 int priority);
367
368 void brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id);
369
370 int brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr);
371
372 int brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd);
373 struct brw_bo *brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr,
374 int prime_fd);
375 struct brw_bo *brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr,
376 int prime_fd,
377 uint32_t tiling_mode,
378 uint32_t stride);
379
380 uint32_t brw_bo_export_gem_handle(struct brw_bo *bo);
381
382 /**
383 * Exports a bo as a GEM handle into a given DRM file descriptor
384 * \param bo Buffer to export
385 * \param drm_fd File descriptor where the new handle is created
386 * \param out_handle Pointer to store the new handle
387 *
388 * Returns 0 if the buffer was successfully exported, a non zero error code
389 * otherwise.
390 */
391 int brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd,
392 uint32_t *out_handle);
393
394 int brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset,
395 uint64_t *result);
396
397 bool brw_using_softpin(struct brw_bufmgr *bufmgr);
398
399 /** @{ */
400
401 #if defined(__cplusplus)
402 }
403 #endif
404 #endif /* BRW_BUFMGR_H */
405