• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 #define _DEFAULT_SOURCE
3 
4 #include "lib.h"
5 #include "syscall.h"
6 #include "liburing.h"
7 #include "int_flags.h"
8 #include "setup.h"
9 #include "liburing/compat.h"
10 #include "liburing/io_uring.h"
11 
12 #define KERN_MAX_ENTRIES	32768
13 #define KERN_MAX_CQ_ENTRIES	(2 * KERN_MAX_ENTRIES)
14 
__fls(int x)15 static inline int __fls(int x)
16 {
17 	if (!x)
18 		return 0;
19 	return 8 * sizeof(x) - __builtin_clz(x);
20 }
21 
roundup_pow2(unsigned depth)22 static unsigned roundup_pow2(unsigned depth)
23 {
24 	return 1U << __fls(depth - 1);
25 }
26 
get_sq_cq_entries(unsigned entries,struct io_uring_params * p,unsigned * sq,unsigned * cq)27 static int get_sq_cq_entries(unsigned entries, struct io_uring_params *p,
28 			     unsigned *sq, unsigned *cq)
29 {
30 	unsigned cq_entries;
31 
32 	if (!entries)
33 		return -EINVAL;
34 	if (entries > KERN_MAX_ENTRIES) {
35 		if (!(p->flags & IORING_SETUP_CLAMP))
36 			return -EINVAL;
37 		entries = KERN_MAX_ENTRIES;
38 	}
39 
40 	entries = roundup_pow2(entries);
41 	if (p->flags & IORING_SETUP_CQSIZE) {
42 		if (!p->cq_entries)
43 			return -EINVAL;
44 		cq_entries = p->cq_entries;
45 		if (cq_entries > KERN_MAX_CQ_ENTRIES) {
46 			if (!(p->flags & IORING_SETUP_CLAMP))
47 				return -EINVAL;
48 			cq_entries = KERN_MAX_CQ_ENTRIES;
49 		}
50 		cq_entries = roundup_pow2(cq_entries);
51 		if (cq_entries < entries)
52 			return -EINVAL;
53 	} else {
54 		cq_entries = 2 * entries;
55 	}
56 
57 	*sq = entries;
58 	*cq = cq_entries;
59 	return 0;
60 }
61 
io_uring_unmap_rings(struct io_uring_sq * sq,struct io_uring_cq * cq)62 static void io_uring_unmap_rings(struct io_uring_sq *sq, struct io_uring_cq *cq)
63 {
64 	if (sq->ring_sz)
65 		__sys_munmap(sq->ring_ptr, sq->ring_sz);
66 	if (cq->ring_ptr && cq->ring_sz && cq->ring_ptr != sq->ring_ptr)
67 		__sys_munmap(cq->ring_ptr, cq->ring_sz);
68 }
69 
io_uring_setup_ring_pointers(struct io_uring_params * p,struct io_uring_sq * sq,struct io_uring_cq * cq)70 static void io_uring_setup_ring_pointers(struct io_uring_params *p,
71 					 struct io_uring_sq *sq,
72 					 struct io_uring_cq *cq)
73 {
74 	sq->khead = sq->ring_ptr + p->sq_off.head;
75 	sq->ktail = sq->ring_ptr + p->sq_off.tail;
76 	sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask;
77 	sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries;
78 	sq->kflags = sq->ring_ptr + p->sq_off.flags;
79 	sq->kdropped = sq->ring_ptr + p->sq_off.dropped;
80 	if (!(p->flags & IORING_SETUP_NO_SQARRAY))
81 		sq->array = sq->ring_ptr + p->sq_off.array;
82 
83 	cq->khead = cq->ring_ptr + p->cq_off.head;
84 	cq->ktail = cq->ring_ptr + p->cq_off.tail;
85 	cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask;
86 	cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries;
87 	cq->koverflow = cq->ring_ptr + p->cq_off.overflow;
88 	cq->cqes = cq->ring_ptr + p->cq_off.cqes;
89 	if (p->cq_off.flags)
90 		cq->kflags = cq->ring_ptr + p->cq_off.flags;
91 
92 	sq->ring_mask = *sq->kring_mask;
93 	sq->ring_entries = *sq->kring_entries;
94 	cq->ring_mask = *cq->kring_mask;
95 	cq->ring_entries = *cq->kring_entries;
96 }
97 
io_uring_mmap(int fd,struct io_uring_params * p,struct io_uring_sq * sq,struct io_uring_cq * cq)98 static int io_uring_mmap(int fd, struct io_uring_params *p,
99 			 struct io_uring_sq *sq, struct io_uring_cq *cq)
100 {
101 	size_t size;
102 	int ret;
103 
104 	size = sizeof(struct io_uring_cqe);
105 	if (p->flags & IORING_SETUP_CQE32)
106 		size += sizeof(struct io_uring_cqe);
107 
108 	sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
109 	cq->ring_sz = p->cq_off.cqes + p->cq_entries * size;
110 
111 	if (p->features & IORING_FEAT_SINGLE_MMAP) {
112 		if (cq->ring_sz > sq->ring_sz)
113 			sq->ring_sz = cq->ring_sz;
114 		cq->ring_sz = sq->ring_sz;
115 	}
116 	sq->ring_ptr = __sys_mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
117 				  MAP_SHARED | MAP_POPULATE, fd,
118 				  IORING_OFF_SQ_RING);
119 	if (IS_ERR(sq->ring_ptr))
120 		return PTR_ERR(sq->ring_ptr);
121 
122 	if (p->features & IORING_FEAT_SINGLE_MMAP) {
123 		cq->ring_ptr = sq->ring_ptr;
124 	} else {
125 		cq->ring_ptr = __sys_mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
126 					  MAP_SHARED | MAP_POPULATE, fd,
127 					  IORING_OFF_CQ_RING);
128 		if (IS_ERR(cq->ring_ptr)) {
129 			ret = PTR_ERR(cq->ring_ptr);
130 			cq->ring_ptr = NULL;
131 			goto err;
132 		}
133 	}
134 
135 	size = sizeof(struct io_uring_sqe);
136 	if (p->flags & IORING_SETUP_SQE128)
137 		size += 64;
138 	sq->sqes = __sys_mmap(0, size * p->sq_entries, PROT_READ | PROT_WRITE,
139 			      MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
140 	if (IS_ERR(sq->sqes)) {
141 		ret = PTR_ERR(sq->sqes);
142 err:
143 		io_uring_unmap_rings(sq, cq);
144 		return ret;
145 	}
146 
147 	io_uring_setup_ring_pointers(p, sq, cq);
148 	return 0;
149 }
150 
151 /*
152  * For users that want to specify sq_thread_cpu or sq_thread_idle, this
153  * interface is a convenient helper for mmap()ing the rings.
154  * Returns -errno on error, or zero on success.  On success, 'ring'
155  * contains the necessary information to read/write to the rings.
156  */
io_uring_queue_mmap(int fd,struct io_uring_params * p,struct io_uring * ring)157 __cold int io_uring_queue_mmap(int fd, struct io_uring_params *p,
158 			       struct io_uring *ring)
159 {
160 	memset(ring, 0, sizeof(*ring));
161 	return io_uring_mmap(fd, p, &ring->sq, &ring->cq);
162 }
163 
164 /*
165  * Ensure that the mmap'ed rings aren't available to a child after a fork(2).
166  * This uses madvise(..., MADV_DONTFORK) on the mmap'ed ranges.
167  */
io_uring_ring_dontfork(struct io_uring * ring)168 __cold int io_uring_ring_dontfork(struct io_uring *ring)
169 {
170 	size_t len;
171 	int ret;
172 
173 	if (!ring->sq.ring_ptr || !ring->sq.sqes || !ring->cq.ring_ptr)
174 		return -EINVAL;
175 
176 	len = sizeof(struct io_uring_sqe);
177 	if (ring->flags & IORING_SETUP_SQE128)
178 		len += 64;
179 	len *= ring->sq.ring_entries;
180 	ret = __sys_madvise(ring->sq.sqes, len, MADV_DONTFORK);
181 	if (ret < 0)
182 		return ret;
183 
184 	len = ring->sq.ring_sz;
185 	ret = __sys_madvise(ring->sq.ring_ptr, len, MADV_DONTFORK);
186 	if (ret < 0)
187 		return ret;
188 
189 	if (ring->cq.ring_ptr != ring->sq.ring_ptr) {
190 		len = ring->cq.ring_sz;
191 		ret = __sys_madvise(ring->cq.ring_ptr, len, MADV_DONTFORK);
192 		if (ret < 0)
193 			return ret;
194 	}
195 
196 	return 0;
197 }
198 
199 /* FIXME */
200 static size_t huge_page_size = 2 * 1024 * 1024;
201 
202 #define KRING_SIZE	64
203 
204 /*
205  * Returns negative for error, or number of bytes used in the buffer on success
206  */
io_uring_alloc_huge(unsigned entries,struct io_uring_params * p,struct io_uring_sq * sq,struct io_uring_cq * cq,void * buf,size_t buf_size)207 static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
208 			       struct io_uring_sq *sq, struct io_uring_cq *cq,
209 			       void *buf, size_t buf_size)
210 {
211 	unsigned long page_size = get_page_size();
212 	unsigned sq_entries, cq_entries;
213 	size_t ring_mem, sqes_mem, cqes_mem;
214 	unsigned long mem_used = 0;
215 	void *ptr;
216 	int ret;
217 
218 	ret = get_sq_cq_entries(entries, p, &sq_entries, &cq_entries);
219 	if (ret)
220 		return ret;
221 
222 	ring_mem = KRING_SIZE;
223 
224 	sqes_mem = sq_entries * sizeof(struct io_uring_sqe);
225 	if (!(p->flags & IORING_SETUP_NO_SQARRAY))
226 		sqes_mem += sq_entries * sizeof(unsigned);
227 	sqes_mem = (sqes_mem + page_size - 1) & ~(page_size - 1);
228 
229 	cqes_mem = cq_entries * sizeof(struct io_uring_cqe);
230 	if (p->flags & IORING_SETUP_CQE32)
231 		cqes_mem *= 2;
232 	ring_mem += sqes_mem + cqes_mem;
233 	mem_used = ring_mem;
234 	mem_used = (mem_used + page_size - 1) & ~(page_size - 1);
235 
236 	/*
237 	 * A maxed-out number of CQ entries with IORING_SETUP_CQE32 fills a 2MB
238 	 * huge page by itself, so the SQ entries won't fit in the same huge
239 	 * page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
240 	 * but check that too to future-proof (e.g. against different huge page
241 	 * sizes). Bail out early so we don't overrun.
242 	 */
243 	if (!buf && (sqes_mem > huge_page_size || ring_mem > huge_page_size))
244 		return -ENOMEM;
245 
246 	if (buf) {
247 		if (mem_used > buf_size)
248 			return -ENOMEM;
249 		ptr = buf;
250 	} else {
251 		int map_hugetlb = 0;
252 		if (sqes_mem <= page_size)
253 			buf_size = page_size;
254 		else {
255 			buf_size = huge_page_size;
256 			map_hugetlb = MAP_HUGETLB;
257 		}
258 		ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE,
259 					MAP_SHARED|MAP_ANONYMOUS|map_hugetlb,
260 					-1, 0);
261 		if (IS_ERR(ptr))
262 			return PTR_ERR(ptr);
263 	}
264 
265 	sq->sqes = ptr;
266 	if (mem_used <= buf_size) {
267 		sq->ring_ptr = (void *) sq->sqes + sqes_mem;
268 		/* clear ring sizes, we have just one mmap() to undo */
269 		cq->ring_sz = 0;
270 		sq->ring_sz = 0;
271 	} else {
272 		int map_hugetlb = 0;
273 		if (ring_mem <= page_size)
274 			buf_size = page_size;
275 		else {
276 			buf_size = huge_page_size;
277 			map_hugetlb = MAP_HUGETLB;
278 		}
279 		ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE,
280 					MAP_SHARED|MAP_ANONYMOUS|map_hugetlb,
281 					-1, 0);
282 		if (IS_ERR(ptr)) {
283 			__sys_munmap(sq->sqes, 1);
284 			return PTR_ERR(ptr);
285 		}
286 		sq->ring_ptr = ptr;
287 		sq->ring_sz = buf_size;
288 		cq->ring_sz = 0;
289 	}
290 
291 	cq->ring_ptr = (void *) sq->ring_ptr;
292 	p->sq_off.user_addr = (unsigned long) sq->sqes;
293 	p->cq_off.user_addr = (unsigned long) sq->ring_ptr;
294 	return (int) mem_used;
295 }
296 
__io_uring_queue_init_params(unsigned entries,struct io_uring * ring,struct io_uring_params * p,void * buf,size_t buf_size)297 int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
298 				 struct io_uring_params *p, void *buf,
299 				 size_t buf_size)
300 {
301 	int fd, ret = 0;
302 	unsigned *sq_array;
303 	unsigned sq_entries, index;
304 
305 	memset(ring, 0, sizeof(*ring));
306 
307 	/*
308 	 * The kernel does this check already, but checking it here allows us
309 	 * to avoid handling it below.
310 	 */
311 	if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY
312 	    && !(p->flags & IORING_SETUP_NO_MMAP))
313 		return -EINVAL;
314 
315 	if (p->flags & IORING_SETUP_NO_MMAP) {
316 		ret = io_uring_alloc_huge(entries, p, &ring->sq, &ring->cq,
317 						buf, buf_size);
318 		if (ret < 0)
319 			return ret;
320 		if (buf)
321 			ring->int_flags |= INT_FLAG_APP_MEM;
322 	}
323 
324 	fd = __sys_io_uring_setup(entries, p);
325 	if (fd < 0) {
326 		if ((p->flags & IORING_SETUP_NO_MMAP) &&
327 		    !(ring->int_flags & INT_FLAG_APP_MEM)) {
328 			__sys_munmap(ring->sq.sqes, 1);
329 			io_uring_unmap_rings(&ring->sq, &ring->cq);
330 		}
331 		return fd;
332 	}
333 
334 	if (!(p->flags & IORING_SETUP_NO_MMAP)) {
335 		ret = io_uring_queue_mmap(fd, p, ring);
336 		if (ret) {
337 			__sys_close(fd);
338 			return ret;
339 		}
340 	} else {
341 		io_uring_setup_ring_pointers(p, &ring->sq, &ring->cq);
342 	}
343 
344 	/*
345 	 * Directly map SQ slots to SQEs
346 	 */
347 	sq_entries = ring->sq.ring_entries;
348 
349 	if (!(p->flags & IORING_SETUP_NO_SQARRAY)) {
350 		sq_array = ring->sq.array;
351 		for (index = 0; index < sq_entries; index++)
352 			sq_array[index] = index;
353 	}
354 	ring->features = p->features;
355 	ring->flags = p->flags;
356 	ring->enter_ring_fd = fd;
357 	if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY) {
358 		ring->ring_fd = -1;
359 		ring->int_flags |= INT_FLAG_REG_RING | INT_FLAG_REG_REG_RING;
360 	} else {
361 		ring->ring_fd = fd;
362 	}
363 
364 	return ret;
365 }
366 
io_uring_queue_init_try_nosqarr(unsigned entries,struct io_uring * ring,struct io_uring_params * p,void * buf,size_t buf_size)367 static int io_uring_queue_init_try_nosqarr(unsigned entries, struct io_uring *ring,
368 					   struct io_uring_params *p, void *buf,
369 					   size_t buf_size)
370 {
371 	unsigned flags = p->flags;
372 	int ret;
373 
374 	p->flags |= IORING_SETUP_NO_SQARRAY;
375 	ret = __io_uring_queue_init_params(entries, ring, p, buf, buf_size);
376 
377 	/* don't fallback if explicitly asked for NOSQARRAY */
378 	if (ret != -EINVAL || (flags & IORING_SETUP_NO_SQARRAY))
379 		return ret;
380 
381 	p->flags = flags;
382 	return __io_uring_queue_init_params(entries, ring, p, buf, buf_size);
383 }
384 
385 /*
386  * Like io_uring_queue_init_params(), except it allows the application to pass
387  * in a pre-allocated memory range that is used for the shared data between
388  * the kernel and the application. This includes the sqes array, and the two
389  * rings. The memory must be contiguous, the use case here is that the app
390  * allocates a huge page and passes it in.
391  *
392  * Returns the number of bytes used in the buffer, the app can then reuse
393  * the buffer with the returned offset to put more rings in the same huge
394  * page. Returns -ENOMEM if there's not enough room left in the buffer to
395  * host the ring.
396  */
io_uring_queue_init_mem(unsigned entries,struct io_uring * ring,struct io_uring_params * p,void * buf,size_t buf_size)397 int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring,
398 			    struct io_uring_params *p,
399 			    void *buf, size_t buf_size)
400 {
401 	/* should already be set... */
402 	p->flags |= IORING_SETUP_NO_MMAP;
403 	return io_uring_queue_init_try_nosqarr(entries, ring, p, buf, buf_size);
404 }
405 
io_uring_queue_init_params(unsigned entries,struct io_uring * ring,struct io_uring_params * p)406 int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
407 			       struct io_uring_params *p)
408 {
409 	int ret;
410 
411 	ret = io_uring_queue_init_try_nosqarr(entries, ring, p, NULL, 0);
412 	return ret >= 0 ? 0 : ret;
413 }
414 
415 /*
416  * Returns -errno on error, or zero on success. On success, 'ring'
417  * contains the necessary information to read/write to the rings.
418  */
io_uring_queue_init(unsigned entries,struct io_uring * ring,unsigned flags)419 __cold int io_uring_queue_init(unsigned entries, struct io_uring *ring,
420 			       unsigned flags)
421 {
422 	struct io_uring_params p;
423 
424 	memset(&p, 0, sizeof(p));
425 	p.flags = flags;
426 
427 	return io_uring_queue_init_params(entries, ring, &p);
428 }
429 
io_uring_queue_exit(struct io_uring * ring)430 __cold void io_uring_queue_exit(struct io_uring *ring)
431 {
432 	struct io_uring_sq *sq = &ring->sq;
433 	struct io_uring_cq *cq = &ring->cq;
434 	size_t sqe_size;
435 
436 	if (!sq->ring_sz && !(ring->int_flags & INT_FLAG_APP_MEM)) {
437 		sqe_size = sizeof(struct io_uring_sqe);
438 		if (ring->flags & IORING_SETUP_SQE128)
439 			sqe_size += 64;
440 		__sys_munmap(sq->sqes, sqe_size * sq->ring_entries);
441 		io_uring_unmap_rings(sq, cq);
442 	} else {
443 		if (!(ring->int_flags & INT_FLAG_APP_MEM)) {
444 			__sys_munmap(sq->sqes,
445 				*sq->kring_entries * sizeof(struct io_uring_sqe));
446 			io_uring_unmap_rings(sq, cq);
447 		}
448 	}
449 
450 	/*
451 	 * Not strictly required, but frees up the slot we used now rather
452 	 * than at process exit time.
453 	 */
454 	if (ring->int_flags & INT_FLAG_REG_RING)
455 		io_uring_unregister_ring_fd(ring);
456 	if (ring->ring_fd != -1)
457 		__sys_close(ring->ring_fd);
458 }
459 
io_uring_get_probe_ring(struct io_uring * ring)460 __cold struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring)
461 {
462 	struct io_uring_probe *probe;
463 	size_t len;
464 	int r;
465 
466 	len = sizeof(*probe) + 256 * sizeof(struct io_uring_probe_op);
467 	probe = malloc(len);
468 	if (!probe)
469 		return NULL;
470 	memset(probe, 0, len);
471 
472 	r = io_uring_register_probe(ring, probe, 256);
473 	if (r >= 0)
474 		return probe;
475 
476 	free(probe);
477 	return NULL;
478 }
479 
io_uring_get_probe(void)480 __cold struct io_uring_probe *io_uring_get_probe(void)
481 {
482 	struct io_uring ring;
483 	struct io_uring_probe *probe;
484 	int r;
485 
486 	r = io_uring_queue_init(2, &ring, 0);
487 	if (r < 0)
488 		return NULL;
489 
490 	probe = io_uring_get_probe_ring(&ring);
491 	io_uring_queue_exit(&ring);
492 	return probe;
493 }
494 
io_uring_free_probe(struct io_uring_probe * probe)495 __cold void io_uring_free_probe(struct io_uring_probe *probe)
496 {
497 	free(probe);
498 }
499 
npages(size_t size,long page_size)500 static size_t npages(size_t size, long page_size)
501 {
502 	size--;
503 	size /= page_size;
504 	return __fls((int) size);
505 }
506 
rings_size(struct io_uring_params * p,unsigned entries,unsigned cq_entries,long page_size)507 static size_t rings_size(struct io_uring_params *p, unsigned entries,
508 			 unsigned cq_entries, long page_size)
509 {
510 	size_t pages, sq_size, cq_size;
511 
512 	cq_size = sizeof(struct io_uring_cqe);
513 	if (p->flags & IORING_SETUP_CQE32)
514 		cq_size += sizeof(struct io_uring_cqe);
515 	cq_size *= cq_entries;
516 	cq_size += KRING_SIZE;
517 	cq_size = (cq_size + 63) & ~63UL;
518 	pages = (size_t) 1 << npages(cq_size, page_size);
519 
520 	sq_size = sizeof(struct io_uring_sqe);
521 	if (p->flags & IORING_SETUP_SQE128)
522 		sq_size += 64;
523 	sq_size *= entries;
524 	pages += (size_t) 1 << npages(sq_size, page_size);
525 	return pages * page_size;
526 }
527 
528 /*
529  * Return the required ulimit -l memlock memory required for a given ring
530  * setup, in bytes. May return -errno on error. On newer (5.12+) kernels,
531  * io_uring no longer requires any memlock memory, and hence this function
532  * will return 0 for that case. On older (5.11 and prior) kernels, this will
533  * return the required memory so that the caller can ensure that enough space
534  * is available before setting up a ring with the specified parameters.
535  */
io_uring_mlock_size_params(unsigned entries,struct io_uring_params * p)536 __cold ssize_t io_uring_mlock_size_params(unsigned entries,
537 					  struct io_uring_params *p)
538 {
539 	struct io_uring_params lp;
540 	struct io_uring ring;
541 	unsigned cq_entries, sq;
542 	long page_size;
543 	ssize_t ret;
544 	int cret;
545 
546 	memset(&lp, 0, sizeof(lp));
547 
548 	/*
549 	 * We only really use this inited ring to see if the kernel is newer
550 	 * or not. Newer kernels don't require memlocked memory. If we fail,
551 	 * it's most likely because it's an older kernel and we have no
552 	 * available memlock space. Just continue on, lp.features will still
553 	 * be zeroed at this point and we'll do the right thing.
554 	 */
555 	ret = io_uring_queue_init_params(entries, &ring, &lp);
556 	if (!ret)
557 		io_uring_queue_exit(&ring);
558 
559 	/*
560 	 * Native workers imply using cgroup memory accounting, and hence no
561 	 * memlock memory is needed for the ring allocations.
562 	 */
563 	if (lp.features & IORING_FEAT_NATIVE_WORKERS)
564 		return 0;
565 
566 	if (!entries)
567 		return -EINVAL;
568 	if (entries > KERN_MAX_ENTRIES) {
569 		if (!(p->flags & IORING_SETUP_CLAMP))
570 			return -EINVAL;
571 		entries = KERN_MAX_ENTRIES;
572 	}
573 
574 	cret = get_sq_cq_entries(entries, p, &sq, &cq_entries);
575 	if (cret)
576 		return cret;
577 
578 	page_size = get_page_size();
579 	return rings_size(p, sq, cq_entries, page_size);
580 }
581 
582 /*
583  * Return required ulimit -l memory space for a given ring setup. See
584  * @io_uring_mlock_size_params().
585  */
io_uring_mlock_size(unsigned entries,unsigned flags)586 __cold ssize_t io_uring_mlock_size(unsigned entries, unsigned flags)
587 {
588 	struct io_uring_params p;
589 
590 	memset(&p, 0, sizeof(p));
591 	p.flags = flags;
592 	return io_uring_mlock_size_params(entries, &p);
593 }
594 
595 #if defined(__hppa__)
br_setup(struct io_uring * ring,unsigned int nentries,int bgid,unsigned int flags,int * err)596 static struct io_uring_buf_ring *br_setup(struct io_uring *ring,
597 					  unsigned int nentries, int bgid,
598 					  unsigned int flags, int *err)
599 {
600 	struct io_uring_buf_ring *br;
601 	struct io_uring_buf_reg reg;
602 	size_t ring_size;
603 	off_t off;
604 	int lret;
605 
606 	memset(&reg, 0, sizeof(reg));
607 	reg.ring_entries = nentries;
608 	reg.bgid = bgid;
609 	reg.flags = IOU_PBUF_RING_MMAP;
610 
611 	*err = 0;
612 	lret = io_uring_register_buf_ring(ring, &reg, flags);
613 	if (lret) {
614 		*err = lret;
615 		return NULL;
616 	}
617 
618 	off = IORING_OFF_PBUF_RING | (unsigned long long) bgid << IORING_OFF_PBUF_SHIFT;
619 	ring_size = nentries * sizeof(struct io_uring_buf);
620 	br = __sys_mmap(NULL, ring_size, PROT_READ | PROT_WRITE,
621 			MAP_SHARED | MAP_POPULATE, ring->ring_fd, off);
622 	if (IS_ERR(br)) {
623 		*err = PTR_ERR(br);
624 		return NULL;
625 	}
626 
627 	return br;
628 }
629 #else
br_setup(struct io_uring * ring,unsigned int nentries,int bgid,unsigned int flags,int * err)630 static struct io_uring_buf_ring *br_setup(struct io_uring *ring,
631 					  unsigned int nentries, int bgid,
632 					  unsigned int flags, int *err)
633 {
634 	struct io_uring_buf_ring *br;
635 	struct io_uring_buf_reg reg;
636 	size_t ring_size;
637 	int lret;
638 
639 	memset(&reg, 0, sizeof(reg));
640 	ring_size = nentries * sizeof(struct io_uring_buf);
641 	br = __sys_mmap(NULL, ring_size, PROT_READ | PROT_WRITE,
642 			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
643 	if (IS_ERR(br)) {
644 		*err = PTR_ERR(br);
645 		return NULL;
646 	}
647 
648 	reg.ring_addr = (unsigned long) (uintptr_t) br;
649 	reg.ring_entries = nentries;
650 	reg.bgid = bgid;
651 
652 	*err = 0;
653 	lret = io_uring_register_buf_ring(ring, &reg, flags);
654 	if (lret) {
655 		__sys_munmap(br, ring_size);
656 		*err = lret;
657 		br = NULL;
658 	}
659 
660 	return br;
661 }
662 #endif
663 
io_uring_setup_buf_ring(struct io_uring * ring,unsigned int nentries,int bgid,unsigned int flags,int * err)664 struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *ring,
665 						  unsigned int nentries,
666 						  int bgid, unsigned int flags,
667 						  int *err)
668 {
669 	struct io_uring_buf_ring *br;
670 
671 	br = br_setup(ring, nentries, bgid, flags, err);
672 	if (br)
673 		io_uring_buf_ring_init(br);
674 
675 	return br;
676 }
677 
io_uring_free_buf_ring(struct io_uring * ring,struct io_uring_buf_ring * br,unsigned int nentries,int bgid)678 int io_uring_free_buf_ring(struct io_uring *ring, struct io_uring_buf_ring *br,
679 			   unsigned int nentries, int bgid)
680 {
681 	int ret;
682 
683 	ret = io_uring_unregister_buf_ring(ring, bgid);
684 	if (ret)
685 		return ret;
686 
687 	__sys_munmap(br, nentries * sizeof(struct io_uring_buf));
688 	return 0;
689 }
690