• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 #ifndef LIB_URING_H
3 #define LIB_URING_H
4 
5 #ifndef _XOPEN_SOURCE
6 #define _XOPEN_SOURCE 500 /* Required for glibc to expose sigset_t */
7 #endif
8 
9 #ifndef _GNU_SOURCE
10 #define _GNU_SOURCE /* Required for musl to expose cpu_set_t */
11 #endif
12 
13 #include <sys/socket.h>
14 #include <sys/stat.h>
15 #include <sys/uio.h>
16 #include <errno.h>
17 #include <signal.h>
18 #include <stdbool.h>
19 #include <inttypes.h>
20 #include <time.h>
21 #include <fcntl.h>
22 #include <sched.h>
23 #include <linux/swab.h>
24 #include "liburing/compat.h"
25 #include "liburing/io_uring.h"
26 #include "liburing/barrier.h"
27 
28 #ifndef uring_unlikely
29 #define uring_unlikely(cond)	__builtin_expect(!!(cond), 0)
30 #endif
31 
32 #ifndef uring_likely
33 #define uring_likely(cond)	__builtin_expect(!!(cond), 1)
34 #endif
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 
40 /*
41  * Library interface to io_uring
42  */
43 struct io_uring_sq {
44 	unsigned *khead;
45 	unsigned *ktail;
46 	unsigned *kring_mask;
47 	unsigned *kring_entries;
48 	unsigned *kflags;
49 	unsigned *kdropped;
50 	unsigned *array;
51 	struct io_uring_sqe *sqes;
52 
53 	unsigned sqe_head;
54 	unsigned sqe_tail;
55 
56 	size_t ring_sz;
57 	void *ring_ptr;
58 
59 	unsigned pad[4];
60 };
61 
62 struct io_uring_cq {
63 	unsigned *khead;
64 	unsigned *ktail;
65 	unsigned *kring_mask;
66 	unsigned *kring_entries;
67 	unsigned *kflags;
68 	unsigned *koverflow;
69 	struct io_uring_cqe *cqes;
70 
71 	size_t ring_sz;
72 	void *ring_ptr;
73 
74 	unsigned pad[4];
75 };
76 
77 struct io_uring {
78 	struct io_uring_sq sq;
79 	struct io_uring_cq cq;
80 	unsigned flags;
81 	int ring_fd;
82 
83 	unsigned features;
84 	int enter_ring_fd;
85 	__u8 int_flags;
86 	__u8 pad[3];
87 	unsigned pad2;
88 };
89 
90 /*
91  * Library interface
92  */
93 
94 /*
95  * return an allocated io_uring_probe structure, or NULL if probe fails (for
96  * example, if it is not available). The caller is responsible for freeing it
97  */
98 struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
99 /* same as io_uring_get_probe_ring, but takes care of ring init and teardown */
100 struct io_uring_probe *io_uring_get_probe(void);
101 
102 /*
103  * frees a probe allocated through io_uring_get_probe() or
104  * io_uring_get_probe_ring()
105  */
106 void io_uring_free_probe(struct io_uring_probe *probe);
107 
io_uring_opcode_supported(const struct io_uring_probe * p,int op)108 static inline int io_uring_opcode_supported(const struct io_uring_probe *p,
109 					    int op)
110 {
111 	if (op > p->last_op)
112 		return 0;
113 	return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
114 }
115 
116 int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
117 				struct io_uring_params *p);
118 int io_uring_queue_init(unsigned entries, struct io_uring *ring,
119 			unsigned flags);
120 int io_uring_queue_mmap(int fd, struct io_uring_params *p,
121 			struct io_uring *ring);
122 int io_uring_ring_dontfork(struct io_uring *ring);
123 void io_uring_queue_exit(struct io_uring *ring);
124 unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
125 	struct io_uring_cqe **cqes, unsigned count);
126 int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
127 		       unsigned wait_nr, struct __kernel_timespec *ts,
128 		       sigset_t *sigmask);
129 int io_uring_wait_cqe_timeout(struct io_uring *ring,
130 			      struct io_uring_cqe **cqe_ptr,
131 			      struct __kernel_timespec *ts);
132 int io_uring_submit(struct io_uring *ring);
133 int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
134 int io_uring_submit_and_wait_timeout(struct io_uring *ring,
135 				     struct io_uring_cqe **cqe_ptr,
136 				     unsigned wait_nr,
137 				     struct __kernel_timespec *ts,
138 				     sigset_t *sigmask);
139 
140 int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
141 			      unsigned nr_iovecs);
142 int io_uring_register_buffers_tags(struct io_uring *ring,
143 				   const struct iovec *iovecs,
144 				   const __u64 *tags, unsigned nr);
145 int io_uring_register_buffers_sparse(struct io_uring *ring, unsigned nr);
146 int io_uring_register_buffers_update_tag(struct io_uring *ring,
147 					 unsigned off,
148 					 const struct iovec *iovecs,
149 					 const __u64 *tags, unsigned nr);
150 int io_uring_unregister_buffers(struct io_uring *ring);
151 
152 int io_uring_register_files(struct io_uring *ring, const int *files,
153 			    unsigned nr_files);
154 int io_uring_register_files_tags(struct io_uring *ring, const int *files,
155 				 const __u64 *tags, unsigned nr);
156 int io_uring_register_files_sparse(struct io_uring *ring, unsigned nr);
157 int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off,
158 				       const int *files, const __u64 *tags,
159 				       unsigned nr_files);
160 
161 int io_uring_unregister_files(struct io_uring *ring);
162 int io_uring_register_files_update(struct io_uring *ring, unsigned off,
163 				   int *files, unsigned nr_files);
164 int io_uring_register_eventfd(struct io_uring *ring, int fd);
165 int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
166 int io_uring_unregister_eventfd(struct io_uring *ring);
167 int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p,
168 			    unsigned nr);
169 int io_uring_register_personality(struct io_uring *ring);
170 int io_uring_unregister_personality(struct io_uring *ring, int id);
171 int io_uring_register_restrictions(struct io_uring *ring,
172 				   struct io_uring_restriction *res,
173 				   unsigned int nr_res);
174 int io_uring_enable_rings(struct io_uring *ring);
175 int __io_uring_sqring_wait(struct io_uring *ring);
176 int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz,
177 				const cpu_set_t *mask);
178 int io_uring_unregister_iowq_aff(struct io_uring *ring);
179 int io_uring_register_iowq_max_workers(struct io_uring *ring,
180 				       unsigned int *values);
181 int io_uring_register_ring_fd(struct io_uring *ring);
182 int io_uring_unregister_ring_fd(struct io_uring *ring);
183 int io_uring_register_buf_ring(struct io_uring *ring,
184 			       struct io_uring_buf_reg *reg, unsigned int flags);
185 int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid);
186 
187 /*
188  * Helper for the peek/wait single cqe functions. Exported because of that,
189  * but probably shouldn't be used directly in an application.
190  */
191 int __io_uring_get_cqe(struct io_uring *ring,
192 			struct io_uring_cqe **cqe_ptr, unsigned submit,
193 			unsigned wait_nr, sigset_t *sigmask);
194 
195 #define LIBURING_UDATA_TIMEOUT	((__u64) -1)
196 
197 /*
198  * Calculates the step size for CQE iteration.
199  * 	For standard CQE's its 1, for big CQE's its two.
200  */
201 #define io_uring_cqe_shift(ring)					\
202 	(!!((ring)->flags & IORING_SETUP_CQE32))
203 
204 #define io_uring_cqe_index(ring,ptr,mask)				\
205 	(((ptr) & (mask)) << io_uring_cqe_shift(ring))
206 
207 #define io_uring_for_each_cqe(ring, head, cqe)				\
208 	/*								\
209 	 * io_uring_smp_load_acquire() enforces the order of tail	\
210 	 * and CQE reads.						\
211 	 */								\
212 	for (head = *(ring)->cq.khead;					\
213 	     (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \
214 		&(ring)->cq.cqes[io_uring_cqe_index(ring, head, *(ring)->cq.kring_mask)] : NULL)); \
215 	     head++)							\
216 
217 /*
218  * Must be called after io_uring_for_each_cqe()
219  */
io_uring_cq_advance(struct io_uring * ring,unsigned nr)220 static inline void io_uring_cq_advance(struct io_uring *ring,
221 				       unsigned nr)
222 {
223 	if (nr) {
224 		struct io_uring_cq *cq = &ring->cq;
225 
226 		/*
227 		 * Ensure that the kernel only sees the new value of the head
228 		 * index after the CQEs have been read.
229 		 */
230 		io_uring_smp_store_release(cq->khead, *cq->khead + nr);
231 	}
232 }
233 
234 /*
235  * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
236  * been processed by the application.
237  */
io_uring_cqe_seen(struct io_uring * ring,struct io_uring_cqe * cqe)238 static inline void io_uring_cqe_seen(struct io_uring *ring,
239 				     struct io_uring_cqe *cqe)
240 {
241 	if (cqe)
242 		io_uring_cq_advance(ring, 1);
243 }
244 
245 /*
246  * Command prep helpers
247  */
248 
249 /*
250  * Associate pointer @data with the sqe, for later retrieval from the cqe
251  * at command completion time with io_uring_cqe_get_data().
252  */
io_uring_sqe_set_data(struct io_uring_sqe * sqe,void * data)253 static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
254 {
255 	sqe->user_data = (unsigned long) data;
256 }
257 
io_uring_cqe_get_data(const struct io_uring_cqe * cqe)258 static inline void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
259 {
260 	return (void *) (uintptr_t) cqe->user_data;
261 }
262 
263 /*
264  * Assign a 64-bit value to this sqe, which can get retrieved at completion
265  * time with io_uring_cqe_get_data64. Just like the non-64 variants, except
266  * these store a 64-bit type rather than a data pointer.
267  */
io_uring_sqe_set_data64(struct io_uring_sqe * sqe,__u64 data)268 static inline void io_uring_sqe_set_data64(struct io_uring_sqe *sqe,
269 					   __u64 data)
270 {
271 	sqe->user_data = data;
272 }
273 
io_uring_cqe_get_data64(const struct io_uring_cqe * cqe)274 static inline __u64 io_uring_cqe_get_data64(const struct io_uring_cqe *cqe)
275 {
276 	return cqe->user_data;
277 }
278 
279 /*
280  * Tell the app the have the 64-bit variants of the get/set userdata
281  */
282 #define LIBURING_HAVE_DATA64
283 
io_uring_sqe_set_flags(struct io_uring_sqe * sqe,unsigned flags)284 static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
285 					  unsigned flags)
286 {
287 	sqe->flags = (__u8) flags;
288 }
289 
__io_uring_set_target_fixed_file(struct io_uring_sqe * sqe,unsigned int file_index)290 static inline void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
291 						    unsigned int file_index)
292 {
293 	/* 0 means no fixed files, indexes should be encoded as "index + 1" */
294 	sqe->file_index = file_index + 1;
295 }
296 
io_uring_prep_rw(int op,struct io_uring_sqe * sqe,int fd,const void * addr,unsigned len,__u64 offset)297 static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
298 				    const void *addr, unsigned len,
299 				    __u64 offset)
300 {
301 	sqe->opcode = (__u8) op;
302 	sqe->flags = 0;
303 	sqe->ioprio = 0;
304 	sqe->fd = fd;
305 	sqe->off = offset;
306 	sqe->addr = (unsigned long) addr;
307 	sqe->len = len;
308 	sqe->rw_flags = 0;
309 	sqe->buf_index = 0;
310 	sqe->personality = 0;
311 	sqe->file_index = 0;
312 	sqe->addr3 = 0;
313 	sqe->__pad2[0] = 0;
314 }
315 
316 /**
317  * @pre Either fd_in or fd_out must be a pipe.
318  * @param off_in If fd_in refers to a pipe, off_in must be (int64_t) -1;
319  *		 If fd_in does not refer to a pipe and off_in is (int64_t) -1,
320  *		 then bytes are read from fd_in starting from the file offset
321  *		 and it is adjust appropriately;
322  *               If fd_in does not refer to a pipe and off_in is not
323  *		 (int64_t) -1, then the  starting offset of fd_in will be
324  *		 off_in.
325  * @param off_out The description of off_in also applied to off_out.
326  * @param splice_flags see man splice(2) for description of flags.
327  *
328  * This splice operation can be used to implement sendfile by splicing to an
329  * intermediate pipe first, then splice to the final destination.
330  * In fact, the implementation of sendfile in kernel uses splice internally.
331  *
332  * NOTE that even if fd_in or fd_out refers to a pipe, the splice operation
333  * can still failed with EINVAL if one of the fd doesn't explicitly support
334  * splice operation, e.g. reading from terminal is unsupported from kernel 5.7
335  * to 5.11.
336  * Check issue #291 for more information.
337  */
io_uring_prep_splice(struct io_uring_sqe * sqe,int fd_in,int64_t off_in,int fd_out,int64_t off_out,unsigned int nbytes,unsigned int splice_flags)338 static inline void io_uring_prep_splice(struct io_uring_sqe *sqe,
339 					int fd_in, int64_t off_in,
340 					int fd_out, int64_t off_out,
341 					unsigned int nbytes,
342 					unsigned int splice_flags)
343 {
344 	io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes,
345 				(__u64) off_out);
346 	sqe->splice_off_in = (__u64) off_in;
347 	sqe->splice_fd_in = fd_in;
348 	sqe->splice_flags = splice_flags;
349 }
350 
io_uring_prep_tee(struct io_uring_sqe * sqe,int fd_in,int fd_out,unsigned int nbytes,unsigned int splice_flags)351 static inline void io_uring_prep_tee(struct io_uring_sqe *sqe,
352 				     int fd_in, int fd_out,
353 				     unsigned int nbytes,
354 				     unsigned int splice_flags)
355 {
356 	io_uring_prep_rw(IORING_OP_TEE, sqe, fd_out, NULL, nbytes, 0);
357 	sqe->splice_off_in = 0;
358 	sqe->splice_fd_in = fd_in;
359 	sqe->splice_flags = splice_flags;
360 }
361 
io_uring_prep_readv(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset)362 static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
363 				       const struct iovec *iovecs,
364 				       unsigned nr_vecs, __u64 offset)
365 {
366 	io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
367 }
368 
io_uring_prep_readv2(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset,int flags)369 static inline void io_uring_prep_readv2(struct io_uring_sqe *sqe, int fd,
370 				       const struct iovec *iovecs,
371 				       unsigned nr_vecs, __u64 offset,
372 				       int flags)
373 {
374 	io_uring_prep_readv(sqe, fd, iovecs, nr_vecs, offset);
375 	sqe->rw_flags = flags;
376 }
377 
io_uring_prep_read_fixed(struct io_uring_sqe * sqe,int fd,void * buf,unsigned nbytes,__u64 offset,int buf_index)378 static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
379 					    void *buf, unsigned nbytes,
380 					    __u64 offset, int buf_index)
381 {
382 	io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
383 	sqe->buf_index = (__u16) buf_index;
384 }
385 
io_uring_prep_writev(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset)386 static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
387 					const struct iovec *iovecs,
388 					unsigned nr_vecs, __u64 offset)
389 {
390 	io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
391 }
392 
io_uring_prep_writev2(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset,int flags)393 static inline void io_uring_prep_writev2(struct io_uring_sqe *sqe, int fd,
394 				       const struct iovec *iovecs,
395 				       unsigned nr_vecs, __u64 offset,
396 				       int flags)
397 {
398 	io_uring_prep_writev(sqe, fd, iovecs, nr_vecs, offset);
399 	sqe->rw_flags = flags;
400 }
401 
io_uring_prep_write_fixed(struct io_uring_sqe * sqe,int fd,const void * buf,unsigned nbytes,__u64 offset,int buf_index)402 static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
403 					     const void *buf, unsigned nbytes,
404 					     __u64 offset, int buf_index)
405 {
406 	io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
407 	sqe->buf_index = (__u16) buf_index;
408 }
409 
io_uring_prep_recvmsg(struct io_uring_sqe * sqe,int fd,struct msghdr * msg,unsigned flags)410 static inline void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
411 					 struct msghdr *msg, unsigned flags)
412 {
413 	io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0);
414 	sqe->msg_flags = flags;
415 }
416 
io_uring_prep_sendmsg(struct io_uring_sqe * sqe,int fd,const struct msghdr * msg,unsigned flags)417 static inline void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
418 					 const struct msghdr *msg,
419 					 unsigned flags)
420 {
421 	io_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0);
422 	sqe->msg_flags = flags;
423 }
424 
__io_uring_prep_poll_mask(unsigned poll_mask)425 static inline unsigned __io_uring_prep_poll_mask(unsigned poll_mask)
426 {
427 #if __BYTE_ORDER == __BIG_ENDIAN
428 	poll_mask = __swahw32(poll_mask);
429 #endif
430 	return poll_mask;
431 }
432 
io_uring_prep_poll_add(struct io_uring_sqe * sqe,int fd,unsigned poll_mask)433 static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
434 					  unsigned poll_mask)
435 {
436 	io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
437 	sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
438 }
439 
io_uring_prep_poll_multishot(struct io_uring_sqe * sqe,int fd,unsigned poll_mask)440 static inline void io_uring_prep_poll_multishot(struct io_uring_sqe *sqe,
441 						int fd, unsigned poll_mask)
442 {
443 	io_uring_prep_poll_add(sqe, fd, poll_mask);
444 	sqe->len = IORING_POLL_ADD_MULTI;
445 }
446 
io_uring_prep_poll_remove(struct io_uring_sqe * sqe,__u64 user_data)447 static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
448 					     __u64 user_data)
449 {
450 	io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, 0, 0);
451 	sqe->addr = user_data;
452 }
453 
io_uring_prep_poll_update(struct io_uring_sqe * sqe,__u64 old_user_data,__u64 new_user_data,unsigned poll_mask,unsigned flags)454 static inline void io_uring_prep_poll_update(struct io_uring_sqe *sqe,
455 					     __u64 old_user_data,
456 					     __u64 new_user_data,
457 					     unsigned poll_mask, unsigned flags)
458 {
459 	io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, flags,
460 			 new_user_data);
461 	sqe->addr = old_user_data;
462 	sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
463 }
464 
io_uring_prep_fsync(struct io_uring_sqe * sqe,int fd,unsigned fsync_flags)465 static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
466 				       unsigned fsync_flags)
467 {
468 	io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0);
469 	sqe->fsync_flags = fsync_flags;
470 }
471 
io_uring_prep_nop(struct io_uring_sqe * sqe)472 static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
473 {
474 	io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0);
475 }
476 
io_uring_prep_timeout(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,unsigned count,unsigned flags)477 static inline void io_uring_prep_timeout(struct io_uring_sqe *sqe,
478 					 struct __kernel_timespec *ts,
479 					 unsigned count, unsigned flags)
480 {
481 	io_uring_prep_rw(IORING_OP_TIMEOUT, sqe, -1, ts, 1, count);
482 	sqe->timeout_flags = flags;
483 }
484 
io_uring_prep_timeout_remove(struct io_uring_sqe * sqe,__u64 user_data,unsigned flags)485 static inline void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe,
486 						__u64 user_data, unsigned flags)
487 {
488 	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0, 0);
489 	sqe->addr = user_data;
490 	sqe->timeout_flags = flags;
491 }
492 
io_uring_prep_timeout_update(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,__u64 user_data,unsigned flags)493 static inline void io_uring_prep_timeout_update(struct io_uring_sqe *sqe,
494 						struct __kernel_timespec *ts,
495 						__u64 user_data, unsigned flags)
496 {
497 	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0,
498 				(uintptr_t) ts);
499 	sqe->addr = user_data;
500 	sqe->timeout_flags = flags | IORING_TIMEOUT_UPDATE;
501 }
502 
io_uring_prep_accept(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)503 static inline void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
504 					struct sockaddr *addr,
505 					socklen_t *addrlen, int flags)
506 {
507 	io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0,
508 				(__u64) (unsigned long) addrlen);
509 	sqe->accept_flags = (__u32) flags;
510 }
511 
512 /* accept directly into the fixed file table */
io_uring_prep_accept_direct(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags,unsigned int file_index)513 static inline void io_uring_prep_accept_direct(struct io_uring_sqe *sqe, int fd,
514 					       struct sockaddr *addr,
515 					       socklen_t *addrlen, int flags,
516 					       unsigned int file_index)
517 {
518 	io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
519 	__io_uring_set_target_fixed_file(sqe, file_index);
520 }
521 
io_uring_prep_multishot_accept(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)522 static inline void io_uring_prep_multishot_accept(struct io_uring_sqe *sqe,
523 						  int fd, struct sockaddr *addr,
524 						  socklen_t *addrlen, int flags)
525 {
526 	io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
527 	sqe->ioprio |= IORING_ACCEPT_MULTISHOT;
528 }
529 
530 /* multishot accept directly into the fixed file table */
io_uring_prep_multishot_accept_direct(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)531 static inline void io_uring_prep_multishot_accept_direct(struct io_uring_sqe *sqe,
532 							 int fd,
533 							 struct sockaddr *addr,
534 							 socklen_t *addrlen,
535 							 int flags)
536 {
537 	io_uring_prep_multishot_accept(sqe, fd, addr, addrlen, flags);
538 	__io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
539 }
540 
io_uring_prep_cancel64(struct io_uring_sqe * sqe,__u64 user_data,int flags)541 static inline void io_uring_prep_cancel64(struct io_uring_sqe *sqe,
542 					  __u64 user_data, int flags)
543 {
544 	io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, NULL, 0, 0);
545 	sqe->addr = user_data;
546 	sqe->cancel_flags = (__u32) flags;
547 }
548 
io_uring_prep_cancel(struct io_uring_sqe * sqe,void * user_data,int flags)549 static inline void io_uring_prep_cancel(struct io_uring_sqe *sqe,
550 					void *user_data, int flags)
551 {
552 	io_uring_prep_cancel64(sqe, (__u64) (uintptr_t) user_data, flags);
553 }
554 
io_uring_prep_cancel_fd(struct io_uring_sqe * sqe,int fd,unsigned int flags)555 static inline void io_uring_prep_cancel_fd(struct io_uring_sqe *sqe, int fd,
556 					   unsigned int flags)
557 {
558 	io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, fd, NULL, 0, 0);
559 	sqe->cancel_flags = (__u32) flags | IORING_ASYNC_CANCEL_FD;
560 }
561 
io_uring_prep_link_timeout(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,unsigned flags)562 static inline void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
563 					      struct __kernel_timespec *ts,
564 					      unsigned flags)
565 {
566 	io_uring_prep_rw(IORING_OP_LINK_TIMEOUT, sqe, -1, ts, 1, 0);
567 	sqe->timeout_flags = flags;
568 }
569 
io_uring_prep_connect(struct io_uring_sqe * sqe,int fd,const struct sockaddr * addr,socklen_t addrlen)570 static inline void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd,
571 					 const struct sockaddr *addr,
572 					 socklen_t addrlen)
573 {
574 	io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen);
575 }
576 
io_uring_prep_files_update(struct io_uring_sqe * sqe,int * fds,unsigned nr_fds,int offset)577 static inline void io_uring_prep_files_update(struct io_uring_sqe *sqe,
578 					      int *fds, unsigned nr_fds,
579 					      int offset)
580 {
581 	io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds,
582 				(__u64) offset);
583 }
584 
io_uring_prep_fallocate(struct io_uring_sqe * sqe,int fd,int mode,off_t offset,off_t len)585 static inline void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
586 					   int mode, off_t offset, off_t len)
587 {
588 
589 	io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd,
590 			(const uintptr_t *) (unsigned long) len,
591 			(unsigned int) mode, (__u64) offset);
592 }
593 
io_uring_prep_openat(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,mode_t mode)594 static inline void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
595 					const char *path, int flags,
596 					mode_t mode)
597 {
598 	io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0);
599 	sqe->open_flags = (__u32) flags;
600 }
601 
602 /* open directly into the fixed file table */
io_uring_prep_openat_direct(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,mode_t mode,unsigned file_index)603 static inline void io_uring_prep_openat_direct(struct io_uring_sqe *sqe,
604 					       int dfd, const char *path,
605 					       int flags, mode_t mode,
606 					       unsigned file_index)
607 {
608 	io_uring_prep_openat(sqe, dfd, path, flags, mode);
609 	__io_uring_set_target_fixed_file(sqe, file_index);
610 }
611 
io_uring_prep_close(struct io_uring_sqe * sqe,int fd)612 static inline void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
613 {
614 	io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0);
615 }
616 
io_uring_prep_close_direct(struct io_uring_sqe * sqe,unsigned file_index)617 static inline void io_uring_prep_close_direct(struct io_uring_sqe *sqe,
618 					      unsigned file_index)
619 {
620 	io_uring_prep_close(sqe, 0);
621 	__io_uring_set_target_fixed_file(sqe, file_index);
622 }
623 
io_uring_prep_read(struct io_uring_sqe * sqe,int fd,void * buf,unsigned nbytes,__u64 offset)624 static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
625 				      void *buf, unsigned nbytes, __u64 offset)
626 {
627 	io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
628 }
629 
io_uring_prep_write(struct io_uring_sqe * sqe,int fd,const void * buf,unsigned nbytes,__u64 offset)630 static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
631 				       const void *buf, unsigned nbytes,
632 				       __u64 offset)
633 {
634 	io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
635 }
636 
637 struct statx;
io_uring_prep_statx(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,unsigned mask,struct statx * statxbuf)638 static inline void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
639 				const char *path, int flags, unsigned mask,
640 				struct statx *statxbuf)
641 {
642 	io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask,
643 				(__u64) (unsigned long) statxbuf);
644 	sqe->statx_flags = (__u32) flags;
645 }
646 
io_uring_prep_fadvise(struct io_uring_sqe * sqe,int fd,__u64 offset,off_t len,int advice)647 static inline void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
648 					 __u64 offset, off_t len, int advice)
649 {
650 	io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, (__u32) len, offset);
651 	sqe->fadvise_advice = (__u32) advice;
652 }
653 
io_uring_prep_madvise(struct io_uring_sqe * sqe,void * addr,off_t length,int advice)654 static inline void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
655 					 off_t length, int advice)
656 {
657 	io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, (__u32) length, 0);
658 	sqe->fadvise_advice = (__u32) advice;
659 }
660 
io_uring_prep_send(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags)661 static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
662 				      const void *buf, size_t len, int flags)
663 {
664 	io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, (__u32) len, 0);
665 	sqe->msg_flags = (__u32) flags;
666 }
667 
io_uring_prep_recv(struct io_uring_sqe * sqe,int sockfd,void * buf,size_t len,int flags)668 static inline void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
669 				      void *buf, size_t len, int flags)
670 {
671 	io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, (__u32) len, 0);
672 	sqe->msg_flags = (__u32) flags;
673 }
674 
io_uring_prep_openat2(struct io_uring_sqe * sqe,int dfd,const char * path,struct open_how * how)675 static inline void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
676 					const char *path, struct open_how *how)
677 {
678 	io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how),
679 				(uint64_t) (uintptr_t) how);
680 }
681 
682 /* open directly into the fixed file table */
io_uring_prep_openat2_direct(struct io_uring_sqe * sqe,int dfd,const char * path,struct open_how * how,unsigned file_index)683 static inline void io_uring_prep_openat2_direct(struct io_uring_sqe *sqe,
684 						int dfd, const char *path,
685 						struct open_how *how,
686 						unsigned file_index)
687 {
688 	io_uring_prep_openat2(sqe, dfd, path, how);
689 	__io_uring_set_target_fixed_file(sqe, file_index);
690 }
691 
692 struct epoll_event;
io_uring_prep_epoll_ctl(struct io_uring_sqe * sqe,int epfd,int fd,int op,struct epoll_event * ev)693 static inline void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
694 					   int fd, int op,
695 					   struct epoll_event *ev)
696 {
697 	io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev,
698 				(__u32) op, (__u32) fd);
699 }
700 
io_uring_prep_provide_buffers(struct io_uring_sqe * sqe,void * addr,int len,int nr,int bgid,int bid)701 static inline void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
702 						 void *addr, int len, int nr,
703 						 int bgid, int bid)
704 {
705 	io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, (__u32) len,
706 				(__u64) bid);
707 	sqe->buf_group = (__u16) bgid;
708 }
709 
io_uring_prep_remove_buffers(struct io_uring_sqe * sqe,int nr,int bgid)710 static inline void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
711 						int nr, int bgid)
712 {
713 	io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0);
714 	sqe->buf_group = (__u16) bgid;
715 }
716 
io_uring_prep_shutdown(struct io_uring_sqe * sqe,int fd,int how)717 static inline void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
718 					  int how)
719 {
720 	io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, (__u32) how, 0);
721 }
722 
io_uring_prep_unlinkat(struct io_uring_sqe * sqe,int dfd,const char * path,int flags)723 static inline void io_uring_prep_unlinkat(struct io_uring_sqe *sqe, int dfd,
724 					  const char *path, int flags)
725 {
726 	io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, path, 0, 0);
727 	sqe->unlink_flags = (__u32) flags;
728 }
729 
io_uring_prep_unlink(struct io_uring_sqe * sqe,const char * path,int flags)730 static inline void io_uring_prep_unlink(struct io_uring_sqe *sqe,
731 					  const char *path, int flags)
732 {
733 	io_uring_prep_unlinkat(sqe, AT_FDCWD, path, flags);
734 }
735 
io_uring_prep_renameat(struct io_uring_sqe * sqe,int olddfd,const char * oldpath,int newdfd,const char * newpath,int flags)736 static inline void io_uring_prep_renameat(struct io_uring_sqe *sqe, int olddfd,
737 					  const char *oldpath, int newdfd,
738 					  const char *newpath, int flags)
739 {
740 	io_uring_prep_rw(IORING_OP_RENAMEAT, sqe, olddfd, oldpath,
741 				(__u32) newdfd,
742 				(uint64_t) (uintptr_t) newpath);
743 	sqe->rename_flags = (__u32) flags;
744 }
745 
io_uring_prep_rename(struct io_uring_sqe * sqe,const char * oldpath,const char * newpath)746 static inline void io_uring_prep_rename(struct io_uring_sqe *sqe,
747 					  const char *oldpath, const char *newpath)
748 {
749 	io_uring_prep_renameat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, 0);
750 }
751 
io_uring_prep_sync_file_range(struct io_uring_sqe * sqe,int fd,unsigned len,__u64 offset,int flags)752 static inline void io_uring_prep_sync_file_range(struct io_uring_sqe *sqe,
753 						 int fd, unsigned len,
754 						 __u64 offset, int flags)
755 {
756 	io_uring_prep_rw(IORING_OP_SYNC_FILE_RANGE, sqe, fd, NULL, len, offset);
757 	sqe->sync_range_flags = (__u32) flags;
758 }
759 
io_uring_prep_mkdirat(struct io_uring_sqe * sqe,int dfd,const char * path,mode_t mode)760 static inline void io_uring_prep_mkdirat(struct io_uring_sqe *sqe, int dfd,
761 					const char *path, mode_t mode)
762 {
763 	io_uring_prep_rw(IORING_OP_MKDIRAT, sqe, dfd, path, mode, 0);
764 }
765 
io_uring_prep_mkdir(struct io_uring_sqe * sqe,const char * path,mode_t mode)766 static inline void io_uring_prep_mkdir(struct io_uring_sqe *sqe,
767 					const char *path, mode_t mode)
768 {
769 	io_uring_prep_mkdirat(sqe, AT_FDCWD, path, mode);
770 }
771 
io_uring_prep_symlinkat(struct io_uring_sqe * sqe,const char * target,int newdirfd,const char * linkpath)772 static inline void io_uring_prep_symlinkat(struct io_uring_sqe *sqe,
773 					   const char *target, int newdirfd,
774 					   const char *linkpath)
775 {
776 	io_uring_prep_rw(IORING_OP_SYMLINKAT, sqe, newdirfd, target, 0,
777 				(uint64_t) (uintptr_t) linkpath);
778 }
779 
io_uring_prep_symlink(struct io_uring_sqe * sqe,const char * target,const char * linkpath)780 static inline void io_uring_prep_symlink(struct io_uring_sqe *sqe,
781 					   const char *target, const char *linkpath)
782 {
783 	io_uring_prep_symlinkat(sqe, target, AT_FDCWD, linkpath);
784 }
785 
io_uring_prep_linkat(struct io_uring_sqe * sqe,int olddfd,const char * oldpath,int newdfd,const char * newpath,int flags)786 static inline void io_uring_prep_linkat(struct io_uring_sqe *sqe, int olddfd,
787 					const char *oldpath, int newdfd,
788 					const char *newpath, int flags)
789 {
790 	io_uring_prep_rw(IORING_OP_LINKAT, sqe, olddfd, oldpath, (__u32) newdfd,
791 				(uint64_t) (uintptr_t) newpath);
792 	sqe->hardlink_flags = (__u32) flags;
793 }
794 
io_uring_prep_link(struct io_uring_sqe * sqe,const char * oldpath,const char * newpath,int flags)795 static inline void io_uring_prep_link(struct io_uring_sqe *sqe,
796 					const char *oldpath, const char *newpath, int flags)
797 {
798 	io_uring_prep_linkat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, flags);
799 }
800 
io_uring_prep_msg_ring(struct io_uring_sqe * sqe,int fd,unsigned int len,__u64 data,unsigned int flags)801 static inline void io_uring_prep_msg_ring(struct io_uring_sqe *sqe, int fd,
802 					  unsigned int len, __u64 data,
803 					  unsigned int flags)
804 {
805 	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data);
806 	sqe->rw_flags = flags;
807 }
808 
io_uring_prep_getxattr(struct io_uring_sqe * sqe,const char * name,const char * value,const char * path,size_t len)809 static inline void io_uring_prep_getxattr(struct io_uring_sqe *sqe,
810 					  const char *name,
811 					  const char *value,
812 					  const char *path,
813 					  size_t len)
814 {
815 	io_uring_prep_rw(IORING_OP_GETXATTR, sqe, 0, name, len,
816 				(__u64) (uintptr_t) value);
817 	sqe->addr3 = (__u64) (uintptr_t) path;
818 	sqe->xattr_flags = 0;
819 }
820 
io_uring_prep_setxattr(struct io_uring_sqe * sqe,const char * name,const char * value,const char * path,int flags,size_t len)821 static inline void io_uring_prep_setxattr(struct io_uring_sqe *sqe,
822 					  const char *name,
823 					  const char *value,
824 					  const char *path,
825 					  int flags,
826 					  size_t len)
827 {
828 	io_uring_prep_rw(IORING_OP_SETXATTR, sqe, 0, name, len,
829 				(__u64) (uintptr_t) value);
830 	sqe->addr3 = (__u64) (uintptr_t) path;
831 	sqe->xattr_flags = flags;
832 }
833 
io_uring_prep_fgetxattr(struct io_uring_sqe * sqe,int fd,const char * name,const char * value,size_t len)834 static inline void io_uring_prep_fgetxattr(struct io_uring_sqe *sqe,
835 		                           int         fd,
836 					   const char *name,
837 					   const char *value,
838 					   size_t      len)
839 {
840 	io_uring_prep_rw(IORING_OP_FGETXATTR, sqe, fd, name, len,
841 				(__u64) (uintptr_t) value);
842 	sqe->xattr_flags = 0;
843 }
844 
io_uring_prep_fsetxattr(struct io_uring_sqe * sqe,int fd,const char * name,const char * value,int flags,size_t len)845 static inline void io_uring_prep_fsetxattr(struct io_uring_sqe *sqe,
846 					   int         fd,
847 					   const char *name,
848 					   const char *value,
849 					   int         flags,
850 					   size_t      len)
851 {
852 	io_uring_prep_rw(IORING_OP_FSETXATTR, sqe, fd, name, len,
853 				(__u64) (uintptr_t) value);
854 	sqe->xattr_flags = flags;
855 }
856 
io_uring_prep_socket(struct io_uring_sqe * sqe,int domain,int type,int protocol,unsigned int flags)857 static inline void io_uring_prep_socket(struct io_uring_sqe *sqe, int domain,
858 					int type, int protocol,
859 					unsigned int flags)
860 {
861 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
862 	sqe->rw_flags = flags;
863 }
864 
io_uring_prep_socket_direct(struct io_uring_sqe * sqe,int domain,int type,int protocol,unsigned file_index,unsigned int flags)865 static inline void io_uring_prep_socket_direct(struct io_uring_sqe *sqe,
866 					       int domain, int type,
867 					       int protocol,
868 					       unsigned file_index,
869 					       unsigned int flags)
870 {
871 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
872 	sqe->rw_flags = flags;
873 	__io_uring_set_target_fixed_file(sqe, file_index);
874 }
875 
io_uring_prep_socket_direct_alloc(struct io_uring_sqe * sqe,int domain,int type,int protocol,unsigned int flags)876 static inline void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe,
877 				int domain, int type, int protocol,
878 				unsigned int flags)
879 {
880 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
881 	sqe->rw_flags = flags;
882 	__io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
883 }
884 
885 /*
886  * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
887  * the SQ ring
888  */
io_uring_sq_ready(const struct io_uring * ring)889 static inline unsigned io_uring_sq_ready(const struct io_uring *ring)
890 {
891 	unsigned khead = *ring->sq.khead;
892 
893 	/*
894 	 * Without a barrier, we could miss an update and think the SQ wasn't
895 	 * ready. We don't need the load acquire for non-SQPOLL since then we
896 	 * drive updates.
897 	 */
898 	if (ring->flags & IORING_SETUP_SQPOLL)
899 		khead = io_uring_smp_load_acquire(ring->sq.khead);
900 
901 	/* always use real head, to avoid losing sync for short submit */
902 	return ring->sq.sqe_tail - khead;
903 }
904 
905 /*
906  * Returns how much space is left in the SQ ring.
907  */
io_uring_sq_space_left(const struct io_uring * ring)908 static inline unsigned io_uring_sq_space_left(const struct io_uring *ring)
909 {
910 	return *ring->sq.kring_entries - io_uring_sq_ready(ring);
911 }
912 
913 /*
914  * Only applicable when using SQPOLL - allows the caller to wait for space
915  * to free up in the SQ ring, which happens when the kernel side thread has
916  * consumed one or more entries. If the SQ ring is currently non-full, no
917  * action is taken. Note: may return -EINVAL if the kernel doesn't support
918  * this feature.
919  */
io_uring_sqring_wait(struct io_uring * ring)920 static inline int io_uring_sqring_wait(struct io_uring *ring)
921 {
922 	if (!(ring->flags & IORING_SETUP_SQPOLL))
923 		return 0;
924 	if (io_uring_sq_space_left(ring))
925 		return 0;
926 
927 	return __io_uring_sqring_wait(ring);
928 }
929 
930 /*
931  * Returns how many unconsumed entries are ready in the CQ ring
932  */
io_uring_cq_ready(const struct io_uring * ring)933 static inline unsigned io_uring_cq_ready(const struct io_uring *ring)
934 {
935 	return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead;
936 }
937 
938 /*
939  * Returns true if the eventfd notification is currently enabled
940  */
io_uring_cq_eventfd_enabled(const struct io_uring * ring)941 static inline bool io_uring_cq_eventfd_enabled(const struct io_uring *ring)
942 {
943 	if (!ring->cq.kflags)
944 		return true;
945 
946 	return !(*ring->cq.kflags & IORING_CQ_EVENTFD_DISABLED);
947 }
948 
949 /*
950  * Toggle eventfd notification on or off, if an eventfd is registered with
951  * the ring.
952  */
io_uring_cq_eventfd_toggle(struct io_uring * ring,bool enabled)953 static inline int io_uring_cq_eventfd_toggle(struct io_uring *ring,
954 					     bool enabled)
955 {
956 	uint32_t flags;
957 
958 	if (!!enabled == io_uring_cq_eventfd_enabled(ring))
959 		return 0;
960 
961 	if (!ring->cq.kflags)
962 		return -EOPNOTSUPP;
963 
964 	flags = *ring->cq.kflags;
965 
966 	if (enabled)
967 		flags &= ~IORING_CQ_EVENTFD_DISABLED;
968 	else
969 		flags |= IORING_CQ_EVENTFD_DISABLED;
970 
971 	IO_URING_WRITE_ONCE(*ring->cq.kflags, flags);
972 
973 	return 0;
974 }
975 
976 /*
977  * Return an IO completion, waiting for 'wait_nr' completions if one isn't
978  * readily available. Returns 0 with cqe_ptr filled in on success, -errno on
979  * failure.
980  */
io_uring_wait_cqe_nr(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr,unsigned wait_nr)981 static inline int io_uring_wait_cqe_nr(struct io_uring *ring,
982 				      struct io_uring_cqe **cqe_ptr,
983 				      unsigned wait_nr)
984 {
985 	return __io_uring_get_cqe(ring, cqe_ptr, 0, wait_nr, NULL);
986 }
987 
988 /*
989  * Internal helper, don't use directly in applications. Use one of the
990  * "official" versions of this, io_uring_peek_cqe(), io_uring_wait_cqe(),
991  * or io_uring_wait_cqes*().
992  */
__io_uring_peek_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr,unsigned * nr_available)993 static inline int __io_uring_peek_cqe(struct io_uring *ring,
994 				      struct io_uring_cqe **cqe_ptr,
995 				      unsigned *nr_available)
996 {
997 	struct io_uring_cqe *cqe;
998 	int err = 0;
999 	unsigned available;
1000 	unsigned mask = *ring->cq.kring_mask;
1001 	int shift = 0;
1002 
1003 	if (ring->flags & IORING_SETUP_CQE32)
1004 		shift = 1;
1005 
1006 	do {
1007 		unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail);
1008 		unsigned head = *ring->cq.khead;
1009 
1010 		cqe = NULL;
1011 		available = tail - head;
1012 		if (!available)
1013 			break;
1014 
1015 		cqe = &ring->cq.cqes[(head & mask) << shift];
1016 		if (!(ring->features & IORING_FEAT_EXT_ARG) &&
1017 				cqe->user_data == LIBURING_UDATA_TIMEOUT) {
1018 			if (cqe->res < 0)
1019 				err = cqe->res;
1020 			io_uring_cq_advance(ring, 1);
1021 			if (!err)
1022 				continue;
1023 			cqe = NULL;
1024 		}
1025 
1026 		break;
1027 	} while (1);
1028 
1029 	*cqe_ptr = cqe;
1030 	if (nr_available)
1031 		*nr_available = available;
1032 	return err;
1033 }
1034 
1035 /*
1036  * Return an IO completion, if one is readily available. Returns 0 with
1037  * cqe_ptr filled in on success, -errno on failure.
1038  */
io_uring_peek_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)1039 static inline int io_uring_peek_cqe(struct io_uring *ring,
1040 				    struct io_uring_cqe **cqe_ptr)
1041 {
1042 	if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
1043 		return 0;
1044 
1045 	return io_uring_wait_cqe_nr(ring, cqe_ptr, 0);
1046 }
1047 
1048 /*
1049  * Return an IO completion, waiting for it if necessary. Returns 0 with
1050  * cqe_ptr filled in on success, -errno on failure.
1051  */
io_uring_wait_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)1052 static inline int io_uring_wait_cqe(struct io_uring *ring,
1053 				    struct io_uring_cqe **cqe_ptr)
1054 {
1055 	if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
1056 		return 0;
1057 
1058 	return io_uring_wait_cqe_nr(ring, cqe_ptr, 1);
1059 }
1060 
1061 /*
1062  * Return an sqe to fill. Application must later call io_uring_submit()
1063  * when it's ready to tell the kernel about it. The caller may call this
1064  * function multiple times before calling io_uring_submit().
1065  *
1066  * Returns a vacant sqe, or NULL if we're full.
1067  */
_io_uring_get_sqe(struct io_uring * ring)1068 static inline struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
1069 {
1070 	struct io_uring_sq *sq = &ring->sq;
1071 	unsigned int head = io_uring_smp_load_acquire(sq->khead);
1072 	unsigned int next = sq->sqe_tail + 1;
1073 	int shift = 0;
1074 
1075 	if (ring->flags & IORING_SETUP_SQE128)
1076 		shift = 1;
1077 
1078 	if (next - head <= *sq->kring_entries) {
1079 		struct io_uring_sqe *sqe;
1080 
1081 		sqe = &sq->sqes[(sq->sqe_tail & *sq->kring_mask) << shift];
1082 		sq->sqe_tail = next;
1083 		return sqe;
1084 	}
1085 
1086 	return NULL;
1087 }
1088 
1089 /*
1090  * Return the appropriate mask for a buffer ring of size 'ring_entries'
1091  */
io_uring_buf_ring_mask(__u32 ring_entries)1092 static inline int io_uring_buf_ring_mask(__u32 ring_entries)
1093 {
1094 	return ring_entries - 1;
1095 }
1096 
io_uring_buf_ring_init(struct io_uring_buf_ring * br)1097 static inline void io_uring_buf_ring_init(struct io_uring_buf_ring *br)
1098 {
1099 	br->tail = 0;
1100 }
1101 
1102 /*
1103  * Assign 'buf' with the addr/len/buffer ID supplied
1104  */
io_uring_buf_ring_add(struct io_uring_buf_ring * br,void * addr,unsigned int len,unsigned short bid,int mask,int buf_offset)1105 static inline void io_uring_buf_ring_add(struct io_uring_buf_ring *br,
1106 					 void *addr, unsigned int len,
1107 					 unsigned short bid, int mask,
1108 					 int buf_offset)
1109 {
1110 	struct io_uring_buf *buf = &br->bufs[(br->tail + buf_offset) & mask];
1111 
1112 	buf->addr = (unsigned long) (uintptr_t) addr;
1113 	buf->len = len;
1114 	buf->bid = bid;
1115 }
1116 
1117 /*
1118  * Make 'count' new buffers visible to the kernel. Called after
1119  * io_uring_buf_ring_add() has been called 'count' times to fill in new
1120  * buffers.
1121  */
io_uring_buf_ring_advance(struct io_uring_buf_ring * br,int count)1122 static inline void io_uring_buf_ring_advance(struct io_uring_buf_ring *br,
1123 					     int count)
1124 {
1125 	unsigned short new_tail = br->tail + count;
1126 
1127 	io_uring_smp_store_release(&br->tail, new_tail);
1128 }
1129 
1130 /*
1131  * Make 'count' new buffers visible to the kernel while at the same time
1132  * advancing the CQ ring seen entries. This can be used when the application
1133  * is using ring provided buffers and returns buffers while processing CQEs,
1134  * avoiding an extra atomic when needing to increment both the CQ ring and
1135  * the ring buffer index at the same time.
1136  */
io_uring_buf_ring_cq_advance(struct io_uring * ring,struct io_uring_buf_ring * br,int count)1137 static inline void io_uring_buf_ring_cq_advance(struct io_uring *ring,
1138 						struct io_uring_buf_ring *br,
1139 						int count)
1140 {
1141 	br->tail += count;
1142 	io_uring_cq_advance(ring, count);
1143 }
1144 
1145 #ifndef LIBURING_INTERNAL
io_uring_get_sqe(struct io_uring * ring)1146 static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
1147 {
1148 	return _io_uring_get_sqe(ring);
1149 }
1150 #else
1151 struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
1152 #endif
1153 
1154 ssize_t io_uring_mlock_size(unsigned entries, unsigned flags);
1155 ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p);
1156 
1157 #ifdef __cplusplus
1158 }
1159 #endif
1160 
1161 #endif
1162