• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 #ifndef LIB_URING_H
3 #define LIB_URING_H
4 
5 #include <sys/socket.h>
6 #include <sys/stat.h>
7 #include <sys/uio.h>
8 #include <errno.h>
9 #include <signal.h>
10 #include <stdbool.h>
11 #include <inttypes.h>
12 #include <time.h>
13 #include <fcntl.h>
14 #include <sched.h>
15 #include <linux/swab.h>
16 #include <sys/wait.h>
17 #include "liburing/compat.h"
18 #include "liburing/io_uring.h"
19 #include "liburing/io_uring_version.h"
20 #include "liburing/barrier.h"
21 
22 
23 #ifndef uring_unlikely
24 #define uring_unlikely(cond)	__builtin_expect(!!(cond), 0)
25 #endif
26 
27 #ifndef uring_likely
28 #define uring_likely(cond)	__builtin_expect(!!(cond), 1)
29 #endif
30 
31 #ifndef IOURINGINLINE
32 #define IOURINGINLINE static inline
33 #endif
34 
35 #ifdef __alpha__
36 /*
37  * alpha and mips are the exceptions, all other architectures have
38  * common numbers for new system calls.
39  */
40 #ifndef __NR_io_uring_setup
41 #define __NR_io_uring_setup		535
42 #endif
43 #ifndef __NR_io_uring_enter
44 #define __NR_io_uring_enter		536
45 #endif
46 #ifndef __NR_io_uring_register
47 #define __NR_io_uring_register		537
48 #endif
49 #elif defined __mips__
50 #ifndef __NR_io_uring_setup
51 #define __NR_io_uring_setup		(__NR_Linux + 425)
52 #endif
53 #ifndef __NR_io_uring_enter
54 #define __NR_io_uring_enter		(__NR_Linux + 426)
55 #endif
56 #ifndef __NR_io_uring_register
57 #define __NR_io_uring_register		(__NR_Linux + 427)
58 #endif
59 #else /* !__alpha__ and !__mips__ */
60 #ifndef __NR_io_uring_setup
61 #define __NR_io_uring_setup		425
62 #endif
63 #ifndef __NR_io_uring_enter
64 #define __NR_io_uring_enter		426
65 #endif
66 #ifndef __NR_io_uring_register
67 #define __NR_io_uring_register		427
68 #endif
69 #endif
70 
71 #ifdef __cplusplus
72 extern "C" {
73 #endif
74 
75 /*
76  * Library interface to io_uring
77  */
78 struct io_uring_sq {
79 	unsigned *khead;
80 	unsigned *ktail;
81 	// Deprecated: use `ring_mask` instead of `*kring_mask`
82 	unsigned *kring_mask;
83 	// Deprecated: use `ring_entries` instead of `*kring_entries`
84 	unsigned *kring_entries;
85 	unsigned *kflags;
86 	unsigned *kdropped;
87 	unsigned *array;
88 	struct io_uring_sqe *sqes;
89 
90 	unsigned sqe_head;
91 	unsigned sqe_tail;
92 
93 	size_t ring_sz;
94 	void *ring_ptr;
95 
96 	unsigned ring_mask;
97 	unsigned ring_entries;
98 
99 	unsigned pad[2];
100 };
101 
102 struct io_uring_cq {
103 	unsigned *khead;
104 	unsigned *ktail;
105 	// Deprecated: use `ring_mask` instead of `*kring_mask`
106 	unsigned *kring_mask;
107 	// Deprecated: use `ring_entries` instead of `*kring_entries`
108 	unsigned *kring_entries;
109 	unsigned *kflags;
110 	unsigned *koverflow;
111 	struct io_uring_cqe *cqes;
112 
113 	size_t ring_sz;
114 	void *ring_ptr;
115 
116 	unsigned ring_mask;
117 	unsigned ring_entries;
118 
119 	unsigned pad[2];
120 };
121 
122 struct io_uring {
123 	struct io_uring_sq sq;
124 	struct io_uring_cq cq;
125 	unsigned flags;
126 	int ring_fd;
127 
128 	unsigned features;
129 	int enter_ring_fd;
130 	__u8 int_flags;
131 	__u8 pad[3];
132 	unsigned pad2;
133 };
134 
135 /*
136  * Library interface
137  */
138 
139 /*
140  * return an allocated io_uring_probe structure, or NULL if probe fails (for
141  * example, if it is not available). The caller is responsible for freeing it
142  */
143 struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
144 /* same as io_uring_get_probe_ring, but takes care of ring init and teardown */
145 struct io_uring_probe *io_uring_get_probe(void);
146 
147 /*
148  * frees a probe allocated through io_uring_get_probe() or
149  * io_uring_get_probe_ring()
150  */
151 void io_uring_free_probe(struct io_uring_probe *probe);
152 
io_uring_opcode_supported(const struct io_uring_probe * p,int op)153 IOURINGINLINE int io_uring_opcode_supported(const struct io_uring_probe *p,
154 					    int op)
155 {
156 	if (op > p->last_op)
157 		return 0;
158 	return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
159 }
160 
161 int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring,
162 				struct io_uring_params *p,
163 				void *buf, size_t buf_size);
164 int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
165 				struct io_uring_params *p);
166 int io_uring_queue_init(unsigned entries, struct io_uring *ring,
167 			unsigned flags);
168 int io_uring_queue_mmap(int fd, struct io_uring_params *p,
169 			struct io_uring *ring);
170 int io_uring_ring_dontfork(struct io_uring *ring);
171 void io_uring_queue_exit(struct io_uring *ring);
172 unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
173 	struct io_uring_cqe **cqes, unsigned count);
174 int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
175 		       unsigned wait_nr, struct __kernel_timespec *ts,
176 		       sigset_t *sigmask);
177 int io_uring_wait_cqes_min_timeout(struct io_uring *ring,
178 				   struct io_uring_cqe **cqe_ptr,
179 				   unsigned wait_nr,
180 				   struct __kernel_timespec *ts,
181 				   unsigned int min_ts_usec,
182 				   sigset_t *sigmask);
183 int io_uring_wait_cqe_timeout(struct io_uring *ring,
184 			      struct io_uring_cqe **cqe_ptr,
185 			      struct __kernel_timespec *ts);
186 int io_uring_submit(struct io_uring *ring);
187 int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
188 int io_uring_submit_and_wait_timeout(struct io_uring *ring,
189 				     struct io_uring_cqe **cqe_ptr,
190 				     unsigned wait_nr,
191 				     struct __kernel_timespec *ts,
192 				     sigset_t *sigmask);
193 int io_uring_submit_and_wait_min_timeout(struct io_uring *ring,
194 					 struct io_uring_cqe **cqe_ptr,
195 					 unsigned wait_nr,
196 					 struct __kernel_timespec *ts,
197 					 unsigned min_wait,
198 					 sigset_t *sigmask);
199 
200 int io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src);
201 int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
202 			      unsigned nr_iovecs);
203 int io_uring_register_buffers_tags(struct io_uring *ring,
204 				   const struct iovec *iovecs,
205 				   const __u64 *tags, unsigned nr);
206 int io_uring_register_buffers_sparse(struct io_uring *ring, unsigned nr);
207 int io_uring_register_buffers_update_tag(struct io_uring *ring,
208 					 unsigned off,
209 					 const struct iovec *iovecs,
210 					 const __u64 *tags, unsigned nr);
211 int io_uring_unregister_buffers(struct io_uring *ring);
212 
213 int io_uring_register_files(struct io_uring *ring, const int *files,
214 			    unsigned nr_files);
215 int io_uring_register_files_tags(struct io_uring *ring, const int *files,
216 				 const __u64 *tags, unsigned nr);
217 int io_uring_register_files_sparse(struct io_uring *ring, unsigned nr);
218 int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off,
219 				       const int *files, const __u64 *tags,
220 				       unsigned nr_files);
221 
222 int io_uring_unregister_files(struct io_uring *ring);
223 int io_uring_register_files_update(struct io_uring *ring, unsigned off,
224 				   const int *files, unsigned nr_files);
225 int io_uring_register_eventfd(struct io_uring *ring, int fd);
226 int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
227 int io_uring_unregister_eventfd(struct io_uring *ring);
228 int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p,
229 			    unsigned nr);
230 int io_uring_register_personality(struct io_uring *ring);
231 int io_uring_unregister_personality(struct io_uring *ring, int id);
232 int io_uring_register_restrictions(struct io_uring *ring,
233 				   struct io_uring_restriction *res,
234 				   unsigned int nr_res);
235 int io_uring_enable_rings(struct io_uring *ring);
236 int __io_uring_sqring_wait(struct io_uring *ring);
237 #ifdef _GNU_SOURCE
238 int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz,
239 				const cpu_set_t *mask);
240 #endif
241 int io_uring_unregister_iowq_aff(struct io_uring *ring);
242 int io_uring_register_iowq_max_workers(struct io_uring *ring,
243 				       unsigned int *values);
244 int io_uring_register_ring_fd(struct io_uring *ring);
245 int io_uring_unregister_ring_fd(struct io_uring *ring);
246 int io_uring_close_ring_fd(struct io_uring *ring);
247 int io_uring_register_buf_ring(struct io_uring *ring,
248 			       struct io_uring_buf_reg *reg, unsigned int flags);
249 int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid);
250 int io_uring_buf_ring_head(struct io_uring *ring, int buf_group, uint16_t *head);
251 int io_uring_register_sync_cancel(struct io_uring *ring,
252 				 struct io_uring_sync_cancel_reg *reg);
253 
254 int io_uring_register_file_alloc_range(struct io_uring *ring,
255 					unsigned off, unsigned len);
256 
257 int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi);
258 int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi);
259 
260 int io_uring_register_clock(struct io_uring *ring,
261 			    struct io_uring_clock_register *arg);
262 
263 int io_uring_get_events(struct io_uring *ring);
264 int io_uring_submit_and_get_events(struct io_uring *ring);
265 
266 /*
267  * io_uring syscalls.
268  */
269 int io_uring_enter(unsigned int fd, unsigned int to_submit,
270 		   unsigned int min_complete, unsigned int flags, sigset_t *sig);
271 int io_uring_enter2(unsigned int fd, unsigned int to_submit,
272 		    unsigned int min_complete, unsigned int flags,
273 		    sigset_t *sig, size_t sz);
274 int io_uring_setup(unsigned int entries, struct io_uring_params *p);
275 int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg,
276 		      unsigned int nr_args);
277 
278 /*
279  * Mapped buffer ring alloc/register + unregister/free helpers
280  */
281 struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *ring,
282 						  unsigned int nentries,
283 						  int bgid, unsigned int flags,
284 						  int *err);
285 int io_uring_free_buf_ring(struct io_uring *ring, struct io_uring_buf_ring *br,
286 			   unsigned int nentries, int bgid);
287 
288 /*
289  * Helper for the peek/wait single cqe functions. Exported because of that,
290  * but probably shouldn't be used directly in an application.
291  */
292 int __io_uring_get_cqe(struct io_uring *ring,
293 			struct io_uring_cqe **cqe_ptr, unsigned submit,
294 			unsigned wait_nr, sigset_t *sigmask);
295 
296 #define LIBURING_UDATA_TIMEOUT	((__u64) -1)
297 
298 /*
299  * Calculates the step size for CQE iteration.
300  * 	For standard CQE's its 1, for big CQE's its two.
301  */
302 #define io_uring_cqe_shift(ring)					\
303 	(!!((ring)->flags & IORING_SETUP_CQE32))
304 
305 #define io_uring_cqe_index(ring,ptr,mask)				\
306 	(((ptr) & (mask)) << io_uring_cqe_shift(ring))
307 
308 /*
309  * NOTE: we should just get rid of the 'head' being passed in here, it doesn't
310  * serve a purpose anymore. The below is a bit of a work-around to ensure that
311  * the compiler doesn't complain about 'head' being unused (or only written,
312  * never read), as we use a local iterator for both the head and tail tracking.
313  */
314 #define io_uring_for_each_cqe(ring, head, cqe)				\
315 	/*								\
316 	 * io_uring_smp_load_acquire() enforces the order of tail	\
317 	 * and CQE reads.						\
318 	 */								\
319 	for (__u32 __HEAD__ = (head) = *(ring)->cq.khead,		\
320 	     __TAIL__ = io_uring_smp_load_acquire((ring)->cq.ktail);	\
321 	     (cqe = ((head) != __TAIL__ ?				\
322 	     &(ring)->cq.cqes[io_uring_cqe_index(ring, __HEAD__, (ring)->cq.ring_mask)] : NULL)); \
323 	     (head) = ++__HEAD__)
324 
325 /*
326  * Must be called after io_uring_for_each_cqe()
327  */
io_uring_cq_advance(struct io_uring * ring,unsigned nr)328 IOURINGINLINE void io_uring_cq_advance(struct io_uring *ring, unsigned nr)
329 {
330 	if (nr) {
331 		struct io_uring_cq *cq = &ring->cq;
332 
333 		/*
334 		 * Ensure that the kernel only sees the new value of the head
335 		 * index after the CQEs have been read.
336 		 */
337 		io_uring_smp_store_release(cq->khead, *cq->khead + nr);
338 	}
339 }
340 
341 /*
342  * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
343  * been processed by the application.
344  */
io_uring_cqe_seen(struct io_uring * ring,struct io_uring_cqe * cqe)345 IOURINGINLINE void io_uring_cqe_seen(struct io_uring *ring,
346 				     struct io_uring_cqe *cqe)
347 {
348 	if (cqe)
349 		io_uring_cq_advance(ring, 1);
350 }
351 
352 /*
353  * Command prep helpers
354  */
355 
356 /*
357  * Associate pointer @data with the sqe, for later retrieval from the cqe
358  * at command completion time with io_uring_cqe_get_data().
359  */
io_uring_sqe_set_data(struct io_uring_sqe * sqe,void * data)360 IOURINGINLINE void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
361 {
362 	sqe->user_data = (unsigned long) data;
363 }
364 
io_uring_cqe_get_data(const struct io_uring_cqe * cqe)365 IOURINGINLINE void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
366 {
367 	return (void *) (uintptr_t) cqe->user_data;
368 }
369 
370 /*
371  * Assign a 64-bit value to this sqe, which can get retrieved at completion
372  * time with io_uring_cqe_get_data64. Just like the non-64 variants, except
373  * these store a 64-bit type rather than a data pointer.
374  */
io_uring_sqe_set_data64(struct io_uring_sqe * sqe,__u64 data)375 IOURINGINLINE void io_uring_sqe_set_data64(struct io_uring_sqe *sqe,
376 					   __u64 data)
377 {
378 	sqe->user_data = data;
379 }
380 
io_uring_cqe_get_data64(const struct io_uring_cqe * cqe)381 IOURINGINLINE __u64 io_uring_cqe_get_data64(const struct io_uring_cqe *cqe)
382 {
383 	return cqe->user_data;
384 }
385 
386 /*
387  * Tell the app the have the 64-bit variants of the get/set userdata
388  */
389 #define LIBURING_HAVE_DATA64
390 
io_uring_sqe_set_flags(struct io_uring_sqe * sqe,unsigned flags)391 IOURINGINLINE void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
392 					  unsigned flags)
393 {
394 	sqe->flags = (__u8) flags;
395 }
396 
__io_uring_set_target_fixed_file(struct io_uring_sqe * sqe,unsigned int file_index)397 IOURINGINLINE void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
398 						    unsigned int file_index)
399 {
400 	/* 0 means no fixed files, indexes should be encoded as "index + 1" */
401 	sqe->file_index = file_index + 1;
402 }
403 
io_uring_initialize_sqe(struct io_uring_sqe * sqe)404 IOURINGINLINE void io_uring_initialize_sqe(struct io_uring_sqe *sqe)
405 {
406 	sqe->flags = 0;
407 	sqe->ioprio = 0;
408 	sqe->rw_flags = 0;
409 	sqe->buf_index = 0;
410 	sqe->personality = 0;
411 	sqe->file_index = 0;
412 	sqe->addr3 = 0;
413 	sqe->__pad2[0] = 0;
414 }
415 
io_uring_prep_rw(int op,struct io_uring_sqe * sqe,int fd,const void * addr,unsigned len,__u64 offset)416 IOURINGINLINE void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
417 				    const void *addr, unsigned len,
418 				    __u64 offset)
419 {
420 	sqe->opcode = (__u8) op;
421 	sqe->fd = fd;
422 	sqe->off = offset;
423 	sqe->addr = (unsigned long) addr;
424 	sqe->len = len;
425 }
426 
427 /*
428  * io_uring_prep_splice() - Either @fd_in or @fd_out must be a pipe.
429  *
430  * - If @fd_in refers to a pipe, @off_in is ignored and must be set to -1.
431  *
432  * - If @fd_in does not refer to a pipe and @off_in is -1, then @nbytes are read
433  *   from @fd_in starting from the file offset, which is incremented by the
434  *   number of bytes read.
435  *
436  * - If @fd_in does not refer to a pipe and @off_in is not -1, then the starting
437  *   offset of @fd_in will be @off_in.
438  *
439  * This splice operation can be used to implement sendfile by splicing to an
440  * intermediate pipe first, then splice to the final destination.
441  * In fact, the implementation of sendfile in kernel uses splice internally.
442  *
443  * NOTE that even if fd_in or fd_out refers to a pipe, the splice operation
444  * can still fail with EINVAL if one of the fd doesn't explicitly support splice
445  * operation, e.g. reading from terminal is unsupported from kernel 5.7 to 5.11.
446  * Check issue #291 for more information.
447  */
io_uring_prep_splice(struct io_uring_sqe * sqe,int fd_in,int64_t off_in,int fd_out,int64_t off_out,unsigned int nbytes,unsigned int splice_flags)448 IOURINGINLINE void io_uring_prep_splice(struct io_uring_sqe *sqe,
449 					int fd_in, int64_t off_in,
450 					int fd_out, int64_t off_out,
451 					unsigned int nbytes,
452 					unsigned int splice_flags)
453 {
454 	io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes,
455 				(__u64) off_out);
456 	sqe->splice_off_in = (__u64) off_in;
457 	sqe->splice_fd_in = fd_in;
458 	sqe->splice_flags = splice_flags;
459 }
460 
io_uring_prep_tee(struct io_uring_sqe * sqe,int fd_in,int fd_out,unsigned int nbytes,unsigned int splice_flags)461 IOURINGINLINE void io_uring_prep_tee(struct io_uring_sqe *sqe,
462 				     int fd_in, int fd_out,
463 				     unsigned int nbytes,
464 				     unsigned int splice_flags)
465 {
466 	io_uring_prep_rw(IORING_OP_TEE, sqe, fd_out, NULL, nbytes, 0);
467 	sqe->splice_off_in = 0;
468 	sqe->splice_fd_in = fd_in;
469 	sqe->splice_flags = splice_flags;
470 }
471 
io_uring_prep_readv(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset)472 IOURINGINLINE void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
473 				       const struct iovec *iovecs,
474 				       unsigned nr_vecs, __u64 offset)
475 {
476 	io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
477 }
478 
io_uring_prep_readv2(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset,int flags)479 IOURINGINLINE void io_uring_prep_readv2(struct io_uring_sqe *sqe, int fd,
480 				       const struct iovec *iovecs,
481 				       unsigned nr_vecs, __u64 offset,
482 				       int flags)
483 {
484 	io_uring_prep_readv(sqe, fd, iovecs, nr_vecs, offset);
485 	sqe->rw_flags = flags;
486 }
487 
io_uring_prep_read_fixed(struct io_uring_sqe * sqe,int fd,void * buf,unsigned nbytes,__u64 offset,int buf_index)488 IOURINGINLINE void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
489 					    void *buf, unsigned nbytes,
490 					    __u64 offset, int buf_index)
491 {
492 	io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
493 	sqe->buf_index = (__u16) buf_index;
494 }
495 
io_uring_prep_writev(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset)496 IOURINGINLINE void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
497 					const struct iovec *iovecs,
498 					unsigned nr_vecs, __u64 offset)
499 {
500 	io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
501 }
502 
io_uring_prep_writev2(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset,int flags)503 IOURINGINLINE void io_uring_prep_writev2(struct io_uring_sqe *sqe, int fd,
504 				       const struct iovec *iovecs,
505 				       unsigned nr_vecs, __u64 offset,
506 				       int flags)
507 {
508 	io_uring_prep_writev(sqe, fd, iovecs, nr_vecs, offset);
509 	sqe->rw_flags = flags;
510 }
511 
io_uring_prep_write_fixed(struct io_uring_sqe * sqe,int fd,const void * buf,unsigned nbytes,__u64 offset,int buf_index)512 IOURINGINLINE void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
513 					     const void *buf, unsigned nbytes,
514 					     __u64 offset, int buf_index)
515 {
516 	io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
517 	sqe->buf_index = (__u16) buf_index;
518 }
519 
io_uring_prep_recvmsg(struct io_uring_sqe * sqe,int fd,struct msghdr * msg,unsigned flags)520 IOURINGINLINE void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
521 					 struct msghdr *msg, unsigned flags)
522 {
523 	io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0);
524 	sqe->msg_flags = flags;
525 }
526 
io_uring_prep_recvmsg_multishot(struct io_uring_sqe * sqe,int fd,struct msghdr * msg,unsigned flags)527 IOURINGINLINE void io_uring_prep_recvmsg_multishot(struct io_uring_sqe *sqe,
528 						   int fd, struct msghdr *msg,
529 						   unsigned flags)
530 {
531 	io_uring_prep_recvmsg(sqe, fd, msg, flags);
532 	sqe->ioprio |= IORING_RECV_MULTISHOT;
533 }
534 
io_uring_prep_sendmsg(struct io_uring_sqe * sqe,int fd,const struct msghdr * msg,unsigned flags)535 IOURINGINLINE void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
536 					 const struct msghdr *msg,
537 					 unsigned flags)
538 {
539 	io_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0);
540 	sqe->msg_flags = flags;
541 }
542 
__io_uring_prep_poll_mask(unsigned poll_mask)543 IOURINGINLINE unsigned __io_uring_prep_poll_mask(unsigned poll_mask)
544 {
545 #if __BYTE_ORDER == __BIG_ENDIAN
546 	poll_mask = __swahw32(poll_mask);
547 #endif
548 	return poll_mask;
549 }
550 
io_uring_prep_poll_add(struct io_uring_sqe * sqe,int fd,unsigned poll_mask)551 IOURINGINLINE void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
552 					  unsigned poll_mask)
553 {
554 	io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
555 	sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
556 }
557 
io_uring_prep_poll_multishot(struct io_uring_sqe * sqe,int fd,unsigned poll_mask)558 IOURINGINLINE void io_uring_prep_poll_multishot(struct io_uring_sqe *sqe,
559 						int fd, unsigned poll_mask)
560 {
561 	io_uring_prep_poll_add(sqe, fd, poll_mask);
562 	sqe->len = IORING_POLL_ADD_MULTI;
563 }
564 
io_uring_prep_poll_remove(struct io_uring_sqe * sqe,__u64 user_data)565 IOURINGINLINE void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
566 					     __u64 user_data)
567 {
568 	io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, 0, 0);
569 	sqe->addr = user_data;
570 }
571 
io_uring_prep_poll_update(struct io_uring_sqe * sqe,__u64 old_user_data,__u64 new_user_data,unsigned poll_mask,unsigned flags)572 IOURINGINLINE void io_uring_prep_poll_update(struct io_uring_sqe *sqe,
573 					     __u64 old_user_data,
574 					     __u64 new_user_data,
575 					     unsigned poll_mask, unsigned flags)
576 {
577 	io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, flags,
578 			 new_user_data);
579 	sqe->addr = old_user_data;
580 	sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
581 }
582 
io_uring_prep_fsync(struct io_uring_sqe * sqe,int fd,unsigned fsync_flags)583 IOURINGINLINE void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
584 				       unsigned fsync_flags)
585 {
586 	io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0);
587 	sqe->fsync_flags = fsync_flags;
588 }
589 
io_uring_prep_nop(struct io_uring_sqe * sqe)590 IOURINGINLINE void io_uring_prep_nop(struct io_uring_sqe *sqe)
591 {
592 	io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0);
593 }
594 
io_uring_prep_timeout(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,unsigned count,unsigned flags)595 IOURINGINLINE void io_uring_prep_timeout(struct io_uring_sqe *sqe,
596 					 struct __kernel_timespec *ts,
597 					 unsigned count, unsigned flags)
598 {
599 	io_uring_prep_rw(IORING_OP_TIMEOUT, sqe, -1, ts, 1, count);
600 	sqe->timeout_flags = flags;
601 }
602 
io_uring_prep_timeout_remove(struct io_uring_sqe * sqe,__u64 user_data,unsigned flags)603 IOURINGINLINE void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe,
604 						__u64 user_data, unsigned flags)
605 {
606 	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0, 0);
607 	sqe->addr = user_data;
608 	sqe->timeout_flags = flags;
609 }
610 
io_uring_prep_timeout_update(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,__u64 user_data,unsigned flags)611 IOURINGINLINE void io_uring_prep_timeout_update(struct io_uring_sqe *sqe,
612 						struct __kernel_timespec *ts,
613 						__u64 user_data, unsigned flags)
614 {
615 	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0,
616 				(uintptr_t) ts);
617 	sqe->addr = user_data;
618 	sqe->timeout_flags = flags | IORING_TIMEOUT_UPDATE;
619 }
620 
io_uring_prep_accept(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)621 IOURINGINLINE void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
622 					struct sockaddr *addr,
623 					socklen_t *addrlen, int flags)
624 {
625 	io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0,
626 				(__u64) (unsigned long) addrlen);
627 	sqe->accept_flags = (__u32) flags;
628 }
629 
630 /* accept directly into the fixed file table */
io_uring_prep_accept_direct(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags,unsigned int file_index)631 IOURINGINLINE void io_uring_prep_accept_direct(struct io_uring_sqe *sqe, int fd,
632 					       struct sockaddr *addr,
633 					       socklen_t *addrlen, int flags,
634 					       unsigned int file_index)
635 {
636 	io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
637 	/* offset by 1 for allocation */
638 	if (file_index == IORING_FILE_INDEX_ALLOC)
639 		file_index--;
640 	__io_uring_set_target_fixed_file(sqe, file_index);
641 }
642 
io_uring_prep_multishot_accept(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)643 IOURINGINLINE void io_uring_prep_multishot_accept(struct io_uring_sqe *sqe,
644 						  int fd, struct sockaddr *addr,
645 						  socklen_t *addrlen, int flags)
646 {
647 	io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
648 	sqe->ioprio |= IORING_ACCEPT_MULTISHOT;
649 }
650 
651 /* multishot accept directly into the fixed file table */
io_uring_prep_multishot_accept_direct(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)652 IOURINGINLINE void io_uring_prep_multishot_accept_direct(struct io_uring_sqe *sqe,
653 							 int fd,
654 							 struct sockaddr *addr,
655 							 socklen_t *addrlen,
656 							 int flags)
657 {
658 	io_uring_prep_multishot_accept(sqe, fd, addr, addrlen, flags);
659 	__io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
660 }
661 
io_uring_prep_cancel64(struct io_uring_sqe * sqe,__u64 user_data,int flags)662 IOURINGINLINE void io_uring_prep_cancel64(struct io_uring_sqe *sqe,
663 					  __u64 user_data, int flags)
664 {
665 	io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, NULL, 0, 0);
666 	sqe->addr = user_data;
667 	sqe->cancel_flags = (__u32) flags;
668 }
669 
io_uring_prep_cancel(struct io_uring_sqe * sqe,void * user_data,int flags)670 IOURINGINLINE void io_uring_prep_cancel(struct io_uring_sqe *sqe,
671 					void *user_data, int flags)
672 {
673 	io_uring_prep_cancel64(sqe, (__u64) (uintptr_t) user_data, flags);
674 }
675 
io_uring_prep_cancel_fd(struct io_uring_sqe * sqe,int fd,unsigned int flags)676 IOURINGINLINE void io_uring_prep_cancel_fd(struct io_uring_sqe *sqe, int fd,
677 					   unsigned int flags)
678 {
679 	io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, fd, NULL, 0, 0);
680 	sqe->cancel_flags = (__u32) flags | IORING_ASYNC_CANCEL_FD;
681 }
682 
io_uring_prep_link_timeout(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,unsigned flags)683 IOURINGINLINE void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
684 					      struct __kernel_timespec *ts,
685 					      unsigned flags)
686 {
687 	io_uring_prep_rw(IORING_OP_LINK_TIMEOUT, sqe, -1, ts, 1, 0);
688 	sqe->timeout_flags = flags;
689 }
690 
io_uring_prep_connect(struct io_uring_sqe * sqe,int fd,const struct sockaddr * addr,socklen_t addrlen)691 IOURINGINLINE void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd,
692 					 const struct sockaddr *addr,
693 					 socklen_t addrlen)
694 {
695 	io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen);
696 }
697 
io_uring_prep_bind(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t addrlen)698 IOURINGINLINE void io_uring_prep_bind(struct io_uring_sqe *sqe, int fd,
699 				      struct sockaddr *addr,
700 				      socklen_t addrlen)
701 {
702 	io_uring_prep_rw(IORING_OP_BIND, sqe, fd, addr, 0, addrlen);
703 }
704 
io_uring_prep_listen(struct io_uring_sqe * sqe,int fd,int backlog)705 IOURINGINLINE void io_uring_prep_listen(struct io_uring_sqe *sqe, int fd,
706 				      int backlog)
707 {
708 	io_uring_prep_rw(IORING_OP_LISTEN, sqe, fd, 0, backlog, 0);
709 }
710 
io_uring_prep_files_update(struct io_uring_sqe * sqe,int * fds,unsigned nr_fds,int offset)711 IOURINGINLINE void io_uring_prep_files_update(struct io_uring_sqe *sqe,
712 					      int *fds, unsigned nr_fds,
713 					      int offset)
714 {
715 	io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds,
716 				(__u64) offset);
717 }
718 
io_uring_prep_fallocate(struct io_uring_sqe * sqe,int fd,int mode,__u64 offset,__u64 len)719 IOURINGINLINE void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
720 					   int mode, __u64 offset, __u64 len)
721 {
722 	io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd,
723 			0, (unsigned int) mode, (__u64) offset);
724 	sqe->addr = (__u64) len;
725 }
726 
io_uring_prep_openat(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,mode_t mode)727 IOURINGINLINE void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
728 					const char *path, int flags,
729 					mode_t mode)
730 {
731 	io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0);
732 	sqe->open_flags = (__u32) flags;
733 }
734 
735 /* open directly into the fixed file table */
io_uring_prep_openat_direct(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,mode_t mode,unsigned file_index)736 IOURINGINLINE void io_uring_prep_openat_direct(struct io_uring_sqe *sqe,
737 					       int dfd, const char *path,
738 					       int flags, mode_t mode,
739 					       unsigned file_index)
740 {
741 	io_uring_prep_openat(sqe, dfd, path, flags, mode);
742 	/* offset by 1 for allocation */
743 	if (file_index == IORING_FILE_INDEX_ALLOC)
744 		file_index--;
745 	__io_uring_set_target_fixed_file(sqe, file_index);
746 }
747 
io_uring_prep_open(struct io_uring_sqe * sqe,const char * path,int flags,mode_t mode)748 IOURINGINLINE void io_uring_prep_open(struct io_uring_sqe *sqe,
749 					const char *path, int flags, mode_t mode)
750 {
751 	io_uring_prep_openat(sqe, AT_FDCWD, path, flags, mode);
752 }
753 
754 /* open directly into the fixed file table */
io_uring_prep_open_direct(struct io_uring_sqe * sqe,const char * path,int flags,mode_t mode,unsigned file_index)755 IOURINGINLINE void io_uring_prep_open_direct(struct io_uring_sqe *sqe,
756 							const char *path, int flags, mode_t mode,
757 							unsigned file_index)
758 {
759 	io_uring_prep_openat_direct(sqe, AT_FDCWD, path, flags, mode, file_index);
760 }
761 
io_uring_prep_close(struct io_uring_sqe * sqe,int fd)762 IOURINGINLINE void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
763 {
764 	io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0);
765 }
766 
io_uring_prep_close_direct(struct io_uring_sqe * sqe,unsigned file_index)767 IOURINGINLINE void io_uring_prep_close_direct(struct io_uring_sqe *sqe,
768 					      unsigned file_index)
769 {
770 	io_uring_prep_close(sqe, 0);
771 	__io_uring_set_target_fixed_file(sqe, file_index);
772 }
773 
io_uring_prep_read(struct io_uring_sqe * sqe,int fd,void * buf,unsigned nbytes,__u64 offset)774 IOURINGINLINE void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
775 				      void *buf, unsigned nbytes, __u64 offset)
776 {
777 	io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
778 }
779 
io_uring_prep_read_multishot(struct io_uring_sqe * sqe,int fd,unsigned nbytes,__u64 offset,int buf_group)780 IOURINGINLINE void io_uring_prep_read_multishot(struct io_uring_sqe *sqe,
781 						int fd, unsigned nbytes,
782 						__u64 offset, int buf_group)
783 {
784 	io_uring_prep_rw(IORING_OP_READ_MULTISHOT, sqe, fd, NULL, nbytes,
785 			 offset);
786 	sqe->buf_group = buf_group;
787 	sqe->flags = IOSQE_BUFFER_SELECT;
788 }
789 
io_uring_prep_write(struct io_uring_sqe * sqe,int fd,const void * buf,unsigned nbytes,__u64 offset)790 IOURINGINLINE void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
791 				       const void *buf, unsigned nbytes,
792 				       __u64 offset)
793 {
794 	io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
795 }
796 
797 struct statx;
io_uring_prep_statx(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,unsigned mask,struct statx * statxbuf)798 IOURINGINLINE void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
799 				       const char *path, int flags,
800 				       unsigned mask, struct statx *statxbuf)
801 {
802 	io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask,
803 				(__u64) (unsigned long) statxbuf);
804 	sqe->statx_flags = (__u32) flags;
805 }
806 
io_uring_prep_fadvise(struct io_uring_sqe * sqe,int fd,__u64 offset,__u32 len,int advice)807 IOURINGINLINE void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
808 					 __u64 offset, __u32 len, int advice)
809 {
810 	io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, (__u32) len, offset);
811 	sqe->fadvise_advice = (__u32) advice;
812 }
813 
io_uring_prep_madvise(struct io_uring_sqe * sqe,void * addr,__u32 length,int advice)814 IOURINGINLINE void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
815 					 __u32 length, int advice)
816 {
817 	io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, (__u32) length, 0);
818 	sqe->fadvise_advice = (__u32) advice;
819 }
820 
io_uring_prep_fadvise64(struct io_uring_sqe * sqe,int fd,__u64 offset,off_t len,int advice)821 IOURINGINLINE void io_uring_prep_fadvise64(struct io_uring_sqe *sqe, int fd,
822 					 __u64 offset, off_t len, int advice)
823 {
824 	io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, 0, offset);
825 	sqe->addr = len;
826 	sqe->fadvise_advice = (__u32) advice;
827 }
828 
io_uring_prep_madvise64(struct io_uring_sqe * sqe,void * addr,off_t length,int advice)829 IOURINGINLINE void io_uring_prep_madvise64(struct io_uring_sqe *sqe, void *addr,
830 					 off_t length, int advice)
831 {
832 	io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, 0, length);
833 	sqe->fadvise_advice = (__u32) advice;
834 }
835 
io_uring_prep_send(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags)836 IOURINGINLINE void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
837 				      const void *buf, size_t len, int flags)
838 {
839 	io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, (__u32) len, 0);
840 	sqe->msg_flags = (__u32) flags;
841 }
842 
io_uring_prep_send_bundle(struct io_uring_sqe * sqe,int sockfd,size_t len,int flags)843 IOURINGINLINE void io_uring_prep_send_bundle(struct io_uring_sqe *sqe,
844 					     int sockfd, size_t len, int flags)
845 {
846 	io_uring_prep_send(sqe, sockfd, NULL, len, flags);
847 	sqe->ioprio |= IORING_RECVSEND_BUNDLE;
848 }
849 
io_uring_prep_send_set_addr(struct io_uring_sqe * sqe,const struct sockaddr * dest_addr,__u16 addr_len)850 IOURINGINLINE void io_uring_prep_send_set_addr(struct io_uring_sqe *sqe,
851 						const struct sockaddr *dest_addr,
852 						__u16 addr_len)
853 {
854 	sqe->addr2 = (unsigned long)(const void *)dest_addr;
855 	sqe->addr_len = addr_len;
856 }
857 
io_uring_prep_sendto(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags,const struct sockaddr * addr,socklen_t addrlen)858 IOURINGINLINE void io_uring_prep_sendto(struct io_uring_sqe *sqe, int sockfd,
859 					const void *buf, size_t len, int flags,
860 					const struct sockaddr *addr,
861 					socklen_t addrlen)
862 {
863 	io_uring_prep_send(sqe, sockfd, buf, len, flags);
864 	io_uring_prep_send_set_addr(sqe, addr, addrlen);
865 }
866 
io_uring_prep_send_zc(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags,unsigned zc_flags)867 IOURINGINLINE void io_uring_prep_send_zc(struct io_uring_sqe *sqe, int sockfd,
868 					 const void *buf, size_t len, int flags,
869 					 unsigned zc_flags)
870 {
871 	io_uring_prep_rw(IORING_OP_SEND_ZC, sqe, sockfd, buf, (__u32) len, 0);
872 	sqe->msg_flags = (__u32) flags;
873 	sqe->ioprio = zc_flags;
874 }
875 
io_uring_prep_send_zc_fixed(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags,unsigned zc_flags,unsigned buf_index)876 IOURINGINLINE void io_uring_prep_send_zc_fixed(struct io_uring_sqe *sqe,
877 						int sockfd, const void *buf,
878 						size_t len, int flags,
879 						unsigned zc_flags,
880 						unsigned buf_index)
881 {
882 	io_uring_prep_send_zc(sqe, sockfd, buf, len, flags, zc_flags);
883 	sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
884 	sqe->buf_index = buf_index;
885 }
886 
io_uring_prep_sendmsg_zc(struct io_uring_sqe * sqe,int fd,const struct msghdr * msg,unsigned flags)887 IOURINGINLINE void io_uring_prep_sendmsg_zc(struct io_uring_sqe *sqe, int fd,
888 					    const struct msghdr *msg,
889 					    unsigned flags)
890 {
891 	io_uring_prep_sendmsg(sqe, fd, msg, flags);
892 	sqe->opcode = IORING_OP_SENDMSG_ZC;
893 }
894 
io_uring_prep_recv(struct io_uring_sqe * sqe,int sockfd,void * buf,size_t len,int flags)895 IOURINGINLINE void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
896 				      void *buf, size_t len, int flags)
897 {
898 	io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, (__u32) len, 0);
899 	sqe->msg_flags = (__u32) flags;
900 }
901 
io_uring_prep_recv_multishot(struct io_uring_sqe * sqe,int sockfd,void * buf,size_t len,int flags)902 IOURINGINLINE void io_uring_prep_recv_multishot(struct io_uring_sqe *sqe,
903 						int sockfd, void *buf,
904 						size_t len, int flags)
905 {
906 	io_uring_prep_recv(sqe, sockfd, buf, len, flags);
907 	sqe->ioprio |= IORING_RECV_MULTISHOT;
908 }
909 
910 IOURINGINLINE struct io_uring_recvmsg_out *
io_uring_recvmsg_validate(void * buf,int buf_len,struct msghdr * msgh)911 io_uring_recvmsg_validate(void *buf, int buf_len, struct msghdr *msgh)
912 {
913 	unsigned long header = msgh->msg_controllen + msgh->msg_namelen +
914 				sizeof(struct io_uring_recvmsg_out);
915 	if (buf_len < 0 || (unsigned long)buf_len < header)
916 		return NULL;
917 	return (struct io_uring_recvmsg_out *)buf;
918 }
919 
io_uring_recvmsg_name(struct io_uring_recvmsg_out * o)920 IOURINGINLINE void *io_uring_recvmsg_name(struct io_uring_recvmsg_out *o)
921 {
922 	return (void *) &o[1];
923 }
924 
925 IOURINGINLINE struct cmsghdr *
io_uring_recvmsg_cmsg_firsthdr(struct io_uring_recvmsg_out * o,struct msghdr * msgh)926 io_uring_recvmsg_cmsg_firsthdr(struct io_uring_recvmsg_out *o,
927 			       struct msghdr *msgh)
928 {
929 	if (o->controllen < sizeof(struct cmsghdr))
930 		return NULL;
931 
932 	return (struct cmsghdr *)((unsigned char *) io_uring_recvmsg_name(o) +
933 			msgh->msg_namelen);
934 }
935 
936 IOURINGINLINE struct cmsghdr *
io_uring_recvmsg_cmsg_nexthdr(struct io_uring_recvmsg_out * o,struct msghdr * msgh,struct cmsghdr * cmsg)937 io_uring_recvmsg_cmsg_nexthdr(struct io_uring_recvmsg_out *o, struct msghdr *msgh,
938 			      struct cmsghdr *cmsg)
939 {
940 	unsigned char *end;
941 
942 	if (cmsg->cmsg_len < sizeof(struct cmsghdr))
943 		return NULL;
944 	end = (unsigned char *) io_uring_recvmsg_cmsg_firsthdr(o, msgh) +
945 		o->controllen;
946 	cmsg = (struct cmsghdr *)((unsigned char *) cmsg +
947 			CMSG_ALIGN(cmsg->cmsg_len));
948 
949 	if ((unsigned char *) (cmsg + 1) > end)
950 		return NULL;
951 	if (((unsigned char *) cmsg) + CMSG_ALIGN(cmsg->cmsg_len) > end)
952 		return NULL;
953 
954 	return cmsg;
955 }
956 
io_uring_recvmsg_payload(struct io_uring_recvmsg_out * o,struct msghdr * msgh)957 IOURINGINLINE void *io_uring_recvmsg_payload(struct io_uring_recvmsg_out *o,
958 					     struct msghdr *msgh)
959 {
960 	return (void *)((unsigned char *)io_uring_recvmsg_name(o) +
961 			msgh->msg_namelen + msgh->msg_controllen);
962 }
963 
964 IOURINGINLINE unsigned int
io_uring_recvmsg_payload_length(struct io_uring_recvmsg_out * o,int buf_len,struct msghdr * msgh)965 io_uring_recvmsg_payload_length(struct io_uring_recvmsg_out *o,
966 				int buf_len, struct msghdr *msgh)
967 {
968 	unsigned long payload_start, payload_end;
969 
970 	payload_start = (unsigned long) io_uring_recvmsg_payload(o, msgh);
971 	payload_end = (unsigned long) o + buf_len;
972 	return (unsigned int) (payload_end - payload_start);
973 }
974 
io_uring_prep_openat2(struct io_uring_sqe * sqe,int dfd,const char * path,struct open_how * how)975 IOURINGINLINE void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
976 					const char *path, struct open_how *how)
977 {
978 	io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how),
979 				(uint64_t) (uintptr_t) how);
980 }
981 
982 /* open directly into the fixed file table */
io_uring_prep_openat2_direct(struct io_uring_sqe * sqe,int dfd,const char * path,struct open_how * how,unsigned file_index)983 IOURINGINLINE void io_uring_prep_openat2_direct(struct io_uring_sqe *sqe,
984 						int dfd, const char *path,
985 						struct open_how *how,
986 						unsigned file_index)
987 {
988 	io_uring_prep_openat2(sqe, dfd, path, how);
989 	/* offset by 1 for allocation */
990 	if (file_index == IORING_FILE_INDEX_ALLOC)
991 		file_index--;
992 	__io_uring_set_target_fixed_file(sqe, file_index);
993 }
994 
995 struct epoll_event;
io_uring_prep_epoll_ctl(struct io_uring_sqe * sqe,int epfd,int fd,int op,struct epoll_event * ev)996 IOURINGINLINE void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
997 					   int fd, int op,
998 					   struct epoll_event *ev)
999 {
1000 	io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev,
1001 				(__u32) op, (__u32) fd);
1002 }
1003 
io_uring_prep_provide_buffers(struct io_uring_sqe * sqe,void * addr,int len,int nr,int bgid,int bid)1004 IOURINGINLINE void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
1005 						 void *addr, int len, int nr,
1006 						 int bgid, int bid)
1007 {
1008 	io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, (__u32) len,
1009 				(__u64) bid);
1010 	sqe->buf_group = (__u16) bgid;
1011 }
1012 
io_uring_prep_remove_buffers(struct io_uring_sqe * sqe,int nr,int bgid)1013 IOURINGINLINE void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
1014 						int nr, int bgid)
1015 {
1016 	io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0);
1017 	sqe->buf_group = (__u16) bgid;
1018 }
1019 
io_uring_prep_shutdown(struct io_uring_sqe * sqe,int fd,int how)1020 IOURINGINLINE void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
1021 					  int how)
1022 {
1023 	io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, (__u32) how, 0);
1024 }
1025 
io_uring_prep_unlinkat(struct io_uring_sqe * sqe,int dfd,const char * path,int flags)1026 IOURINGINLINE void io_uring_prep_unlinkat(struct io_uring_sqe *sqe, int dfd,
1027 					  const char *path, int flags)
1028 {
1029 	io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, path, 0, 0);
1030 	sqe->unlink_flags = (__u32) flags;
1031 }
1032 
io_uring_prep_unlink(struct io_uring_sqe * sqe,const char * path,int flags)1033 IOURINGINLINE void io_uring_prep_unlink(struct io_uring_sqe *sqe,
1034 					  const char *path, int flags)
1035 {
1036 	io_uring_prep_unlinkat(sqe, AT_FDCWD, path, flags);
1037 }
1038 
io_uring_prep_renameat(struct io_uring_sqe * sqe,int olddfd,const char * oldpath,int newdfd,const char * newpath,unsigned int flags)1039 IOURINGINLINE void io_uring_prep_renameat(struct io_uring_sqe *sqe, int olddfd,
1040 					  const char *oldpath, int newdfd,
1041 					  const char *newpath, unsigned int flags)
1042 {
1043 	io_uring_prep_rw(IORING_OP_RENAMEAT, sqe, olddfd, oldpath,
1044 				(__u32) newdfd,
1045 				(uint64_t) (uintptr_t) newpath);
1046 	sqe->rename_flags = (__u32) flags;
1047 }
1048 
io_uring_prep_rename(struct io_uring_sqe * sqe,const char * oldpath,const char * newpath)1049 IOURINGINLINE void io_uring_prep_rename(struct io_uring_sqe *sqe,
1050 					const char *oldpath,
1051 					const char *newpath)
1052 {
1053 	io_uring_prep_renameat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, 0);
1054 }
1055 
io_uring_prep_sync_file_range(struct io_uring_sqe * sqe,int fd,unsigned len,__u64 offset,int flags)1056 IOURINGINLINE void io_uring_prep_sync_file_range(struct io_uring_sqe *sqe,
1057 						 int fd, unsigned len,
1058 						 __u64 offset, int flags)
1059 {
1060 	io_uring_prep_rw(IORING_OP_SYNC_FILE_RANGE, sqe, fd, NULL, len, offset);
1061 	sqe->sync_range_flags = (__u32) flags;
1062 }
1063 
io_uring_prep_mkdirat(struct io_uring_sqe * sqe,int dfd,const char * path,mode_t mode)1064 IOURINGINLINE void io_uring_prep_mkdirat(struct io_uring_sqe *sqe, int dfd,
1065 					const char *path, mode_t mode)
1066 {
1067 	io_uring_prep_rw(IORING_OP_MKDIRAT, sqe, dfd, path, mode, 0);
1068 }
1069 
io_uring_prep_mkdir(struct io_uring_sqe * sqe,const char * path,mode_t mode)1070 IOURINGINLINE void io_uring_prep_mkdir(struct io_uring_sqe *sqe,
1071 					const char *path, mode_t mode)
1072 {
1073 	io_uring_prep_mkdirat(sqe, AT_FDCWD, path, mode);
1074 }
1075 
io_uring_prep_symlinkat(struct io_uring_sqe * sqe,const char * target,int newdirfd,const char * linkpath)1076 IOURINGINLINE void io_uring_prep_symlinkat(struct io_uring_sqe *sqe,
1077 					   const char *target, int newdirfd,
1078 					   const char *linkpath)
1079 {
1080 	io_uring_prep_rw(IORING_OP_SYMLINKAT, sqe, newdirfd, target, 0,
1081 				(uint64_t) (uintptr_t) linkpath);
1082 }
1083 
io_uring_prep_symlink(struct io_uring_sqe * sqe,const char * target,const char * linkpath)1084 IOURINGINLINE void io_uring_prep_symlink(struct io_uring_sqe *sqe,
1085 					 const char *target,
1086 					 const char *linkpath)
1087 {
1088 	io_uring_prep_symlinkat(sqe, target, AT_FDCWD, linkpath);
1089 }
1090 
io_uring_prep_linkat(struct io_uring_sqe * sqe,int olddfd,const char * oldpath,int newdfd,const char * newpath,int flags)1091 IOURINGINLINE void io_uring_prep_linkat(struct io_uring_sqe *sqe, int olddfd,
1092 					const char *oldpath, int newdfd,
1093 					const char *newpath, int flags)
1094 {
1095 	io_uring_prep_rw(IORING_OP_LINKAT, sqe, olddfd, oldpath, (__u32) newdfd,
1096 				(uint64_t) (uintptr_t) newpath);
1097 	sqe->hardlink_flags = (__u32) flags;
1098 }
1099 
io_uring_prep_link(struct io_uring_sqe * sqe,const char * oldpath,const char * newpath,int flags)1100 IOURINGINLINE void io_uring_prep_link(struct io_uring_sqe *sqe,
1101 				      const char *oldpath, const char *newpath,
1102 				      int flags)
1103 {
1104 	io_uring_prep_linkat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, flags);
1105 }
1106 
io_uring_prep_msg_ring_cqe_flags(struct io_uring_sqe * sqe,int fd,unsigned int len,__u64 data,unsigned int flags,unsigned int cqe_flags)1107 IOURINGINLINE void io_uring_prep_msg_ring_cqe_flags(struct io_uring_sqe *sqe,
1108 					  int fd, unsigned int len, __u64 data,
1109 					  unsigned int flags, unsigned int cqe_flags)
1110 {
1111 	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data);
1112 	sqe->msg_ring_flags = IORING_MSG_RING_FLAGS_PASS | flags;
1113 	sqe->file_index = cqe_flags;
1114 }
1115 
io_uring_prep_msg_ring(struct io_uring_sqe * sqe,int fd,unsigned int len,__u64 data,unsigned int flags)1116 IOURINGINLINE void io_uring_prep_msg_ring(struct io_uring_sqe *sqe, int fd,
1117 					  unsigned int len, __u64 data,
1118 					  unsigned int flags)
1119 {
1120 	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data);
1121 	sqe->msg_ring_flags = flags;
1122 }
1123 
io_uring_prep_msg_ring_fd(struct io_uring_sqe * sqe,int fd,int source_fd,int target_fd,__u64 data,unsigned int flags)1124 IOURINGINLINE void io_uring_prep_msg_ring_fd(struct io_uring_sqe *sqe, int fd,
1125 					     int source_fd, int target_fd,
1126 					     __u64 data, unsigned int flags)
1127 {
1128 	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd,
1129 			 (void *) (uintptr_t) IORING_MSG_SEND_FD, 0, data);
1130 	sqe->addr3 = source_fd;
1131 	/* offset by 1 for allocation */
1132 	if ((unsigned int) target_fd == IORING_FILE_INDEX_ALLOC)
1133 		target_fd--;
1134 	__io_uring_set_target_fixed_file(sqe, target_fd);
1135 	sqe->msg_ring_flags = flags;
1136 }
1137 
io_uring_prep_msg_ring_fd_alloc(struct io_uring_sqe * sqe,int fd,int source_fd,__u64 data,unsigned int flags)1138 IOURINGINLINE void io_uring_prep_msg_ring_fd_alloc(struct io_uring_sqe *sqe,
1139 						   int fd, int source_fd,
1140 						   __u64 data, unsigned int flags)
1141 {
1142 	io_uring_prep_msg_ring_fd(sqe, fd, source_fd, IORING_FILE_INDEX_ALLOC,
1143 				  data, flags);
1144 }
1145 
io_uring_prep_getxattr(struct io_uring_sqe * sqe,const char * name,char * value,const char * path,unsigned int len)1146 IOURINGINLINE void io_uring_prep_getxattr(struct io_uring_sqe *sqe,
1147 					  const char *name, char *value,
1148 					  const char *path, unsigned int len)
1149 {
1150 	io_uring_prep_rw(IORING_OP_GETXATTR, sqe, 0, name, len,
1151 				(__u64) (uintptr_t) value);
1152 	sqe->addr3 = (__u64) (uintptr_t) path;
1153 	sqe->xattr_flags = 0;
1154 }
1155 
io_uring_prep_setxattr(struct io_uring_sqe * sqe,const char * name,const char * value,const char * path,int flags,unsigned int len)1156 IOURINGINLINE void io_uring_prep_setxattr(struct io_uring_sqe *sqe,
1157 					  const char *name, const char *value,
1158 					  const char *path, int flags,
1159 					  unsigned int len)
1160 {
1161 	io_uring_prep_rw(IORING_OP_SETXATTR, sqe, 0, name, len,
1162 				(__u64) (uintptr_t) value);
1163 	sqe->addr3 = (__u64) (uintptr_t) path;
1164 	sqe->xattr_flags = flags;
1165 }
1166 
io_uring_prep_fgetxattr(struct io_uring_sqe * sqe,int fd,const char * name,char * value,unsigned int len)1167 IOURINGINLINE void io_uring_prep_fgetxattr(struct io_uring_sqe *sqe,
1168 					   int fd, const char *name,
1169 					   char *value, unsigned int len)
1170 {
1171 	io_uring_prep_rw(IORING_OP_FGETXATTR, sqe, fd, name, len,
1172 				(__u64) (uintptr_t) value);
1173 	sqe->xattr_flags = 0;
1174 }
1175 
io_uring_prep_fsetxattr(struct io_uring_sqe * sqe,int fd,const char * name,const char * value,int flags,unsigned int len)1176 IOURINGINLINE void io_uring_prep_fsetxattr(struct io_uring_sqe *sqe, int fd,
1177 					   const char *name, const char	*value,
1178 					   int flags, unsigned int len)
1179 {
1180 	io_uring_prep_rw(IORING_OP_FSETXATTR, sqe, fd, name, len,
1181 				(__u64) (uintptr_t) value);
1182 	sqe->xattr_flags = flags;
1183 }
1184 
io_uring_prep_socket(struct io_uring_sqe * sqe,int domain,int type,int protocol,unsigned int flags)1185 IOURINGINLINE void io_uring_prep_socket(struct io_uring_sqe *sqe, int domain,
1186 					int type, int protocol,
1187 					unsigned int flags)
1188 {
1189 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
1190 	sqe->rw_flags = flags;
1191 }
1192 
io_uring_prep_socket_direct(struct io_uring_sqe * sqe,int domain,int type,int protocol,unsigned file_index,unsigned int flags)1193 IOURINGINLINE void io_uring_prep_socket_direct(struct io_uring_sqe *sqe,
1194 					       int domain, int type,
1195 					       int protocol,
1196 					       unsigned file_index,
1197 					       unsigned int flags)
1198 {
1199 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
1200 	sqe->rw_flags = flags;
1201 	/* offset by 1 for allocation */
1202 	if (file_index == IORING_FILE_INDEX_ALLOC)
1203 		file_index--;
1204 	__io_uring_set_target_fixed_file(sqe, file_index);
1205 }
1206 
io_uring_prep_socket_direct_alloc(struct io_uring_sqe * sqe,int domain,int type,int protocol,unsigned int flags)1207 IOURINGINLINE void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe,
1208 						     int domain, int type,
1209 						     int protocol,
1210 						     unsigned int flags)
1211 {
1212 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
1213 	sqe->rw_flags = flags;
1214 	__io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
1215 }
1216 
1217 /*
1218  * Prepare commands for sockets
1219  */
io_uring_prep_cmd_sock(struct io_uring_sqe * sqe,int cmd_op,int fd,int level,int optname,void * optval,int optlen)1220 IOURINGINLINE void io_uring_prep_cmd_sock(struct io_uring_sqe *sqe,
1221 					  int cmd_op,
1222 					  int fd,
1223 					  int level,
1224 					  int optname,
1225 					  void *optval,
1226 					  int optlen)
1227 {
1228 	io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, NULL, 0, 0);
1229 	sqe->optval = (unsigned long) (uintptr_t) optval;
1230 	sqe->optname = optname;
1231 	sqe->optlen = optlen;
1232 	sqe->cmd_op = cmd_op;
1233 	sqe->level = level;
1234 }
1235 
io_uring_prep_waitid(struct io_uring_sqe * sqe,idtype_t idtype,id_t id,siginfo_t * infop,int options,unsigned int flags)1236 IOURINGINLINE void io_uring_prep_waitid(struct io_uring_sqe *sqe,
1237 					idtype_t idtype,
1238 					id_t id,
1239 					siginfo_t *infop,
1240 					int options, unsigned int flags)
1241 {
1242 	io_uring_prep_rw(IORING_OP_WAITID, sqe, id, NULL, (unsigned) idtype, 0);
1243 	sqe->waitid_flags = flags;
1244 	sqe->file_index = options;
1245 	sqe->addr2 = (unsigned long) infop;
1246 }
1247 
io_uring_prep_futex_wake(struct io_uring_sqe * sqe,uint32_t * futex,uint64_t val,uint64_t mask,uint32_t futex_flags,unsigned int flags)1248 IOURINGINLINE void io_uring_prep_futex_wake(struct io_uring_sqe *sqe,
1249 					    uint32_t *futex, uint64_t val,
1250 					    uint64_t mask, uint32_t futex_flags,
1251 					    unsigned int flags)
1252 {
1253 	io_uring_prep_rw(IORING_OP_FUTEX_WAKE, sqe, futex_flags, futex, 0, val);
1254 	sqe->futex_flags = flags;
1255 	sqe->addr3 = mask;
1256 }
1257 
io_uring_prep_futex_wait(struct io_uring_sqe * sqe,uint32_t * futex,uint64_t val,uint64_t mask,uint32_t futex_flags,unsigned int flags)1258 IOURINGINLINE void io_uring_prep_futex_wait(struct io_uring_sqe *sqe,
1259 					    uint32_t *futex, uint64_t val,
1260 					    uint64_t mask, uint32_t futex_flags,
1261 					    unsigned int flags)
1262 {
1263 	io_uring_prep_rw(IORING_OP_FUTEX_WAIT, sqe, futex_flags, futex, 0, val);
1264 	sqe->futex_flags = flags;
1265 	sqe->addr3 = mask;
1266 }
1267 
1268 struct futex_waitv;
io_uring_prep_futex_waitv(struct io_uring_sqe * sqe,struct futex_waitv * futex,uint32_t nr_futex,unsigned int flags)1269 IOURINGINLINE void io_uring_prep_futex_waitv(struct io_uring_sqe *sqe,
1270 					     struct futex_waitv *futex,
1271 					     uint32_t nr_futex,
1272 					     unsigned int flags)
1273 {
1274 	io_uring_prep_rw(IORING_OP_FUTEX_WAITV, sqe, 0, futex, nr_futex, 0);
1275 	sqe->futex_flags = flags;
1276 }
1277 
io_uring_prep_fixed_fd_install(struct io_uring_sqe * sqe,int fd,unsigned int flags)1278 IOURINGINLINE void io_uring_prep_fixed_fd_install(struct io_uring_sqe *sqe,
1279 						  int fd,
1280 						  unsigned int flags)
1281 {
1282 	io_uring_prep_rw(IORING_OP_FIXED_FD_INSTALL, sqe, fd, NULL, 0, 0);
1283 	sqe->flags = IOSQE_FIXED_FILE;
1284 	sqe->install_fd_flags = flags;
1285 }
1286 
1287 #ifdef _GNU_SOURCE
io_uring_prep_ftruncate(struct io_uring_sqe * sqe,int fd,loff_t len)1288 IOURINGINLINE void io_uring_prep_ftruncate(struct io_uring_sqe *sqe,
1289 				       int fd, loff_t len)
1290 {
1291 	io_uring_prep_rw(IORING_OP_FTRUNCATE, sqe, fd, 0, 0, len);
1292 }
1293 #endif
1294 
io_uring_prep_cmd_discard(struct io_uring_sqe * sqe,int fd,uint64_t offset,uint64_t nbytes)1295 IOURINGINLINE void io_uring_prep_cmd_discard(struct io_uring_sqe *sqe,
1296 					     int fd,
1297 					     uint64_t offset, uint64_t nbytes)
1298 {
1299 	io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, 0, 0, 0);
1300 	sqe->cmd_op = BLOCK_URING_CMD_DISCARD;
1301 	sqe->addr = offset;
1302 	sqe->addr3 = nbytes;
1303 }
1304 
1305 /*
1306  * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
1307  * the SQ ring
1308  */
io_uring_sq_ready(const struct io_uring * ring)1309 IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring)
1310 {
1311 	unsigned khead;
1312 
1313 	/*
1314 	 * Without a barrier, we could miss an update and think the SQ wasn't
1315 	 * ready. We don't need the load acquire for non-SQPOLL since then we
1316 	 * drive updates.
1317 	 */
1318 	if (ring->flags & IORING_SETUP_SQPOLL)
1319 		khead = io_uring_smp_load_acquire(ring->sq.khead);
1320 	else
1321 		khead = *ring->sq.khead;
1322 
1323 	/* always use real head, to avoid losing sync for short submit */
1324 	return ring->sq.sqe_tail - khead;
1325 }
1326 
1327 /*
1328  * Returns how much space is left in the SQ ring.
1329  */
io_uring_sq_space_left(const struct io_uring * ring)1330 IOURINGINLINE unsigned io_uring_sq_space_left(const struct io_uring *ring)
1331 {
1332 	return ring->sq.ring_entries - io_uring_sq_ready(ring);
1333 }
1334 
1335 /*
1336  * Only applicable when using SQPOLL - allows the caller to wait for space
1337  * to free up in the SQ ring, which happens when the kernel side thread has
1338  * consumed one or more entries. If the SQ ring is currently non-full, no
1339  * action is taken. Note: may return -EINVAL if the kernel doesn't support
1340  * this feature.
1341  */
io_uring_sqring_wait(struct io_uring * ring)1342 IOURINGINLINE int io_uring_sqring_wait(struct io_uring *ring)
1343 {
1344 	if (!(ring->flags & IORING_SETUP_SQPOLL))
1345 		return 0;
1346 	if (io_uring_sq_space_left(ring))
1347 		return 0;
1348 
1349 	return __io_uring_sqring_wait(ring);
1350 }
1351 
1352 /*
1353  * Returns how many unconsumed entries are ready in the CQ ring
1354  */
io_uring_cq_ready(const struct io_uring * ring)1355 IOURINGINLINE unsigned io_uring_cq_ready(const struct io_uring *ring)
1356 {
1357 	return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead;
1358 }
1359 
1360 /*
1361  * Returns true if there are overflow entries waiting to be flushed onto
1362  * the CQ ring
1363  */
io_uring_cq_has_overflow(const struct io_uring * ring)1364 IOURINGINLINE bool io_uring_cq_has_overflow(const struct io_uring *ring)
1365 {
1366 	return IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW;
1367 }
1368 
1369 /*
1370  * Returns true if the eventfd notification is currently enabled
1371  */
io_uring_cq_eventfd_enabled(const struct io_uring * ring)1372 IOURINGINLINE bool io_uring_cq_eventfd_enabled(const struct io_uring *ring)
1373 {
1374 	if (!ring->cq.kflags)
1375 		return true;
1376 
1377 	return !(*ring->cq.kflags & IORING_CQ_EVENTFD_DISABLED);
1378 }
1379 
1380 /*
1381  * Toggle eventfd notification on or off, if an eventfd is registered with
1382  * the ring.
1383  */
io_uring_cq_eventfd_toggle(struct io_uring * ring,bool enabled)1384 IOURINGINLINE int io_uring_cq_eventfd_toggle(struct io_uring *ring,
1385 					     bool enabled)
1386 {
1387 	uint32_t flags;
1388 
1389 	if (!!enabled == io_uring_cq_eventfd_enabled(ring))
1390 		return 0;
1391 
1392 	if (!ring->cq.kflags)
1393 		return -EOPNOTSUPP;
1394 
1395 	flags = *ring->cq.kflags;
1396 
1397 	if (enabled)
1398 		flags &= ~IORING_CQ_EVENTFD_DISABLED;
1399 	else
1400 		flags |= IORING_CQ_EVENTFD_DISABLED;
1401 
1402 	IO_URING_WRITE_ONCE(*ring->cq.kflags, flags);
1403 
1404 	return 0;
1405 }
1406 
1407 /*
1408  * Return an IO completion, waiting for 'wait_nr' completions if one isn't
1409  * readily available. Returns 0 with cqe_ptr filled in on success, -errno on
1410  * failure.
1411  */
io_uring_wait_cqe_nr(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr,unsigned wait_nr)1412 IOURINGINLINE int io_uring_wait_cqe_nr(struct io_uring *ring,
1413 				      struct io_uring_cqe **cqe_ptr,
1414 				      unsigned wait_nr)
1415 {
1416 	return __io_uring_get_cqe(ring, cqe_ptr, 0, wait_nr, NULL);
1417 }
1418 
1419 /*
1420  * Internal helper, don't use directly in applications. Use one of the
1421  * "official" versions of this, io_uring_peek_cqe(), io_uring_wait_cqe(),
1422  * or io_uring_wait_cqes*().
1423  */
__io_uring_peek_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr,unsigned * nr_available)1424 IOURINGINLINE int __io_uring_peek_cqe(struct io_uring *ring,
1425 				      struct io_uring_cqe **cqe_ptr,
1426 				      unsigned *nr_available)
1427 {
1428 	struct io_uring_cqe *cqe;
1429 	int err = 0;
1430 	unsigned available;
1431 	unsigned mask = ring->cq.ring_mask;
1432 	int shift = 0;
1433 
1434 	if (ring->flags & IORING_SETUP_CQE32)
1435 		shift = 1;
1436 
1437 	do {
1438 		unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail);
1439 		unsigned head = *ring->cq.khead;
1440 
1441 		cqe = NULL;
1442 		available = tail - head;
1443 		if (!available)
1444 			break;
1445 
1446 		cqe = &ring->cq.cqes[(head & mask) << shift];
1447 		if (!(ring->features & IORING_FEAT_EXT_ARG) &&
1448 				cqe->user_data == LIBURING_UDATA_TIMEOUT) {
1449 			if (cqe->res < 0)
1450 				err = cqe->res;
1451 			io_uring_cq_advance(ring, 1);
1452 			if (!err)
1453 				continue;
1454 			cqe = NULL;
1455 		}
1456 
1457 		break;
1458 	} while (1);
1459 
1460 	*cqe_ptr = cqe;
1461 	if (nr_available)
1462 		*nr_available = available;
1463 	return err;
1464 }
1465 
1466 /*
1467  * Return an IO completion, if one is readily available. Returns 0 with
1468  * cqe_ptr filled in on success, -errno on failure.
1469  */
io_uring_peek_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)1470 IOURINGINLINE int io_uring_peek_cqe(struct io_uring *ring,
1471 				    struct io_uring_cqe **cqe_ptr)
1472 {
1473 	if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
1474 		return 0;
1475 
1476 	return io_uring_wait_cqe_nr(ring, cqe_ptr, 0);
1477 }
1478 
1479 /*
1480  * Return an IO completion, waiting for it if necessary. Returns 0 with
1481  * cqe_ptr filled in on success, -errno on failure.
1482  */
io_uring_wait_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)1483 IOURINGINLINE int io_uring_wait_cqe(struct io_uring *ring,
1484 				    struct io_uring_cqe **cqe_ptr)
1485 {
1486 	if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
1487 		return 0;
1488 
1489 	return io_uring_wait_cqe_nr(ring, cqe_ptr, 1);
1490 }
1491 
1492 /*
1493  * Return an sqe to fill. Application must later call io_uring_submit()
1494  * when it's ready to tell the kernel about it. The caller may call this
1495  * function multiple times before calling io_uring_submit().
1496  *
1497  * Returns a vacant sqe, or NULL if we're full.
1498  */
_io_uring_get_sqe(struct io_uring * ring)1499 IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
1500 {
1501 	struct io_uring_sq *sq = &ring->sq;
1502 	unsigned int head, next = sq->sqe_tail + 1;
1503 	int shift = 0;
1504 
1505 	if (ring->flags & IORING_SETUP_SQE128)
1506 		shift = 1;
1507 	if (!(ring->flags & IORING_SETUP_SQPOLL))
1508 		head = *sq->khead;
1509 	else
1510 		head = io_uring_smp_load_acquire(sq->khead);
1511 
1512 	if (next - head <= sq->ring_entries) {
1513 		struct io_uring_sqe *sqe;
1514 
1515 		sqe = &sq->sqes[(sq->sqe_tail & sq->ring_mask) << shift];
1516 		sq->sqe_tail = next;
1517 		io_uring_initialize_sqe(sqe);
1518 		return sqe;
1519 	}
1520 
1521 	return NULL;
1522 }
1523 
1524 /*
1525  * Return the appropriate mask for a buffer ring of size 'ring_entries'
1526  */
io_uring_buf_ring_mask(__u32 ring_entries)1527 IOURINGINLINE int io_uring_buf_ring_mask(__u32 ring_entries)
1528 {
1529 	return ring_entries - 1;
1530 }
1531 
io_uring_buf_ring_init(struct io_uring_buf_ring * br)1532 IOURINGINLINE void io_uring_buf_ring_init(struct io_uring_buf_ring *br)
1533 {
1534 	br->tail = 0;
1535 }
1536 
1537 /*
1538  * Assign 'buf' with the addr/len/buffer ID supplied
1539  */
io_uring_buf_ring_add(struct io_uring_buf_ring * br,void * addr,unsigned int len,unsigned short bid,int mask,int buf_offset)1540 IOURINGINLINE void io_uring_buf_ring_add(struct io_uring_buf_ring *br,
1541 					 void *addr, unsigned int len,
1542 					 unsigned short bid, int mask,
1543 					 int buf_offset)
1544 {
1545 	struct io_uring_buf *buf = &br->bufs[(br->tail + buf_offset) & mask];
1546 
1547 	buf->addr = (unsigned long) (uintptr_t) addr;
1548 	buf->len = len;
1549 	buf->bid = bid;
1550 }
1551 
1552 /*
1553  * Make 'count' new buffers visible to the kernel. Called after
1554  * io_uring_buf_ring_add() has been called 'count' times to fill in new
1555  * buffers.
1556  */
io_uring_buf_ring_advance(struct io_uring_buf_ring * br,int count)1557 IOURINGINLINE void io_uring_buf_ring_advance(struct io_uring_buf_ring *br,
1558 					     int count)
1559 {
1560 	unsigned short new_tail = br->tail + count;
1561 
1562 	io_uring_smp_store_release(&br->tail, new_tail);
1563 }
1564 
__io_uring_buf_ring_cq_advance(struct io_uring * ring,struct io_uring_buf_ring * br,int cq_count,int buf_count)1565 IOURINGINLINE void __io_uring_buf_ring_cq_advance(struct io_uring *ring,
1566 						  struct io_uring_buf_ring *br,
1567 						  int cq_count, int buf_count)
1568 {
1569 	io_uring_buf_ring_advance(br, buf_count);
1570 	io_uring_cq_advance(ring, cq_count);
1571 }
1572 
1573 /*
1574  * Make 'count' new buffers visible to the kernel while at the same time
1575  * advancing the CQ ring seen entries. This can be used when the application
1576  * is using ring provided buffers and returns buffers while processing CQEs,
1577  * avoiding an extra atomic when needing to increment both the CQ ring and
1578  * the ring buffer index at the same time.
1579  */
io_uring_buf_ring_cq_advance(struct io_uring * ring,struct io_uring_buf_ring * br,int count)1580 IOURINGINLINE void io_uring_buf_ring_cq_advance(struct io_uring *ring,
1581 						struct io_uring_buf_ring *br,
1582 						int count)
1583 {
1584 	__io_uring_buf_ring_cq_advance(ring, br, count, count);
1585 }
1586 
io_uring_buf_ring_available(struct io_uring * ring,struct io_uring_buf_ring * br,unsigned short bgid)1587 IOURINGINLINE int io_uring_buf_ring_available(struct io_uring *ring,
1588 					      struct io_uring_buf_ring *br,
1589 					      unsigned short bgid)
1590 {
1591 	uint16_t head;
1592 	int ret;
1593 
1594 	ret = io_uring_buf_ring_head(ring, bgid, &head);
1595 	if (ret)
1596 		return ret;
1597 
1598 	return (uint16_t) (br->tail - head);
1599 }
1600 
1601 #ifndef LIBURING_INTERNAL
io_uring_get_sqe(struct io_uring * ring)1602 IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
1603 {
1604 	return _io_uring_get_sqe(ring);
1605 }
1606 #else
1607 struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
1608 #endif
1609 
1610 ssize_t io_uring_mlock_size(unsigned entries, unsigned flags);
1611 ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p);
1612 
1613 /*
1614  * Versioning information for liburing.
1615  *
1616  * Use IO_URING_CHECK_VERSION() for compile time checks including from
1617  * preprocessor directives.
1618  *
1619  * Use io_uring_check_version() for runtime checks of the version of
1620  * liburing that was loaded by the dynamic linker.
1621  */
1622 int io_uring_major_version(void);
1623 int io_uring_minor_version(void);
1624 bool io_uring_check_version(int major, int minor);
1625 
1626 #define IO_URING_CHECK_VERSION(major,minor) \
1627   (major > IO_URING_VERSION_MAJOR ||        \
1628    (major == IO_URING_VERSION_MAJOR &&      \
1629     minor > IO_URING_VERSION_MINOR))
1630 
1631 #ifdef __cplusplus
1632 }
1633 #endif
1634 
1635 #ifdef IOURINGINLINE
1636 #undef IOURINGINLINE
1637 #endif
1638 
1639 #endif
1640