• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 #ifndef LIB_URING_H
3 #define LIB_URING_H
4 
5 #include <sys/socket.h>
6 #include <sys/stat.h>
7 #include <sys/uio.h>
8 #include <errno.h>
9 #include <signal.h>
10 #include <stdbool.h>
11 #include <inttypes.h>
12 #include <time.h>
13 #include <fcntl.h>
14 #include <sched.h>
15 #include <linux/swab.h>
16 #include <sys/wait.h>
17 #include "liburing/compat.h"
18 #include "liburing/io_uring.h"
19 #include "liburing/io_uring_version.h"
20 #include "liburing/barrier.h"
21 
22 #ifndef uring_unlikely
23 #define uring_unlikely(cond)	__builtin_expect(!!(cond), 0)
24 #endif
25 
26 #ifndef uring_likely
27 #define uring_likely(cond)	__builtin_expect(!!(cond), 1)
28 #endif
29 
30 #ifndef IOURINGINLINE
31 #define IOURINGINLINE static inline
32 #endif
33 
34 #ifdef __alpha__
35 /*
36  * alpha and mips are the exceptions, all other architectures have
37  * common numbers for new system calls.
38  */
39 #ifndef __NR_io_uring_setup
40 #define __NR_io_uring_setup		535
41 #endif
42 #ifndef __NR_io_uring_enter
43 #define __NR_io_uring_enter		536
44 #endif
45 #ifndef __NR_io_uring_register
46 #define __NR_io_uring_register		537
47 #endif
48 #elif defined __mips__
49 #ifndef __NR_io_uring_setup
50 #define __NR_io_uring_setup		(__NR_Linux + 425)
51 #endif
52 #ifndef __NR_io_uring_enter
53 #define __NR_io_uring_enter		(__NR_Linux + 426)
54 #endif
55 #ifndef __NR_io_uring_register
56 #define __NR_io_uring_register		(__NR_Linux + 427)
57 #endif
58 #else /* !__alpha__ and !__mips__ */
59 #ifndef __NR_io_uring_setup
60 #define __NR_io_uring_setup		425
61 #endif
62 #ifndef __NR_io_uring_enter
63 #define __NR_io_uring_enter		426
64 #endif
65 #ifndef __NR_io_uring_register
66 #define __NR_io_uring_register		427
67 #endif
68 #endif
69 
70 #ifdef __cplusplus
71 extern "C" {
72 #endif
73 
74 /*
75  * Library interface to io_uring
76  */
77 struct io_uring_sq {
78 	unsigned *khead;
79 	unsigned *ktail;
80 	// Deprecated: use `ring_mask` instead of `*kring_mask`
81 	unsigned *kring_mask;
82 	// Deprecated: use `ring_entries` instead of `*kring_entries`
83 	unsigned *kring_entries;
84 	unsigned *kflags;
85 	unsigned *kdropped;
86 	unsigned *array;
87 	struct io_uring_sqe *sqes;
88 
89 	unsigned sqe_head;
90 	unsigned sqe_tail;
91 
92 	size_t ring_sz;
93 	void *ring_ptr;
94 
95 	unsigned ring_mask;
96 	unsigned ring_entries;
97 
98 	unsigned pad[2];
99 };
100 
101 struct io_uring_cq {
102 	unsigned *khead;
103 	unsigned *ktail;
104 	// Deprecated: use `ring_mask` instead of `*kring_mask`
105 	unsigned *kring_mask;
106 	// Deprecated: use `ring_entries` instead of `*kring_entries`
107 	unsigned *kring_entries;
108 	unsigned *kflags;
109 	unsigned *koverflow;
110 	struct io_uring_cqe *cqes;
111 
112 	size_t ring_sz;
113 	void *ring_ptr;
114 
115 	unsigned ring_mask;
116 	unsigned ring_entries;
117 
118 	unsigned pad[2];
119 };
120 
121 struct io_uring {
122 	struct io_uring_sq sq;
123 	struct io_uring_cq cq;
124 	unsigned flags;
125 	int ring_fd;
126 
127 	unsigned features;
128 	int enter_ring_fd;
129 	__u8 int_flags;
130 	__u8 pad[3];
131 	unsigned pad2;
132 };
133 
134 /*
135  * Library interface
136  */
137 
138 /*
139  * return an allocated io_uring_probe structure, or NULL if probe fails (for
140  * example, if it is not available). The caller is responsible for freeing it
141  */
142 struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
143 /* same as io_uring_get_probe_ring, but takes care of ring init and teardown */
144 struct io_uring_probe *io_uring_get_probe(void);
145 
146 /*
147  * frees a probe allocated through io_uring_get_probe() or
148  * io_uring_get_probe_ring()
149  */
150 void io_uring_free_probe(struct io_uring_probe *probe);
151 
io_uring_opcode_supported(const struct io_uring_probe * p,int op)152 IOURINGINLINE int io_uring_opcode_supported(const struct io_uring_probe *p,
153 					    int op)
154 {
155 	if (op > p->last_op)
156 		return 0;
157 	return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
158 }
159 
160 int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring,
161 				struct io_uring_params *p,
162 				void *buf, size_t buf_size);
163 int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
164 				struct io_uring_params *p);
165 int io_uring_queue_init(unsigned entries, struct io_uring *ring,
166 			unsigned flags);
167 int io_uring_queue_mmap(int fd, struct io_uring_params *p,
168 			struct io_uring *ring);
169 int io_uring_ring_dontfork(struct io_uring *ring);
170 void io_uring_queue_exit(struct io_uring *ring);
171 unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
172 	struct io_uring_cqe **cqes, unsigned count);
173 int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
174 		       unsigned wait_nr, struct __kernel_timespec *ts,
175 		       sigset_t *sigmask);
176 int io_uring_wait_cqe_timeout(struct io_uring *ring,
177 			      struct io_uring_cqe **cqe_ptr,
178 			      struct __kernel_timespec *ts);
179 int io_uring_submit(struct io_uring *ring);
180 int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
181 int io_uring_submit_and_wait_timeout(struct io_uring *ring,
182 				     struct io_uring_cqe **cqe_ptr,
183 				     unsigned wait_nr,
184 				     struct __kernel_timespec *ts,
185 				     sigset_t *sigmask);
186 
187 int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
188 			      unsigned nr_iovecs);
189 int io_uring_register_buffers_tags(struct io_uring *ring,
190 				   const struct iovec *iovecs,
191 				   const __u64 *tags, unsigned nr);
192 int io_uring_register_buffers_sparse(struct io_uring *ring, unsigned nr);
193 int io_uring_register_buffers_update_tag(struct io_uring *ring,
194 					 unsigned off,
195 					 const struct iovec *iovecs,
196 					 const __u64 *tags, unsigned nr);
197 int io_uring_unregister_buffers(struct io_uring *ring);
198 
199 int io_uring_register_files(struct io_uring *ring, const int *files,
200 			    unsigned nr_files);
201 int io_uring_register_files_tags(struct io_uring *ring, const int *files,
202 				 const __u64 *tags, unsigned nr);
203 int io_uring_register_files_sparse(struct io_uring *ring, unsigned nr);
204 int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off,
205 				       const int *files, const __u64 *tags,
206 				       unsigned nr_files);
207 
208 int io_uring_unregister_files(struct io_uring *ring);
209 int io_uring_register_files_update(struct io_uring *ring, unsigned off,
210 				   const int *files, unsigned nr_files);
211 int io_uring_register_eventfd(struct io_uring *ring, int fd);
212 int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
213 int io_uring_unregister_eventfd(struct io_uring *ring);
214 int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p,
215 			    unsigned nr);
216 int io_uring_register_personality(struct io_uring *ring);
217 int io_uring_unregister_personality(struct io_uring *ring, int id);
218 int io_uring_register_restrictions(struct io_uring *ring,
219 				   struct io_uring_restriction *res,
220 				   unsigned int nr_res);
221 int io_uring_enable_rings(struct io_uring *ring);
222 int __io_uring_sqring_wait(struct io_uring *ring);
223 int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz,
224 				const cpu_set_t *mask);
225 int io_uring_unregister_iowq_aff(struct io_uring *ring);
226 int io_uring_register_iowq_max_workers(struct io_uring *ring,
227 				       unsigned int *values);
228 int io_uring_register_ring_fd(struct io_uring *ring);
229 int io_uring_unregister_ring_fd(struct io_uring *ring);
230 int io_uring_close_ring_fd(struct io_uring *ring);
231 int io_uring_register_buf_ring(struct io_uring *ring,
232 			       struct io_uring_buf_reg *reg, unsigned int flags);
233 int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid);
234 int io_uring_buf_ring_head(struct io_uring *ring, int buf_group, uint16_t *head);
235 int io_uring_register_sync_cancel(struct io_uring *ring,
236 				 struct io_uring_sync_cancel_reg *reg);
237 
238 int io_uring_register_file_alloc_range(struct io_uring *ring,
239 					unsigned off, unsigned len);
240 
241 int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi);
242 int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi);
243 
244 int io_uring_get_events(struct io_uring *ring);
245 int io_uring_submit_and_get_events(struct io_uring *ring);
246 
247 /*
248  * io_uring syscalls.
249  */
250 int io_uring_enter(unsigned int fd, unsigned int to_submit,
251 		   unsigned int min_complete, unsigned int flags, sigset_t *sig);
252 int io_uring_enter2(unsigned int fd, unsigned int to_submit,
253 		    unsigned int min_complete, unsigned int flags,
254 		    sigset_t *sig, size_t sz);
255 int io_uring_setup(unsigned int entries, struct io_uring_params *p);
256 int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg,
257 		      unsigned int nr_args);
258 
259 /*
260  * Mapped buffer ring alloc/register + unregister/free helpers
261  */
262 struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *ring,
263 						  unsigned int nentries,
264 						  int bgid, unsigned int flags,
265 						  int *ret);
266 int io_uring_free_buf_ring(struct io_uring *ring, struct io_uring_buf_ring *br,
267 			   unsigned int nentries, int bgid);
268 
269 /*
270  * Helper for the peek/wait single cqe functions. Exported because of that,
271  * but probably shouldn't be used directly in an application.
272  */
273 int __io_uring_get_cqe(struct io_uring *ring,
274 			struct io_uring_cqe **cqe_ptr, unsigned submit,
275 			unsigned wait_nr, sigset_t *sigmask);
276 
277 #define LIBURING_UDATA_TIMEOUT	((__u64) -1)
278 
279 /*
280  * Calculates the step size for CQE iteration.
281  * 	For standard CQE's its 1, for big CQE's its two.
282  */
283 #define io_uring_cqe_shift(ring)					\
284 	(!!((ring)->flags & IORING_SETUP_CQE32))
285 
286 #define io_uring_cqe_index(ring,ptr,mask)				\
287 	(((ptr) & (mask)) << io_uring_cqe_shift(ring))
288 
289 #define io_uring_for_each_cqe(ring, head, cqe)				\
290 	/*								\
291 	 * io_uring_smp_load_acquire() enforces the order of tail	\
292 	 * and CQE reads.						\
293 	 */								\
294 	for (head = *(ring)->cq.khead;					\
295 	     (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \
296 		&(ring)->cq.cqes[io_uring_cqe_index(ring, head, (ring)->cq.ring_mask)] : NULL)); \
297 	     head++)							\
298 
299 /*
300  * Must be called after io_uring_for_each_cqe()
301  */
io_uring_cq_advance(struct io_uring * ring,unsigned nr)302 IOURINGINLINE void io_uring_cq_advance(struct io_uring *ring, unsigned nr)
303 {
304 	if (nr) {
305 		struct io_uring_cq *cq = &ring->cq;
306 
307 		/*
308 		 * Ensure that the kernel only sees the new value of the head
309 		 * index after the CQEs have been read.
310 		 */
311 		io_uring_smp_store_release(cq->khead, *cq->khead + nr);
312 	}
313 }
314 
315 /*
316  * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
317  * been processed by the application.
318  */
io_uring_cqe_seen(struct io_uring * ring,struct io_uring_cqe * cqe)319 IOURINGINLINE void io_uring_cqe_seen(struct io_uring *ring,
320 				     struct io_uring_cqe *cqe)
321 {
322 	if (cqe)
323 		io_uring_cq_advance(ring, 1);
324 }
325 
326 /*
327  * Command prep helpers
328  */
329 
330 /*
331  * Associate pointer @data with the sqe, for later retrieval from the cqe
332  * at command completion time with io_uring_cqe_get_data().
333  */
io_uring_sqe_set_data(struct io_uring_sqe * sqe,void * data)334 IOURINGINLINE void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
335 {
336 	sqe->user_data = (unsigned long) data;
337 }
338 
io_uring_cqe_get_data(const struct io_uring_cqe * cqe)339 IOURINGINLINE void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
340 {
341 	return (void *) (uintptr_t) cqe->user_data;
342 }
343 
344 /*
345  * Assign a 64-bit value to this sqe, which can get retrieved at completion
346  * time with io_uring_cqe_get_data64. Just like the non-64 variants, except
347  * these store a 64-bit type rather than a data pointer.
348  */
io_uring_sqe_set_data64(struct io_uring_sqe * sqe,__u64 data)349 IOURINGINLINE void io_uring_sqe_set_data64(struct io_uring_sqe *sqe,
350 					   __u64 data)
351 {
352 	sqe->user_data = data;
353 }
354 
io_uring_cqe_get_data64(const struct io_uring_cqe * cqe)355 IOURINGINLINE __u64 io_uring_cqe_get_data64(const struct io_uring_cqe *cqe)
356 {
357 	return cqe->user_data;
358 }
359 
360 /*
361  * Tell the app the have the 64-bit variants of the get/set userdata
362  */
363 #define LIBURING_HAVE_DATA64
364 
io_uring_sqe_set_flags(struct io_uring_sqe * sqe,unsigned flags)365 IOURINGINLINE void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
366 					  unsigned flags)
367 {
368 	sqe->flags = (__u8) flags;
369 }
370 
__io_uring_set_target_fixed_file(struct io_uring_sqe * sqe,unsigned int file_index)371 IOURINGINLINE void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
372 						    unsigned int file_index)
373 {
374 	/* 0 means no fixed files, indexes should be encoded as "index + 1" */
375 	sqe->file_index = file_index + 1;
376 }
377 
io_uring_initialize_sqe(struct io_uring_sqe * sqe)378 IOURINGINLINE void io_uring_initialize_sqe(struct io_uring_sqe *sqe)
379 {
380 	sqe->flags = 0;
381 	sqe->ioprio = 0;
382 	sqe->rw_flags = 0;
383 	sqe->buf_index = 0;
384 	sqe->personality = 0;
385 	sqe->file_index = 0;
386 	sqe->addr3 = 0;
387 	sqe->__pad2[0] = 0;
388 }
389 
io_uring_prep_rw(int op,struct io_uring_sqe * sqe,int fd,const void * addr,unsigned len,__u64 offset)390 IOURINGINLINE void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
391 				    const void *addr, unsigned len,
392 				    __u64 offset)
393 {
394 	sqe->opcode = (__u8) op;
395 	sqe->fd = fd;
396 	sqe->off = offset;
397 	sqe->addr = (unsigned long) addr;
398 	sqe->len = len;
399 }
400 
401 /*
402  * io_uring_prep_splice() - Either @fd_in or @fd_out must be a pipe.
403  *
404  * - If @fd_in refers to a pipe, @off_in is ignored and must be set to -1.
405  *
406  * - If @fd_in does not refer to a pipe and @off_in is -1, then @nbytes are read
407  *   from @fd_in starting from the file offset, which is incremented by the
408  *   number of bytes read.
409  *
410  * - If @fd_in does not refer to a pipe and @off_in is not -1, then the starting
411  *   offset of @fd_in will be @off_in.
412  *
413  * This splice operation can be used to implement sendfile by splicing to an
414  * intermediate pipe first, then splice to the final destination.
415  * In fact, the implementation of sendfile in kernel uses splice internally.
416  *
417  * NOTE that even if fd_in or fd_out refers to a pipe, the splice operation
418  * can still fail with EINVAL if one of the fd doesn't explicitly support splice
419  * operation, e.g. reading from terminal is unsupported from kernel 5.7 to 5.11.
420  * Check issue #291 for more information.
421  */
io_uring_prep_splice(struct io_uring_sqe * sqe,int fd_in,int64_t off_in,int fd_out,int64_t off_out,unsigned int nbytes,unsigned int splice_flags)422 IOURINGINLINE void io_uring_prep_splice(struct io_uring_sqe *sqe,
423 					int fd_in, int64_t off_in,
424 					int fd_out, int64_t off_out,
425 					unsigned int nbytes,
426 					unsigned int splice_flags)
427 {
428 	io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes,
429 				(__u64) off_out);
430 	sqe->splice_off_in = (__u64) off_in;
431 	sqe->splice_fd_in = fd_in;
432 	sqe->splice_flags = splice_flags;
433 }
434 
io_uring_prep_tee(struct io_uring_sqe * sqe,int fd_in,int fd_out,unsigned int nbytes,unsigned int splice_flags)435 IOURINGINLINE void io_uring_prep_tee(struct io_uring_sqe *sqe,
436 				     int fd_in, int fd_out,
437 				     unsigned int nbytes,
438 				     unsigned int splice_flags)
439 {
440 	io_uring_prep_rw(IORING_OP_TEE, sqe, fd_out, NULL, nbytes, 0);
441 	sqe->splice_off_in = 0;
442 	sqe->splice_fd_in = fd_in;
443 	sqe->splice_flags = splice_flags;
444 }
445 
io_uring_prep_readv(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset)446 IOURINGINLINE void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
447 				       const struct iovec *iovecs,
448 				       unsigned nr_vecs, __u64 offset)
449 {
450 	io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
451 }
452 
io_uring_prep_readv2(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset,int flags)453 IOURINGINLINE void io_uring_prep_readv2(struct io_uring_sqe *sqe, int fd,
454 				       const struct iovec *iovecs,
455 				       unsigned nr_vecs, __u64 offset,
456 				       int flags)
457 {
458 	io_uring_prep_readv(sqe, fd, iovecs, nr_vecs, offset);
459 	sqe->rw_flags = flags;
460 }
461 
io_uring_prep_read_fixed(struct io_uring_sqe * sqe,int fd,void * buf,unsigned nbytes,__u64 offset,int buf_index)462 IOURINGINLINE void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
463 					    void *buf, unsigned nbytes,
464 					    __u64 offset, int buf_index)
465 {
466 	io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
467 	sqe->buf_index = (__u16) buf_index;
468 }
469 
io_uring_prep_writev(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset)470 IOURINGINLINE void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
471 					const struct iovec *iovecs,
472 					unsigned nr_vecs, __u64 offset)
473 {
474 	io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
475 }
476 
io_uring_prep_writev2(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,__u64 offset,int flags)477 IOURINGINLINE void io_uring_prep_writev2(struct io_uring_sqe *sqe, int fd,
478 				       const struct iovec *iovecs,
479 				       unsigned nr_vecs, __u64 offset,
480 				       int flags)
481 {
482 	io_uring_prep_writev(sqe, fd, iovecs, nr_vecs, offset);
483 	sqe->rw_flags = flags;
484 }
485 
io_uring_prep_write_fixed(struct io_uring_sqe * sqe,int fd,const void * buf,unsigned nbytes,__u64 offset,int buf_index)486 IOURINGINLINE void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
487 					     const void *buf, unsigned nbytes,
488 					     __u64 offset, int buf_index)
489 {
490 	io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
491 	sqe->buf_index = (__u16) buf_index;
492 }
493 
io_uring_prep_recvmsg(struct io_uring_sqe * sqe,int fd,struct msghdr * msg,unsigned flags)494 IOURINGINLINE void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
495 					 struct msghdr *msg, unsigned flags)
496 {
497 	io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0);
498 	sqe->msg_flags = flags;
499 }
500 
io_uring_prep_recvmsg_multishot(struct io_uring_sqe * sqe,int fd,struct msghdr * msg,unsigned flags)501 IOURINGINLINE void io_uring_prep_recvmsg_multishot(struct io_uring_sqe *sqe,
502 						   int fd, struct msghdr *msg,
503 						   unsigned flags)
504 {
505 	io_uring_prep_recvmsg(sqe, fd, msg, flags);
506 	sqe->ioprio |= IORING_RECV_MULTISHOT;
507 }
508 
io_uring_prep_sendmsg(struct io_uring_sqe * sqe,int fd,const struct msghdr * msg,unsigned flags)509 IOURINGINLINE void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
510 					 const struct msghdr *msg,
511 					 unsigned flags)
512 {
513 	io_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0);
514 	sqe->msg_flags = flags;
515 }
516 
__io_uring_prep_poll_mask(unsigned poll_mask)517 IOURINGINLINE unsigned __io_uring_prep_poll_mask(unsigned poll_mask)
518 {
519 #if __BYTE_ORDER == __BIG_ENDIAN
520 	poll_mask = __swahw32(poll_mask);
521 #endif
522 	return poll_mask;
523 }
524 
io_uring_prep_poll_add(struct io_uring_sqe * sqe,int fd,unsigned poll_mask)525 IOURINGINLINE void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
526 					  unsigned poll_mask)
527 {
528 	io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
529 	sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
530 }
531 
io_uring_prep_poll_multishot(struct io_uring_sqe * sqe,int fd,unsigned poll_mask)532 IOURINGINLINE void io_uring_prep_poll_multishot(struct io_uring_sqe *sqe,
533 						int fd, unsigned poll_mask)
534 {
535 	io_uring_prep_poll_add(sqe, fd, poll_mask);
536 	sqe->len = IORING_POLL_ADD_MULTI;
537 }
538 
io_uring_prep_poll_remove(struct io_uring_sqe * sqe,__u64 user_data)539 IOURINGINLINE void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
540 					     __u64 user_data)
541 {
542 	io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, 0, 0);
543 	sqe->addr = user_data;
544 }
545 
io_uring_prep_poll_update(struct io_uring_sqe * sqe,__u64 old_user_data,__u64 new_user_data,unsigned poll_mask,unsigned flags)546 IOURINGINLINE void io_uring_prep_poll_update(struct io_uring_sqe *sqe,
547 					     __u64 old_user_data,
548 					     __u64 new_user_data,
549 					     unsigned poll_mask, unsigned flags)
550 {
551 	io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, flags,
552 			 new_user_data);
553 	sqe->addr = old_user_data;
554 	sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
555 }
556 
io_uring_prep_fsync(struct io_uring_sqe * sqe,int fd,unsigned fsync_flags)557 IOURINGINLINE void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
558 				       unsigned fsync_flags)
559 {
560 	io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0);
561 	sqe->fsync_flags = fsync_flags;
562 }
563 
io_uring_prep_nop(struct io_uring_sqe * sqe)564 IOURINGINLINE void io_uring_prep_nop(struct io_uring_sqe *sqe)
565 {
566 	io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0);
567 }
568 
io_uring_prep_timeout(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,unsigned count,unsigned flags)569 IOURINGINLINE void io_uring_prep_timeout(struct io_uring_sqe *sqe,
570 					 struct __kernel_timespec *ts,
571 					 unsigned count, unsigned flags)
572 {
573 	io_uring_prep_rw(IORING_OP_TIMEOUT, sqe, -1, ts, 1, count);
574 	sqe->timeout_flags = flags;
575 }
576 
io_uring_prep_timeout_remove(struct io_uring_sqe * sqe,__u64 user_data,unsigned flags)577 IOURINGINLINE void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe,
578 						__u64 user_data, unsigned flags)
579 {
580 	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0, 0);
581 	sqe->addr = user_data;
582 	sqe->timeout_flags = flags;
583 }
584 
io_uring_prep_timeout_update(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,__u64 user_data,unsigned flags)585 IOURINGINLINE void io_uring_prep_timeout_update(struct io_uring_sqe *sqe,
586 						struct __kernel_timespec *ts,
587 						__u64 user_data, unsigned flags)
588 {
589 	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0,
590 				(uintptr_t) ts);
591 	sqe->addr = user_data;
592 	sqe->timeout_flags = flags | IORING_TIMEOUT_UPDATE;
593 }
594 
io_uring_prep_accept(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)595 IOURINGINLINE void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
596 					struct sockaddr *addr,
597 					socklen_t *addrlen, int flags)
598 {
599 	io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0,
600 				(__u64) (unsigned long) addrlen);
601 	sqe->accept_flags = (__u32) flags;
602 }
603 
604 /* accept directly into the fixed file table */
io_uring_prep_accept_direct(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags,unsigned int file_index)605 IOURINGINLINE void io_uring_prep_accept_direct(struct io_uring_sqe *sqe, int fd,
606 					       struct sockaddr *addr,
607 					       socklen_t *addrlen, int flags,
608 					       unsigned int file_index)
609 {
610 	io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
611 	/* offset by 1 for allocation */
612 	if (file_index == IORING_FILE_INDEX_ALLOC)
613 		file_index--;
614 	__io_uring_set_target_fixed_file(sqe, file_index);
615 }
616 
io_uring_prep_multishot_accept(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)617 IOURINGINLINE void io_uring_prep_multishot_accept(struct io_uring_sqe *sqe,
618 						  int fd, struct sockaddr *addr,
619 						  socklen_t *addrlen, int flags)
620 {
621 	io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
622 	sqe->ioprio |= IORING_ACCEPT_MULTISHOT;
623 }
624 
625 /* multishot accept directly into the fixed file table */
io_uring_prep_multishot_accept_direct(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)626 IOURINGINLINE void io_uring_prep_multishot_accept_direct(struct io_uring_sqe *sqe,
627 							 int fd,
628 							 struct sockaddr *addr,
629 							 socklen_t *addrlen,
630 							 int flags)
631 {
632 	io_uring_prep_multishot_accept(sqe, fd, addr, addrlen, flags);
633 	__io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
634 }
635 
io_uring_prep_cancel64(struct io_uring_sqe * sqe,__u64 user_data,int flags)636 IOURINGINLINE void io_uring_prep_cancel64(struct io_uring_sqe *sqe,
637 					  __u64 user_data, int flags)
638 {
639 	io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, NULL, 0, 0);
640 	sqe->addr = user_data;
641 	sqe->cancel_flags = (__u32) flags;
642 }
643 
io_uring_prep_cancel(struct io_uring_sqe * sqe,void * user_data,int flags)644 IOURINGINLINE void io_uring_prep_cancel(struct io_uring_sqe *sqe,
645 					void *user_data, int flags)
646 {
647 	io_uring_prep_cancel64(sqe, (__u64) (uintptr_t) user_data, flags);
648 }
649 
io_uring_prep_cancel_fd(struct io_uring_sqe * sqe,int fd,unsigned int flags)650 IOURINGINLINE void io_uring_prep_cancel_fd(struct io_uring_sqe *sqe, int fd,
651 					   unsigned int flags)
652 {
653 	io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, fd, NULL, 0, 0);
654 	sqe->cancel_flags = (__u32) flags | IORING_ASYNC_CANCEL_FD;
655 }
656 
io_uring_prep_link_timeout(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,unsigned flags)657 IOURINGINLINE void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
658 					      struct __kernel_timespec *ts,
659 					      unsigned flags)
660 {
661 	io_uring_prep_rw(IORING_OP_LINK_TIMEOUT, sqe, -1, ts, 1, 0);
662 	sqe->timeout_flags = flags;
663 }
664 
io_uring_prep_connect(struct io_uring_sqe * sqe,int fd,const struct sockaddr * addr,socklen_t addrlen)665 IOURINGINLINE void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd,
666 					 const struct sockaddr *addr,
667 					 socklen_t addrlen)
668 {
669 	io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen);
670 }
671 
io_uring_prep_bind(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t addrlen)672 IOURINGINLINE void io_uring_prep_bind(struct io_uring_sqe *sqe, int fd,
673 				      struct sockaddr *addr,
674 				      socklen_t addrlen)
675 {
676 	io_uring_prep_rw(IORING_OP_BIND, sqe, fd, addr, 0, addrlen);
677 }
678 
io_uring_prep_listen(struct io_uring_sqe * sqe,int fd,int backlog)679 IOURINGINLINE void io_uring_prep_listen(struct io_uring_sqe *sqe, int fd,
680 				      int backlog)
681 {
682 	io_uring_prep_rw(IORING_OP_LISTEN, sqe, fd, 0, backlog, 0);
683 }
684 
io_uring_prep_files_update(struct io_uring_sqe * sqe,int * fds,unsigned nr_fds,int offset)685 IOURINGINLINE void io_uring_prep_files_update(struct io_uring_sqe *sqe,
686 					      int *fds, unsigned nr_fds,
687 					      int offset)
688 {
689 	io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds,
690 				(__u64) offset);
691 }
692 
io_uring_prep_fallocate(struct io_uring_sqe * sqe,int fd,int mode,__u64 offset,__u64 len)693 IOURINGINLINE void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
694 					   int mode, __u64 offset, __u64 len)
695 {
696 	io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd,
697 			0, (unsigned int) mode, (__u64) offset);
698 	sqe->addr = (__u64) len;
699 }
700 
io_uring_prep_openat(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,mode_t mode)701 IOURINGINLINE void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
702 					const char *path, int flags,
703 					mode_t mode)
704 {
705 	io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0);
706 	sqe->open_flags = (__u32) flags;
707 }
708 
709 /* open directly into the fixed file table */
io_uring_prep_openat_direct(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,mode_t mode,unsigned file_index)710 IOURINGINLINE void io_uring_prep_openat_direct(struct io_uring_sqe *sqe,
711 					       int dfd, const char *path,
712 					       int flags, mode_t mode,
713 					       unsigned file_index)
714 {
715 	io_uring_prep_openat(sqe, dfd, path, flags, mode);
716 	/* offset by 1 for allocation */
717 	if (file_index == IORING_FILE_INDEX_ALLOC)
718 		file_index--;
719 	__io_uring_set_target_fixed_file(sqe, file_index);
720 }
721 
io_uring_prep_close(struct io_uring_sqe * sqe,int fd)722 IOURINGINLINE void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
723 {
724 	io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0);
725 }
726 
io_uring_prep_close_direct(struct io_uring_sqe * sqe,unsigned file_index)727 IOURINGINLINE void io_uring_prep_close_direct(struct io_uring_sqe *sqe,
728 					      unsigned file_index)
729 {
730 	io_uring_prep_close(sqe, 0);
731 	__io_uring_set_target_fixed_file(sqe, file_index);
732 }
733 
io_uring_prep_read(struct io_uring_sqe * sqe,int fd,void * buf,unsigned nbytes,__u64 offset)734 IOURINGINLINE void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
735 				      void *buf, unsigned nbytes, __u64 offset)
736 {
737 	io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
738 }
739 
io_uring_prep_read_multishot(struct io_uring_sqe * sqe,int fd,unsigned nbytes,__u64 offset,int buf_group)740 IOURINGINLINE void io_uring_prep_read_multishot(struct io_uring_sqe *sqe,
741 						int fd, unsigned nbytes,
742 						__u64 offset, int buf_group)
743 {
744 	io_uring_prep_rw(IORING_OP_READ_MULTISHOT, sqe, fd, NULL, nbytes,
745 			 offset);
746 	sqe->buf_group = buf_group;
747 	sqe->flags = IOSQE_BUFFER_SELECT;
748 }
749 
io_uring_prep_write(struct io_uring_sqe * sqe,int fd,const void * buf,unsigned nbytes,__u64 offset)750 IOURINGINLINE void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
751 				       const void *buf, unsigned nbytes,
752 				       __u64 offset)
753 {
754 	io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
755 }
756 
757 struct statx;
io_uring_prep_statx(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,unsigned mask,struct statx * statxbuf)758 IOURINGINLINE void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
759 				       const char *path, int flags,
760 				       unsigned mask, struct statx *statxbuf)
761 {
762 	io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask,
763 				(__u64) (unsigned long) statxbuf);
764 	sqe->statx_flags = (__u32) flags;
765 }
766 
io_uring_prep_fadvise(struct io_uring_sqe * sqe,int fd,__u64 offset,__u32 len,int advice)767 IOURINGINLINE void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
768 					 __u64 offset, __u32 len, int advice)
769 {
770 	io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, (__u32) len, offset);
771 	sqe->fadvise_advice = (__u32) advice;
772 }
773 
io_uring_prep_madvise(struct io_uring_sqe * sqe,void * addr,__u32 length,int advice)774 IOURINGINLINE void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
775 					 __u32 length, int advice)
776 {
777 	io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, (__u32) length, 0);
778 	sqe->fadvise_advice = (__u32) advice;
779 }
780 
io_uring_prep_fadvise64(struct io_uring_sqe * sqe,int fd,__u64 offset,off_t len,int advice)781 IOURINGINLINE void io_uring_prep_fadvise64(struct io_uring_sqe *sqe, int fd,
782 					 __u64 offset, off_t len, int advice)
783 {
784 	io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, 0, offset);
785 	sqe->addr = len;
786 	sqe->fadvise_advice = (__u32) advice;
787 }
788 
io_uring_prep_madvise64(struct io_uring_sqe * sqe,void * addr,off_t length,int advice)789 IOURINGINLINE void io_uring_prep_madvise64(struct io_uring_sqe *sqe, void *addr,
790 					 off_t length, int advice)
791 {
792 	io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, 0, length);
793 	sqe->fadvise_advice = (__u32) advice;
794 }
795 
io_uring_prep_send(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags)796 IOURINGINLINE void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
797 				      const void *buf, size_t len, int flags)
798 {
799 	io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, (__u32) len, 0);
800 	sqe->msg_flags = (__u32) flags;
801 }
802 
io_uring_prep_send_bundle(struct io_uring_sqe * sqe,int sockfd,size_t len,int flags)803 IOURINGINLINE void io_uring_prep_send_bundle(struct io_uring_sqe *sqe,
804 					     int sockfd, size_t len, int flags)
805 {
806 	io_uring_prep_send(sqe, sockfd, NULL, len, flags);
807 	sqe->ioprio |= IORING_RECVSEND_BUNDLE;
808 }
809 
io_uring_prep_send_set_addr(struct io_uring_sqe * sqe,const struct sockaddr * dest_addr,__u16 addr_len)810 IOURINGINLINE void io_uring_prep_send_set_addr(struct io_uring_sqe *sqe,
811 						const struct sockaddr *dest_addr,
812 						__u16 addr_len)
813 {
814 	sqe->addr2 = (unsigned long)(const void *)dest_addr;
815 	sqe->addr_len = addr_len;
816 }
817 
io_uring_prep_sendto(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags,const struct sockaddr * addr,socklen_t addrlen)818 IOURINGINLINE void io_uring_prep_sendto(struct io_uring_sqe *sqe, int sockfd,
819 					const void *buf, size_t len, int flags,
820 					const struct sockaddr *addr,
821 					socklen_t addrlen)
822 {
823 	io_uring_prep_send(sqe, sockfd, buf, len, flags);
824 	io_uring_prep_send_set_addr(sqe, addr, addrlen);
825 }
826 
io_uring_prep_send_zc(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags,unsigned zc_flags)827 IOURINGINLINE void io_uring_prep_send_zc(struct io_uring_sqe *sqe, int sockfd,
828 					 const void *buf, size_t len, int flags,
829 					 unsigned zc_flags)
830 {
831 	io_uring_prep_rw(IORING_OP_SEND_ZC, sqe, sockfd, buf, (__u32) len, 0);
832 	sqe->msg_flags = (__u32) flags;
833 	sqe->ioprio = zc_flags;
834 }
835 
io_uring_prep_send_zc_fixed(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags,unsigned zc_flags,unsigned buf_index)836 IOURINGINLINE void io_uring_prep_send_zc_fixed(struct io_uring_sqe *sqe,
837 						int sockfd, const void *buf,
838 						size_t len, int flags,
839 						unsigned zc_flags,
840 						unsigned buf_index)
841 {
842 	io_uring_prep_send_zc(sqe, sockfd, buf, len, flags, zc_flags);
843 	sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
844 	sqe->buf_index = buf_index;
845 }
846 
io_uring_prep_sendmsg_zc(struct io_uring_sqe * sqe,int fd,const struct msghdr * msg,unsigned flags)847 IOURINGINLINE void io_uring_prep_sendmsg_zc(struct io_uring_sqe *sqe, int fd,
848 					    const struct msghdr *msg,
849 					    unsigned flags)
850 {
851 	io_uring_prep_sendmsg(sqe, fd, msg, flags);
852 	sqe->opcode = IORING_OP_SENDMSG_ZC;
853 }
854 
io_uring_prep_recv(struct io_uring_sqe * sqe,int sockfd,void * buf,size_t len,int flags)855 IOURINGINLINE void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
856 				      void *buf, size_t len, int flags)
857 {
858 	io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, (__u32) len, 0);
859 	sqe->msg_flags = (__u32) flags;
860 }
861 
io_uring_prep_recv_multishot(struct io_uring_sqe * sqe,int sockfd,void * buf,size_t len,int flags)862 IOURINGINLINE void io_uring_prep_recv_multishot(struct io_uring_sqe *sqe,
863 						int sockfd, void *buf,
864 						size_t len, int flags)
865 {
866 	io_uring_prep_recv(sqe, sockfd, buf, len, flags);
867 	sqe->ioprio |= IORING_RECV_MULTISHOT;
868 }
869 
870 IOURINGINLINE struct io_uring_recvmsg_out *
io_uring_recvmsg_validate(void * buf,int buf_len,struct msghdr * msgh)871 io_uring_recvmsg_validate(void *buf, int buf_len, struct msghdr *msgh)
872 {
873 	unsigned long header = msgh->msg_controllen + msgh->msg_namelen +
874 				sizeof(struct io_uring_recvmsg_out);
875 	if (buf_len < 0 || (unsigned long)buf_len < header)
876 		return NULL;
877 	return (struct io_uring_recvmsg_out *)buf;
878 }
879 
io_uring_recvmsg_name(struct io_uring_recvmsg_out * o)880 IOURINGINLINE void *io_uring_recvmsg_name(struct io_uring_recvmsg_out *o)
881 {
882 	return (void *) &o[1];
883 }
884 
885 IOURINGINLINE struct cmsghdr *
io_uring_recvmsg_cmsg_firsthdr(struct io_uring_recvmsg_out * o,struct msghdr * msgh)886 io_uring_recvmsg_cmsg_firsthdr(struct io_uring_recvmsg_out *o,
887 			       struct msghdr *msgh)
888 {
889 	if (o->controllen < sizeof(struct cmsghdr))
890 		return NULL;
891 
892 	return (struct cmsghdr *)((unsigned char *) io_uring_recvmsg_name(o) +
893 			msgh->msg_namelen);
894 }
895 
896 IOURINGINLINE struct cmsghdr *
io_uring_recvmsg_cmsg_nexthdr(struct io_uring_recvmsg_out * o,struct msghdr * msgh,struct cmsghdr * cmsg)897 io_uring_recvmsg_cmsg_nexthdr(struct io_uring_recvmsg_out *o, struct msghdr *msgh,
898 			      struct cmsghdr *cmsg)
899 {
900 	unsigned char *end;
901 
902 	if (cmsg->cmsg_len < sizeof(struct cmsghdr))
903 		return NULL;
904 	end = (unsigned char *) io_uring_recvmsg_cmsg_firsthdr(o, msgh) +
905 		o->controllen;
906 	cmsg = (struct cmsghdr *)((unsigned char *) cmsg +
907 			CMSG_ALIGN(cmsg->cmsg_len));
908 
909 	if ((unsigned char *) (cmsg + 1) > end)
910 		return NULL;
911 	if (((unsigned char *) cmsg) + CMSG_ALIGN(cmsg->cmsg_len) > end)
912 		return NULL;
913 
914 	return cmsg;
915 }
916 
io_uring_recvmsg_payload(struct io_uring_recvmsg_out * o,struct msghdr * msgh)917 IOURINGINLINE void *io_uring_recvmsg_payload(struct io_uring_recvmsg_out *o,
918 					     struct msghdr *msgh)
919 {
920 	return (void *)((unsigned char *)io_uring_recvmsg_name(o) +
921 			msgh->msg_namelen + msgh->msg_controllen);
922 }
923 
924 IOURINGINLINE unsigned int
io_uring_recvmsg_payload_length(struct io_uring_recvmsg_out * o,int buf_len,struct msghdr * msgh)925 io_uring_recvmsg_payload_length(struct io_uring_recvmsg_out *o,
926 				int buf_len, struct msghdr *msgh)
927 {
928 	unsigned long payload_start, payload_end;
929 
930 	payload_start = (unsigned long) io_uring_recvmsg_payload(o, msgh);
931 	payload_end = (unsigned long) o + buf_len;
932 	return (unsigned int) (payload_end - payload_start);
933 }
934 
io_uring_prep_openat2(struct io_uring_sqe * sqe,int dfd,const char * path,struct open_how * how)935 IOURINGINLINE void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
936 					const char *path, struct open_how *how)
937 {
938 	io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how),
939 				(uint64_t) (uintptr_t) how);
940 }
941 
942 /* open directly into the fixed file table */
io_uring_prep_openat2_direct(struct io_uring_sqe * sqe,int dfd,const char * path,struct open_how * how,unsigned file_index)943 IOURINGINLINE void io_uring_prep_openat2_direct(struct io_uring_sqe *sqe,
944 						int dfd, const char *path,
945 						struct open_how *how,
946 						unsigned file_index)
947 {
948 	io_uring_prep_openat2(sqe, dfd, path, how);
949 	/* offset by 1 for allocation */
950 	if (file_index == IORING_FILE_INDEX_ALLOC)
951 		file_index--;
952 	__io_uring_set_target_fixed_file(sqe, file_index);
953 }
954 
955 struct epoll_event;
io_uring_prep_epoll_ctl(struct io_uring_sqe * sqe,int epfd,int fd,int op,struct epoll_event * ev)956 IOURINGINLINE void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
957 					   int fd, int op,
958 					   struct epoll_event *ev)
959 {
960 	io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev,
961 				(__u32) op, (__u32) fd);
962 }
963 
io_uring_prep_provide_buffers(struct io_uring_sqe * sqe,void * addr,int len,int nr,int bgid,int bid)964 IOURINGINLINE void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
965 						 void *addr, int len, int nr,
966 						 int bgid, int bid)
967 {
968 	io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, (__u32) len,
969 				(__u64) bid);
970 	sqe->buf_group = (__u16) bgid;
971 }
972 
io_uring_prep_remove_buffers(struct io_uring_sqe * sqe,int nr,int bgid)973 IOURINGINLINE void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
974 						int nr, int bgid)
975 {
976 	io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0);
977 	sqe->buf_group = (__u16) bgid;
978 }
979 
io_uring_prep_shutdown(struct io_uring_sqe * sqe,int fd,int how)980 IOURINGINLINE void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
981 					  int how)
982 {
983 	io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, (__u32) how, 0);
984 }
985 
io_uring_prep_unlinkat(struct io_uring_sqe * sqe,int dfd,const char * path,int flags)986 IOURINGINLINE void io_uring_prep_unlinkat(struct io_uring_sqe *sqe, int dfd,
987 					  const char *path, int flags)
988 {
989 	io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, path, 0, 0);
990 	sqe->unlink_flags = (__u32) flags;
991 }
992 
io_uring_prep_unlink(struct io_uring_sqe * sqe,const char * path,int flags)993 IOURINGINLINE void io_uring_prep_unlink(struct io_uring_sqe *sqe,
994 					  const char *path, int flags)
995 {
996 	io_uring_prep_unlinkat(sqe, AT_FDCWD, path, flags);
997 }
998 
io_uring_prep_renameat(struct io_uring_sqe * sqe,int olddfd,const char * oldpath,int newdfd,const char * newpath,unsigned int flags)999 IOURINGINLINE void io_uring_prep_renameat(struct io_uring_sqe *sqe, int olddfd,
1000 					  const char *oldpath, int newdfd,
1001 					  const char *newpath, unsigned int flags)
1002 {
1003 	io_uring_prep_rw(IORING_OP_RENAMEAT, sqe, olddfd, oldpath,
1004 				(__u32) newdfd,
1005 				(uint64_t) (uintptr_t) newpath);
1006 	sqe->rename_flags = (__u32) flags;
1007 }
1008 
io_uring_prep_rename(struct io_uring_sqe * sqe,const char * oldpath,const char * newpath)1009 IOURINGINLINE void io_uring_prep_rename(struct io_uring_sqe *sqe,
1010 					const char *oldpath,
1011 					const char *newpath)
1012 {
1013 	io_uring_prep_renameat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, 0);
1014 }
1015 
io_uring_prep_sync_file_range(struct io_uring_sqe * sqe,int fd,unsigned len,__u64 offset,int flags)1016 IOURINGINLINE void io_uring_prep_sync_file_range(struct io_uring_sqe *sqe,
1017 						 int fd, unsigned len,
1018 						 __u64 offset, int flags)
1019 {
1020 	io_uring_prep_rw(IORING_OP_SYNC_FILE_RANGE, sqe, fd, NULL, len, offset);
1021 	sqe->sync_range_flags = (__u32) flags;
1022 }
1023 
io_uring_prep_mkdirat(struct io_uring_sqe * sqe,int dfd,const char * path,mode_t mode)1024 IOURINGINLINE void io_uring_prep_mkdirat(struct io_uring_sqe *sqe, int dfd,
1025 					const char *path, mode_t mode)
1026 {
1027 	io_uring_prep_rw(IORING_OP_MKDIRAT, sqe, dfd, path, mode, 0);
1028 }
1029 
io_uring_prep_mkdir(struct io_uring_sqe * sqe,const char * path,mode_t mode)1030 IOURINGINLINE void io_uring_prep_mkdir(struct io_uring_sqe *sqe,
1031 					const char *path, mode_t mode)
1032 {
1033 	io_uring_prep_mkdirat(sqe, AT_FDCWD, path, mode);
1034 }
1035 
io_uring_prep_symlinkat(struct io_uring_sqe * sqe,const char * target,int newdirfd,const char * linkpath)1036 IOURINGINLINE void io_uring_prep_symlinkat(struct io_uring_sqe *sqe,
1037 					   const char *target, int newdirfd,
1038 					   const char *linkpath)
1039 {
1040 	io_uring_prep_rw(IORING_OP_SYMLINKAT, sqe, newdirfd, target, 0,
1041 				(uint64_t) (uintptr_t) linkpath);
1042 }
1043 
io_uring_prep_symlink(struct io_uring_sqe * sqe,const char * target,const char * linkpath)1044 IOURINGINLINE void io_uring_prep_symlink(struct io_uring_sqe *sqe,
1045 					 const char *target,
1046 					 const char *linkpath)
1047 {
1048 	io_uring_prep_symlinkat(sqe, target, AT_FDCWD, linkpath);
1049 }
1050 
io_uring_prep_linkat(struct io_uring_sqe * sqe,int olddfd,const char * oldpath,int newdfd,const char * newpath,int flags)1051 IOURINGINLINE void io_uring_prep_linkat(struct io_uring_sqe *sqe, int olddfd,
1052 					const char *oldpath, int newdfd,
1053 					const char *newpath, int flags)
1054 {
1055 	io_uring_prep_rw(IORING_OP_LINKAT, sqe, olddfd, oldpath, (__u32) newdfd,
1056 				(uint64_t) (uintptr_t) newpath);
1057 	sqe->hardlink_flags = (__u32) flags;
1058 }
1059 
io_uring_prep_link(struct io_uring_sqe * sqe,const char * oldpath,const char * newpath,int flags)1060 IOURINGINLINE void io_uring_prep_link(struct io_uring_sqe *sqe,
1061 				      const char *oldpath, const char *newpath,
1062 				      int flags)
1063 {
1064 	io_uring_prep_linkat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, flags);
1065 }
1066 
io_uring_prep_msg_ring_cqe_flags(struct io_uring_sqe * sqe,int fd,unsigned int len,__u64 data,unsigned int flags,unsigned int cqe_flags)1067 IOURINGINLINE void io_uring_prep_msg_ring_cqe_flags(struct io_uring_sqe *sqe,
1068 					  int fd, unsigned int len, __u64 data,
1069 					  unsigned int flags, unsigned int cqe_flags)
1070 {
1071 	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data);
1072 	sqe->msg_ring_flags = IORING_MSG_RING_FLAGS_PASS | flags;
1073 	sqe->file_index = cqe_flags;
1074 }
1075 
io_uring_prep_msg_ring(struct io_uring_sqe * sqe,int fd,unsigned int len,__u64 data,unsigned int flags)1076 IOURINGINLINE void io_uring_prep_msg_ring(struct io_uring_sqe *sqe, int fd,
1077 					  unsigned int len, __u64 data,
1078 					  unsigned int flags)
1079 {
1080 	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data);
1081 	sqe->msg_ring_flags = flags;
1082 }
1083 
io_uring_prep_msg_ring_fd(struct io_uring_sqe * sqe,int fd,int source_fd,int target_fd,__u64 data,unsigned int flags)1084 IOURINGINLINE void io_uring_prep_msg_ring_fd(struct io_uring_sqe *sqe, int fd,
1085 					     int source_fd, int target_fd,
1086 					     __u64 data, unsigned int flags)
1087 {
1088 	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd,
1089 			 (void *) (uintptr_t) IORING_MSG_SEND_FD, 0, data);
1090 	sqe->addr3 = source_fd;
1091 	/* offset by 1 for allocation */
1092 	if ((unsigned int) target_fd == IORING_FILE_INDEX_ALLOC)
1093 		target_fd--;
1094 	__io_uring_set_target_fixed_file(sqe, target_fd);
1095 	sqe->msg_ring_flags = flags;
1096 }
1097 
io_uring_prep_msg_ring_fd_alloc(struct io_uring_sqe * sqe,int fd,int source_fd,__u64 data,unsigned int flags)1098 IOURINGINLINE void io_uring_prep_msg_ring_fd_alloc(struct io_uring_sqe *sqe,
1099 						   int fd, int source_fd,
1100 						   __u64 data, unsigned int flags)
1101 {
1102 	io_uring_prep_msg_ring_fd(sqe, fd, source_fd, IORING_FILE_INDEX_ALLOC,
1103 				  data, flags);
1104 }
1105 
io_uring_prep_getxattr(struct io_uring_sqe * sqe,const char * name,char * value,const char * path,unsigned int len)1106 IOURINGINLINE void io_uring_prep_getxattr(struct io_uring_sqe *sqe,
1107 					  const char *name, char *value,
1108 					  const char *path, unsigned int len)
1109 {
1110 	io_uring_prep_rw(IORING_OP_GETXATTR, sqe, 0, name, len,
1111 				(__u64) (uintptr_t) value);
1112 	sqe->addr3 = (__u64) (uintptr_t) path;
1113 	sqe->xattr_flags = 0;
1114 }
1115 
io_uring_prep_setxattr(struct io_uring_sqe * sqe,const char * name,const char * value,const char * path,int flags,unsigned int len)1116 IOURINGINLINE void io_uring_prep_setxattr(struct io_uring_sqe *sqe,
1117 					  const char *name, const char *value,
1118 					  const char *path, int flags,
1119 					  unsigned int len)
1120 {
1121 	io_uring_prep_rw(IORING_OP_SETXATTR, sqe, 0, name, len,
1122 				(__u64) (uintptr_t) value);
1123 	sqe->addr3 = (__u64) (uintptr_t) path;
1124 	sqe->xattr_flags = flags;
1125 }
1126 
io_uring_prep_fgetxattr(struct io_uring_sqe * sqe,int fd,const char * name,char * value,unsigned int len)1127 IOURINGINLINE void io_uring_prep_fgetxattr(struct io_uring_sqe *sqe,
1128 					   int fd, const char *name,
1129 					   char *value, unsigned int len)
1130 {
1131 	io_uring_prep_rw(IORING_OP_FGETXATTR, sqe, fd, name, len,
1132 				(__u64) (uintptr_t) value);
1133 	sqe->xattr_flags = 0;
1134 }
1135 
io_uring_prep_fsetxattr(struct io_uring_sqe * sqe,int fd,const char * name,const char * value,int flags,unsigned int len)1136 IOURINGINLINE void io_uring_prep_fsetxattr(struct io_uring_sqe *sqe, int fd,
1137 					   const char *name, const char	*value,
1138 					   int flags, unsigned int len)
1139 {
1140 	io_uring_prep_rw(IORING_OP_FSETXATTR, sqe, fd, name, len,
1141 				(__u64) (uintptr_t) value);
1142 	sqe->xattr_flags = flags;
1143 }
1144 
io_uring_prep_socket(struct io_uring_sqe * sqe,int domain,int type,int protocol,unsigned int flags)1145 IOURINGINLINE void io_uring_prep_socket(struct io_uring_sqe *sqe, int domain,
1146 					int type, int protocol,
1147 					unsigned int flags)
1148 {
1149 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
1150 	sqe->rw_flags = flags;
1151 }
1152 
io_uring_prep_socket_direct(struct io_uring_sqe * sqe,int domain,int type,int protocol,unsigned file_index,unsigned int flags)1153 IOURINGINLINE void io_uring_prep_socket_direct(struct io_uring_sqe *sqe,
1154 					       int domain, int type,
1155 					       int protocol,
1156 					       unsigned file_index,
1157 					       unsigned int flags)
1158 {
1159 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
1160 	sqe->rw_flags = flags;
1161 	/* offset by 1 for allocation */
1162 	if (file_index == IORING_FILE_INDEX_ALLOC)
1163 		file_index--;
1164 	__io_uring_set_target_fixed_file(sqe, file_index);
1165 }
1166 
io_uring_prep_socket_direct_alloc(struct io_uring_sqe * sqe,int domain,int type,int protocol,unsigned int flags)1167 IOURINGINLINE void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe,
1168 						     int domain, int type,
1169 						     int protocol,
1170 						     unsigned int flags)
1171 {
1172 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
1173 	sqe->rw_flags = flags;
1174 	__io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
1175 }
1176 
1177 /*
1178  * Prepare commands for sockets
1179  */
io_uring_prep_cmd_sock(struct io_uring_sqe * sqe,int cmd_op,int fd,int level,int optname,void * optval,int optlen)1180 IOURINGINLINE void io_uring_prep_cmd_sock(struct io_uring_sqe *sqe,
1181 					  int cmd_op,
1182 					  int fd,
1183 					  int level,
1184 					  int optname,
1185 					  void *optval,
1186 					  int optlen)
1187 {
1188 	io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, NULL, 0, 0);
1189 	sqe->optval = (unsigned long) (uintptr_t) optval;
1190 	sqe->optname = optname;
1191 	sqe->optlen = optlen;
1192 	sqe->cmd_op = cmd_op;
1193 	sqe->level = level;
1194 }
1195 
io_uring_prep_waitid(struct io_uring_sqe * sqe,idtype_t idtype,id_t id,siginfo_t * infop,int options,unsigned int flags)1196 IOURINGINLINE void io_uring_prep_waitid(struct io_uring_sqe *sqe,
1197 					idtype_t idtype,
1198 					id_t id,
1199 					siginfo_t *infop,
1200 					int options, unsigned int flags)
1201 {
1202 	io_uring_prep_rw(IORING_OP_WAITID, sqe, id, NULL, (unsigned) idtype, 0);
1203 	sqe->waitid_flags = flags;
1204 	sqe->file_index = options;
1205 	sqe->addr2 = (unsigned long) infop;
1206 }
1207 
io_uring_prep_futex_wake(struct io_uring_sqe * sqe,uint32_t * futex,uint64_t val,uint64_t mask,uint32_t futex_flags,unsigned int flags)1208 IOURINGINLINE void io_uring_prep_futex_wake(struct io_uring_sqe *sqe,
1209 					    uint32_t *futex, uint64_t val,
1210 					    uint64_t mask, uint32_t futex_flags,
1211 					    unsigned int flags)
1212 {
1213 	io_uring_prep_rw(IORING_OP_FUTEX_WAKE, sqe, futex_flags, futex, 0, val);
1214 	sqe->futex_flags = flags;
1215 	sqe->addr3 = mask;
1216 }
1217 
io_uring_prep_futex_wait(struct io_uring_sqe * sqe,uint32_t * futex,uint64_t val,uint64_t mask,uint32_t futex_flags,unsigned int flags)1218 IOURINGINLINE void io_uring_prep_futex_wait(struct io_uring_sqe *sqe,
1219 					    uint32_t *futex, uint64_t val,
1220 					    uint64_t mask, uint32_t futex_flags,
1221 					    unsigned int flags)
1222 {
1223 	io_uring_prep_rw(IORING_OP_FUTEX_WAIT, sqe, futex_flags, futex, 0, val);
1224 	sqe->futex_flags = flags;
1225 	sqe->addr3 = mask;
1226 }
1227 
1228 struct futex_waitv;
io_uring_prep_futex_waitv(struct io_uring_sqe * sqe,struct futex_waitv * futex,uint32_t nr_futex,unsigned int flags)1229 IOURINGINLINE void io_uring_prep_futex_waitv(struct io_uring_sqe *sqe,
1230 					     struct futex_waitv *futex,
1231 					     uint32_t nr_futex,
1232 					     unsigned int flags)
1233 {
1234 	io_uring_prep_rw(IORING_OP_FUTEX_WAITV, sqe, 0, futex, nr_futex, 0);
1235 	sqe->futex_flags = flags;
1236 }
1237 
io_uring_prep_fixed_fd_install(struct io_uring_sqe * sqe,int fd,unsigned int flags)1238 IOURINGINLINE void io_uring_prep_fixed_fd_install(struct io_uring_sqe *sqe,
1239 						  int fd,
1240 						  unsigned int flags)
1241 {
1242 	io_uring_prep_rw(IORING_OP_FIXED_FD_INSTALL, sqe, fd, NULL, 0, 0);
1243 	sqe->flags = IOSQE_FIXED_FILE;
1244 	sqe->install_fd_flags = flags;
1245 }
1246 
io_uring_prep_ftruncate(struct io_uring_sqe * sqe,int fd,loff_t len)1247 IOURINGINLINE void io_uring_prep_ftruncate(struct io_uring_sqe *sqe,
1248 				       int fd, loff_t len)
1249 {
1250 	io_uring_prep_rw(IORING_OP_FTRUNCATE, sqe, fd, 0, 0, len);
1251 }
1252 
1253 /*
1254  * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
1255  * the SQ ring
1256  */
io_uring_sq_ready(const struct io_uring * ring)1257 IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring)
1258 {
1259 	unsigned khead;
1260 
1261 	/*
1262 	 * Without a barrier, we could miss an update and think the SQ wasn't
1263 	 * ready. We don't need the load acquire for non-SQPOLL since then we
1264 	 * drive updates.
1265 	 */
1266 	if (ring->flags & IORING_SETUP_SQPOLL)
1267 		khead = io_uring_smp_load_acquire(ring->sq.khead);
1268 	else
1269 		khead = *ring->sq.khead;
1270 
1271 	/* always use real head, to avoid losing sync for short submit */
1272 	return ring->sq.sqe_tail - khead;
1273 }
1274 
1275 /*
1276  * Returns how much space is left in the SQ ring.
1277  */
io_uring_sq_space_left(const struct io_uring * ring)1278 IOURINGINLINE unsigned io_uring_sq_space_left(const struct io_uring *ring)
1279 {
1280 	return ring->sq.ring_entries - io_uring_sq_ready(ring);
1281 }
1282 
1283 /*
1284  * Only applicable when using SQPOLL - allows the caller to wait for space
1285  * to free up in the SQ ring, which happens when the kernel side thread has
1286  * consumed one or more entries. If the SQ ring is currently non-full, no
1287  * action is taken. Note: may return -EINVAL if the kernel doesn't support
1288  * this feature.
1289  */
io_uring_sqring_wait(struct io_uring * ring)1290 IOURINGINLINE int io_uring_sqring_wait(struct io_uring *ring)
1291 {
1292 	if (!(ring->flags & IORING_SETUP_SQPOLL))
1293 		return 0;
1294 	if (io_uring_sq_space_left(ring))
1295 		return 0;
1296 
1297 	return __io_uring_sqring_wait(ring);
1298 }
1299 
1300 /*
1301  * Returns how many unconsumed entries are ready in the CQ ring
1302  */
io_uring_cq_ready(const struct io_uring * ring)1303 IOURINGINLINE unsigned io_uring_cq_ready(const struct io_uring *ring)
1304 {
1305 	return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead;
1306 }
1307 
1308 /*
1309  * Returns true if there are overflow entries waiting to be flushed onto
1310  * the CQ ring
1311  */
io_uring_cq_has_overflow(const struct io_uring * ring)1312 IOURINGINLINE bool io_uring_cq_has_overflow(const struct io_uring *ring)
1313 {
1314 	return IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW;
1315 }
1316 
1317 /*
1318  * Returns true if the eventfd notification is currently enabled
1319  */
io_uring_cq_eventfd_enabled(const struct io_uring * ring)1320 IOURINGINLINE bool io_uring_cq_eventfd_enabled(const struct io_uring *ring)
1321 {
1322 	if (!ring->cq.kflags)
1323 		return true;
1324 
1325 	return !(*ring->cq.kflags & IORING_CQ_EVENTFD_DISABLED);
1326 }
1327 
1328 /*
1329  * Toggle eventfd notification on or off, if an eventfd is registered with
1330  * the ring.
1331  */
io_uring_cq_eventfd_toggle(struct io_uring * ring,bool enabled)1332 IOURINGINLINE int io_uring_cq_eventfd_toggle(struct io_uring *ring,
1333 					     bool enabled)
1334 {
1335 	uint32_t flags;
1336 
1337 	if (!!enabled == io_uring_cq_eventfd_enabled(ring))
1338 		return 0;
1339 
1340 	if (!ring->cq.kflags)
1341 		return -EOPNOTSUPP;
1342 
1343 	flags = *ring->cq.kflags;
1344 
1345 	if (enabled)
1346 		flags &= ~IORING_CQ_EVENTFD_DISABLED;
1347 	else
1348 		flags |= IORING_CQ_EVENTFD_DISABLED;
1349 
1350 	IO_URING_WRITE_ONCE(*ring->cq.kflags, flags);
1351 
1352 	return 0;
1353 }
1354 
1355 /*
1356  * Return an IO completion, waiting for 'wait_nr' completions if one isn't
1357  * readily available. Returns 0 with cqe_ptr filled in on success, -errno on
1358  * failure.
1359  */
io_uring_wait_cqe_nr(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr,unsigned wait_nr)1360 IOURINGINLINE int io_uring_wait_cqe_nr(struct io_uring *ring,
1361 				      struct io_uring_cqe **cqe_ptr,
1362 				      unsigned wait_nr)
1363 {
1364 	return __io_uring_get_cqe(ring, cqe_ptr, 0, wait_nr, NULL);
1365 }
1366 
1367 /*
1368  * Internal helper, don't use directly in applications. Use one of the
1369  * "official" versions of this, io_uring_peek_cqe(), io_uring_wait_cqe(),
1370  * or io_uring_wait_cqes*().
1371  */
__io_uring_peek_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr,unsigned * nr_available)1372 IOURINGINLINE int __io_uring_peek_cqe(struct io_uring *ring,
1373 				      struct io_uring_cqe **cqe_ptr,
1374 				      unsigned *nr_available)
1375 {
1376 	struct io_uring_cqe *cqe;
1377 	int err = 0;
1378 	unsigned available;
1379 	unsigned mask = ring->cq.ring_mask;
1380 	int shift = 0;
1381 
1382 	if (ring->flags & IORING_SETUP_CQE32)
1383 		shift = 1;
1384 
1385 	do {
1386 		unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail);
1387 		unsigned head = *ring->cq.khead;
1388 
1389 		cqe = NULL;
1390 		available = tail - head;
1391 		if (!available)
1392 			break;
1393 
1394 		cqe = &ring->cq.cqes[(head & mask) << shift];
1395 		if (!(ring->features & IORING_FEAT_EXT_ARG) &&
1396 				cqe->user_data == LIBURING_UDATA_TIMEOUT) {
1397 			if (cqe->res < 0)
1398 				err = cqe->res;
1399 			io_uring_cq_advance(ring, 1);
1400 			if (!err)
1401 				continue;
1402 			cqe = NULL;
1403 		}
1404 
1405 		break;
1406 	} while (1);
1407 
1408 	*cqe_ptr = cqe;
1409 	if (nr_available)
1410 		*nr_available = available;
1411 	return err;
1412 }
1413 
1414 /*
1415  * Return an IO completion, if one is readily available. Returns 0 with
1416  * cqe_ptr filled in on success, -errno on failure.
1417  */
io_uring_peek_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)1418 IOURINGINLINE int io_uring_peek_cqe(struct io_uring *ring,
1419 				    struct io_uring_cqe **cqe_ptr)
1420 {
1421 	if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
1422 		return 0;
1423 
1424 	return io_uring_wait_cqe_nr(ring, cqe_ptr, 0);
1425 }
1426 
1427 /*
1428  * Return an IO completion, waiting for it if necessary. Returns 0 with
1429  * cqe_ptr filled in on success, -errno on failure.
1430  */
io_uring_wait_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)1431 IOURINGINLINE int io_uring_wait_cqe(struct io_uring *ring,
1432 				    struct io_uring_cqe **cqe_ptr)
1433 {
1434 	if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
1435 		return 0;
1436 
1437 	return io_uring_wait_cqe_nr(ring, cqe_ptr, 1);
1438 }
1439 
1440 /*
1441  * Return an sqe to fill. Application must later call io_uring_submit()
1442  * when it's ready to tell the kernel about it. The caller may call this
1443  * function multiple times before calling io_uring_submit().
1444  *
1445  * Returns a vacant sqe, or NULL if we're full.
1446  */
_io_uring_get_sqe(struct io_uring * ring)1447 IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
1448 {
1449 	struct io_uring_sq *sq = &ring->sq;
1450 	unsigned int head, next = sq->sqe_tail + 1;
1451 	int shift = 0;
1452 
1453 	if (ring->flags & IORING_SETUP_SQE128)
1454 		shift = 1;
1455 	if (!(ring->flags & IORING_SETUP_SQPOLL))
1456 		head = *sq->khead;
1457 	else
1458 		head = io_uring_smp_load_acquire(sq->khead);
1459 
1460 	if (next - head <= sq->ring_entries) {
1461 		struct io_uring_sqe *sqe;
1462 
1463 		sqe = &sq->sqes[(sq->sqe_tail & sq->ring_mask) << shift];
1464 		sq->sqe_tail = next;
1465 		io_uring_initialize_sqe(sqe);
1466 		return sqe;
1467 	}
1468 
1469 	return NULL;
1470 }
1471 
1472 /*
1473  * Return the appropriate mask for a buffer ring of size 'ring_entries'
1474  */
io_uring_buf_ring_mask(__u32 ring_entries)1475 IOURINGINLINE int io_uring_buf_ring_mask(__u32 ring_entries)
1476 {
1477 	return ring_entries - 1;
1478 }
1479 
io_uring_buf_ring_init(struct io_uring_buf_ring * br)1480 IOURINGINLINE void io_uring_buf_ring_init(struct io_uring_buf_ring *br)
1481 {
1482 	br->tail = 0;
1483 }
1484 
1485 /*
1486  * Assign 'buf' with the addr/len/buffer ID supplied
1487  */
io_uring_buf_ring_add(struct io_uring_buf_ring * br,void * addr,unsigned int len,unsigned short bid,int mask,int buf_offset)1488 IOURINGINLINE void io_uring_buf_ring_add(struct io_uring_buf_ring *br,
1489 					 void *addr, unsigned int len,
1490 					 unsigned short bid, int mask,
1491 					 int buf_offset)
1492 {
1493 	struct io_uring_buf *buf = &br->bufs[(br->tail + buf_offset) & mask];
1494 
1495 	buf->addr = (unsigned long) (uintptr_t) addr;
1496 	buf->len = len;
1497 	buf->bid = bid;
1498 }
1499 
1500 /*
1501  * Make 'count' new buffers visible to the kernel. Called after
1502  * io_uring_buf_ring_add() has been called 'count' times to fill in new
1503  * buffers.
1504  */
io_uring_buf_ring_advance(struct io_uring_buf_ring * br,int count)1505 IOURINGINLINE void io_uring_buf_ring_advance(struct io_uring_buf_ring *br,
1506 					     int count)
1507 {
1508 	unsigned short new_tail = br->tail + count;
1509 
1510 	io_uring_smp_store_release(&br->tail, new_tail);
1511 }
1512 
__io_uring_buf_ring_cq_advance(struct io_uring * ring,struct io_uring_buf_ring * br,int cq_count,int buf_count)1513 IOURINGINLINE void __io_uring_buf_ring_cq_advance(struct io_uring *ring,
1514 						  struct io_uring_buf_ring *br,
1515 						  int cq_count, int buf_count)
1516 {
1517 	io_uring_buf_ring_advance(br, buf_count);
1518 	io_uring_cq_advance(ring, cq_count);
1519 }
1520 
1521 /*
1522  * Make 'count' new buffers visible to the kernel while at the same time
1523  * advancing the CQ ring seen entries. This can be used when the application
1524  * is using ring provided buffers and returns buffers while processing CQEs,
1525  * avoiding an extra atomic when needing to increment both the CQ ring and
1526  * the ring buffer index at the same time.
1527  */
io_uring_buf_ring_cq_advance(struct io_uring * ring,struct io_uring_buf_ring * br,int count)1528 IOURINGINLINE void io_uring_buf_ring_cq_advance(struct io_uring *ring,
1529 						struct io_uring_buf_ring *br,
1530 						int count)
1531 {
1532 	__io_uring_buf_ring_cq_advance(ring, br, count, count);
1533 }
1534 
io_uring_buf_ring_available(struct io_uring * ring,struct io_uring_buf_ring * br,unsigned short bgid)1535 IOURINGINLINE int io_uring_buf_ring_available(struct io_uring *ring,
1536 					      struct io_uring_buf_ring *br,
1537 					      unsigned short bgid)
1538 {
1539 	uint16_t head;
1540 	int ret;
1541 
1542 	ret = io_uring_buf_ring_head(ring, bgid, &head);
1543 	if (ret)
1544 		return ret;
1545 
1546 	return (uint16_t) (br->tail - head);
1547 }
1548 
1549 #ifndef LIBURING_INTERNAL
io_uring_get_sqe(struct io_uring * ring)1550 IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
1551 {
1552 	return _io_uring_get_sqe(ring);
1553 }
1554 #else
1555 struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
1556 #endif
1557 
1558 ssize_t io_uring_mlock_size(unsigned entries, unsigned flags);
1559 ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p);
1560 
1561 /*
1562  * Versioning information for liburing.
1563  *
1564  * Use IO_URING_CHECK_VERSION() for compile time checks including from
1565  * preprocessor directives.
1566  *
1567  * Use io_uring_check_version() for runtime checks of the version of
1568  * liburing that was loaded by the dynamic linker.
1569  */
1570 int io_uring_major_version(void);
1571 int io_uring_minor_version(void);
1572 bool io_uring_check_version(int major, int minor);
1573 
1574 #define IO_URING_CHECK_VERSION(major,minor) \
1575   (major > IO_URING_VERSION_MAJOR ||        \
1576    (major == IO_URING_VERSION_MAJOR &&      \
1577     minor > IO_URING_VERSION_MINOR))
1578 
1579 #ifdef __cplusplus
1580 }
1581 #endif
1582 
1583 #ifdef IOURINGINLINE
1584 #undef IOURINGINLINE
1585 #endif
1586 
1587 #endif
1588