• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 #ifndef LIB_URING_H
3 #define LIB_URING_H
4 
5 #ifndef _XOPEN_SOURCE
6 #define _XOPEN_SOURCE 500 /* Required for glibc to expose sigset_t */
7 #endif
8 
9 #include <sys/socket.h>
10 #include <sys/uio.h>
11 #include <sys/stat.h>
12 #include <errno.h>
13 #include <signal.h>
14 #include <stdbool.h>
15 #include <inttypes.h>
16 #include <time.h>
17 #include <linux/swab.h>
18 #include "liburing/compat.h"
19 #include "liburing/io_uring.h"
20 #include "liburing/barrier.h"
21 
22 #ifndef uring_unlikely
23 #  define uring_unlikely(cond)      __builtin_expect(!!(cond), 0)
24 #endif
25 
26 #ifndef uring_likely
27 #  define uring_likely(cond)        __builtin_expect(!!(cond), 1)
28 #endif
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 /*
35  * Library interface to io_uring
36  */
37 struct io_uring_sq {
38 	unsigned *khead;
39 	unsigned *ktail;
40 	unsigned *kring_mask;
41 	unsigned *kring_entries;
42 	unsigned *kflags;
43 	unsigned *kdropped;
44 	unsigned *array;
45 	struct io_uring_sqe *sqes;
46 
47 	unsigned sqe_head;
48 	unsigned sqe_tail;
49 
50 	size_t ring_sz;
51 	void *ring_ptr;
52 
53 	unsigned pad[4];
54 };
55 
56 struct io_uring_cq {
57 	unsigned *khead;
58 	unsigned *ktail;
59 	unsigned *kring_mask;
60 	unsigned *kring_entries;
61 	unsigned *kflags;
62 	unsigned *koverflow;
63 	struct io_uring_cqe *cqes;
64 
65 	size_t ring_sz;
66 	void *ring_ptr;
67 
68 	unsigned pad[4];
69 };
70 
71 struct io_uring {
72 	struct io_uring_sq sq;
73 	struct io_uring_cq cq;
74 	unsigned flags;
75 	int ring_fd;
76 
77 	unsigned features;
78 	unsigned pad[3];
79 };
80 
81 /*
82  * Library interface
83  */
84 
85 /*
86  * return an allocated io_uring_probe structure, or NULL if probe fails (for
87  * example, if it is not available). The caller is responsible for freeing it
88  */
89 extern struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
90 /* same as io_uring_get_probe_ring, but takes care of ring init and teardown */
91 extern struct io_uring_probe *io_uring_get_probe(void);
92 
93 /*
94  * frees a probe allocated through io_uring_get_probe() or
95  * io_uring_get_probe_ring()
96  */
97 extern void io_uring_free_probe(struct io_uring_probe *probe);
98 
io_uring_opcode_supported(const struct io_uring_probe * p,int op)99 static inline int io_uring_opcode_supported(const struct io_uring_probe *p, int op)
100 {
101 	if (op > p->last_op)
102 		return 0;
103 	return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
104 }
105 
106 extern int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
107 	struct io_uring_params *p);
108 extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
109 	unsigned flags);
110 extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
111 	struct io_uring *ring);
112 extern int io_uring_ring_dontfork(struct io_uring *ring);
113 extern void io_uring_queue_exit(struct io_uring *ring);
114 unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
115 	struct io_uring_cqe **cqes, unsigned count);
116 extern int io_uring_wait_cqes(struct io_uring *ring,
117 	struct io_uring_cqe **cqe_ptr, unsigned wait_nr,
118 	struct __kernel_timespec *ts, sigset_t *sigmask);
119 extern int io_uring_wait_cqe_timeout(struct io_uring *ring,
120 	struct io_uring_cqe **cqe_ptr, struct __kernel_timespec *ts);
121 extern int io_uring_submit(struct io_uring *ring);
122 extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
123 extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
124 
125 extern int io_uring_register_buffers(struct io_uring *ring,
126 					const struct iovec *iovecs,
127 					unsigned nr_iovecs);
128 extern int io_uring_unregister_buffers(struct io_uring *ring);
129 extern int io_uring_register_files(struct io_uring *ring, const int *files,
130 					unsigned nr_files);
131 extern int io_uring_unregister_files(struct io_uring *ring);
132 extern int io_uring_register_files_update(struct io_uring *ring, unsigned off,
133 					int *files, unsigned nr_files);
134 extern int io_uring_register_eventfd(struct io_uring *ring, int fd);
135 extern int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
136 extern int io_uring_unregister_eventfd(struct io_uring *ring);
137 extern int io_uring_register_probe(struct io_uring *ring,
138 					struct io_uring_probe *p, unsigned nr);
139 extern int io_uring_register_personality(struct io_uring *ring);
140 extern int io_uring_unregister_personality(struct io_uring *ring, int id);
141 extern int io_uring_register_restrictions(struct io_uring *ring,
142 					  struct io_uring_restriction *res,
143 					  unsigned int nr_res);
144 extern int io_uring_enable_rings(struct io_uring *ring);
145 extern int __io_uring_sqring_wait(struct io_uring *ring);
146 
147 /*
148  * Helper for the peek/wait single cqe functions. Exported because of that,
149  * but probably shouldn't be used directly in an application.
150  */
151 extern int __io_uring_get_cqe(struct io_uring *ring,
152 			      struct io_uring_cqe **cqe_ptr, unsigned submit,
153 			      unsigned wait_nr, sigset_t *sigmask);
154 
155 #define LIBURING_UDATA_TIMEOUT	((__u64) -1)
156 
157 #define io_uring_for_each_cqe(ring, head, cqe)				\
158 	/*								\
159 	 * io_uring_smp_load_acquire() enforces the order of tail	\
160 	 * and CQE reads.						\
161 	 */								\
162 	for (head = *(ring)->cq.khead;					\
163 	     (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \
164 		&(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \
165 	     head++)							\
166 
167 /*
168  * Must be called after io_uring_for_each_cqe()
169  */
io_uring_cq_advance(struct io_uring * ring,unsigned nr)170 static inline void io_uring_cq_advance(struct io_uring *ring,
171 				       unsigned nr)
172 {
173 	if (nr) {
174 		struct io_uring_cq *cq = &ring->cq;
175 
176 		/*
177 		 * Ensure that the kernel only sees the new value of the head
178 		 * index after the CQEs have been read.
179 		 */
180 		io_uring_smp_store_release(cq->khead, *cq->khead + nr);
181 	}
182 }
183 
184 /*
185  * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
186  * been processed by the application.
187  */
io_uring_cqe_seen(struct io_uring * ring,struct io_uring_cqe * cqe)188 static inline void io_uring_cqe_seen(struct io_uring *ring,
189 				     struct io_uring_cqe *cqe)
190 {
191 	if (cqe)
192 		io_uring_cq_advance(ring, 1);
193 }
194 
195 /*
196  * Command prep helpers
197  */
io_uring_sqe_set_data(struct io_uring_sqe * sqe,void * data)198 static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
199 {
200 	sqe->user_data = (unsigned long) data;
201 }
202 
io_uring_cqe_get_data(const struct io_uring_cqe * cqe)203 static inline void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
204 {
205 	return (void *) (uintptr_t) cqe->user_data;
206 }
207 
io_uring_sqe_set_flags(struct io_uring_sqe * sqe,unsigned flags)208 static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
209 					  unsigned flags)
210 {
211 	sqe->flags = flags;
212 }
213 
io_uring_prep_rw(int op,struct io_uring_sqe * sqe,int fd,const void * addr,unsigned len,__u64 offset)214 static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
215 				    const void *addr, unsigned len,
216 				    __u64 offset)
217 {
218 	sqe->opcode = op;
219 	sqe->flags = 0;
220 	sqe->ioprio = 0;
221 	sqe->fd = fd;
222 	sqe->off = offset;
223 	sqe->addr = (unsigned long) addr;
224 	sqe->len = len;
225 	sqe->rw_flags = 0;
226 	sqe->user_data = 0;
227 	sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
228 }
229 
230 /**
231  * @pre Either fd_in or fd_out must be a pipe.
232  * @param off_in If fd_in refers to a pipe, off_in must be (int64_t) -1;
233  *               If fd_in does not refer to a pipe and off_in is (int64_t) -1, then bytes are read
234  *               from fd_in starting from the file offset and it is adjust appropriately;
235  *               If fd_in does not refer to a pipe and off_in is not (int64_t) -1, then the
236  *               starting offset of fd_in will be off_in.
237  * @param off_out The description of off_in also applied to off_out.
238  * @param splice_flags see man splice(2) for description of flags.
239  *
240  * This splice operation can be used to implement sendfile by splicing to an intermediate pipe
241  * first, then splice to the final destination.
242  * In fact, the implementation of sendfile in kernel uses splice internally.
243  *
244  * NOTE that even if fd_in or fd_out refers to a pipe, the splice operation can still failed with
245  * EINVAL if one of the fd doesn't explicitly support splice operation, e.g. reading from terminal
246  * is unsupported from kernel 5.7 to 5.11.
247  * Check issue #291 for more information.
248  */
io_uring_prep_splice(struct io_uring_sqe * sqe,int fd_in,int64_t off_in,int fd_out,int64_t off_out,unsigned int nbytes,unsigned int splice_flags)249 static inline void io_uring_prep_splice(struct io_uring_sqe *sqe,
250 					int fd_in, int64_t off_in,
251 					int fd_out, int64_t off_out,
252 					unsigned int nbytes,
253 					unsigned int splice_flags)
254 {
255 	io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes, off_out);
256 	sqe->splice_off_in = off_in;
257 	sqe->splice_fd_in = fd_in;
258 	sqe->splice_flags = splice_flags;
259 }
260 
io_uring_prep_tee(struct io_uring_sqe * sqe,int fd_in,int fd_out,unsigned int nbytes,unsigned int splice_flags)261 static inline void io_uring_prep_tee(struct io_uring_sqe *sqe,
262 				     int fd_in, int fd_out,
263 				     unsigned int nbytes,
264 				     unsigned int splice_flags)
265 {
266 	io_uring_prep_rw(IORING_OP_TEE, sqe, fd_out, NULL, nbytes, 0);
267 	sqe->splice_off_in = 0;
268 	sqe->splice_fd_in = fd_in;
269 	sqe->splice_flags = splice_flags;
270 }
271 
io_uring_prep_readv(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,off_t offset)272 static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
273 				       const struct iovec *iovecs,
274 				       unsigned nr_vecs, off_t offset)
275 {
276 	io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
277 }
278 
io_uring_prep_read_fixed(struct io_uring_sqe * sqe,int fd,void * buf,unsigned nbytes,off_t offset,int buf_index)279 static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
280 					    void *buf, unsigned nbytes,
281 					    off_t offset, int buf_index)
282 {
283 	io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
284 	sqe->buf_index = buf_index;
285 }
286 
io_uring_prep_writev(struct io_uring_sqe * sqe,int fd,const struct iovec * iovecs,unsigned nr_vecs,off_t offset)287 static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
288 					const struct iovec *iovecs,
289 					unsigned nr_vecs, off_t offset)
290 {
291 	io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
292 }
293 
io_uring_prep_write_fixed(struct io_uring_sqe * sqe,int fd,const void * buf,unsigned nbytes,off_t offset,int buf_index)294 static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
295 					     const void *buf, unsigned nbytes,
296 					     off_t offset, int buf_index)
297 {
298 	io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
299 	sqe->buf_index = buf_index;
300 }
301 
io_uring_prep_recvmsg(struct io_uring_sqe * sqe,int fd,struct msghdr * msg,unsigned flags)302 static inline void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
303 					 struct msghdr *msg, unsigned flags)
304 {
305 	io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0);
306 	sqe->msg_flags = flags;
307 }
308 
io_uring_prep_sendmsg(struct io_uring_sqe * sqe,int fd,const struct msghdr * msg,unsigned flags)309 static inline void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
310 					 const struct msghdr *msg, unsigned flags)
311 {
312 	io_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0);
313 	sqe->msg_flags = flags;
314 }
315 
io_uring_prep_poll_add(struct io_uring_sqe * sqe,int fd,unsigned poll_mask)316 static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
317 					  unsigned poll_mask)
318 {
319 	io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
320 #if __BYTE_ORDER == __BIG_ENDIAN
321 	poll_mask = __swahw32(poll_mask);
322 #endif
323 	sqe->poll32_events = poll_mask;
324 }
325 
io_uring_prep_poll_remove(struct io_uring_sqe * sqe,void * user_data)326 static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
327 					     void *user_data)
328 {
329 	io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, user_data, 0, 0);
330 }
331 
io_uring_prep_poll_update(struct io_uring_sqe * sqe,void * old_user_data,void * new_user_data,unsigned poll_mask,unsigned flags)332 static inline void io_uring_prep_poll_update(struct io_uring_sqe *sqe,
333 					     void *old_user_data,
334 					     void *new_user_data,
335 					     unsigned poll_mask, unsigned flags)
336 {
337 	io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, old_user_data, flags,
338 			 (__u64)new_user_data);
339 #if __BYTE_ORDER == __BIG_ENDIAN
340 	poll_mask = __swahw32(poll_mask);
341 #endif
342 	sqe->poll32_events = poll_mask;
343 }
344 
io_uring_prep_fsync(struct io_uring_sqe * sqe,int fd,unsigned fsync_flags)345 static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
346 				       unsigned fsync_flags)
347 {
348 	io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0);
349 	sqe->fsync_flags = fsync_flags;
350 }
351 
io_uring_prep_nop(struct io_uring_sqe * sqe)352 static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
353 {
354 	io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0);
355 }
356 
io_uring_prep_timeout(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,unsigned count,unsigned flags)357 static inline void io_uring_prep_timeout(struct io_uring_sqe *sqe,
358 					 struct __kernel_timespec *ts,
359 					 unsigned count, unsigned flags)
360 {
361 	io_uring_prep_rw(IORING_OP_TIMEOUT, sqe, -1, ts, 1, count);
362 	sqe->timeout_flags = flags;
363 }
364 
io_uring_prep_timeout_remove(struct io_uring_sqe * sqe,__u64 user_data,unsigned flags)365 static inline void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe,
366 						__u64 user_data, unsigned flags)
367 {
368 	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1,
369 				(void *)(unsigned long)user_data, 0, 0);
370 	sqe->timeout_flags = flags;
371 }
372 
io_uring_prep_timeout_update(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,__u64 user_data,unsigned flags)373 static inline void io_uring_prep_timeout_update(struct io_uring_sqe *sqe,
374 						struct __kernel_timespec *ts,
375 						__u64 user_data, unsigned flags)
376 {
377 	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1,
378 				(void *)(unsigned long)user_data, 0,
379 				(uintptr_t)ts);
380 	sqe->timeout_flags = flags | IORING_TIMEOUT_UPDATE;
381 }
382 
io_uring_prep_accept(struct io_uring_sqe * sqe,int fd,struct sockaddr * addr,socklen_t * addrlen,int flags)383 static inline void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
384 					struct sockaddr *addr,
385 					socklen_t *addrlen, int flags)
386 {
387 	io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0,
388 				(__u64) (unsigned long) addrlen);
389 	sqe->accept_flags = flags;
390 }
391 
io_uring_prep_cancel(struct io_uring_sqe * sqe,void * user_data,int flags)392 static inline void io_uring_prep_cancel(struct io_uring_sqe *sqe, void *user_data,
393 					int flags)
394 {
395 	io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, user_data, 0, 0);
396 	sqe->cancel_flags = flags;
397 }
398 
io_uring_prep_link_timeout(struct io_uring_sqe * sqe,struct __kernel_timespec * ts,unsigned flags)399 static inline void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
400 					      struct __kernel_timespec *ts,
401 					      unsigned flags)
402 {
403 	io_uring_prep_rw(IORING_OP_LINK_TIMEOUT, sqe, -1, ts, 1, 0);
404 	sqe->timeout_flags = flags;
405 }
406 
io_uring_prep_connect(struct io_uring_sqe * sqe,int fd,const struct sockaddr * addr,socklen_t addrlen)407 static inline void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd,
408 					 const struct sockaddr *addr,
409 					 socklen_t addrlen)
410 {
411 	io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen);
412 }
413 
io_uring_prep_files_update(struct io_uring_sqe * sqe,int * fds,unsigned nr_fds,int offset)414 static inline void io_uring_prep_files_update(struct io_uring_sqe *sqe,
415 					      int *fds, unsigned nr_fds,
416 					      int offset)
417 {
418 	io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds, offset);
419 }
420 
io_uring_prep_fallocate(struct io_uring_sqe * sqe,int fd,int mode,off_t offset,off_t len)421 static inline void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
422 					   int mode, off_t offset, off_t len)
423 {
424 
425 	io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd,
426 			(const uintptr_t *) (unsigned long) len, mode, offset);
427 }
428 
io_uring_prep_openat(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,mode_t mode)429 static inline void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
430 					const char *path, int flags, mode_t mode)
431 {
432 	io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0);
433 	sqe->open_flags = flags;
434 }
435 
io_uring_prep_close(struct io_uring_sqe * sqe,int fd)436 static inline void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
437 {
438 	io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0);
439 }
440 
io_uring_prep_read(struct io_uring_sqe * sqe,int fd,void * buf,unsigned nbytes,off_t offset)441 static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
442 				      void *buf, unsigned nbytes, off_t offset)
443 {
444 	io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
445 }
446 
io_uring_prep_write(struct io_uring_sqe * sqe,int fd,const void * buf,unsigned nbytes,off_t offset)447 static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
448 				       const void *buf, unsigned nbytes, off_t offset)
449 {
450 	io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
451 }
452 
453 struct statx;
io_uring_prep_statx(struct io_uring_sqe * sqe,int dfd,const char * path,int flags,unsigned mask,struct statx * statxbuf)454 static inline void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
455 				const char *path, int flags, unsigned mask,
456 				struct statx *statxbuf)
457 {
458 	io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask,
459 				(__u64) (unsigned long) statxbuf);
460 	sqe->statx_flags = flags;
461 }
462 
io_uring_prep_fadvise(struct io_uring_sqe * sqe,int fd,off_t offset,off_t len,int advice)463 static inline void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
464 					 off_t offset, off_t len, int advice)
465 {
466 	io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, len, offset);
467 	sqe->fadvise_advice = advice;
468 }
469 
io_uring_prep_madvise(struct io_uring_sqe * sqe,void * addr,off_t length,int advice)470 static inline void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
471 					 off_t length, int advice)
472 {
473 	io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, length, 0);
474 	sqe->fadvise_advice = advice;
475 }
476 
io_uring_prep_send(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags)477 static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
478 				      const void *buf, size_t len, int flags)
479 {
480 	io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, len, 0);
481 	sqe->msg_flags = flags;
482 }
483 
io_uring_prep_recv(struct io_uring_sqe * sqe,int sockfd,void * buf,size_t len,int flags)484 static inline void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
485 				      void *buf, size_t len, int flags)
486 {
487 	io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, len, 0);
488 	sqe->msg_flags = flags;
489 }
490 
io_uring_prep_openat2(struct io_uring_sqe * sqe,int dfd,const char * path,struct open_how * how)491 static inline void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
492 					const char *path, struct open_how *how)
493 {
494 	io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how),
495 				(uint64_t) (uintptr_t) how);
496 }
497 
498 struct epoll_event;
io_uring_prep_epoll_ctl(struct io_uring_sqe * sqe,int epfd,int fd,int op,struct epoll_event * ev)499 static inline void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
500 					   int fd, int op,
501 					   struct epoll_event *ev)
502 {
503 	io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev, op, fd);
504 }
505 
io_uring_prep_provide_buffers(struct io_uring_sqe * sqe,void * addr,int len,int nr,int bgid,int bid)506 static inline void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
507 						 void *addr, int len, int nr,
508 						 int bgid, int bid)
509 {
510 	io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, len, bid);
511 	sqe->buf_group = bgid;
512 }
513 
io_uring_prep_remove_buffers(struct io_uring_sqe * sqe,int nr,int bgid)514 static inline void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
515 						int nr, int bgid)
516 {
517 	io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0);
518 	sqe->buf_group = bgid;
519 }
520 
io_uring_prep_shutdown(struct io_uring_sqe * sqe,int fd,int how)521 static inline void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
522 					  int how)
523 {
524 	io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, how, 0);
525 }
526 
io_uring_prep_unlinkat(struct io_uring_sqe * sqe,int dfd,const char * path,int flags)527 static inline void io_uring_prep_unlinkat(struct io_uring_sqe *sqe, int dfd,
528 					  const char *path, int flags)
529 {
530 	io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, path, 0, 0);
531 	sqe->unlink_flags = flags;
532 }
533 
io_uring_prep_renameat(struct io_uring_sqe * sqe,int olddfd,const char * oldpath,int newdfd,const char * newpath,int flags)534 static inline void io_uring_prep_renameat(struct io_uring_sqe *sqe, int olddfd,
535 					  const char *oldpath, int newdfd,
536 					  const char *newpath, int flags)
537 {
538 	io_uring_prep_rw(IORING_OP_RENAMEAT, sqe, olddfd, oldpath, newdfd,
539 				(uint64_t) (uintptr_t) newpath);
540 	sqe->rename_flags = flags;
541 }
542 
io_uring_prep_sync_file_range(struct io_uring_sqe * sqe,int fd,unsigned len,off_t offset,int flags)543 static inline void io_uring_prep_sync_file_range(struct io_uring_sqe *sqe,
544 						 int fd, unsigned len,
545 						 off_t offset, int flags)
546 {
547 	io_uring_prep_rw(IORING_OP_SYNC_FILE_RANGE, sqe, fd, NULL, len, offset);
548 	sqe->sync_range_flags = flags;
549 }
550 
io_uring_prep_mkdirat(struct io_uring_sqe * sqe,int dfd,const char * path,mode_t mode)551 static inline void io_uring_prep_mkdirat(struct io_uring_sqe *sqe, int dfd,
552 					const char *path, mode_t mode)
553 {
554 	io_uring_prep_rw(IORING_OP_MKDIRAT, sqe, dfd, path, mode, 0);
555 }
556 
io_uring_prep_symlinkat(struct io_uring_sqe * sqe,const char * target,int newdirfd,const char * linkpath)557 static inline void io_uring_prep_symlinkat(struct io_uring_sqe *sqe,
558 					const char *target, int newdirfd, const char *linkpath)
559 {
560 	io_uring_prep_rw(IORING_OP_SYMLINKAT, sqe, newdirfd, target, 0,
561 				(uint64_t) (uintptr_t) linkpath);
562 }
563 
io_uring_prep_linkat(struct io_uring_sqe * sqe,int olddfd,const char * oldpath,int newdfd,const char * newpath,int flags)564 static inline void io_uring_prep_linkat(struct io_uring_sqe *sqe, int olddfd,
565 					const char *oldpath, int newdfd,
566 					const char *newpath, int flags)
567 {
568 	io_uring_prep_rw(IORING_OP_LINKAT, sqe, olddfd, oldpath, newdfd,
569 				(uint64_t) (uintptr_t) newpath);
570 	sqe->hardlink_flags = flags;
571 }
572 
573 /*
574  * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
575  * the SQ ring
576  */
io_uring_sq_ready(const struct io_uring * ring)577 static inline unsigned io_uring_sq_ready(const struct io_uring *ring)
578 {
579 	/*
580 	 * Without a barrier, we could miss an update and think the SQ wasn't ready.
581 	 * We don't need the load acquire for non-SQPOLL since then we drive updates.
582 	 */
583 	if (ring->flags & IORING_SETUP_SQPOLL)
584 		return ring->sq.sqe_tail - io_uring_smp_load_acquire(ring->sq.khead);
585 
586 	/* always use real head, to avoid losing sync for short submit */
587 	return ring->sq.sqe_tail - *ring->sq.khead;
588 }
589 
590 /*
591  * Returns how much space is left in the SQ ring.
592  */
io_uring_sq_space_left(const struct io_uring * ring)593 static inline unsigned io_uring_sq_space_left(const struct io_uring *ring)
594 {
595 	return *ring->sq.kring_entries - io_uring_sq_ready(ring);
596 }
597 
598 /*
599  * Only applicable when using SQPOLL - allows the caller to wait for space
600  * to free up in the SQ ring, which happens when the kernel side thread has
601  * consumed one or more entries. If the SQ ring is currently non-full, no
602  * action is taken. Note: may return -EINVAL if the kernel doesn't support
603  * this feature.
604  */
io_uring_sqring_wait(struct io_uring * ring)605 static inline int io_uring_sqring_wait(struct io_uring *ring)
606 {
607 	if (!(ring->flags & IORING_SETUP_SQPOLL))
608 		return 0;
609 	if (io_uring_sq_space_left(ring))
610 		return 0;
611 
612 	return __io_uring_sqring_wait(ring);
613 }
614 
615 /*
616  * Returns how many unconsumed entries are ready in the CQ ring
617  */
io_uring_cq_ready(const struct io_uring * ring)618 static inline unsigned io_uring_cq_ready(const struct io_uring *ring)
619 {
620 	return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead;
621 }
622 
623 /*
624  * Returns true if the eventfd notification is currently enabled
625  */
io_uring_cq_eventfd_enabled(const struct io_uring * ring)626 static inline bool io_uring_cq_eventfd_enabled(const struct io_uring *ring)
627 {
628 	if (!ring->cq.kflags)
629 		return true;
630 
631 	return !(*ring->cq.kflags & IORING_CQ_EVENTFD_DISABLED);
632 }
633 
634 /*
635  * Toggle eventfd notification on or off, if an eventfd is registered with
636  * the ring.
637  */
io_uring_cq_eventfd_toggle(struct io_uring * ring,bool enabled)638 static inline int io_uring_cq_eventfd_toggle(struct io_uring *ring,
639 					     bool enabled)
640 {
641 	uint32_t flags;
642 
643 	if (!!enabled == io_uring_cq_eventfd_enabled(ring))
644 		return 0;
645 
646 	if (!ring->cq.kflags)
647 		return -EOPNOTSUPP;
648 
649 	flags = *ring->cq.kflags;
650 
651 	if (enabled)
652 		flags &= ~IORING_CQ_EVENTFD_DISABLED;
653 	else
654 		flags |= IORING_CQ_EVENTFD_DISABLED;
655 
656 	IO_URING_WRITE_ONCE(*ring->cq.kflags, flags);
657 
658 	return 0;
659 }
660 
661 /*
662  * Return an IO completion, waiting for 'wait_nr' completions if one isn't
663  * readily available. Returns 0 with cqe_ptr filled in on success, -errno on
664  * failure.
665  */
io_uring_wait_cqe_nr(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr,unsigned wait_nr)666 static inline int io_uring_wait_cqe_nr(struct io_uring *ring,
667 				      struct io_uring_cqe **cqe_ptr,
668 				      unsigned wait_nr)
669 {
670 	return __io_uring_get_cqe(ring, cqe_ptr, 0, wait_nr, NULL);
671 }
672 
673 /*
674  * Return an IO completion, if one is readily available. Returns 0 with
675  * cqe_ptr filled in on success, -errno on failure.
676  */
io_uring_peek_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)677 static inline int io_uring_peek_cqe(struct io_uring *ring,
678 				    struct io_uring_cqe **cqe_ptr)
679 {
680 	return io_uring_wait_cqe_nr(ring, cqe_ptr, 0);
681 }
682 
683 /*
684  * Return an IO completion, waiting for it if necessary. Returns 0 with
685  * cqe_ptr filled in on success, -errno on failure.
686  */
io_uring_wait_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)687 static inline int io_uring_wait_cqe(struct io_uring *ring,
688 				    struct io_uring_cqe **cqe_ptr)
689 {
690 	return io_uring_wait_cqe_nr(ring, cqe_ptr, 1);
691 }
692 
693 ssize_t io_uring_mlock_size(unsigned entries, unsigned flags);
694 ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p);
695 
696 #ifdef __cplusplus
697 }
698 #endif
699 
700 #endif
701