1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Description: uring_cmd based ublk
4 *
5 * Covers cancellable uring_cmd feature.
6 */
7 #include <unistd.h>
8 #include <stdlib.h>
9 #include <assert.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <string.h>
13 #include <pthread.h>
14 #include <limits.h>
15 #include <poll.h>
16 #include <sys/syscall.h>
17 #include <sys/mman.h>
18 #include <sys/ioctl.h>
19 #include <sys/inotify.h>
20 #include <sys/wait.h>
21
22 #include "liburing.h"
23 #include "helpers.h"
24 #ifdef CONFIG_HAVE_UBLK_HEADER
25 #include <linux/ublk_cmd.h>
26
27 /****************** part 1: libublk ********************/
28
29 #define CTRL_DEV "/dev/ublk-control"
30 #define UBLKC_DEV "/dev/ublkc"
31 #define UBLKB_DEV "/dev/ublkb"
32 #define UBLK_CTRL_RING_DEPTH 32
33
34 /* queue idle timeout */
35 #define UBLKSRV_IO_IDLE_SECS 20
36
37 #define UBLK_IO_MAX_BYTES 65536
38 #define UBLK_MAX_QUEUES 4
39 #define UBLK_QUEUE_DEPTH 128
40
41 #define UBLK_DBG_DEV (1U << 0)
42 #define UBLK_DBG_QUEUE (1U << 1)
43 #define UBLK_DBG_IO_CMD (1U << 2)
44 #define UBLK_DBG_IO (1U << 3)
45 #define UBLK_DBG_CTRL_CMD (1U << 4)
46 #define UBLK_LOG (1U << 5)
47
48 struct ublk_dev;
49 struct ublk_queue;
50
51 struct ublk_ctrl_cmd_data {
52 __u32 cmd_op;
53 #define CTRL_CMD_HAS_DATA 1
54 #define CTRL_CMD_HAS_BUF 2
55 __u32 flags;
56
57 __u64 data[2];
58 __u64 addr;
59 __u32 len;
60 };
61
62 struct ublk_io {
63 char *buf_addr;
64
65 #define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
66 #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
67 #define UBLKSRV_IO_FREE (1UL << 2)
68 unsigned int flags;
69
70 unsigned int result;
71 };
72
73 struct ublk_tgt_ops {
74 const char *name;
75 int (*init_tgt)(struct ublk_dev *);
76 void (*deinit_tgt)(struct ublk_dev *);
77
78 int (*queue_io)(struct ublk_queue *, int tag);
79 void (*tgt_io_done)(struct ublk_queue *,
80 int tag, const struct io_uring_cqe *);
81 };
82
83 struct ublk_tgt {
84 unsigned long dev_size;
85 const struct ublk_tgt_ops *ops;
86 struct ublk_params params;
87 };
88
89 struct ublk_queue {
90 int q_id;
91 int q_depth;
92 unsigned int cmd_inflight;
93 unsigned int io_inflight;
94 struct ublk_dev *dev;
95 const struct ublk_tgt_ops *tgt_ops;
96 char *io_cmd_buf;
97 struct io_uring ring;
98 struct ublk_io ios[UBLK_QUEUE_DEPTH];
99 #define UBLKSRV_QUEUE_STOPPING (1U << 0)
100 #define UBLKSRV_QUEUE_IDLE (1U << 1)
101 unsigned state;
102 pid_t tid;
103 pthread_t thread;
104 };
105
106 struct ublk_dev {
107 struct ublk_tgt tgt;
108 struct ublksrv_ctrl_dev_info dev_info;
109 struct ublk_queue q[UBLK_MAX_QUEUES];
110
111 int fds[2]; /* fds[0] points to /dev/ublkcN */
112 int nr_fds;
113 int ctrl_fd;
114 struct io_uring ring;
115 };
116
117 #ifndef offsetof
118 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
119 #endif
120
121 #ifndef container_of
122 #define container_of(ptr, type, member) ({ \
123 unsigned long __mptr = (unsigned long)(ptr); \
124 ((type *)(__mptr - offsetof(type, member))); })
125 #endif
126
127 #define round_up(val, rnd) \
128 (((val) + ((rnd) - 1)) & ~((rnd) - 1))
129
130 static unsigned int ublk_dbg_mask = 0;
131
132 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
133
is_target_io(__u64 user_data)134 static inline int is_target_io(__u64 user_data)
135 {
136 return (user_data & (1ULL << 63)) != 0;
137 }
138
build_user_data(unsigned tag,unsigned op,unsigned tgt_data,unsigned is_target_io)139 static inline __u64 build_user_data(unsigned tag, unsigned op,
140 unsigned tgt_data, unsigned is_target_io)
141 {
142 assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
143
144 return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
145 }
146
user_data_to_tag(__u64 user_data)147 static inline unsigned int user_data_to_tag(__u64 user_data)
148 {
149 return user_data & 0xffff;
150 }
151
user_data_to_op(__u64 user_data)152 static inline unsigned int user_data_to_op(__u64 user_data)
153 {
154 return (user_data >> 16) & 0xff;
155 }
156
ublk_err(const char * fmt,...)157 static void ublk_err(const char *fmt, ...)
158 {
159 va_list ap;
160
161 va_start(ap, fmt);
162 vfprintf(stderr, fmt, ap);
163 }
164
ublk_dbg(int level,const char * fmt,...)165 static void ublk_dbg(int level, const char *fmt, ...)
166 {
167 if (level & ublk_dbg_mask) {
168 va_list ap;
169 va_start(ap, fmt);
170 vfprintf(stdout, fmt, ap);
171 }
172 }
173
ublk_get_sqe_cmd(const struct io_uring_sqe * sqe)174 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
175 {
176 return (void *)&sqe->cmd;
177 }
178
ublk_mark_io_done(struct ublk_io * io,int res)179 static inline void ublk_mark_io_done(struct ublk_io *io, int res)
180 {
181 io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
182 io->result = res;
183 }
184
ublk_get_iod(const struct ublk_queue * q,int tag)185 static inline const struct ublksrv_io_desc *ublk_get_iod(
186 const struct ublk_queue *q, int tag)
187 {
188 return (struct ublksrv_io_desc *)
189 &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
190 }
191
ublk_set_sqe_cmd_op(struct io_uring_sqe * sqe,__u32 cmd_op)192 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
193 __u32 cmd_op)
194 {
195 __u32 *addr = (__u32 *)&sqe->off;
196
197 addr[0] = cmd_op;
198 addr[1] = 0;
199 }
200
ublk_setup_ring(struct io_uring * r,int depth,int cq_depth,unsigned flags)201 static inline int ublk_setup_ring(struct io_uring *r, int depth,
202 int cq_depth, unsigned flags)
203 {
204 struct io_uring_params p;
205
206 memset(&p, 0, sizeof(p));
207 p.flags = flags | IORING_SETUP_CQSIZE;
208 p.cq_entries = cq_depth;
209
210 return io_uring_queue_init_params(depth, r, &p);
211 }
212
ublk_ctrl_init_cmd(struct ublk_dev * dev,struct io_uring_sqe * sqe,struct ublk_ctrl_cmd_data * data)213 static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
214 struct io_uring_sqe *sqe,
215 struct ublk_ctrl_cmd_data *data)
216 {
217 struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
218 struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
219
220 sqe->fd = dev->ctrl_fd;
221 sqe->opcode = IORING_OP_URING_CMD;
222 sqe->ioprio = 0;
223
224 if (data->flags & CTRL_CMD_HAS_BUF) {
225 cmd->addr = data->addr;
226 cmd->len = data->len;
227 }
228
229 if (data->flags & CTRL_CMD_HAS_DATA)
230 cmd->data[0] = data->data[0];
231
232 cmd->dev_id = info->dev_id;
233 cmd->queue_id = -1;
234
235 ublk_set_sqe_cmd_op(sqe, data->cmd_op);
236
237 io_uring_sqe_set_data(sqe, cmd);
238 }
239
__ublk_ctrl_cmd(struct ublk_dev * dev,struct ublk_ctrl_cmd_data * data)240 static int __ublk_ctrl_cmd(struct ublk_dev *dev,
241 struct ublk_ctrl_cmd_data *data)
242 {
243 struct io_uring_sqe *sqe;
244 struct io_uring_cqe *cqe;
245 int ret = -EINVAL;
246
247 sqe = io_uring_get_sqe(&dev->ring);
248 if (!sqe) {
249 ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
250 return ret;
251 }
252
253 ublk_ctrl_init_cmd(dev, sqe, data);
254
255 ret = io_uring_submit(&dev->ring);
256 if (ret < 0) {
257 ublk_err("uring submit ret %d\n", ret);
258 return ret;
259 }
260
261 ret = io_uring_wait_cqe(&dev->ring, &cqe);
262 if (ret < 0) {
263 ublk_err("wait cqe: %s\n", strerror(-ret));
264 return ret;
265 }
266 io_uring_cqe_seen(&dev->ring, cqe);
267
268 return cqe->res;
269 }
270
ublk_ctrl_start_dev(struct ublk_dev * dev,int daemon_pid)271 static int ublk_ctrl_start_dev(struct ublk_dev *dev,
272 int daemon_pid)
273 {
274 struct ublk_ctrl_cmd_data data = {
275 .cmd_op = UBLK_U_CMD_START_DEV,
276 .flags = CTRL_CMD_HAS_DATA,
277 };
278
279 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
280
281 return __ublk_ctrl_cmd(dev, &data);
282 }
283
ublk_ctrl_add_dev(struct ublk_dev * dev)284 static int ublk_ctrl_add_dev(struct ublk_dev *dev)
285 {
286 struct ublk_ctrl_cmd_data data = {
287 .cmd_op = UBLK_U_CMD_ADD_DEV,
288 .flags = CTRL_CMD_HAS_BUF,
289 .addr = (__u64) (uintptr_t) &dev->dev_info,
290 .len = sizeof(struct ublksrv_ctrl_dev_info),
291 };
292
293 return __ublk_ctrl_cmd(dev, &data);
294 }
295
ublk_ctrl_del_dev(struct ublk_dev * dev)296 static int ublk_ctrl_del_dev(struct ublk_dev *dev)
297 {
298 struct ublk_ctrl_cmd_data data = {
299 .cmd_op = UBLK_U_CMD_DEL_DEV,
300 .flags = 0,
301 };
302
303 return __ublk_ctrl_cmd(dev, &data);
304 }
305
ublk_ctrl_get_info(struct ublk_dev * dev)306 static int ublk_ctrl_get_info(struct ublk_dev *dev)
307 {
308 struct ublk_ctrl_cmd_data data = {
309 .cmd_op = UBLK_U_CMD_GET_DEV_INFO,
310 .flags = CTRL_CMD_HAS_BUF,
311 .addr = (__u64) (uintptr_t) &dev->dev_info,
312 .len = sizeof(struct ublksrv_ctrl_dev_info),
313 };
314
315 return __ublk_ctrl_cmd(dev, &data);
316 }
317
ublk_ctrl_set_params(struct ublk_dev * dev,struct ublk_params * params)318 static int ublk_ctrl_set_params(struct ublk_dev *dev,
319 struct ublk_params *params)
320 {
321 struct ublk_ctrl_cmd_data data = {
322 .cmd_op = UBLK_U_CMD_SET_PARAMS,
323 .flags = CTRL_CMD_HAS_BUF,
324 .addr = (__u64) (uintptr_t) params,
325 .len = sizeof(*params),
326 };
327 params->len = sizeof(*params);
328 return __ublk_ctrl_cmd(dev, &data);
329 }
330
ublk_ctrl_get_features(struct ublk_dev * dev,__u64 * features)331 static int ublk_ctrl_get_features(struct ublk_dev *dev,
332 __u64 *features)
333 {
334 struct ublk_ctrl_cmd_data data = {
335 .cmd_op = UBLK_U_CMD_GET_FEATURES,
336 .flags = CTRL_CMD_HAS_BUF,
337 .addr = (__u64) (uintptr_t) features,
338 .len = sizeof(*features),
339 };
340
341 return __ublk_ctrl_cmd(dev, &data);
342 }
343
ublk_ctrl_deinit(struct ublk_dev * dev)344 static void ublk_ctrl_deinit(struct ublk_dev *dev)
345 {
346 close(dev->ctrl_fd);
347 free(dev);
348 }
349
ublk_ctrl_init(void)350 static struct ublk_dev *ublk_ctrl_init(void)
351 {
352 struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
353 struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
354 int ret;
355
356 dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
357 if (dev->ctrl_fd < 0) {
358 free(dev);
359 return NULL;
360 }
361
362 info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
363
364 ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
365 UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
366 if (ret < 0) {
367 ublk_err("queue_init: %s\n", strerror(-ret));
368 free(dev);
369 return NULL;
370 }
371 dev->nr_fds = 1;
372
373 return dev;
374 }
375
ublk_queue_cmd_buf_sz(struct ublk_queue * q)376 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
377 {
378 int size = q->q_depth * sizeof(struct ublksrv_io_desc);
379 unsigned int page_sz = getpagesize();
380
381 return round_up(size, page_sz);
382 }
383
ublk_queue_deinit(struct ublk_queue * q)384 static void ublk_queue_deinit(struct ublk_queue *q)
385 {
386 int i;
387 int nr_ios = q->q_depth;
388
389 io_uring_unregister_ring_fd(&q->ring);
390
391 if (q->ring.ring_fd > 0) {
392 io_uring_unregister_files(&q->ring);
393 close(q->ring.ring_fd);
394 q->ring.ring_fd = -1;
395 }
396
397 if (q->io_cmd_buf)
398 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
399
400 for (i = 0; i < nr_ios; i++)
401 free(q->ios[i].buf_addr);
402 }
403
ublk_queue_init(struct ublk_queue * q)404 static int ublk_queue_init(struct ublk_queue *q)
405 {
406 struct ublk_dev *dev = q->dev;
407 int depth = dev->dev_info.queue_depth;
408 int i, ret = -1;
409 int cmd_buf_size, io_buf_size;
410 unsigned long off;
411 int ring_depth = depth, cq_depth = depth;
412
413 q->tgt_ops = dev->tgt.ops;
414 q->state = 0;
415 q->q_depth = depth;
416 q->cmd_inflight = 0;
417 q->tid = gettid();
418
419 cmd_buf_size = ublk_queue_cmd_buf_sz(q);
420 off = UBLKSRV_CMD_BUF_OFFSET +
421 q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
422 q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
423 MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
424 if (q->io_cmd_buf == MAP_FAILED) {
425 ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
426 q->dev->dev_info.dev_id, q->q_id);
427 goto fail;
428 }
429
430 io_buf_size = dev->dev_info.max_io_buf_bytes;
431 for (i = 0; i < q->q_depth; i++) {
432 q->ios[i].buf_addr = NULL;
433
434 if (posix_memalign((void **)&q->ios[i].buf_addr,
435 getpagesize(), io_buf_size)) {
436 ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
437 dev->dev_info.dev_id, q->q_id, i);
438 goto fail;
439 }
440 q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
441 }
442
443 ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
444 IORING_SETUP_COOP_TASKRUN);
445 if (ret < 0) {
446 ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
447 q->dev->dev_info.dev_id, q->q_id, ret);
448 goto fail;
449 }
450
451 io_uring_register_ring_fd(&q->ring);
452
453 ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
454 if (ret) {
455 ublk_err("ublk dev %d queue %d register files failed %d\n",
456 q->dev->dev_info.dev_id, q->q_id, ret);
457 goto fail;
458 }
459
460 return 0;
461 fail:
462 ublk_queue_deinit(q);
463 ublk_err("ublk dev %d queue %d failed\n",
464 dev->dev_info.dev_id, q->q_id);
465 return -ENOMEM;
466 }
467
ublk_dev_prep(struct ublk_dev * dev)468 static int ublk_dev_prep(struct ublk_dev *dev)
469 {
470 int dev_id = dev->dev_info.dev_id;
471 char buf[64];
472 int ret = 0;
473
474 snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
475 dev->fds[0] = open(buf, O_RDWR);
476 if (dev->fds[0] < 0) {
477 ret = -EBADF;
478 ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
479 goto fail;
480 }
481
482 if (dev->tgt.ops->init_tgt)
483 ret = dev->tgt.ops->init_tgt(dev);
484
485 return ret;
486 fail:
487 close(dev->fds[0]);
488 return ret;
489 }
490
ublk_dev_unprep(struct ublk_dev * dev)491 static void ublk_dev_unprep(struct ublk_dev *dev)
492 {
493 if (dev->tgt.ops->deinit_tgt)
494 dev->tgt.ops->deinit_tgt(dev);
495 close(dev->fds[0]);
496 }
497
ublk_queue_io_cmd(struct ublk_queue * q,struct ublk_io * io,unsigned tag)498 static int ublk_queue_io_cmd(struct ublk_queue *q,
499 struct ublk_io *io, unsigned tag)
500 {
501 struct ublksrv_io_cmd *cmd;
502 struct io_uring_sqe *sqe;
503 unsigned int cmd_op = 0;
504 __u64 user_data;
505
506 /* only freed io can be issued */
507 if (!(io->flags & UBLKSRV_IO_FREE))
508 return 0;
509
510 /* we issue because we need either fetching or committing */
511 if (!(io->flags &
512 (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
513 return 0;
514
515 if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
516 cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
517 else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
518 cmd_op = UBLK_U_IO_FETCH_REQ;
519
520 sqe = io_uring_get_sqe(&q->ring);
521 if (!sqe) {
522 ublk_err("%s: run out of sqe %d, tag %d\n",
523 __func__, q->q_id, tag);
524 return -1;
525 }
526
527 cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
528
529 if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
530 cmd->result = io->result;
531
532 /* These fields should be written once, never change */
533 ublk_set_sqe_cmd_op(sqe, cmd_op);
534 sqe->fd = 0; /* dev->fds[0] */
535 sqe->opcode = IORING_OP_URING_CMD;
536 sqe->flags = IOSQE_FIXED_FILE;
537 sqe->rw_flags = 0;
538 cmd->tag = tag;
539 cmd->addr = (__u64) (uintptr_t) io->buf_addr;
540 cmd->q_id = q->q_id;
541
542 user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
543 io_uring_sqe_set_data64(sqe, user_data);
544
545 io->flags = 0;
546
547 q->cmd_inflight += 1;
548
549 ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
550 __func__, q->q_id, tag, cmd_op,
551 io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
552 return 1;
553 }
554
ublk_complete_io(struct ublk_queue * q,unsigned tag,int res)555 static int ublk_complete_io(struct ublk_queue *q,
556 unsigned tag, int res)
557 {
558 struct ublk_io *io = &q->ios[tag];
559
560 ublk_mark_io_done(io, res);
561
562 return ublk_queue_io_cmd(q, io, tag);
563 }
564
ublk_submit_fetch_commands(struct ublk_queue * q)565 static void ublk_submit_fetch_commands(struct ublk_queue *q)
566 {
567 int i = 0;
568
569 for (i = 0; i < q->q_depth; i++)
570 ublk_queue_io_cmd(q, &q->ios[i], i);
571 }
572
ublk_queue_is_idle(struct ublk_queue * q)573 static int ublk_queue_is_idle(struct ublk_queue *q)
574 {
575 return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
576 }
577
ublk_queue_is_done(struct ublk_queue * q)578 static int ublk_queue_is_done(struct ublk_queue *q)
579 {
580 return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
581 }
582
ublksrv_handle_tgt_cqe(struct ublk_queue * q,struct io_uring_cqe * cqe)583 static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
584 struct io_uring_cqe *cqe)
585 {
586 unsigned tag = user_data_to_tag(cqe->user_data);
587
588 if (cqe->res < 0 && cqe->res != -EAGAIN)
589 ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
590 __func__, cqe->res, q->q_id,
591 user_data_to_tag(cqe->user_data),
592 user_data_to_op(cqe->user_data));
593
594 if (q->tgt_ops->tgt_io_done)
595 q->tgt_ops->tgt_io_done(q, tag, cqe);
596 }
597
ublk_handle_cqe(struct io_uring * r,struct io_uring_cqe * cqe,void * data)598 static void ublk_handle_cqe(struct io_uring *r,
599 struct io_uring_cqe *cqe, void *data)
600 {
601 struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
602 unsigned tag = user_data_to_tag(cqe->user_data);
603 unsigned cmd_op = user_data_to_op(cqe->user_data);
604 int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
605 !(q->state & UBLKSRV_QUEUE_STOPPING);
606 struct ublk_io *io;
607
608 ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
609 __func__, cqe->res, q->q_id, tag, cmd_op,
610 is_target_io(cqe->user_data),
611 (q->state & UBLKSRV_QUEUE_STOPPING));
612
613 /* Don't retrieve io in case of target io */
614 if (is_target_io(cqe->user_data)) {
615 ublksrv_handle_tgt_cqe(q, cqe);
616 return;
617 }
618
619 io = &q->ios[tag];
620 q->cmd_inflight--;
621
622 if (!fetch) {
623 q->state |= UBLKSRV_QUEUE_STOPPING;
624 io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
625 }
626
627 if (cqe->res == UBLK_IO_RES_OK) {
628 assert(tag < q->q_depth);
629 q->tgt_ops->queue_io(q, tag);
630 } else {
631 /*
632 * COMMIT_REQ will be completed immediately since no fetching
633 * piggyback is required.
634 *
635 * Marking IO_FREE only, then this io won't be issued since
636 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
637 *
638 * */
639 io->flags = UBLKSRV_IO_FREE;
640 }
641 }
642
ublk_reap_events_uring(struct io_uring * r)643 static int ublk_reap_events_uring(struct io_uring *r)
644 {
645 struct io_uring_cqe *cqe;
646 unsigned head;
647 int count = 0;
648
649 io_uring_for_each_cqe(r, head, cqe) {
650 ublk_handle_cqe(r, cqe, NULL);
651 count += 1;
652 }
653 io_uring_cq_advance(r, count);
654
655 return count;
656 }
657
ublk_process_io(struct ublk_queue * q)658 static int ublk_process_io(struct ublk_queue *q)
659 {
660 int ret, reapped;
661
662 ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
663 q->dev->dev_info.dev_id,
664 q->q_id, io_uring_sq_ready(&q->ring),
665 q->cmd_inflight,
666 (q->state & UBLKSRV_QUEUE_STOPPING));
667
668 if (ublk_queue_is_done(q))
669 return -ENODEV;
670
671 ret = io_uring_submit_and_wait(&q->ring, 1);
672 reapped = ublk_reap_events_uring(&q->ring);
673
674 ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
675 ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
676 (q->state & UBLKSRV_QUEUE_IDLE));
677
678 return reapped;
679 }
680
ublk_io_handler_fn(void * data)681 static void *ublk_io_handler_fn(void *data)
682 {
683 struct ublk_queue *q = data;
684 int dev_id = q->dev->dev_info.dev_id;
685 int ret;
686
687 ret = ublk_queue_init(q);
688 if (ret) {
689 ublk_err("ublk dev %d queue %d init queue failed\n",
690 dev_id, q->q_id);
691 return NULL;
692 }
693 ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
694 q->tid, dev_id, q->q_id);
695
696 /* submit all io commands to ublk driver */
697 ublk_submit_fetch_commands(q);
698 do {
699 if (ublk_process_io(q) < 0)
700 break;
701 } while (1);
702
703 ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
704 ublk_queue_deinit(q);
705 return NULL;
706 }
707
ublk_set_parameters(struct ublk_dev * dev)708 static void ublk_set_parameters(struct ublk_dev *dev)
709 {
710 int ret;
711
712 ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
713 if (ret)
714 ublk_err("dev %d set basic parameter failed %d\n",
715 dev->dev_info.dev_id, ret);
716 }
717
ublk_start_daemon(struct ublk_dev * dev)718 static int ublk_start_daemon(struct ublk_dev *dev)
719 {
720 int ret, i;
721 void *thread_ret;
722 const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
723
724 if (daemon(1, 1) < 0)
725 return -errno;
726
727 ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
728
729 ret = ublk_dev_prep(dev);
730 if (ret)
731 return ret;
732
733 for (i = 0; i < dinfo->nr_hw_queues; i++) {
734 dev->q[i].dev = dev;
735 dev->q[i].q_id = i;
736 pthread_create(&dev->q[i].thread, NULL,
737 ublk_io_handler_fn,
738 &dev->q[i]);
739 }
740
741 /* everything is fine now, start us */
742 ublk_set_parameters(dev);
743 ret = ublk_ctrl_start_dev(dev, getpid());
744 if (ret < 0) {
745 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
746 goto fail;
747 }
748
749 /* wait until we are terminated */
750 for (i = 0; i < dinfo->nr_hw_queues; i++)
751 pthread_join(dev->q[i].thread, &thread_ret);
752 fail:
753 ublk_dev_unprep(dev);
754 ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
755
756 return ret;
757 }
758
wait_ublk_dev(char * dev_name,int evt_mask,unsigned timeout)759 static int wait_ublk_dev(char *dev_name, int evt_mask, unsigned timeout)
760 {
761 #define EV_SIZE (sizeof(struct inotify_event))
762 #define EV_BUF_LEN (128 * (EV_SIZE + 16))
763 struct pollfd pfd;
764 int fd, wd;
765 int ret = -EINVAL;
766
767 fd = inotify_init();
768 if (fd < 0) {
769 ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
770 return fd;
771 }
772
773 wd = inotify_add_watch(fd, "/dev", evt_mask);
774 if (wd == -1) {
775 ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
776 goto fail;
777 }
778
779 pfd.fd = fd;
780 pfd.events = POLL_IN;
781 while (1) {
782 int i = 0;
783 char buffer[EV_BUF_LEN];
784 ret = poll(&pfd, 1, 1000 * timeout);
785
786 if (ret == -1) {
787 ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
788 goto rm_watch;
789 } else if (ret == 0) {
790 ublk_err("%s: poll inotify timeout\n", __func__);
791 ret = -ENOENT;
792 goto rm_watch;
793 }
794
795 ret = read(fd, buffer, EV_BUF_LEN);
796 if (ret < 0) {
797 ublk_err("%s: read inotify fd failed\n", __func__);
798 goto rm_watch;
799 }
800
801 while (i < ret) {
802 struct inotify_event *event = (struct inotify_event *)&buffer[i];
803
804 ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
805 __func__, event->mask, event->name);
806 if (event->mask & evt_mask) {
807 if (!strcmp(event->name, dev_name)) {
808 ret = 0;
809 goto rm_watch;
810 }
811 }
812 i += EV_SIZE + event->len;
813 }
814 }
815 rm_watch:
816 inotify_rm_watch(fd, wd);
817 fail:
818 close(fd);
819 return ret;
820 }
821
ublk_stop_io_daemon(const struct ublk_dev * dev)822 static int ublk_stop_io_daemon(const struct ublk_dev *dev)
823 {
824 int daemon_pid = dev->dev_info.ublksrv_pid;
825 int dev_id = dev->dev_info.dev_id;
826 char ublkc[64];
827 int ret;
828
829 /*
830 * Wait until ublk char device is closed, when our daemon is shutdown
831 */
832 snprintf(ublkc, sizeof(ublkc), "%s%d", "ublkc", dev_id);
833 ret = wait_ublk_dev(ublkc, IN_CLOSE_WRITE, 10);
834 waitpid(dev->dev_info.ublksrv_pid, NULL, 0);
835 ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
836 __func__, daemon_pid, dev_id, ret);
837
838 return ret;
839 }
840
cmd_dev_add(char * tgt_type,int * exp_id,unsigned nr_queues,unsigned depth)841 static int cmd_dev_add(char *tgt_type, int *exp_id, unsigned nr_queues,
842 unsigned depth)
843 {
844 const struct ublk_tgt_ops *ops;
845 struct ublksrv_ctrl_dev_info *info;
846 struct ublk_dev *dev;
847 int dev_id = *exp_id;
848 char ublkb[64];
849 int ret;
850
851 ops = ublk_find_tgt(tgt_type);
852 if (!ops) {
853 ublk_err("%s: no such tgt type, type %s\n",
854 __func__, tgt_type);
855 return -ENODEV;
856 }
857
858 if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
859 ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
860 __func__, nr_queues, depth);
861 return -EINVAL;
862 }
863
864 dev = ublk_ctrl_init();
865 if (!dev) {
866 ublk_err("%s: can't alloc dev id %d, type %s\n",
867 __func__, dev_id, tgt_type);
868 return -ENOMEM;
869 }
870
871 info = &dev->dev_info;
872 info->dev_id = dev_id;
873 info->nr_hw_queues = nr_queues;
874 info->queue_depth = depth;
875 dev->tgt.ops = ops;
876
877 ret = ublk_ctrl_add_dev(dev);
878 if (ret < 0) {
879 ublk_err("%s: can't add dev id %d, type %s ret %d\n",
880 __func__, dev_id, tgt_type, ret);
881 goto fail;
882 }
883
884 switch (fork()) {
885 case -1:
886 goto fail;
887 case 0:
888 ublk_start_daemon(dev);
889 return 0;
890 }
891
892 /*
893 * Wait until ublk disk is added, when our daemon is started
894 * successfully
895 */
896 snprintf(ublkb, sizeof(ublkb), "%s%u", "ublkb", dev->dev_info.dev_id);
897 ret = wait_ublk_dev(ublkb, IN_CREATE, 3);
898 if (ret < 0) {
899 ublk_err("%s: can't start daemon id %d, type %s\n",
900 __func__, dev_id, tgt_type);
901 ublk_ctrl_del_dev(dev);
902 } else {
903 *exp_id = dev->dev_info.dev_id;
904 }
905 fail:
906 ublk_ctrl_deinit(dev);
907 return ret;
908 }
909
cmd_dev_del_by_kill(int number)910 static int cmd_dev_del_by_kill(int number)
911 {
912 struct ublk_dev *dev;
913 int ret;
914
915 dev = ublk_ctrl_init();
916 dev->dev_info.dev_id = number;
917
918 ret = ublk_ctrl_get_info(dev);
919 if (ret < 0)
920 goto fail;
921
922 /* simulate one ublk daemon panic */
923 kill(dev->dev_info.ublksrv_pid, 9);
924
925 ret = ublk_stop_io_daemon(dev);
926 if (ret < 0)
927 ublk_err("%s: can't stop daemon id %d\n", __func__, number);
928 ublk_ctrl_del_dev(dev);
929 fail:
930 if (ret >= 0)
931 ret = ublk_ctrl_get_info(dev);
932 ublk_ctrl_deinit(dev);
933
934 return (ret != 0) ? 0 : -EIO;
935 }
936
937 /****************** part 2: target implementation ********************/
938
ublk_null_tgt_init(struct ublk_dev * dev)939 static int ublk_null_tgt_init(struct ublk_dev *dev)
940 {
941 const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
942 unsigned long dev_size = 250UL << 30;
943
944 dev->tgt.dev_size = dev_size;
945 dev->tgt.params = (struct ublk_params) {
946 .types = UBLK_PARAM_TYPE_BASIC,
947 .basic = {
948 .logical_bs_shift = 9,
949 .physical_bs_shift = 12,
950 .io_opt_shift = 12,
951 .io_min_shift = 9,
952 .max_sectors = info->max_io_buf_bytes >> 9,
953 .dev_sectors = dev_size >> 9,
954 },
955 };
956
957 return 0;
958 }
959
ublk_null_queue_io(struct ublk_queue * q,int tag)960 static int ublk_null_queue_io(struct ublk_queue *q, int tag)
961 {
962 const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
963
964 ublk_complete_io(q, tag, iod->nr_sectors << 9);
965
966 return 0;
967 }
968
969 static const struct ublk_tgt_ops tgt_ops_list[] = {
970 {
971 .name = "null",
972 .init_tgt = ublk_null_tgt_init,
973 .queue_io = ublk_null_queue_io,
974 },
975 };
976
ublk_find_tgt(const char * name)977 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
978 {
979 const struct ublk_tgt_ops *ops;
980 int i;
981
982 if (name == NULL)
983 return NULL;
984
985 for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
986 if (strcmp(tgt_ops_list[i].name, name) == 0)
987 return &tgt_ops_list[i];
988 return NULL;
989 }
990
991
992 /****************** part 3: IO test over ublk disk ********************/
993
994 #include "helpers.h"
995 #include "liburing.h"
996 #define BS 4096
997 #define BUFFERS 128
998
999 struct io_ctx {
1000 int dev_id;
1001 int write;
1002 int seq;
1003
1004 /* output */
1005 int res;
1006 pthread_t handle;
1007 };
1008
__test_io(struct io_uring * ring,int fd,int write,int seq,struct iovec * vecs,int exp_len,off_t start)1009 static int __test_io(struct io_uring *ring, int fd, int write,
1010 int seq, struct iovec *vecs, int exp_len, off_t start)
1011 {
1012 struct io_uring_sqe *sqe;
1013 struct io_uring_cqe *cqe;
1014 int i, ret;
1015 off_t offset;
1016
1017 offset = start;
1018 for (i = 0; i < BUFFERS; i++) {
1019 sqe = io_uring_get_sqe(ring);
1020 if (!sqe) {
1021 fprintf(stderr, "sqe get failed\n");
1022 goto err;
1023 }
1024 if (!seq)
1025 offset = start + BS * (rand() % BUFFERS);
1026 if (write) {
1027 io_uring_prep_write_fixed(sqe, fd, vecs[i].iov_base,
1028 vecs[i].iov_len,
1029 offset, i);
1030 } else {
1031 io_uring_prep_read_fixed(sqe, fd, vecs[i].iov_base,
1032 vecs[i].iov_len,
1033 offset, i);
1034 }
1035 sqe->user_data = i;
1036 if (seq)
1037 offset += BS;
1038 }
1039
1040 ret = io_uring_submit(ring);
1041 if (ret != BUFFERS) {
1042 fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
1043 goto err;
1044 }
1045
1046 for (i = 0; i < BUFFERS; i++) {
1047 ret = io_uring_wait_cqe(ring, &cqe);
1048 if (ret) {
1049 fprintf(stderr, "wait_cqe=%d\n", ret);
1050 goto err;
1051 }
1052 if (exp_len == -1) {
1053 int iov_len = vecs[cqe->user_data].iov_len;
1054
1055 if (cqe->res != iov_len) {
1056 fprintf(stderr, "cqe res %d, wanted %d\n",
1057 cqe->res, iov_len);
1058 goto err;
1059 }
1060 } else if (cqe->res != exp_len) {
1061 fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
1062 goto err;
1063 }
1064 io_uring_cqe_seen(ring, cqe);
1065 }
1066
1067 return 0;
1068 err:
1069 return 1;
1070 }
1071
1072 /* Run IO over ublk block device */
test_io(struct io_ctx * ctx)1073 static int test_io(struct io_ctx *ctx)
1074 {
1075 struct io_uring ring;
1076 int ret, ring_flags = 0;
1077 char buf[256];
1078 int fd = -1;
1079 off_t offset = 0;
1080 unsigned long long bytes;
1081 int open_flags = O_DIRECT;
1082 struct iovec *vecs = t_create_buffers(BUFFERS, BS);
1083
1084 ret = t_create_ring(BUFFERS, &ring, ring_flags);
1085 if (ret == T_SETUP_SKIP)
1086 return 0;
1087 if (ret != T_SETUP_OK) {
1088 fprintf(stderr, "ring create failed: %d\n", ret);
1089 return 1;
1090 }
1091
1092 snprintf(buf, sizeof(buf), "%s%d", UBLKB_DEV, ctx->dev_id);
1093
1094 if (ctx->write)
1095 open_flags |= O_WRONLY;
1096 else
1097 open_flags |= O_RDONLY;
1098 fd = open(buf, open_flags);
1099 if (fd < 0) {
1100 if (errno == EINVAL)
1101 return 0;
1102 return 1;
1103 }
1104
1105 if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
1106 return 1;
1107
1108 ret = t_register_buffers(&ring, vecs, BUFFERS);
1109 if (ret == T_SETUP_SKIP)
1110 return 0;
1111 if (ret != T_SETUP_OK) {
1112 fprintf(stderr, "buffer reg failed: %d\n", ret);
1113 return 1;
1114 }
1115
1116 for (offset = 0; offset < bytes; offset += BS * BUFFERS) {
1117 ret = __test_io(&ring, fd, ctx->write, ctx->seq, vecs, BS,
1118 offset);
1119 if (ret != T_SETUP_OK) {
1120 fprintf(stderr, "/dev/ublkb%d read failed: offset %lu ret %d\n",
1121 ctx->dev_id, (unsigned long) offset, ret);
1122 break;
1123 }
1124 }
1125
1126 close(fd);
1127 io_uring_unregister_buffers(&ring);
1128 io_uring_queue_exit(&ring);
1129
1130 return ret;
1131 }
1132
test_io_fn(void * data)1133 static void *test_io_fn(void *data)
1134 {
1135 struct io_ctx *ctx = data;
1136
1137 ctx->res = test_io(ctx);
1138
1139 return data;
1140 }
1141
ignore_stderr(void)1142 static void ignore_stderr(void)
1143 {
1144 int devnull = open("/dev/null", O_WRONLY);
1145
1146 if (devnull >= 0) {
1147 dup2(devnull, fileno(stderr));
1148 close(devnull);
1149 }
1150 }
1151
test_io_worker(int dev_id)1152 static int test_io_worker(int dev_id)
1153 {
1154 const int nr_jobs = 4;
1155 struct io_ctx ctx[nr_jobs];
1156 int i, ret = 0;
1157
1158 for (i = 0; i < nr_jobs; i++) {
1159 ctx[i].dev_id = dev_id;
1160 ctx[i].write = (i & 0x1) ? 0 : 1;
1161 ctx[i].seq = 1;
1162
1163 pthread_create(&ctx[i].handle, NULL, test_io_fn, &ctx[i]);
1164 }
1165
1166 for (i = 0; i < nr_jobs; i++) {
1167 pthread_join(ctx[i].handle, NULL);
1168
1169 if (!ret && ctx[i].res)
1170 ret = ctx[i].res;
1171 }
1172
1173 return ret;
1174 }
1175
1176 /*
1177 * Run IO over created ublk device, meantime delete this ublk device
1178 *
1179 * Cover cancellable uring_cmd
1180 * */
test_del_ublk_with_io(void)1181 static int test_del_ublk_with_io(void)
1182 {
1183 const unsigned wait_ms = 200;
1184 char *tgt_type = "null";
1185 int dev_id = -1;
1186 int ret, pid;
1187
1188 ret = cmd_dev_add(tgt_type, &dev_id, 2, BUFFERS);
1189 if (ret != T_SETUP_OK) {
1190 fprintf(stderr, "buffer reg failed: %d\n", ret);
1191 return T_EXIT_FAIL;
1192 }
1193
1194 switch ((pid = fork())) {
1195 case -1:
1196 fprintf(stderr, "fork failed\n");
1197 return T_EXIT_FAIL;
1198 case 0:
1199 /* io error is expected since the parent is killing ublk */
1200 ignore_stderr();
1201 test_io_worker(dev_id);
1202 return 0;
1203 default:
1204 /*
1205 * Wait a little while until ublk IO pipeline is warm up,
1206 * then try to shutdown ublk device by `kill -9 $ublk_daemon_pid`.
1207 *
1208 * cancellable uring_cmd code path can be covered in this way.
1209 */
1210 usleep(wait_ms * 1000);
1211 ret = cmd_dev_del_by_kill(dev_id);
1212 waitpid(pid, NULL, 0);
1213 return ret;
1214 }
1215 }
1216
main(int argc,char * argv[])1217 int main(int argc, char *argv[])
1218 {
1219 const int nr_loop = 4;
1220 struct ublk_dev *dev;
1221 __u64 features;
1222 int ret, i;
1223
1224 if (argc > 1)
1225 return T_EXIT_SKIP;
1226
1227 dev = ublk_ctrl_init();
1228 /* ublk isn't supported or the module isn't loaded */
1229 if (!dev)
1230 return T_EXIT_SKIP;
1231
1232 /* kernel doesn't support get_features */
1233 ret = ublk_ctrl_get_features(dev, &features);
1234 if (ret < 0)
1235 return T_EXIT_SKIP;
1236
1237 if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
1238 return T_EXIT_SKIP;
1239
1240 for (i = 0; i < nr_loop; i++) {
1241 if (test_del_ublk_with_io())
1242 return T_EXIT_FAIL;
1243 }
1244 ublk_ctrl_deinit(dev);
1245 return T_EXIT_PASS;
1246 }
1247 #else
main(int argc,char * argv[])1248 int main(int argc, char *argv[])
1249 {
1250 return T_EXIT_SKIP;
1251 }
1252 #endif
1253