• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Description: uring_cmd based ublk
4  *
5  * Covers cancellable uring_cmd feature.
6  */
7 #include <unistd.h>
8 #include <stdlib.h>
9 #include <assert.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <string.h>
13 #include <pthread.h>
14 #include <limits.h>
15 #include <poll.h>
16 #include <sys/syscall.h>
17 #include <sys/mman.h>
18 #include <sys/ioctl.h>
19 #include <sys/inotify.h>
20 #include <sys/wait.h>
21 
22 #include "liburing.h"
23 #include "helpers.h"
24 #ifdef CONFIG_HAVE_UBLK_HEADER
25 #include <linux/ublk_cmd.h>
26 
27 /****************** part 1: libublk ********************/
28 
29 #define CTRL_DEV		"/dev/ublk-control"
30 #define UBLKC_DEV		"/dev/ublkc"
31 #define UBLKB_DEV		"/dev/ublkb"
32 #define UBLK_CTRL_RING_DEPTH            32
33 
34 /* queue idle timeout */
35 #define UBLKSRV_IO_IDLE_SECS		20
36 
37 #define UBLK_IO_MAX_BYTES               65536
38 #define UBLK_MAX_QUEUES                 4
39 #define UBLK_QUEUE_DEPTH                128
40 
41 #define UBLK_DBG_DEV            (1U << 0)
42 #define UBLK_DBG_QUEUE          (1U << 1)
43 #define UBLK_DBG_IO_CMD         (1U << 2)
44 #define UBLK_DBG_IO             (1U << 3)
45 #define UBLK_DBG_CTRL_CMD       (1U << 4)
46 #define UBLK_LOG                (1U << 5)
47 
48 struct ublk_dev;
49 struct ublk_queue;
50 
51 struct ublk_ctrl_cmd_data {
52 	__u32 cmd_op;
53 #define CTRL_CMD_HAS_DATA	1
54 #define CTRL_CMD_HAS_BUF	2
55 	__u32 flags;
56 
57 	__u64 data[2];
58 	__u64 addr;
59 	__u32 len;
60 };
61 
62 struct ublk_io {
63 	char *buf_addr;
64 
65 #define UBLKSRV_NEED_FETCH_RQ		(1UL << 0)
66 #define UBLKSRV_NEED_COMMIT_RQ_COMP	(1UL << 1)
67 #define UBLKSRV_IO_FREE			(1UL << 2)
68 	unsigned int flags;
69 
70 	unsigned int result;
71 };
72 
73 struct ublk_tgt_ops {
74 	const char *name;
75 	int (*init_tgt)(struct ublk_dev *);
76 	void (*deinit_tgt)(struct ublk_dev *);
77 
78 	int (*queue_io)(struct ublk_queue *, int tag);
79 	void (*tgt_io_done)(struct ublk_queue *,
80 			int tag, const struct io_uring_cqe *);
81 };
82 
83 struct ublk_tgt {
84 	unsigned long dev_size;
85 	const struct ublk_tgt_ops *ops;
86 	struct ublk_params params;
87 };
88 
89 struct ublk_queue {
90 	int q_id;
91 	int q_depth;
92 	unsigned int cmd_inflight;
93 	unsigned int io_inflight;
94 	struct ublk_dev *dev;
95 	const struct ublk_tgt_ops *tgt_ops;
96 	char *io_cmd_buf;
97 	struct io_uring ring;
98 	struct ublk_io ios[UBLK_QUEUE_DEPTH];
99 #define UBLKSRV_QUEUE_STOPPING	(1U << 0)
100 #define UBLKSRV_QUEUE_IDLE	(1U << 1)
101 	unsigned state;
102 	pid_t tid;
103 	pthread_t thread;
104 };
105 
106 struct ublk_dev {
107 	struct ublk_tgt tgt;
108 	struct ublksrv_ctrl_dev_info  dev_info;
109 	struct ublk_queue q[UBLK_MAX_QUEUES];
110 
111 	int fds[2];	/* fds[0] points to /dev/ublkcN */
112 	int nr_fds;
113 	int ctrl_fd;
114 	struct io_uring ring;
115 };
116 
117 #ifndef offsetof
118 #define offsetof(TYPE, MEMBER)  ((size_t)&((TYPE *)0)->MEMBER)
119 #endif
120 
121 #ifndef container_of
122 #define container_of(ptr, type, member) ({                              \
123 	unsigned long __mptr = (unsigned long)(ptr);                    \
124 	((type *)(__mptr - offsetof(type, member))); })
125 #endif
126 
127 #define round_up(val, rnd) \
128 	(((val) + ((rnd) - 1)) & ~((rnd) - 1))
129 
130 static unsigned int ublk_dbg_mask = 0;
131 
132 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
133 
is_target_io(__u64 user_data)134 static inline int is_target_io(__u64 user_data)
135 {
136 	return (user_data & (1ULL << 63)) != 0;
137 }
138 
build_user_data(unsigned tag,unsigned op,unsigned tgt_data,unsigned is_target_io)139 static inline __u64 build_user_data(unsigned tag, unsigned op,
140 		unsigned tgt_data, unsigned is_target_io)
141 {
142 	assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
143 
144 	return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
145 }
146 
user_data_to_tag(__u64 user_data)147 static inline unsigned int user_data_to_tag(__u64 user_data)
148 {
149 	return user_data & 0xffff;
150 }
151 
user_data_to_op(__u64 user_data)152 static inline unsigned int user_data_to_op(__u64 user_data)
153 {
154 	return (user_data >> 16) & 0xff;
155 }
156 
ublk_err(const char * fmt,...)157 static void ublk_err(const char *fmt, ...)
158 {
159 	va_list ap;
160 
161 	va_start(ap, fmt);
162 	vfprintf(stderr, fmt, ap);
163 }
164 
ublk_dbg(int level,const char * fmt,...)165 static void ublk_dbg(int level, const char *fmt, ...)
166 {
167 	if (level & ublk_dbg_mask) {
168 		va_list ap;
169 		va_start(ap, fmt);
170 		vfprintf(stdout, fmt, ap);
171         }
172 }
173 
ublk_get_sqe_cmd(const struct io_uring_sqe * sqe)174 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
175 {
176 	return (void *)&sqe->cmd;
177 }
178 
ublk_mark_io_done(struct ublk_io * io,int res)179 static inline void ublk_mark_io_done(struct ublk_io *io, int res)
180 {
181 	io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
182 	io->result = res;
183 }
184 
ublk_get_iod(const struct ublk_queue * q,int tag)185 static inline const struct ublksrv_io_desc *ublk_get_iod(
186                 const struct ublk_queue *q, int tag)
187 {
188         return (struct ublksrv_io_desc *)
189                 &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
190 }
191 
ublk_set_sqe_cmd_op(struct io_uring_sqe * sqe,__u32 cmd_op)192 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
193 		__u32 cmd_op)
194 {
195         __u32 *addr = (__u32 *)&sqe->off;
196 
197         addr[0] = cmd_op;
198         addr[1] = 0;
199 }
200 
ublk_setup_ring(struct io_uring * r,int depth,int cq_depth,unsigned flags)201 static inline int ublk_setup_ring(struct io_uring *r, int depth,
202 		int cq_depth, unsigned flags)
203 {
204 	struct io_uring_params p;
205 
206 	memset(&p, 0, sizeof(p));
207 	p.flags = flags | IORING_SETUP_CQSIZE;
208 	p.cq_entries = cq_depth;
209 
210 	return io_uring_queue_init_params(depth, r, &p);
211 }
212 
ublk_ctrl_init_cmd(struct ublk_dev * dev,struct io_uring_sqe * sqe,struct ublk_ctrl_cmd_data * data)213 static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
214 		struct io_uring_sqe *sqe,
215 		struct ublk_ctrl_cmd_data *data)
216 {
217 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
218 	struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
219 
220 	sqe->fd = dev->ctrl_fd;
221 	sqe->opcode = IORING_OP_URING_CMD;
222 	sqe->ioprio = 0;
223 
224 	if (data->flags & CTRL_CMD_HAS_BUF) {
225 		cmd->addr = data->addr;
226 		cmd->len = data->len;
227 	}
228 
229 	if (data->flags & CTRL_CMD_HAS_DATA)
230 		cmd->data[0] = data->data[0];
231 
232 	cmd->dev_id = info->dev_id;
233 	cmd->queue_id = -1;
234 
235 	ublk_set_sqe_cmd_op(sqe, data->cmd_op);
236 
237 	io_uring_sqe_set_data(sqe, cmd);
238 }
239 
__ublk_ctrl_cmd(struct ublk_dev * dev,struct ublk_ctrl_cmd_data * data)240 static int __ublk_ctrl_cmd(struct ublk_dev *dev,
241 		struct ublk_ctrl_cmd_data *data)
242 {
243 	struct io_uring_sqe *sqe;
244 	struct io_uring_cqe *cqe;
245 	int ret = -EINVAL;
246 
247 	sqe = io_uring_get_sqe(&dev->ring);
248 	if (!sqe) {
249 		ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
250 		return ret;
251 	}
252 
253 	ublk_ctrl_init_cmd(dev, sqe, data);
254 
255 	ret = io_uring_submit(&dev->ring);
256 	if (ret < 0) {
257 		ublk_err("uring submit ret %d\n", ret);
258 		return ret;
259 	}
260 
261 	ret = io_uring_wait_cqe(&dev->ring, &cqe);
262 	if (ret < 0) {
263 		ublk_err("wait cqe: %s\n", strerror(-ret));
264 		return ret;
265 	}
266 	io_uring_cqe_seen(&dev->ring, cqe);
267 
268 	return cqe->res;
269 }
270 
ublk_ctrl_start_dev(struct ublk_dev * dev,int daemon_pid)271 static int ublk_ctrl_start_dev(struct ublk_dev *dev,
272 		int daemon_pid)
273 {
274 	struct ublk_ctrl_cmd_data data = {
275 		.cmd_op	= UBLK_U_CMD_START_DEV,
276 		.flags	= CTRL_CMD_HAS_DATA,
277 	};
278 
279 	dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
280 
281 	return __ublk_ctrl_cmd(dev, &data);
282 }
283 
ublk_ctrl_add_dev(struct ublk_dev * dev)284 static int ublk_ctrl_add_dev(struct ublk_dev *dev)
285 {
286 	struct ublk_ctrl_cmd_data data = {
287 		.cmd_op	= UBLK_U_CMD_ADD_DEV,
288 		.flags	= CTRL_CMD_HAS_BUF,
289 		.addr = (__u64) (uintptr_t) &dev->dev_info,
290 		.len = sizeof(struct ublksrv_ctrl_dev_info),
291 	};
292 
293 	return __ublk_ctrl_cmd(dev, &data);
294 }
295 
ublk_ctrl_del_dev(struct ublk_dev * dev)296 static int ublk_ctrl_del_dev(struct ublk_dev *dev)
297 {
298 	struct ublk_ctrl_cmd_data data = {
299 		.cmd_op = UBLK_U_CMD_DEL_DEV,
300 		.flags = 0,
301 	};
302 
303 	return __ublk_ctrl_cmd(dev, &data);
304 }
305 
ublk_ctrl_get_info(struct ublk_dev * dev)306 static int ublk_ctrl_get_info(struct ublk_dev *dev)
307 {
308 	struct ublk_ctrl_cmd_data data = {
309 		.cmd_op	= UBLK_U_CMD_GET_DEV_INFO,
310 		.flags	= CTRL_CMD_HAS_BUF,
311 		.addr = (__u64) (uintptr_t) &dev->dev_info,
312 		.len = sizeof(struct ublksrv_ctrl_dev_info),
313 	};
314 
315 	return __ublk_ctrl_cmd(dev, &data);
316 }
317 
ublk_ctrl_set_params(struct ublk_dev * dev,struct ublk_params * params)318 static int ublk_ctrl_set_params(struct ublk_dev *dev,
319 		struct ublk_params *params)
320 {
321 	struct ublk_ctrl_cmd_data data = {
322 		.cmd_op	= UBLK_U_CMD_SET_PARAMS,
323 		.flags	= CTRL_CMD_HAS_BUF,
324 		.addr = (__u64) (uintptr_t) params,
325 		.len = sizeof(*params),
326 	};
327 	params->len = sizeof(*params);
328 	return __ublk_ctrl_cmd(dev, &data);
329 }
330 
ublk_ctrl_get_features(struct ublk_dev * dev,__u64 * features)331 static int ublk_ctrl_get_features(struct ublk_dev *dev,
332 		__u64 *features)
333 {
334 	struct ublk_ctrl_cmd_data data = {
335 		.cmd_op	= UBLK_U_CMD_GET_FEATURES,
336 		.flags	= CTRL_CMD_HAS_BUF,
337 		.addr = (__u64) (uintptr_t) features,
338 		.len = sizeof(*features),
339 	};
340 
341 	return __ublk_ctrl_cmd(dev, &data);
342 }
343 
ublk_ctrl_deinit(struct ublk_dev * dev)344 static void ublk_ctrl_deinit(struct ublk_dev *dev)
345 {
346 	close(dev->ctrl_fd);
347 	free(dev);
348 }
349 
ublk_ctrl_init(void)350 static struct ublk_dev *ublk_ctrl_init(void)
351 {
352 	struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
353 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
354 	int ret;
355 
356 	dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
357 	if (dev->ctrl_fd < 0) {
358 		free(dev);
359 		return NULL;
360 	}
361 
362 	info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
363 
364 	ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
365 			UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
366 	if (ret < 0) {
367 		ublk_err("queue_init: %s\n", strerror(-ret));
368 		free(dev);
369 		return NULL;
370 	}
371 	dev->nr_fds = 1;
372 
373 	return dev;
374 }
375 
ublk_queue_cmd_buf_sz(struct ublk_queue * q)376 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
377 {
378 	int size =  q->q_depth * sizeof(struct ublksrv_io_desc);
379 	unsigned int page_sz = getpagesize();
380 
381 	return round_up(size, page_sz);
382 }
383 
ublk_queue_deinit(struct ublk_queue * q)384 static void ublk_queue_deinit(struct ublk_queue *q)
385 {
386 	int i;
387 	int nr_ios = q->q_depth;
388 
389 	io_uring_unregister_ring_fd(&q->ring);
390 
391 	if (q->ring.ring_fd > 0) {
392 		io_uring_unregister_files(&q->ring);
393 		close(q->ring.ring_fd);
394 		q->ring.ring_fd = -1;
395 	}
396 
397 	if (q->io_cmd_buf)
398 		munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
399 
400 	for (i = 0; i < nr_ios; i++)
401 		free(q->ios[i].buf_addr);
402 }
403 
ublk_queue_init(struct ublk_queue * q)404 static int ublk_queue_init(struct ublk_queue *q)
405 {
406 	struct ublk_dev *dev = q->dev;
407 	int depth = dev->dev_info.queue_depth;
408 	int i, ret = -1;
409 	int cmd_buf_size, io_buf_size;
410 	unsigned long off;
411 	int ring_depth = depth, cq_depth = depth;
412 
413 	q->tgt_ops = dev->tgt.ops;
414 	q->state = 0;
415 	q->q_depth = depth;
416 	q->cmd_inflight = 0;
417 	q->tid = gettid();
418 
419 	cmd_buf_size = ublk_queue_cmd_buf_sz(q);
420 	off = UBLKSRV_CMD_BUF_OFFSET +
421 		q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
422 	q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
423 			MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
424 	if (q->io_cmd_buf == MAP_FAILED) {
425 		ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
426 				q->dev->dev_info.dev_id, q->q_id);
427 		goto fail;
428 	}
429 
430 	io_buf_size = dev->dev_info.max_io_buf_bytes;
431 	for (i = 0; i < q->q_depth; i++) {
432 		q->ios[i].buf_addr = NULL;
433 
434 		if (posix_memalign((void **)&q->ios[i].buf_addr,
435 					getpagesize(), io_buf_size)) {
436 			ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
437 					dev->dev_info.dev_id, q->q_id, i);
438 			goto fail;
439 		}
440 		q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
441 	}
442 
443 	ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
444 			IORING_SETUP_COOP_TASKRUN);
445 	if (ret < 0) {
446 		ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
447 				q->dev->dev_info.dev_id, q->q_id, ret);
448 		goto fail;
449 	}
450 
451 	io_uring_register_ring_fd(&q->ring);
452 
453 	ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
454 	if (ret) {
455 		ublk_err("ublk dev %d queue %d register files failed %d\n",
456 				q->dev->dev_info.dev_id, q->q_id, ret);
457 		goto fail;
458 	}
459 
460 	return 0;
461  fail:
462 	ublk_queue_deinit(q);
463 	ublk_err("ublk dev %d queue %d failed\n",
464 			dev->dev_info.dev_id, q->q_id);
465 	return -ENOMEM;
466 }
467 
ublk_dev_prep(struct ublk_dev * dev)468 static int ublk_dev_prep(struct ublk_dev *dev)
469 {
470 	int dev_id = dev->dev_info.dev_id;
471 	char buf[64];
472 	int ret = 0;
473 
474 	snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
475 	dev->fds[0] = open(buf, O_RDWR);
476 	if (dev->fds[0] < 0) {
477 		ret = -EBADF;
478 		ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
479 		goto fail;
480 	}
481 
482 	if (dev->tgt.ops->init_tgt)
483 		ret = dev->tgt.ops->init_tgt(dev);
484 
485 	return ret;
486 fail:
487 	close(dev->fds[0]);
488 	return ret;
489 }
490 
ublk_dev_unprep(struct ublk_dev * dev)491 static void ublk_dev_unprep(struct ublk_dev *dev)
492 {
493 	if (dev->tgt.ops->deinit_tgt)
494 		dev->tgt.ops->deinit_tgt(dev);
495 	close(dev->fds[0]);
496 }
497 
ublk_queue_io_cmd(struct ublk_queue * q,struct ublk_io * io,unsigned tag)498 static int ublk_queue_io_cmd(struct ublk_queue *q,
499 		struct ublk_io *io, unsigned tag)
500 {
501 	struct ublksrv_io_cmd *cmd;
502 	struct io_uring_sqe *sqe;
503 	unsigned int cmd_op = 0;
504 	__u64 user_data;
505 
506 	/* only freed io can be issued */
507 	if (!(io->flags & UBLKSRV_IO_FREE))
508 		return 0;
509 
510 	/* we issue because we need either fetching or committing */
511 	if (!(io->flags &
512 		(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
513 		return 0;
514 
515 	if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
516 		cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
517 	else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
518 		cmd_op = UBLK_U_IO_FETCH_REQ;
519 
520 	sqe = io_uring_get_sqe(&q->ring);
521 	if (!sqe) {
522 		ublk_err("%s: run out of sqe %d, tag %d\n",
523 				__func__, q->q_id, tag);
524 		return -1;
525 	}
526 
527 	cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
528 
529 	if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
530 		cmd->result = io->result;
531 
532 	/* These fields should be written once, never change */
533 	ublk_set_sqe_cmd_op(sqe, cmd_op);
534 	sqe->fd		= 0;	/* dev->fds[0] */
535 	sqe->opcode	= IORING_OP_URING_CMD;
536 	sqe->flags	= IOSQE_FIXED_FILE;
537 	sqe->rw_flags	= 0;
538 	cmd->tag	= tag;
539 	cmd->addr	= (__u64) (uintptr_t) io->buf_addr;
540 	cmd->q_id	= q->q_id;
541 
542 	user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
543 	io_uring_sqe_set_data64(sqe, user_data);
544 
545 	io->flags = 0;
546 
547 	q->cmd_inflight += 1;
548 
549 	ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
550 			__func__, q->q_id, tag, cmd_op,
551 			io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
552 	return 1;
553 }
554 
ublk_complete_io(struct ublk_queue * q,unsigned tag,int res)555 static int ublk_complete_io(struct ublk_queue *q,
556 		unsigned tag, int res)
557 {
558 	struct ublk_io *io = &q->ios[tag];
559 
560 	ublk_mark_io_done(io, res);
561 
562 	return ublk_queue_io_cmd(q, io, tag);
563 }
564 
ublk_submit_fetch_commands(struct ublk_queue * q)565 static void ublk_submit_fetch_commands(struct ublk_queue *q)
566 {
567 	int i = 0;
568 
569 	for (i = 0; i < q->q_depth; i++)
570 		ublk_queue_io_cmd(q, &q->ios[i], i);
571 }
572 
ublk_queue_is_idle(struct ublk_queue * q)573 static int ublk_queue_is_idle(struct ublk_queue *q)
574 {
575 	return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
576 }
577 
ublk_queue_is_done(struct ublk_queue * q)578 static int ublk_queue_is_done(struct ublk_queue *q)
579 {
580 	return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
581 }
582 
ublksrv_handle_tgt_cqe(struct ublk_queue * q,struct io_uring_cqe * cqe)583 static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
584 		struct io_uring_cqe *cqe)
585 {
586 	unsigned tag = user_data_to_tag(cqe->user_data);
587 
588 	if (cqe->res < 0 && cqe->res != -EAGAIN)
589 		ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
590 			__func__, cqe->res, q->q_id,
591 			user_data_to_tag(cqe->user_data),
592 			user_data_to_op(cqe->user_data));
593 
594 	if (q->tgt_ops->tgt_io_done)
595 		q->tgt_ops->tgt_io_done(q, tag, cqe);
596 }
597 
ublk_handle_cqe(struct io_uring * r,struct io_uring_cqe * cqe,void * data)598 static void ublk_handle_cqe(struct io_uring *r,
599 		struct io_uring_cqe *cqe, void *data)
600 {
601 	struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
602 	unsigned tag = user_data_to_tag(cqe->user_data);
603 	unsigned cmd_op = user_data_to_op(cqe->user_data);
604 	int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
605 		!(q->state & UBLKSRV_QUEUE_STOPPING);
606 	struct ublk_io *io;
607 
608 	ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
609 			__func__, cqe->res, q->q_id, tag, cmd_op,
610 			is_target_io(cqe->user_data),
611 			(q->state & UBLKSRV_QUEUE_STOPPING));
612 
613 	/* Don't retrieve io in case of target io */
614 	if (is_target_io(cqe->user_data)) {
615 		ublksrv_handle_tgt_cqe(q, cqe);
616 		return;
617 	}
618 
619 	io = &q->ios[tag];
620 	q->cmd_inflight--;
621 
622 	if (!fetch) {
623 		q->state |= UBLKSRV_QUEUE_STOPPING;
624 		io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
625 	}
626 
627 	if (cqe->res == UBLK_IO_RES_OK) {
628 		assert(tag < q->q_depth);
629 		q->tgt_ops->queue_io(q, tag);
630 	} else {
631 		/*
632 		 * COMMIT_REQ will be completed immediately since no fetching
633 		 * piggyback is required.
634 		 *
635 		 * Marking IO_FREE only, then this io won't be issued since
636 		 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
637 		 *
638 		 * */
639 		io->flags = UBLKSRV_IO_FREE;
640 	}
641 }
642 
ublk_reap_events_uring(struct io_uring * r)643 static int ublk_reap_events_uring(struct io_uring *r)
644 {
645 	struct io_uring_cqe *cqe;
646 	unsigned head;
647 	int count = 0;
648 
649 	io_uring_for_each_cqe(r, head, cqe) {
650 		ublk_handle_cqe(r, cqe, NULL);
651 		count += 1;
652 	}
653 	io_uring_cq_advance(r, count);
654 
655 	return count;
656 }
657 
ublk_process_io(struct ublk_queue * q)658 static int ublk_process_io(struct ublk_queue *q)
659 {
660 	int ret, reapped;
661 
662 	ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
663 				q->dev->dev_info.dev_id,
664 				q->q_id, io_uring_sq_ready(&q->ring),
665 				q->cmd_inflight,
666 				(q->state & UBLKSRV_QUEUE_STOPPING));
667 
668 	if (ublk_queue_is_done(q))
669 		return -ENODEV;
670 
671 	ret = io_uring_submit_and_wait(&q->ring, 1);
672 	reapped = ublk_reap_events_uring(&q->ring);
673 
674 	ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
675 			ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
676 			(q->state & UBLKSRV_QUEUE_IDLE));
677 
678 	return reapped;
679 }
680 
ublk_io_handler_fn(void * data)681 static void *ublk_io_handler_fn(void *data)
682 {
683 	struct ublk_queue *q = data;
684 	int dev_id = q->dev->dev_info.dev_id;
685 	int ret;
686 
687 	ret = ublk_queue_init(q);
688 	if (ret) {
689 		ublk_err("ublk dev %d queue %d init queue failed\n",
690 				dev_id, q->q_id);
691 		return NULL;
692 	}
693 	ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
694 			q->tid, dev_id, q->q_id);
695 
696 	/* submit all io commands to ublk driver */
697 	ublk_submit_fetch_commands(q);
698 	do {
699 		if (ublk_process_io(q) < 0)
700 			break;
701 	} while (1);
702 
703 	ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
704 	ublk_queue_deinit(q);
705 	return NULL;
706 }
707 
ublk_set_parameters(struct ublk_dev * dev)708 static void ublk_set_parameters(struct ublk_dev *dev)
709 {
710 	int ret;
711 
712 	ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
713 	if (ret)
714 		ublk_err("dev %d set basic parameter failed %d\n",
715 				dev->dev_info.dev_id, ret);
716 }
717 
ublk_start_daemon(struct ublk_dev * dev)718 static int ublk_start_daemon(struct ublk_dev *dev)
719 {
720 	int ret, i;
721 	void *thread_ret;
722 	const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
723 
724 	if (daemon(1, 1) < 0)
725 		return -errno;
726 
727 	ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
728 
729 	ret = ublk_dev_prep(dev);
730 	if (ret)
731 		return ret;
732 
733 	for (i = 0; i < dinfo->nr_hw_queues; i++) {
734 		dev->q[i].dev = dev;
735 		dev->q[i].q_id = i;
736 		pthread_create(&dev->q[i].thread, NULL,
737 				ublk_io_handler_fn,
738 				&dev->q[i]);
739 	}
740 
741 	/* everything is fine now, start us */
742 	ublk_set_parameters(dev);
743 	ret = ublk_ctrl_start_dev(dev, getpid());
744 	if (ret < 0) {
745 		ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
746 		goto fail;
747 	}
748 
749 	/* wait until we are terminated */
750 	for (i = 0; i < dinfo->nr_hw_queues; i++)
751 		pthread_join(dev->q[i].thread, &thread_ret);
752  fail:
753 	ublk_dev_unprep(dev);
754 	ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
755 
756 	return ret;
757 }
758 
wait_ublk_dev(char * dev_name,int evt_mask,unsigned timeout)759 static int wait_ublk_dev(char *dev_name, int evt_mask, unsigned timeout)
760 {
761 #define EV_SIZE (sizeof(struct inotify_event))
762 #define EV_BUF_LEN (128 * (EV_SIZE + 16))
763 	struct pollfd pfd;
764 	int fd, wd;
765 	int ret = -EINVAL;
766 
767 	fd = inotify_init();
768 	if (fd < 0) {
769 		ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
770 		return fd;
771 	}
772 
773 	wd = inotify_add_watch(fd, "/dev", evt_mask);
774 	if (wd == -1) {
775 		ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
776 		goto fail;
777 	}
778 
779 	pfd.fd = fd;
780 	pfd.events = POLL_IN;
781 	while (1) {
782 		int i = 0;
783 		char buffer[EV_BUF_LEN];
784 		ret = poll(&pfd, 1, 1000 * timeout);
785 
786 		if (ret == -1) {
787 			ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
788 			goto rm_watch;
789 		} else if (ret == 0) {
790 			ublk_err("%s: poll inotify timeout\n", __func__);
791 			ret = -ENOENT;
792 			goto rm_watch;
793 		}
794 
795 		ret = read(fd, buffer, EV_BUF_LEN);
796 		if (ret < 0) {
797 			ublk_err("%s: read inotify fd failed\n", __func__);
798 			goto rm_watch;
799 		}
800 
801 		while (i < ret) {
802 			struct inotify_event *event = (struct inotify_event *)&buffer[i];
803 
804 			ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
805 					__func__, event->mask, event->name);
806 			if (event->mask & evt_mask) {
807 				if (!strcmp(event->name, dev_name)) {
808 					ret = 0;
809 					goto rm_watch;
810 				}
811 			}
812 			i += EV_SIZE + event->len;
813 		}
814 	}
815 rm_watch:
816 	inotify_rm_watch(fd, wd);
817 fail:
818 	close(fd);
819 	return ret;
820 }
821 
ublk_stop_io_daemon(const struct ublk_dev * dev)822 static int ublk_stop_io_daemon(const struct ublk_dev *dev)
823 {
824 	int daemon_pid = dev->dev_info.ublksrv_pid;
825 	int dev_id = dev->dev_info.dev_id;
826 	char ublkc[64];
827 	int ret;
828 
829 	/*
830 	 * Wait until ublk char device is closed, when our daemon is shutdown
831 	 */
832 	snprintf(ublkc, sizeof(ublkc), "%s%d", "ublkc", dev_id);
833 	ret = wait_ublk_dev(ublkc, IN_CLOSE_WRITE, 10);
834 	waitpid(dev->dev_info.ublksrv_pid, NULL, 0);
835 	ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
836 			__func__, daemon_pid, dev_id, ret);
837 
838 	return ret;
839 }
840 
cmd_dev_add(char * tgt_type,int * exp_id,unsigned nr_queues,unsigned depth)841 static int cmd_dev_add(char *tgt_type, int *exp_id, unsigned nr_queues,
842 		       unsigned depth)
843 {
844 	const struct ublk_tgt_ops *ops;
845 	struct ublksrv_ctrl_dev_info *info;
846 	struct ublk_dev *dev;
847 	int dev_id = *exp_id;
848 	char ublkb[64];
849 	int ret;
850 
851 	ops = ublk_find_tgt(tgt_type);
852 	if (!ops) {
853 		ublk_err("%s: no such tgt type, type %s\n",
854 				__func__, tgt_type);
855 		return -ENODEV;
856 	}
857 
858 	if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
859 		ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
860 				__func__, nr_queues, depth);
861 		return -EINVAL;
862 	}
863 
864 	dev = ublk_ctrl_init();
865 	if (!dev) {
866 		ublk_err("%s: can't alloc dev id %d, type %s\n",
867 				__func__, dev_id, tgt_type);
868 		return -ENOMEM;
869 	}
870 
871 	info = &dev->dev_info;
872 	info->dev_id = dev_id;
873         info->nr_hw_queues = nr_queues;
874         info->queue_depth = depth;
875 	dev->tgt.ops = ops;
876 
877 	ret = ublk_ctrl_add_dev(dev);
878 	if (ret < 0) {
879 		ublk_err("%s: can't add dev id %d, type %s ret %d\n",
880 				__func__, dev_id, tgt_type, ret);
881 		goto fail;
882 	}
883 
884 	switch (fork()) {
885 	case -1:
886 		goto fail;
887 	case 0:
888 		ublk_start_daemon(dev);
889 		return 0;
890 	}
891 
892 	/*
893 	 * Wait until ublk disk is added, when our daemon is started
894 	 * successfully
895 	 */
896 	snprintf(ublkb, sizeof(ublkb), "%s%u", "ublkb", dev->dev_info.dev_id);
897 	ret = wait_ublk_dev(ublkb, IN_CREATE, 3);
898 	if (ret < 0) {
899 		ublk_err("%s: can't start daemon id %d, type %s\n",
900 				__func__, dev_id, tgt_type);
901 		ublk_ctrl_del_dev(dev);
902 	} else {
903 		*exp_id = dev->dev_info.dev_id;
904 	}
905 fail:
906 	ublk_ctrl_deinit(dev);
907 	return ret;
908 }
909 
cmd_dev_del_by_kill(int number)910 static int cmd_dev_del_by_kill(int number)
911 {
912 	struct ublk_dev *dev;
913 	int ret;
914 
915 	dev = ublk_ctrl_init();
916 	dev->dev_info.dev_id = number;
917 
918 	ret = ublk_ctrl_get_info(dev);
919 	if (ret < 0)
920 		goto fail;
921 
922 	/* simulate one ublk daemon panic */
923 	kill(dev->dev_info.ublksrv_pid, 9);
924 
925 	ret = ublk_stop_io_daemon(dev);
926 	if (ret < 0)
927 		ublk_err("%s: can't stop daemon id %d\n", __func__, number);
928 	ublk_ctrl_del_dev(dev);
929 fail:
930 	if (ret >= 0)
931 		ret = ublk_ctrl_get_info(dev);
932 	ublk_ctrl_deinit(dev);
933 
934 	return (ret != 0) ? 0 : -EIO;
935 }
936 
937 /****************** part 2: target implementation ********************/
938 
ublk_null_tgt_init(struct ublk_dev * dev)939 static int ublk_null_tgt_init(struct ublk_dev *dev)
940 {
941 	const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
942 	unsigned long dev_size = 250UL << 30;
943 
944 	dev->tgt.dev_size = dev_size;
945 	dev->tgt.params = (struct ublk_params) {
946 		.types = UBLK_PARAM_TYPE_BASIC,
947 		.basic = {
948 			.logical_bs_shift	= 9,
949 			.physical_bs_shift	= 12,
950 			.io_opt_shift		= 12,
951 			.io_min_shift		= 9,
952 			.max_sectors		= info->max_io_buf_bytes >> 9,
953 			.dev_sectors		= dev_size >> 9,
954 		},
955 	};
956 
957 	return 0;
958 }
959 
ublk_null_queue_io(struct ublk_queue * q,int tag)960 static int ublk_null_queue_io(struct ublk_queue *q, int tag)
961 {
962 	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
963 
964 	ublk_complete_io(q, tag, iod->nr_sectors << 9);
965 
966 	return 0;
967 }
968 
969 static const struct ublk_tgt_ops tgt_ops_list[] = {
970 	{
971 		.name = "null",
972 		.init_tgt = ublk_null_tgt_init,
973 		.queue_io = ublk_null_queue_io,
974 	},
975 };
976 
ublk_find_tgt(const char * name)977 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
978 {
979 	const struct ublk_tgt_ops *ops;
980 	int i;
981 
982 	if (name == NULL)
983 		return NULL;
984 
985 	for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
986 		if (strcmp(tgt_ops_list[i].name, name) == 0)
987 			return &tgt_ops_list[i];
988 	return NULL;
989 }
990 
991 
992 /****************** part 3: IO test over ublk disk ********************/
993 
994 #include "helpers.h"
995 #include "liburing.h"
996 #define BS		4096
997 #define BUFFERS		128
998 
999 struct io_ctx {
1000 	int dev_id;
1001 	int write;
1002 	int seq;
1003 
1004 	/* output */
1005 	int res;
1006 	pthread_t handle;
1007 };
1008 
__test_io(struct io_uring * ring,int fd,int write,int seq,struct iovec * vecs,int exp_len,off_t start)1009 static int __test_io(struct io_uring *ring, int fd, int write,
1010 		int seq, struct iovec *vecs, int exp_len, off_t start)
1011 {
1012 	struct io_uring_sqe *sqe;
1013 	struct io_uring_cqe *cqe;
1014 	int i, ret;
1015 	off_t offset;
1016 
1017 	offset = start;
1018 	for (i = 0; i < BUFFERS; i++) {
1019 		sqe = io_uring_get_sqe(ring);
1020 		if (!sqe) {
1021 			fprintf(stderr, "sqe get failed\n");
1022 			goto err;
1023 		}
1024 		if (!seq)
1025 			offset = start + BS * (rand() % BUFFERS);
1026 		if (write) {
1027 			io_uring_prep_write_fixed(sqe, fd, vecs[i].iov_base,
1028 						  vecs[i].iov_len,
1029 						  offset, i);
1030 		} else {
1031 			io_uring_prep_read_fixed(sqe, fd, vecs[i].iov_base,
1032 						 vecs[i].iov_len,
1033 						 offset, i);
1034 		}
1035 		sqe->user_data = i;
1036 		if (seq)
1037 			offset += BS;
1038 	}
1039 
1040 	ret = io_uring_submit(ring);
1041 	if (ret != BUFFERS) {
1042 		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
1043 		goto err;
1044 	}
1045 
1046 	for (i = 0; i < BUFFERS; i++) {
1047 		ret = io_uring_wait_cqe(ring, &cqe);
1048 		if (ret) {
1049 			fprintf(stderr, "wait_cqe=%d\n", ret);
1050 			goto err;
1051 		}
1052 		if (exp_len == -1) {
1053 			int iov_len = vecs[cqe->user_data].iov_len;
1054 
1055 			if (cqe->res != iov_len) {
1056 				fprintf(stderr, "cqe res %d, wanted %d\n",
1057 					cqe->res, iov_len);
1058 				goto err;
1059 			}
1060 		} else if (cqe->res != exp_len) {
1061 			fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
1062 			goto err;
1063 		}
1064 		io_uring_cqe_seen(ring, cqe);
1065 	}
1066 
1067 	return 0;
1068 err:
1069 	return 1;
1070 }
1071 
1072 /* Run IO over ublk block device  */
test_io(struct io_ctx * ctx)1073 static int test_io(struct io_ctx *ctx)
1074 {
1075 	struct io_uring ring;
1076 	int ret, ring_flags = 0;
1077 	char buf[256];
1078 	int fd = -1;
1079 	off_t offset = 0;
1080 	unsigned long long bytes;
1081 	int open_flags = O_DIRECT;
1082 	struct iovec *vecs = t_create_buffers(BUFFERS, BS);
1083 
1084 	ret = t_create_ring(BUFFERS, &ring, ring_flags);
1085 	if (ret == T_SETUP_SKIP)
1086 		return 0;
1087 	if (ret != T_SETUP_OK) {
1088 		fprintf(stderr, "ring create failed: %d\n", ret);
1089 		return 1;
1090 	}
1091 
1092 	snprintf(buf, sizeof(buf), "%s%d", UBLKB_DEV, ctx->dev_id);
1093 
1094 	if (ctx->write)
1095 		open_flags |= O_WRONLY;
1096 	else
1097 		open_flags |= O_RDONLY;
1098 	fd = open(buf, open_flags);
1099 	if (fd < 0) {
1100 		if (errno == EINVAL)
1101 			return 0;
1102 		return 1;
1103 	}
1104 
1105 	if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
1106 		return 1;
1107 
1108 	ret = t_register_buffers(&ring, vecs, BUFFERS);
1109 	if (ret == T_SETUP_SKIP)
1110 		return 0;
1111 	if (ret != T_SETUP_OK) {
1112 		fprintf(stderr, "buffer reg failed: %d\n", ret);
1113 		return 1;
1114 	}
1115 
1116 	for (offset = 0; offset < bytes; offset += BS * BUFFERS) {
1117 		ret = __test_io(&ring, fd, ctx->write, ctx->seq, vecs, BS,
1118 				offset);
1119 		if (ret != T_SETUP_OK) {
1120 			fprintf(stderr, "/dev/ublkb%d read failed: offset %lu ret %d\n",
1121 					ctx->dev_id, (unsigned long) offset, ret);
1122 			break;
1123 		}
1124 	}
1125 
1126 	close(fd);
1127 	io_uring_unregister_buffers(&ring);
1128 	io_uring_queue_exit(&ring);
1129 
1130 	return ret;
1131 }
1132 
test_io_fn(void * data)1133 static void *test_io_fn(void *data)
1134 {
1135 	struct io_ctx *ctx = data;
1136 
1137 	ctx->res = test_io(ctx);
1138 
1139 	return data;
1140 }
1141 
ignore_stderr(void)1142 static void ignore_stderr(void)
1143 {
1144 	int devnull = open("/dev/null", O_WRONLY);
1145 
1146 	if (devnull >= 0) {
1147 		dup2(devnull, fileno(stderr));
1148 		close(devnull);
1149 	}
1150 }
1151 
test_io_worker(int dev_id)1152 static int test_io_worker(int dev_id)
1153 {
1154 	const int nr_jobs = 4;
1155 	struct io_ctx ctx[nr_jobs];
1156 	int i, ret = 0;
1157 
1158 	for (i = 0; i < nr_jobs; i++) {
1159 		ctx[i].dev_id = dev_id;
1160 		ctx[i].write = (i & 0x1) ? 0 : 1;
1161 		ctx[i].seq = 1;
1162 
1163 		pthread_create(&ctx[i].handle, NULL, test_io_fn, &ctx[i]);
1164 	}
1165 
1166 	for (i = 0; i < nr_jobs; i++) {
1167 		pthread_join(ctx[i].handle, NULL);
1168 
1169 		if (!ret && ctx[i].res)
1170 			ret = ctx[i].res;
1171 	}
1172 
1173 	return ret;
1174 }
1175 
1176 /*
1177  * Run IO over created ublk device, meantime delete this ublk device
1178  *
1179  * Cover cancellable uring_cmd
1180  * */
test_del_ublk_with_io(void)1181 static int test_del_ublk_with_io(void)
1182 {
1183 	const unsigned wait_ms = 200;
1184 	char *tgt_type = "null";
1185 	int dev_id = -1;
1186 	int ret, pid;
1187 
1188 	ret = cmd_dev_add(tgt_type, &dev_id, 2, BUFFERS);
1189 	if (ret != T_SETUP_OK) {
1190 		fprintf(stderr, "buffer reg failed: %d\n", ret);
1191 		return T_EXIT_FAIL;
1192 	}
1193 
1194 	switch ((pid = fork())) {
1195 	case -1:
1196 		fprintf(stderr, "fork failed\n");
1197 		return T_EXIT_FAIL;
1198 	case 0:
1199 		/* io error is expected since the parent is killing ublk */
1200 		ignore_stderr();
1201 		test_io_worker(dev_id);
1202 		return 0;
1203 	default:
1204 		/*
1205 		 * Wait a little while until ublk IO pipeline is warm up,
1206 		 * then try to shutdown ublk device by `kill -9 $ublk_daemon_pid`.
1207 		 *
1208 		 * cancellable uring_cmd code path can be covered in this way.
1209 		 */
1210 		usleep(wait_ms * 1000);
1211 		ret = cmd_dev_del_by_kill(dev_id);
1212 		waitpid(pid, NULL, 0);
1213 		return ret;
1214 	}
1215 }
1216 
main(int argc,char * argv[])1217 int main(int argc, char *argv[])
1218 {
1219 	const int nr_loop = 4;
1220 	struct ublk_dev *dev;
1221 	__u64 features;
1222 	int ret, i;
1223 
1224 	if (argc > 1)
1225 		return T_EXIT_SKIP;
1226 
1227 	dev = ublk_ctrl_init();
1228 	/* ublk isn't supported or the module isn't loaded */
1229 	if (!dev)
1230 		return T_EXIT_SKIP;
1231 
1232 	/* kernel doesn't support get_features */
1233 	ret = ublk_ctrl_get_features(dev, &features);
1234 	if (ret < 0)
1235 		return T_EXIT_SKIP;
1236 
1237 	if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
1238 		return T_EXIT_SKIP;
1239 
1240 	for (i = 0; i < nr_loop; i++) {
1241 		if (test_del_ublk_with_io())
1242 			return T_EXIT_FAIL;
1243 	}
1244 	ublk_ctrl_deinit(dev);
1245 	return T_EXIT_PASS;
1246 }
1247 #else
main(int argc,char * argv[])1248 int main(int argc, char *argv[])
1249 {
1250 	return T_EXIT_SKIP;
1251 }
1252 #endif
1253