• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: MIT or GPL-2.0-only
2 
3 #include <config.h>
4 
5 #include <poll.h>
6 #include <sys/epoll.h>
7 #include "ublksrv_tgt.h"
8 
9 static bool user_copy;
10 static bool block_device;
11 
backing_supports_discard(char * name)12 static bool backing_supports_discard(char *name)
13 {
14 	int fd;
15 	char buf[512];
16 	int len;
17 
18 	len = snprintf(buf, 512, "/sys/block/%s/queue/discard_max_hw_bytes",
19 			basename(name));
20 	buf[len] = 0;
21 	fd = open(buf, O_RDONLY);
22 	if (fd > 0) {
23 		char val[128];
24 		int ret = pread(fd, val, 128, 0);
25 		unsigned long long bytes = 0;
26 
27 		close(fd);
28 		if (ret > 0)
29 			bytes = strtol(val, NULL, 10);
30 
31 		if (bytes > 0)
32 			return true;
33 	}
34 	return false;
35 }
36 
loop_setup_tgt(struct ublksrv_dev * dev,int type,bool recovery,const char * jbuf)37 static int loop_setup_tgt(struct ublksrv_dev *dev, int type, bool recovery,
38 		const char *jbuf)
39 {
40 	struct ublksrv_tgt_info *tgt = &dev->tgt;
41 	const struct ublksrv_ctrl_dev_info *info =
42 		ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
43 	int fd, ret;
44 	long direct_io = 0;
45 	struct ublk_params p;
46 	char file[PATH_MAX];
47 
48 	ublk_assert(jbuf);
49 
50 	ret = ublksrv_json_read_target_str_info(jbuf, PATH_MAX, "backing_file", file);
51 	if (ret < 0) {
52 		ublk_err( "%s: backing file can't be retrieved from jbuf %d\n",
53 				__func__, ret);
54 		return ret;
55 	}
56 
57 	ret = ublksrv_json_read_target_ulong_info(jbuf, "direct_io",
58 			&direct_io);
59 	if (ret) {
60 		ublk_err( "%s: read target direct_io failed %d\n",
61 				__func__, ret);
62 		return ret;
63 	}
64 
65 	ret = ublksrv_json_read_params(&p, jbuf);
66 	if (ret) {
67 		ublk_err( "%s: read ublk params failed %d\n",
68 				__func__, ret);
69 		return ret;
70 	}
71 
72 	fd = open(file, O_RDWR);
73 	if (fd < 0) {
74 		ublk_err( "%s: backing file %s can't be opened\n",
75 				__func__, file);
76 		return fd;
77 	}
78 
79 	if (direct_io)
80 		fcntl(fd, F_SETFL, O_DIRECT);
81 
82 	ublksrv_tgt_set_io_data_size(tgt);
83 	tgt->dev_size = p.basic.dev_sectors << 9;
84 	tgt->tgt_ring_depth = info->queue_depth;
85 	tgt->nr_fds = 1;
86 	tgt->fds[1] = fd;
87 	user_copy = info->flags & UBLK_F_USER_COPY;
88 	if (user_copy)
89 		tgt->tgt_ring_depth *= 2;
90 
91 	return 0;
92 }
93 
loop_recovery_tgt(struct ublksrv_dev * dev,int type)94 static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
95 {
96 	const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev);
97 	const char *jbuf = ublksrv_ctrl_get_recovery_jbuf(cdev);
98 
99 	ublk_assert(type == UBLKSRV_TGT_TYPE_LOOP);
100 
101 	return loop_setup_tgt(dev, type, true, jbuf);
102 }
103 
loop_init_tgt(struct ublksrv_dev * dev,int type,int argc,char * argv[])104 static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
105 		*argv[])
106 {
107 	int buffered_io = 0;
108 	const struct ublksrv_ctrl_dev_info *info =
109 		ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
110 	static const struct option lo_longopts[] = {
111 		{ "file",		1,	NULL, 'f' },
112 		{ "buffered_io",	no_argument, &buffered_io, 1},
113 		{ NULL }
114 	};
115 	unsigned long long bytes;
116 	struct stat st;
117 	int fd, opt;
118 	char *file = NULL;
119 	int jbuf_size;
120 	char *jbuf;
121 	struct ublksrv_tgt_base_json tgt_json = {
122 		.type = type,
123 	};
124 	struct ublk_params p = {
125 		.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD,
126 		.basic = {
127 			.logical_bs_shift	= 9,
128 			.physical_bs_shift	= 12,
129 			.io_opt_shift	= 12,
130 			.io_min_shift	= 9,
131 			.max_sectors		= info->max_io_buf_bytes >> 9,
132 		},
133 
134 		.discard = {
135 			.max_discard_sectors	= UINT_MAX >> 9,
136 			.max_discard_segments	= 1,
137 		},
138 	};
139 	bool can_discard = false;
140 
141 	strcpy(tgt_json.name, "loop");
142 
143 	if (type != UBLKSRV_TGT_TYPE_LOOP)
144 		return -1;
145 
146 	while ((opt = getopt_long(argc, argv, "-:f:",
147 				  lo_longopts, NULL)) != -1) {
148 		switch (opt) {
149 		case 'f':
150 			file = strdup(optarg);
151 			break;
152 		}
153 	}
154 
155 	if (!file)
156 		return -1;
157 
158 	fd = open(file, O_RDWR);
159 	if (fd < 0) {
160 		ublk_err( "%s: backing file %s can't be opened\n",
161 				__func__, file);
162 		return -2;
163 	}
164 
165 	if (fstat(fd, &st) < 0)
166 		return -2;
167 
168 	if (S_ISBLK(st.st_mode)) {
169 		unsigned int bs, pbs;
170 
171 		if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
172 			return -1;
173 		if (ioctl(fd, BLKSSZGET, &bs) != 0)
174 			return -1;
175 		if (ioctl(fd, BLKPBSZGET, &pbs) != 0)
176 			return -1;
177 		block_device = true;
178 		p.basic.logical_bs_shift = ilog2(bs);
179 		p.basic.physical_bs_shift = ilog2(pbs);
180 		can_discard = backing_supports_discard(file);
181 	} else if (S_ISREG(st.st_mode)) {
182 		block_device = false;
183 		bytes = st.st_size;
184 		can_discard = true;
185 		p.basic.logical_bs_shift = ilog2(st.st_blksize);
186 		p.basic.physical_bs_shift = ilog2(st.st_blksize);
187 	} else {
188 		bytes = 0;
189 	}
190 
191 	/*
192 	 * in case of buffered io, use common bs/pbs so that all FS
193 	 * image can be supported
194 	 */
195 	if (buffered_io || !ublk_param_is_valid(&p) ||
196 			fcntl(fd, F_SETFL, O_DIRECT)) {
197 		p.basic.logical_bs_shift = 9;
198 		p.basic.physical_bs_shift = 12;
199 		buffered_io = 1;
200 	}
201 
202 	tgt_json.dev_size = bytes;
203 	p.basic.dev_sectors = bytes >> 9;
204 
205 	if (st.st_blksize && can_discard)
206 		p.discard.discard_granularity = st.st_blksize;
207 	else
208 		p.types &= ~UBLK_PARAM_TYPE_DISCARD;
209 
210 	jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
211 	ublk_json_write_dev_info(dev, &jbuf, &jbuf_size);
212 	ublk_json_write_target_base(dev, &jbuf, &jbuf_size, &tgt_json);
213 	ublk_json_write_tgt_str(dev, &jbuf, &jbuf_size, "backing_file", file);
214 	ublk_json_write_tgt_long(dev, &jbuf, &jbuf_size, "direct_io", !buffered_io);
215 	ublk_json_write_params(dev, &jbuf, &jbuf_size, &p);
216 
217 	close(fd);
218 
219 	return loop_setup_tgt(dev, type, false, jbuf);
220 }
221 
loop_usage_for_add(void)222 static void loop_usage_for_add(void)
223 {
224 	printf("           loop: -f backing_file [--buffered_io]\n");
225 	printf("           	default is direct IO to backing file\n");
226 }
227 
loop_fallocate_mode(const struct ublksrv_io_desc * iod)228 static inline int loop_fallocate_mode(const struct ublksrv_io_desc *iod)
229 {
230        __u16 ublk_op = ublksrv_get_op(iod);
231        __u32 flags = ublksrv_get_flags(iod);
232        int mode = FALLOC_FL_KEEP_SIZE;
233 
234        /* follow logic of linux kernel loop */
235        if (ublk_op == UBLK_IO_OP_DISCARD) {
236                mode |= FALLOC_FL_PUNCH_HOLE;
237        } else if (ublk_op == UBLK_IO_OP_WRITE_ZEROES) {
238                if (flags & UBLK_IO_F_NOUNMAP)
239                        mode |= FALLOC_FL_ZERO_RANGE;
240                else
241                        mode |= FALLOC_FL_PUNCH_HOLE;
242        } else {
243                mode |= FALLOC_FL_ZERO_RANGE;
244        }
245 
246        return mode;
247 }
248 
loop_queue_tgt_read(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)249 static void loop_queue_tgt_read(const struct ublksrv_queue *q,
250 		const struct ublksrv_io_desc *iod, int tag)
251 {
252 	unsigned ublk_op = ublksrv_get_op(iod);
253 
254 	if (user_copy) {
255 		struct io_uring_sqe *sqe, *sqe2;
256 		__u64 pos = ublk_pos(q->q_id, tag, 0);
257 		void *buf = ublksrv_queue_get_io_buf(q, tag);
258 
259 		ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
260 		io_uring_prep_read(sqe, 1 /*fds[1]*/,
261 				buf,
262 				iod->nr_sectors << 9,
263 				iod->start_sector << 9);
264 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
265 		sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
266 
267 		io_uring_prep_write(sqe2, 0 /*fds[0]*/,
268 				buf, iod->nr_sectors << 9, pos);
269 		io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
270 		/* bit63 marks us as tgt io */
271 		sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
272 	} else {
273 		struct io_uring_sqe *sqe;
274 		void *buf = (void *)iod->addr;
275 
276 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
277 		io_uring_prep_read(sqe, 1 /*fds[1]*/,
278 			buf,
279 			iod->nr_sectors << 9,
280 			iod->start_sector << 9);
281 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
282 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
283 	}
284 }
285 
loop_queue_tgt_write(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)286 static void loop_queue_tgt_write(const struct ublksrv_queue *q,
287 		const struct ublksrv_io_desc *iod, int tag)
288 {
289 	unsigned ublk_op = ublksrv_get_op(iod);
290 
291 	if (user_copy) {
292 		struct io_uring_sqe *sqe, *sqe2;
293 		__u64 pos = ublk_pos(q->q_id, tag, 0);
294 		void *buf = ublksrv_queue_get_io_buf(q, tag);
295 
296 		ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
297 		io_uring_prep_read(sqe, 0 /*fds[0]*/,
298 			buf, iod->nr_sectors << 9, pos);
299 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
300 		sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
301 
302 		io_uring_prep_write(sqe2, 1 /*fds[1]*/,
303 			buf, iod->nr_sectors << 9,
304 			iod->start_sector << 9);
305 		io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
306 		sqe2->rw_flags |= RWF_DSYNC;
307 		/* bit63 marks us as tgt io */
308 		sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
309 	} else {
310 		struct io_uring_sqe *sqe;
311 		void *buf = (void *)iod->addr;
312 
313 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
314 		io_uring_prep_write(sqe, 1 /*fds[1]*/,
315 			buf,
316 			iod->nr_sectors << 9,
317 			iod->start_sector << 9);
318 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
319 		sqe->rw_flags |= RWF_DSYNC;
320 		/* bit63 marks us as tgt io */
321 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
322 	}
323 }
324 
loop_queue_tgt_io(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)325 static int loop_queue_tgt_io(const struct ublksrv_queue *q,
326 		const struct ublk_io_data *data, int tag)
327 {
328 	const struct ublksrv_io_desc *iod = data->iod;
329 	struct io_uring_sqe *sqe;
330 	unsigned ublk_op = ublksrv_get_op(iod);
331 
332 	switch (ublk_op) {
333 	case UBLK_IO_OP_FLUSH:
334 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
335 		io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/,
336 				iod->nr_sectors << 9,
337 				iod->start_sector << 9,
338 				IORING_FSYNC_DATASYNC);
339 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
340 		/* bit63 marks us as tgt io */
341 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
342 		break;
343 	case UBLK_IO_OP_WRITE_ZEROES:
344 	case UBLK_IO_OP_DISCARD:
345 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
346 		io_uring_prep_fallocate(sqe, 1 /*fds[1]*/,
347 				loop_fallocate_mode(iod),
348 				iod->start_sector << 9,
349 				iod->nr_sectors << 9);
350 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
351 		/* bit63 marks us as tgt io */
352 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
353 		break;
354 	case UBLK_IO_OP_READ:
355 		loop_queue_tgt_read(q, iod, tag);
356 		break;
357 	case UBLK_IO_OP_WRITE:
358 		loop_queue_tgt_write(q, iod, tag);
359 		break;
360 	default:
361 		return -EINVAL;
362 	}
363 
364 	ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
365 			iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
366 
367 	return 1;
368 }
369 
__loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)370 static co_io_job __loop_handle_io_async(const struct ublksrv_queue *q,
371 		const struct ublk_io_data *data, int tag)
372 {
373 	int ret;
374 	struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
375 
376 	io->queued_tgt_io = 0;
377  again:
378 	ret = loop_queue_tgt_io(q, data, tag);
379 	if (ret > 0) {
380 		if (io->queued_tgt_io)
381 			ublk_err("bad queued_tgt_io %d\n", io->queued_tgt_io);
382 		io->queued_tgt_io += 1;
383 
384 		co_await__suspend_always(tag);
385 		io->queued_tgt_io -= 1;
386 
387 		if (io->tgt_io_cqe->res == -EAGAIN)
388 			goto again;
389 
390 		ublksrv_complete_io(q, tag, io->tgt_io_cqe->res);
391 	} else if (ret < 0) {
392 		ublk_err( "fail to queue io %d, ret %d\n", tag, tag);
393 	} else {
394 		ublk_err( "no sqe %d\n", tag);
395 	}
396 }
397 
loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data)398 static int loop_handle_io_async(const struct ublksrv_queue *q,
399 		const struct ublk_io_data *data)
400 {
401 	struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
402 
403 	if (block_device && ublksrv_get_op(data->iod) == UBLK_IO_OP_DISCARD) {
404 		__u64 r[2];
405 		int res;
406 
407 		io_uring_submit(q->ring_ptr);
408 
409 		r[0] = data->iod->start_sector << 9;
410 		r[1] = data->iod->nr_sectors << 9;
411 		res = ioctl(q->dev->tgt.fds[1], BLKDISCARD, &r);
412 		ublksrv_complete_io(q, data->tag, res);
413 	} else {
414 		io->co = __loop_handle_io_async(q, data, data->tag);
415 	}
416 	return 0;
417 }
418 
loop_tgt_io_done(const struct ublksrv_queue * q,const struct ublk_io_data * data,const struct io_uring_cqe * cqe)419 static void loop_tgt_io_done(const struct ublksrv_queue *q,
420 		const struct ublk_io_data *data,
421 		const struct io_uring_cqe *cqe)
422 {
423 	int tag = user_data_to_tag(cqe->user_data);
424 	struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
425 
426 	if (user_data_to_tgt_data(cqe->user_data))
427 		return;
428 
429 	ublk_assert(tag == data->tag);
430 	if (!io->queued_tgt_io)
431 		ublk_err("%s: wrong queued_tgt_io: res %d qid %u tag %u, cmd_op %u\n",
432 			__func__, cqe->res, q->q_id,
433 			user_data_to_tag(cqe->user_data),
434 			user_data_to_op(cqe->user_data));
435 	io->tgt_io_cqe = cqe;
436 	io->co.resume();
437 }
438 
loop_deinit_tgt(const struct ublksrv_dev * dev)439 static void loop_deinit_tgt(const struct ublksrv_dev *dev)
440 {
441 	fsync(dev->tgt.fds[1]);
442 	close(dev->tgt.fds[1]);
443 }
444 
445 struct ublksrv_tgt_type  loop_tgt_type = {
446 	.handle_io_async = loop_handle_io_async,
447 	.tgt_io_done = loop_tgt_io_done,
448 	.usage_for_add	=  loop_usage_for_add,
449 	.init_tgt = loop_init_tgt,
450 	.deinit_tgt	=  loop_deinit_tgt,
451 	.type	= UBLKSRV_TGT_TYPE_LOOP,
452 	.name	=  "loop",
453 	.recovery_tgt = loop_recovery_tgt,
454 };
455 
456 static void tgt_loop_init() __attribute__((constructor));
457 
tgt_loop_init(void)458 static void tgt_loop_init(void)
459 {
460 	ublksrv_register_tgt_type(&loop_tgt_type);
461 }
462