1 // SPDX-License-Identifier: MIT or GPL-2.0-only
2
3 #include <config.h>
4
5 #include <poll.h>
6 #include <sys/epoll.h>
7 #include "ublksrv_tgt.h"
8
9 static bool user_copy;
10 static bool block_device;
11
backing_supports_discard(char * name)12 static bool backing_supports_discard(char *name)
13 {
14 int fd;
15 char buf[512];
16 int len;
17
18 len = snprintf(buf, 512, "/sys/block/%s/queue/discard_max_hw_bytes",
19 basename(name));
20 buf[len] = 0;
21 fd = open(buf, O_RDONLY);
22 if (fd > 0) {
23 char val[128];
24 int ret = pread(fd, val, 128, 0);
25 unsigned long long bytes = 0;
26
27 close(fd);
28 if (ret > 0)
29 bytes = strtol(val, NULL, 10);
30
31 if (bytes > 0)
32 return true;
33 }
34 return false;
35 }
36
loop_setup_tgt(struct ublksrv_dev * dev,int type,bool recovery,const char * jbuf)37 static int loop_setup_tgt(struct ublksrv_dev *dev, int type, bool recovery,
38 const char *jbuf)
39 {
40 struct ublksrv_tgt_info *tgt = &dev->tgt;
41 const struct ublksrv_ctrl_dev_info *info =
42 ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
43 int fd, ret;
44 long direct_io = 0;
45 struct ublk_params p;
46 char file[PATH_MAX];
47
48 ublk_assert(jbuf);
49
50 ret = ublksrv_json_read_target_str_info(jbuf, PATH_MAX, "backing_file", file);
51 if (ret < 0) {
52 ublk_err( "%s: backing file can't be retrieved from jbuf %d\n",
53 __func__, ret);
54 return ret;
55 }
56
57 ret = ublksrv_json_read_target_ulong_info(jbuf, "direct_io",
58 &direct_io);
59 if (ret) {
60 ublk_err( "%s: read target direct_io failed %d\n",
61 __func__, ret);
62 return ret;
63 }
64
65 ret = ublksrv_json_read_params(&p, jbuf);
66 if (ret) {
67 ublk_err( "%s: read ublk params failed %d\n",
68 __func__, ret);
69 return ret;
70 }
71
72 fd = open(file, O_RDWR);
73 if (fd < 0) {
74 ublk_err( "%s: backing file %s can't be opened\n",
75 __func__, file);
76 return fd;
77 }
78
79 if (direct_io)
80 fcntl(fd, F_SETFL, O_DIRECT);
81
82 ublksrv_tgt_set_io_data_size(tgt);
83 tgt->dev_size = p.basic.dev_sectors << 9;
84 tgt->tgt_ring_depth = info->queue_depth;
85 tgt->nr_fds = 1;
86 tgt->fds[1] = fd;
87 user_copy = info->flags & UBLK_F_USER_COPY;
88 if (user_copy)
89 tgt->tgt_ring_depth *= 2;
90
91 return 0;
92 }
93
loop_recovery_tgt(struct ublksrv_dev * dev,int type)94 static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
95 {
96 const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev);
97 const char *jbuf = ublksrv_ctrl_get_recovery_jbuf(cdev);
98
99 ublk_assert(type == UBLKSRV_TGT_TYPE_LOOP);
100
101 return loop_setup_tgt(dev, type, true, jbuf);
102 }
103
loop_init_tgt(struct ublksrv_dev * dev,int type,int argc,char * argv[])104 static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
105 *argv[])
106 {
107 int buffered_io = 0;
108 const struct ublksrv_ctrl_dev_info *info =
109 ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
110 static const struct option lo_longopts[] = {
111 { "file", 1, NULL, 'f' },
112 { "buffered_io", no_argument, &buffered_io, 1},
113 { NULL }
114 };
115 unsigned long long bytes;
116 struct stat st;
117 int fd, opt;
118 char *file = NULL;
119 int jbuf_size;
120 char *jbuf;
121 struct ublksrv_tgt_base_json tgt_json = {
122 .type = type,
123 };
124 struct ublk_params p = {
125 .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD,
126 .basic = {
127 .logical_bs_shift = 9,
128 .physical_bs_shift = 12,
129 .io_opt_shift = 12,
130 .io_min_shift = 9,
131 .max_sectors = info->max_io_buf_bytes >> 9,
132 },
133
134 .discard = {
135 .max_discard_sectors = UINT_MAX >> 9,
136 .max_discard_segments = 1,
137 },
138 };
139 bool can_discard = false;
140
141 strcpy(tgt_json.name, "loop");
142
143 if (type != UBLKSRV_TGT_TYPE_LOOP)
144 return -1;
145
146 while ((opt = getopt_long(argc, argv, "-:f:",
147 lo_longopts, NULL)) != -1) {
148 switch (opt) {
149 case 'f':
150 file = strdup(optarg);
151 break;
152 }
153 }
154
155 if (!file)
156 return -1;
157
158 fd = open(file, O_RDWR);
159 if (fd < 0) {
160 ublk_err( "%s: backing file %s can't be opened\n",
161 __func__, file);
162 return -2;
163 }
164
165 if (fstat(fd, &st) < 0)
166 return -2;
167
168 if (S_ISBLK(st.st_mode)) {
169 unsigned int bs, pbs;
170
171 if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
172 return -1;
173 if (ioctl(fd, BLKSSZGET, &bs) != 0)
174 return -1;
175 if (ioctl(fd, BLKPBSZGET, &pbs) != 0)
176 return -1;
177 block_device = true;
178 p.basic.logical_bs_shift = ilog2(bs);
179 p.basic.physical_bs_shift = ilog2(pbs);
180 can_discard = backing_supports_discard(file);
181 } else if (S_ISREG(st.st_mode)) {
182 block_device = false;
183 bytes = st.st_size;
184 can_discard = true;
185 p.basic.logical_bs_shift = ilog2(st.st_blksize);
186 p.basic.physical_bs_shift = ilog2(st.st_blksize);
187 } else {
188 bytes = 0;
189 }
190
191 /*
192 * in case of buffered io, use common bs/pbs so that all FS
193 * image can be supported
194 */
195 if (buffered_io || !ublk_param_is_valid(&p) ||
196 fcntl(fd, F_SETFL, O_DIRECT)) {
197 p.basic.logical_bs_shift = 9;
198 p.basic.physical_bs_shift = 12;
199 buffered_io = 1;
200 }
201
202 tgt_json.dev_size = bytes;
203 p.basic.dev_sectors = bytes >> 9;
204
205 if (st.st_blksize && can_discard)
206 p.discard.discard_granularity = st.st_blksize;
207 else
208 p.types &= ~UBLK_PARAM_TYPE_DISCARD;
209
210 jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
211 ublk_json_write_dev_info(dev, &jbuf, &jbuf_size);
212 ublk_json_write_target_base(dev, &jbuf, &jbuf_size, &tgt_json);
213 ublk_json_write_tgt_str(dev, &jbuf, &jbuf_size, "backing_file", file);
214 ublk_json_write_tgt_long(dev, &jbuf, &jbuf_size, "direct_io", !buffered_io);
215 ublk_json_write_params(dev, &jbuf, &jbuf_size, &p);
216
217 close(fd);
218
219 return loop_setup_tgt(dev, type, false, jbuf);
220 }
221
loop_usage_for_add(void)222 static void loop_usage_for_add(void)
223 {
224 printf(" loop: -f backing_file [--buffered_io]\n");
225 printf(" default is direct IO to backing file\n");
226 }
227
loop_fallocate_mode(const struct ublksrv_io_desc * iod)228 static inline int loop_fallocate_mode(const struct ublksrv_io_desc *iod)
229 {
230 __u16 ublk_op = ublksrv_get_op(iod);
231 __u32 flags = ublksrv_get_flags(iod);
232 int mode = FALLOC_FL_KEEP_SIZE;
233
234 /* follow logic of linux kernel loop */
235 if (ublk_op == UBLK_IO_OP_DISCARD) {
236 mode |= FALLOC_FL_PUNCH_HOLE;
237 } else if (ublk_op == UBLK_IO_OP_WRITE_ZEROES) {
238 if (flags & UBLK_IO_F_NOUNMAP)
239 mode |= FALLOC_FL_ZERO_RANGE;
240 else
241 mode |= FALLOC_FL_PUNCH_HOLE;
242 } else {
243 mode |= FALLOC_FL_ZERO_RANGE;
244 }
245
246 return mode;
247 }
248
loop_queue_tgt_read(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)249 static void loop_queue_tgt_read(const struct ublksrv_queue *q,
250 const struct ublksrv_io_desc *iod, int tag)
251 {
252 unsigned ublk_op = ublksrv_get_op(iod);
253
254 if (user_copy) {
255 struct io_uring_sqe *sqe, *sqe2;
256 __u64 pos = ublk_pos(q->q_id, tag, 0);
257 void *buf = ublksrv_queue_get_io_buf(q, tag);
258
259 ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
260 io_uring_prep_read(sqe, 1 /*fds[1]*/,
261 buf,
262 iod->nr_sectors << 9,
263 iod->start_sector << 9);
264 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
265 sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
266
267 io_uring_prep_write(sqe2, 0 /*fds[0]*/,
268 buf, iod->nr_sectors << 9, pos);
269 io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
270 /* bit63 marks us as tgt io */
271 sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
272 } else {
273 struct io_uring_sqe *sqe;
274 void *buf = (void *)iod->addr;
275
276 ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
277 io_uring_prep_read(sqe, 1 /*fds[1]*/,
278 buf,
279 iod->nr_sectors << 9,
280 iod->start_sector << 9);
281 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
282 sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
283 }
284 }
285
loop_queue_tgt_write(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)286 static void loop_queue_tgt_write(const struct ublksrv_queue *q,
287 const struct ublksrv_io_desc *iod, int tag)
288 {
289 unsigned ublk_op = ublksrv_get_op(iod);
290
291 if (user_copy) {
292 struct io_uring_sqe *sqe, *sqe2;
293 __u64 pos = ublk_pos(q->q_id, tag, 0);
294 void *buf = ublksrv_queue_get_io_buf(q, tag);
295
296 ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
297 io_uring_prep_read(sqe, 0 /*fds[0]*/,
298 buf, iod->nr_sectors << 9, pos);
299 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
300 sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
301
302 io_uring_prep_write(sqe2, 1 /*fds[1]*/,
303 buf, iod->nr_sectors << 9,
304 iod->start_sector << 9);
305 io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
306 sqe2->rw_flags |= RWF_DSYNC;
307 /* bit63 marks us as tgt io */
308 sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
309 } else {
310 struct io_uring_sqe *sqe;
311 void *buf = (void *)iod->addr;
312
313 ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
314 io_uring_prep_write(sqe, 1 /*fds[1]*/,
315 buf,
316 iod->nr_sectors << 9,
317 iod->start_sector << 9);
318 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
319 sqe->rw_flags |= RWF_DSYNC;
320 /* bit63 marks us as tgt io */
321 sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
322 }
323 }
324
loop_queue_tgt_io(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)325 static int loop_queue_tgt_io(const struct ublksrv_queue *q,
326 const struct ublk_io_data *data, int tag)
327 {
328 const struct ublksrv_io_desc *iod = data->iod;
329 struct io_uring_sqe *sqe;
330 unsigned ublk_op = ublksrv_get_op(iod);
331
332 switch (ublk_op) {
333 case UBLK_IO_OP_FLUSH:
334 ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
335 io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/,
336 iod->nr_sectors << 9,
337 iod->start_sector << 9,
338 IORING_FSYNC_DATASYNC);
339 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
340 /* bit63 marks us as tgt io */
341 sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
342 break;
343 case UBLK_IO_OP_WRITE_ZEROES:
344 case UBLK_IO_OP_DISCARD:
345 ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
346 io_uring_prep_fallocate(sqe, 1 /*fds[1]*/,
347 loop_fallocate_mode(iod),
348 iod->start_sector << 9,
349 iod->nr_sectors << 9);
350 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
351 /* bit63 marks us as tgt io */
352 sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
353 break;
354 case UBLK_IO_OP_READ:
355 loop_queue_tgt_read(q, iod, tag);
356 break;
357 case UBLK_IO_OP_WRITE:
358 loop_queue_tgt_write(q, iod, tag);
359 break;
360 default:
361 return -EINVAL;
362 }
363
364 ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
365 iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
366
367 return 1;
368 }
369
__loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)370 static co_io_job __loop_handle_io_async(const struct ublksrv_queue *q,
371 const struct ublk_io_data *data, int tag)
372 {
373 int ret;
374 struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
375
376 io->queued_tgt_io = 0;
377 again:
378 ret = loop_queue_tgt_io(q, data, tag);
379 if (ret > 0) {
380 if (io->queued_tgt_io)
381 ublk_err("bad queued_tgt_io %d\n", io->queued_tgt_io);
382 io->queued_tgt_io += 1;
383
384 co_await__suspend_always(tag);
385 io->queued_tgt_io -= 1;
386
387 if (io->tgt_io_cqe->res == -EAGAIN)
388 goto again;
389
390 ublksrv_complete_io(q, tag, io->tgt_io_cqe->res);
391 } else if (ret < 0) {
392 ublk_err( "fail to queue io %d, ret %d\n", tag, tag);
393 } else {
394 ublk_err( "no sqe %d\n", tag);
395 }
396 }
397
loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data)398 static int loop_handle_io_async(const struct ublksrv_queue *q,
399 const struct ublk_io_data *data)
400 {
401 struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
402
403 if (block_device && ublksrv_get_op(data->iod) == UBLK_IO_OP_DISCARD) {
404 __u64 r[2];
405 int res;
406
407 io_uring_submit(q->ring_ptr);
408
409 r[0] = data->iod->start_sector << 9;
410 r[1] = data->iod->nr_sectors << 9;
411 res = ioctl(q->dev->tgt.fds[1], BLKDISCARD, &r);
412 ublksrv_complete_io(q, data->tag, res);
413 } else {
414 io->co = __loop_handle_io_async(q, data, data->tag);
415 }
416 return 0;
417 }
418
loop_tgt_io_done(const struct ublksrv_queue * q,const struct ublk_io_data * data,const struct io_uring_cqe * cqe)419 static void loop_tgt_io_done(const struct ublksrv_queue *q,
420 const struct ublk_io_data *data,
421 const struct io_uring_cqe *cqe)
422 {
423 int tag = user_data_to_tag(cqe->user_data);
424 struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
425
426 if (user_data_to_tgt_data(cqe->user_data))
427 return;
428
429 ublk_assert(tag == data->tag);
430 if (!io->queued_tgt_io)
431 ublk_err("%s: wrong queued_tgt_io: res %d qid %u tag %u, cmd_op %u\n",
432 __func__, cqe->res, q->q_id,
433 user_data_to_tag(cqe->user_data),
434 user_data_to_op(cqe->user_data));
435 io->tgt_io_cqe = cqe;
436 io->co.resume();
437 }
438
loop_deinit_tgt(const struct ublksrv_dev * dev)439 static void loop_deinit_tgt(const struct ublksrv_dev *dev)
440 {
441 fsync(dev->tgt.fds[1]);
442 close(dev->tgt.fds[1]);
443 }
444
445 struct ublksrv_tgt_type loop_tgt_type = {
446 .handle_io_async = loop_handle_io_async,
447 .tgt_io_done = loop_tgt_io_done,
448 .usage_for_add = loop_usage_for_add,
449 .init_tgt = loop_init_tgt,
450 .deinit_tgt = loop_deinit_tgt,
451 .type = UBLKSRV_TGT_TYPE_LOOP,
452 .name = "loop",
453 .recovery_tgt = loop_recovery_tgt,
454 };
455
456 static void tgt_loop_init() __attribute__((constructor));
457
tgt_loop_init(void)458 static void tgt_loop_init(void)
459 {
460 ublksrv_register_tgt_type(&loop_tgt_type);
461 }
462