1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * bio-integrity.c - bio data integrity extensions
4 *
5 * Copyright (C) 2007, 2008, 2009 Oracle Corporation
6 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
7 */
8
9 #include <linux/blk-integrity.h>
10 #include <linux/mempool.h>
11 #include <linux/export.h>
12 #include <linux/bio.h>
13 #include <linux/workqueue.h>
14 #include <linux/slab.h>
15 #include "blk.h"
16
17 static struct kmem_cache *bip_slab;
18 static struct workqueue_struct *kintegrityd_wq;
19
blk_flush_integrity(void)20 void blk_flush_integrity(void)
21 {
22 flush_workqueue(kintegrityd_wq);
23 }
24
25 /**
26 * bio_integrity_free - Free bio integrity payload
27 * @bio: bio containing bip to be freed
28 *
29 * Description: Free the integrity portion of a bio.
30 */
bio_integrity_free(struct bio * bio)31 void bio_integrity_free(struct bio *bio)
32 {
33 struct bio_integrity_payload *bip = bio_integrity(bio);
34 struct bio_set *bs = bio->bi_pool;
35
36 if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
37 if (bip->bip_vec)
38 bvec_free(&bs->bvec_integrity_pool, bip->bip_vec,
39 bip->bip_max_vcnt);
40 mempool_free(bip, &bs->bio_integrity_pool);
41 } else {
42 kfree(bip);
43 }
44 bio->bi_integrity = NULL;
45 bio->bi_opf &= ~REQ_INTEGRITY;
46 }
47
48 /**
49 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
50 * @bio: bio to attach integrity metadata to
51 * @gfp_mask: Memory allocation mask
52 * @nr_vecs: Number of integrity metadata scatter-gather elements
53 *
54 * Description: This function prepares a bio for attaching integrity
55 * metadata. nr_vecs specifies the maximum number of pages containing
56 * integrity metadata that can be attached.
57 */
bio_integrity_alloc(struct bio * bio,gfp_t gfp_mask,unsigned int nr_vecs)58 struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
59 gfp_t gfp_mask,
60 unsigned int nr_vecs)
61 {
62 struct bio_integrity_payload *bip;
63 struct bio_set *bs = bio->bi_pool;
64 unsigned inline_vecs;
65
66 if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
67 return ERR_PTR(-EOPNOTSUPP);
68
69 if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
70 bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
71 inline_vecs = nr_vecs;
72 } else {
73 bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
74 inline_vecs = BIO_INLINE_VECS;
75 }
76
77 if (unlikely(!bip))
78 return ERR_PTR(-ENOMEM);
79
80 memset(bip, 0, sizeof(*bip));
81
82 /* always report as many vecs as asked explicitly, not inline vecs */
83 bip->bip_max_vcnt = nr_vecs;
84 if (nr_vecs > inline_vecs) {
85 bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
86 &bip->bip_max_vcnt, gfp_mask);
87 if (!bip->bip_vec)
88 goto err;
89 } else if (nr_vecs) {
90 bip->bip_vec = bip->bip_inline_vecs;
91 }
92
93 bip->bip_bio = bio;
94 bio->bi_integrity = bip;
95 bio->bi_opf |= REQ_INTEGRITY;
96
97 return bip;
98 err:
99 if (bs && mempool_initialized(&bs->bio_integrity_pool))
100 mempool_free(bip, &bs->bio_integrity_pool);
101 else
102 kfree(bip);
103 return ERR_PTR(-ENOMEM);
104 }
105 EXPORT_SYMBOL(bio_integrity_alloc);
106
bio_integrity_unpin_bvec(struct bio_vec * bv,int nr_vecs)107 static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs)
108 {
109 int i;
110
111 for (i = 0; i < nr_vecs; i++)
112 unpin_user_page(bv[i].bv_page);
113 }
114
bio_integrity_uncopy_user(struct bio_integrity_payload * bip)115 static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
116 {
117 unsigned short orig_nr_vecs = bip->bip_max_vcnt - 1;
118 struct bio_vec *orig_bvecs = &bip->bip_vec[1];
119 struct bio_vec *bounce_bvec = &bip->bip_vec[0];
120 size_t bytes = bounce_bvec->bv_len;
121 struct iov_iter orig_iter;
122 int ret;
123
124 iov_iter_bvec(&orig_iter, ITER_DEST, orig_bvecs, orig_nr_vecs, bytes);
125 ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter);
126 WARN_ON_ONCE(ret != bytes);
127
128 bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs);
129 }
130
131 /**
132 * bio_integrity_unmap_user - Unmap user integrity payload
133 * @bio: bio containing bip to be unmapped
134 *
135 * Unmap the user mapped integrity portion of a bio.
136 */
bio_integrity_unmap_user(struct bio * bio)137 void bio_integrity_unmap_user(struct bio *bio)
138 {
139 struct bio_integrity_payload *bip = bio_integrity(bio);
140
141 if (bip->bip_flags & BIP_COPY_USER) {
142 if (bio_data_dir(bio) == READ)
143 bio_integrity_uncopy_user(bip);
144 kfree(bvec_virt(bip->bip_vec));
145 return;
146 }
147
148 bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt);
149 }
150
151 /**
152 * bio_integrity_add_page - Attach integrity metadata
153 * @bio: bio to update
154 * @page: page containing integrity metadata
155 * @len: number of bytes of integrity metadata in page
156 * @offset: start offset within page
157 *
158 * Description: Attach a page containing integrity metadata to bio.
159 */
bio_integrity_add_page(struct bio * bio,struct page * page,unsigned int len,unsigned int offset)160 int bio_integrity_add_page(struct bio *bio, struct page *page,
161 unsigned int len, unsigned int offset)
162 {
163 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
164 struct bio_integrity_payload *bip = bio_integrity(bio);
165
166 if (bip->bip_vcnt > 0) {
167 struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
168 bool same_page = false;
169
170 if (bvec_try_merge_hw_page(q, bv, page, len, offset,
171 &same_page)) {
172 bip->bip_iter.bi_size += len;
173 return len;
174 }
175
176 if (bip->bip_vcnt >=
177 min(bip->bip_max_vcnt, queue_max_integrity_segments(q)))
178 return 0;
179
180 /*
181 * If the queue doesn't support SG gaps and adding this segment
182 * would create a gap, disallow it.
183 */
184 if (bvec_gap_to_prev(&q->limits, bv, offset))
185 return 0;
186 }
187
188 bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
189 bip->bip_vcnt++;
190 bip->bip_iter.bi_size += len;
191
192 return len;
193 }
194 EXPORT_SYMBOL(bio_integrity_add_page);
195
bio_integrity_copy_user(struct bio * bio,struct bio_vec * bvec,int nr_vecs,unsigned int len,unsigned int direction,u32 seed)196 static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
197 int nr_vecs, unsigned int len,
198 unsigned int direction, u32 seed)
199 {
200 bool write = direction == ITER_SOURCE;
201 struct bio_integrity_payload *bip;
202 struct iov_iter iter;
203 void *buf;
204 int ret;
205
206 buf = kmalloc(len, GFP_KERNEL);
207 if (!buf)
208 return -ENOMEM;
209
210 if (write) {
211 iov_iter_bvec(&iter, direction, bvec, nr_vecs, len);
212 if (!copy_from_iter_full(buf, len, &iter)) {
213 ret = -EFAULT;
214 goto free_buf;
215 }
216
217 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
218 } else {
219 memset(buf, 0, len);
220
221 /*
222 * We need to preserve the original bvec and the number of vecs
223 * in it for completion handling
224 */
225 bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1);
226 }
227
228 if (IS_ERR(bip)) {
229 ret = PTR_ERR(bip);
230 goto free_buf;
231 }
232
233 if (write)
234 bio_integrity_unpin_bvec(bvec, nr_vecs);
235 else
236 memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
237
238 ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
239 offset_in_page(buf));
240 if (ret != len) {
241 ret = -ENOMEM;
242 goto free_bip;
243 }
244
245 bip->bip_flags |= BIP_COPY_USER;
246 bip->bip_iter.bi_sector = seed;
247 bip->bip_vcnt = nr_vecs;
248 return 0;
249 free_bip:
250 bio_integrity_free(bio);
251 free_buf:
252 kfree(buf);
253 return ret;
254 }
255
bio_integrity_init_user(struct bio * bio,struct bio_vec * bvec,int nr_vecs,unsigned int len,u32 seed)256 static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
257 int nr_vecs, unsigned int len, u32 seed)
258 {
259 struct bio_integrity_payload *bip;
260
261 bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs);
262 if (IS_ERR(bip))
263 return PTR_ERR(bip);
264
265 memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
266 bip->bip_iter.bi_sector = seed;
267 bip->bip_iter.bi_size = len;
268 bip->bip_vcnt = nr_vecs;
269 return 0;
270 }
271
bvec_from_pages(struct bio_vec * bvec,struct page ** pages,int nr_vecs,ssize_t bytes,ssize_t offset)272 static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
273 int nr_vecs, ssize_t bytes, ssize_t offset)
274 {
275 unsigned int nr_bvecs = 0;
276 int i, j;
277
278 for (i = 0; i < nr_vecs; i = j) {
279 size_t size = min_t(size_t, bytes, PAGE_SIZE - offset);
280 struct folio *folio = page_folio(pages[i]);
281
282 bytes -= size;
283 for (j = i + 1; j < nr_vecs; j++) {
284 size_t next = min_t(size_t, PAGE_SIZE, bytes);
285
286 if (page_folio(pages[j]) != folio ||
287 pages[j] != pages[j - 1] + 1)
288 break;
289 unpin_user_page(pages[j]);
290 size += next;
291 bytes -= next;
292 }
293
294 bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset);
295 offset = 0;
296 nr_bvecs++;
297 }
298
299 return nr_bvecs;
300 }
301
bio_integrity_map_user(struct bio * bio,void __user * ubuf,ssize_t bytes,u32 seed)302 int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
303 u32 seed)
304 {
305 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
306 unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits);
307 struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
308 struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
309 unsigned int direction, nr_bvecs;
310 struct iov_iter iter;
311 int ret, nr_vecs;
312 size_t offset;
313 bool copy;
314
315 if (bio_integrity(bio))
316 return -EINVAL;
317 if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q))
318 return -E2BIG;
319
320 if (bio_data_dir(bio) == READ)
321 direction = ITER_DEST;
322 else
323 direction = ITER_SOURCE;
324
325 iov_iter_ubuf(&iter, direction, ubuf, bytes);
326 nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
327 if (nr_vecs > BIO_MAX_VECS)
328 return -E2BIG;
329 if (nr_vecs > UIO_FASTIOV) {
330 bvec = kcalloc(nr_vecs, sizeof(*bvec), GFP_KERNEL);
331 if (!bvec)
332 return -ENOMEM;
333 pages = NULL;
334 }
335
336 copy = !iov_iter_is_aligned(&iter, align, align);
337 ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
338 if (unlikely(ret < 0))
339 goto free_bvec;
340
341 nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset);
342 if (pages != stack_pages)
343 kvfree(pages);
344 if (nr_bvecs > queue_max_integrity_segments(q))
345 copy = true;
346
347 if (copy)
348 ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
349 direction, seed);
350 else
351 ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed);
352 if (ret)
353 goto release_pages;
354 if (bvec != stack_vec)
355 kfree(bvec);
356
357 return 0;
358
359 release_pages:
360 bio_integrity_unpin_bvec(bvec, nr_bvecs);
361 free_bvec:
362 if (bvec != stack_vec)
363 kfree(bvec);
364 return ret;
365 }
366
367 /**
368 * bio_integrity_prep - Prepare bio for integrity I/O
369 * @bio: bio to prepare
370 *
371 * Description: Checks if the bio already has an integrity payload attached.
372 * If it does, the payload has been generated by another kernel subsystem,
373 * and we just pass it through. Otherwise allocates integrity payload.
374 * The bio must have data direction, target device and start sector set priot
375 * to calling. In the WRITE case, integrity metadata will be generated using
376 * the block device's integrity function. In the READ case, the buffer
377 * will be prepared for DMA and a suitable end_io handler set up.
378 */
bio_integrity_prep(struct bio * bio)379 bool bio_integrity_prep(struct bio *bio)
380 {
381 struct bio_integrity_payload *bip;
382 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
383 unsigned int len;
384 void *buf;
385 gfp_t gfp = GFP_NOIO;
386
387 if (!bi)
388 return true;
389
390 if (!bio_sectors(bio))
391 return true;
392
393 /* Already protected? */
394 if (bio_integrity(bio))
395 return true;
396
397 switch (bio_op(bio)) {
398 case REQ_OP_READ:
399 if (bi->flags & BLK_INTEGRITY_NOVERIFY)
400 return true;
401 break;
402 case REQ_OP_WRITE:
403 if (bi->flags & BLK_INTEGRITY_NOGENERATE)
404 return true;
405
406 /*
407 * Zero the memory allocated to not leak uninitialized kernel
408 * memory to disk for non-integrity metadata where nothing else
409 * initializes the memory.
410 */
411 if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
412 gfp |= __GFP_ZERO;
413 break;
414 default:
415 return true;
416 }
417
418 /* Allocate kernel buffer for protection data */
419 len = bio_integrity_bytes(bi, bio_sectors(bio));
420 buf = kmalloc(len, gfp);
421 if (unlikely(buf == NULL)) {
422 goto err_end_io;
423 }
424
425 bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
426 if (IS_ERR(bip)) {
427 kfree(buf);
428 goto err_end_io;
429 }
430
431 bip->bip_flags |= BIP_BLOCK_INTEGRITY;
432 bip_set_seed(bip, bio->bi_iter.bi_sector);
433
434 if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
435 bip->bip_flags |= BIP_IP_CHECKSUM;
436
437 if (bio_integrity_add_page(bio, virt_to_page(buf), len,
438 offset_in_page(buf)) < len) {
439 printk(KERN_ERR "could not attach integrity payload\n");
440 goto err_end_io;
441 }
442
443 /* Auto-generate integrity metadata if this is a write */
444 if (bio_data_dir(bio) == WRITE)
445 blk_integrity_generate(bio);
446 else
447 bip->bio_iter = bio->bi_iter;
448 return true;
449
450 err_end_io:
451 bio->bi_status = BLK_STS_RESOURCE;
452 bio_endio(bio);
453 return false;
454 }
455 EXPORT_SYMBOL(bio_integrity_prep);
456
457 /**
458 * bio_integrity_verify_fn - Integrity I/O completion worker
459 * @work: Work struct stored in bio to be verified
460 *
461 * Description: This workqueue function is called to complete a READ
462 * request. The function verifies the transferred integrity metadata
463 * and then calls the original bio end_io function.
464 */
bio_integrity_verify_fn(struct work_struct * work)465 static void bio_integrity_verify_fn(struct work_struct *work)
466 {
467 struct bio_integrity_payload *bip =
468 container_of(work, struct bio_integrity_payload, bip_work);
469 struct bio *bio = bip->bip_bio;
470
471 blk_integrity_verify(bio);
472
473 kfree(bvec_virt(bip->bip_vec));
474 bio_integrity_free(bio);
475 bio_endio(bio);
476 }
477
478 /**
479 * __bio_integrity_endio - Integrity I/O completion function
480 * @bio: Protected bio
481 *
482 * Description: Completion for integrity I/O
483 *
484 * Normally I/O completion is done in interrupt context. However,
485 * verifying I/O integrity is a time-consuming task which must be run
486 * in process context. This function postpones completion
487 * accordingly.
488 */
__bio_integrity_endio(struct bio * bio)489 bool __bio_integrity_endio(struct bio *bio)
490 {
491 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
492 struct bio_integrity_payload *bip = bio_integrity(bio);
493
494 if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
495 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
496 queue_work(kintegrityd_wq, &bip->bip_work);
497 return false;
498 }
499
500 kfree(bvec_virt(bip->bip_vec));
501 bio_integrity_free(bio);
502 return true;
503 }
504
505 /**
506 * bio_integrity_advance - Advance integrity vector
507 * @bio: bio whose integrity vector to update
508 * @bytes_done: number of data bytes that have been completed
509 *
510 * Description: This function calculates how many integrity bytes the
511 * number of completed data bytes correspond to and advances the
512 * integrity vector accordingly.
513 */
bio_integrity_advance(struct bio * bio,unsigned int bytes_done)514 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
515 {
516 struct bio_integrity_payload *bip = bio_integrity(bio);
517 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
518 unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
519
520 bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9);
521 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
522 }
523
524 /**
525 * bio_integrity_trim - Trim integrity vector
526 * @bio: bio whose integrity vector to update
527 *
528 * Description: Used to trim the integrity vector in a cloned bio.
529 */
bio_integrity_trim(struct bio * bio)530 void bio_integrity_trim(struct bio *bio)
531 {
532 struct bio_integrity_payload *bip = bio_integrity(bio);
533 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
534
535 bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
536 }
537 EXPORT_SYMBOL(bio_integrity_trim);
538
539 /**
540 * bio_integrity_clone - Callback for cloning bios with integrity metadata
541 * @bio: New bio
542 * @bio_src: Original bio
543 * @gfp_mask: Memory allocation mask
544 *
545 * Description: Called to allocate a bip when cloning a bio
546 */
bio_integrity_clone(struct bio * bio,struct bio * bio_src,gfp_t gfp_mask)547 int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
548 gfp_t gfp_mask)
549 {
550 struct bio_integrity_payload *bip_src = bio_integrity(bio_src);
551 struct bio_integrity_payload *bip;
552
553 BUG_ON(bip_src == NULL);
554
555 bip = bio_integrity_alloc(bio, gfp_mask, 0);
556 if (IS_ERR(bip))
557 return PTR_ERR(bip);
558
559 bip->bip_vec = bip_src->bip_vec;
560 bip->bip_iter = bip_src->bip_iter;
561 bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
562
563 return 0;
564 }
565
bioset_integrity_create(struct bio_set * bs,int pool_size)566 int bioset_integrity_create(struct bio_set *bs, int pool_size)
567 {
568 if (mempool_initialized(&bs->bio_integrity_pool))
569 return 0;
570
571 if (mempool_init_slab_pool(&bs->bio_integrity_pool,
572 pool_size, bip_slab))
573 return -1;
574
575 if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
576 mempool_exit(&bs->bio_integrity_pool);
577 return -1;
578 }
579
580 return 0;
581 }
582 EXPORT_SYMBOL(bioset_integrity_create);
583
bioset_integrity_free(struct bio_set * bs)584 void bioset_integrity_free(struct bio_set *bs)
585 {
586 mempool_exit(&bs->bio_integrity_pool);
587 mempool_exit(&bs->bvec_integrity_pool);
588 }
589
bio_integrity_init(void)590 void __init bio_integrity_init(void)
591 {
592 /*
593 * kintegrityd won't block much but may burn a lot of CPU cycles.
594 * Make it highpri CPU intensive wq with max concurrency of 1.
595 */
596 kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
597 WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
598 if (!kintegrityd_wq)
599 panic("Failed to create kintegrityd\n");
600
601 bip_slab = kmem_cache_create("bio_integrity_payload",
602 sizeof(struct bio_integrity_payload) +
603 sizeof(struct bio_vec) * BIO_INLINE_VECS,
604 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
605 }
606