1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef BLK_INTERNAL_H
3 #define BLK_INTERNAL_H
4
5 #include <linux/blk-crypto.h>
6 #include <linux/memblock.h> /* for max_pfn/max_low_pfn */
7 #include <xen/xen.h>
8 #include "blk-crypto-internal.h"
9
10 struct elevator_type;
11
12 /* Max future timer expiry for timeouts */
13 #define BLK_MAX_TIMEOUT (5 * HZ)
14
15 extern struct dentry *blk_debugfs_root;
16 DECLARE_STATIC_KEY_FALSE(blk_sub_page_limits);
17
18 struct blk_flush_queue {
19 spinlock_t mq_flush_lock;
20 unsigned int flush_pending_idx:1;
21 unsigned int flush_running_idx:1;
22 blk_status_t rq_status;
23 unsigned long flush_pending_since;
24 struct list_head flush_queue[2];
25 unsigned long flush_data_in_flight;
26 struct request *flush_rq;
27 };
28
29 bool is_flush_rq(struct request *req);
30
31 struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
32 gfp_t flags);
33 void blk_free_flush_queue(struct blk_flush_queue *q);
34
blk_queue_sub_page_limits(const struct queue_limits * lim)35 static inline bool blk_queue_sub_page_limits(const struct queue_limits *lim)
36 {
37 return static_branch_unlikely(&blk_sub_page_limits) &&
38 lim->sub_page_limits;
39 }
40
41 int blk_sub_page_limit_queues_get(void *data, u64 *val);
42 void blk_disable_sub_page_limits(struct queue_limits *q);
43
44 void blk_freeze_queue(struct request_queue *q);
45 void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
46 void blk_queue_start_drain(struct request_queue *q);
47 int __bio_queue_enter(struct request_queue *q, struct bio *bio);
48 void submit_bio_noacct_nocheck(struct bio *bio);
49
blk_try_enter_queue(struct request_queue * q,bool pm)50 static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
51 {
52 rcu_read_lock();
53 if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter))
54 goto fail;
55
56 /*
57 * The code that increments the pm_only counter must ensure that the
58 * counter is globally visible before the queue is unfrozen.
59 */
60 if (blk_queue_pm_only(q) &&
61 (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
62 goto fail_put;
63
64 rcu_read_unlock();
65 return true;
66
67 fail_put:
68 blk_queue_exit(q);
69 fail:
70 rcu_read_unlock();
71 return false;
72 }
73
bio_queue_enter(struct bio * bio)74 static inline int bio_queue_enter(struct bio *bio)
75 {
76 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
77
78 if (blk_try_enter_queue(q, false))
79 return 0;
80 return __bio_queue_enter(q, bio);
81 }
82
83 #define BIO_INLINE_VECS 4
84 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
85 gfp_t gfp_mask);
86 void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
87
88 /* Number of DMA segments required to transfer @bytes data. */
blk_segments(const struct queue_limits * limits,unsigned int bytes)89 static inline unsigned int blk_segments(const struct queue_limits *limits,
90 unsigned int bytes)
91 {
92 if (!blk_queue_sub_page_limits(limits))
93 return 1;
94
95 {
96 const unsigned int mss = limits->max_segment_size;
97
98 if (bytes <= mss)
99 return 1;
100 if (is_power_of_2(mss))
101 return round_up(bytes, mss) >> ilog2(mss);
102 return (bytes + mss - 1) / mss;
103 }
104 }
105
106 bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
107 struct page *page, unsigned len, unsigned offset,
108 bool *same_page);
109
biovec_phys_mergeable(struct request_queue * q,struct bio_vec * vec1,struct bio_vec * vec2)110 static inline bool biovec_phys_mergeable(struct request_queue *q,
111 struct bio_vec *vec1, struct bio_vec *vec2)
112 {
113 unsigned long mask = queue_segment_boundary(q);
114 phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
115 phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
116
117 /*
118 * Merging adjacent physical pages may not work correctly under KMSAN
119 * if their metadata pages aren't adjacent. Just disable merging.
120 */
121 if (IS_ENABLED(CONFIG_KMSAN))
122 return false;
123
124 if (addr1 + vec1->bv_len != addr2)
125 return false;
126 if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page))
127 return false;
128 if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask))
129 return false;
130 return true;
131 }
132
__bvec_gap_to_prev(const struct queue_limits * lim,struct bio_vec * bprv,unsigned int offset)133 static inline bool __bvec_gap_to_prev(const struct queue_limits *lim,
134 struct bio_vec *bprv, unsigned int offset)
135 {
136 return (offset & lim->virt_boundary_mask) ||
137 ((bprv->bv_offset + bprv->bv_len) & lim->virt_boundary_mask);
138 }
139
140 /*
141 * Check if adding a bio_vec after bprv with offset would create a gap in
142 * the SG list. Most drivers don't care about this, but some do.
143 */
bvec_gap_to_prev(const struct queue_limits * lim,struct bio_vec * bprv,unsigned int offset)144 static inline bool bvec_gap_to_prev(const struct queue_limits *lim,
145 struct bio_vec *bprv, unsigned int offset)
146 {
147 if (!lim->virt_boundary_mask)
148 return false;
149 return __bvec_gap_to_prev(lim, bprv, offset);
150 }
151
rq_mergeable(struct request * rq)152 static inline bool rq_mergeable(struct request *rq)
153 {
154 if (blk_rq_is_passthrough(rq))
155 return false;
156
157 if (req_op(rq) == REQ_OP_FLUSH)
158 return false;
159
160 if (req_op(rq) == REQ_OP_WRITE_ZEROES)
161 return false;
162
163 if (req_op(rq) == REQ_OP_ZONE_APPEND)
164 return false;
165
166 if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
167 return false;
168 if (rq->rq_flags & RQF_NOMERGE_FLAGS)
169 return false;
170
171 return true;
172 }
173
174 /*
175 * There are two different ways to handle DISCARD merges:
176 * 1) If max_discard_segments > 1, the driver treats every bio as a range and
177 * send the bios to controller together. The ranges don't need to be
178 * contiguous.
179 * 2) Otherwise, the request will be normal read/write requests. The ranges
180 * need to be contiguous.
181 */
blk_discard_mergable(struct request * req)182 static inline bool blk_discard_mergable(struct request *req)
183 {
184 if (req_op(req) == REQ_OP_DISCARD &&
185 queue_max_discard_segments(req->q) > 1)
186 return true;
187 return false;
188 }
189
blk_rq_get_max_segments(struct request * rq)190 static inline unsigned int blk_rq_get_max_segments(struct request *rq)
191 {
192 if (req_op(rq) == REQ_OP_DISCARD)
193 return queue_max_discard_segments(rq->q);
194 return queue_max_segments(rq->q);
195 }
196
blk_queue_get_max_sectors(struct request_queue * q,enum req_op op)197 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
198 enum req_op op)
199 {
200 if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
201 return min(q->limits.max_discard_sectors,
202 UINT_MAX >> SECTOR_SHIFT);
203
204 if (unlikely(op == REQ_OP_WRITE_ZEROES))
205 return q->limits.max_write_zeroes_sectors;
206
207 return q->limits.max_sectors;
208 }
209
210 #ifdef CONFIG_BLK_DEV_INTEGRITY
211 void blk_flush_integrity(void);
212 bool __bio_integrity_endio(struct bio *);
213 void bio_integrity_free(struct bio *bio);
bio_integrity_endio(struct bio * bio)214 static inline bool bio_integrity_endio(struct bio *bio)
215 {
216 if (bio_integrity(bio))
217 return __bio_integrity_endio(bio);
218 return true;
219 }
220
221 bool blk_integrity_merge_rq(struct request_queue *, struct request *,
222 struct request *);
223 bool blk_integrity_merge_bio(struct request_queue *, struct request *,
224 struct bio *);
225
integrity_req_gap_back_merge(struct request * req,struct bio * next)226 static inline bool integrity_req_gap_back_merge(struct request *req,
227 struct bio *next)
228 {
229 struct bio_integrity_payload *bip = bio_integrity(req->bio);
230 struct bio_integrity_payload *bip_next = bio_integrity(next);
231
232 return bvec_gap_to_prev(&req->q->limits,
233 &bip->bip_vec[bip->bip_vcnt - 1],
234 bip_next->bip_vec[0].bv_offset);
235 }
236
integrity_req_gap_front_merge(struct request * req,struct bio * bio)237 static inline bool integrity_req_gap_front_merge(struct request *req,
238 struct bio *bio)
239 {
240 struct bio_integrity_payload *bip = bio_integrity(bio);
241 struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
242
243 return bvec_gap_to_prev(&req->q->limits,
244 &bip->bip_vec[bip->bip_vcnt - 1],
245 bip_next->bip_vec[0].bv_offset);
246 }
247
248 extern const struct attribute_group blk_integrity_attr_group;
249 #else /* CONFIG_BLK_DEV_INTEGRITY */
blk_integrity_merge_rq(struct request_queue * rq,struct request * r1,struct request * r2)250 static inline bool blk_integrity_merge_rq(struct request_queue *rq,
251 struct request *r1, struct request *r2)
252 {
253 return true;
254 }
blk_integrity_merge_bio(struct request_queue * rq,struct request * r,struct bio * b)255 static inline bool blk_integrity_merge_bio(struct request_queue *rq,
256 struct request *r, struct bio *b)
257 {
258 return true;
259 }
integrity_req_gap_back_merge(struct request * req,struct bio * next)260 static inline bool integrity_req_gap_back_merge(struct request *req,
261 struct bio *next)
262 {
263 return false;
264 }
integrity_req_gap_front_merge(struct request * req,struct bio * bio)265 static inline bool integrity_req_gap_front_merge(struct request *req,
266 struct bio *bio)
267 {
268 return false;
269 }
270
blk_flush_integrity(void)271 static inline void blk_flush_integrity(void)
272 {
273 }
bio_integrity_endio(struct bio * bio)274 static inline bool bio_integrity_endio(struct bio *bio)
275 {
276 return true;
277 }
bio_integrity_free(struct bio * bio)278 static inline void bio_integrity_free(struct bio *bio)
279 {
280 }
281 #endif /* CONFIG_BLK_DEV_INTEGRITY */
282
283 unsigned long blk_rq_timeout(unsigned long timeout);
284 void blk_add_timer(struct request *req);
285
286 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
287 unsigned int nr_segs);
288 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
289 struct bio *bio, unsigned int nr_segs);
290
291 /*
292 * Plug flush limits
293 */
294 #define BLK_MAX_REQUEST_COUNT 32
295 #define BLK_PLUG_FLUSH_SIZE (128 * 1024)
296
297 /*
298 * Internal elevator interface
299 */
300 #define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)
301
302 bool blk_insert_flush(struct request *rq);
303
304 int elevator_switch(struct request_queue *q, struct elevator_type *new_e);
305 void elevator_disable(struct request_queue *q);
306 void elevator_exit(struct request_queue *q);
307 int elv_register_queue(struct request_queue *q, bool uevent);
308 void elv_unregister_queue(struct request_queue *q);
309
310 ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
311 char *buf);
312 ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
313 char *buf);
314 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
315 char *buf);
316 ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
317 char *buf);
318 ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
319 const char *buf, size_t count);
320 ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
321 ssize_t part_timeout_store(struct device *, struct device_attribute *,
322 const char *, size_t);
323
bio_may_exceed_limits(struct bio * bio,const struct queue_limits * lim)324 static inline bool bio_may_exceed_limits(struct bio *bio,
325 const struct queue_limits *lim)
326 {
327 switch (bio_op(bio)) {
328 case REQ_OP_DISCARD:
329 case REQ_OP_SECURE_ERASE:
330 case REQ_OP_WRITE_ZEROES:
331 return true; /* non-trivial splitting decisions */
332 default:
333 break;
334 }
335
336 /*
337 * Check whether bio splitting should be performed. This check may
338 * trigger the bio splitting code even if splitting is not necessary.
339 */
340 if (blk_queue_sub_page_limits(lim) && bio->bi_io_vec &&
341 bio->bi_io_vec->bv_len > lim->max_segment_size)
342 return true;
343 return lim->chunk_sectors || bio->bi_vcnt != 1 ||
344 bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
345 }
346
347 struct bio *__bio_split_to_limits(struct bio *bio,
348 const struct queue_limits *lim,
349 unsigned int *nr_segs);
350 int ll_back_merge_fn(struct request *req, struct bio *bio,
351 unsigned int nr_segs);
352 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
353 struct request *next);
354 unsigned int blk_recalc_rq_segments(struct request *rq);
355 void blk_rq_set_mixed_merge(struct request *rq);
356 bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
357 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
358
359 void blk_set_default_limits(struct queue_limits *lim);
360 int blk_dev_init(void);
361
362 /*
363 * Contribute to IO statistics IFF:
364 *
365 * a) it's attached to a gendisk, and
366 * b) the queue had IO stats enabled when this request was started
367 */
blk_do_io_stat(struct request * rq)368 static inline bool blk_do_io_stat(struct request *rq)
369 {
370 return (rq->rq_flags & RQF_IO_STAT) && !blk_rq_is_passthrough(rq);
371 }
372
373 void update_io_ticks(struct block_device *part, unsigned long now, bool end);
374 unsigned int part_in_flight(struct block_device *part);
375
req_set_nomerge(struct request_queue * q,struct request * req)376 static inline void req_set_nomerge(struct request_queue *q, struct request *req)
377 {
378 req->cmd_flags |= REQ_NOMERGE;
379 if (req == q->last_merge)
380 q->last_merge = NULL;
381 }
382
383 /*
384 * Internal io_context interface
385 */
386 struct io_cq *ioc_find_get_icq(struct request_queue *q);
387 struct io_cq *ioc_lookup_icq(struct request_queue *q);
388 #ifdef CONFIG_BLK_ICQ
389 void ioc_clear_queue(struct request_queue *q);
390 #else
ioc_clear_queue(struct request_queue * q)391 static inline void ioc_clear_queue(struct request_queue *q)
392 {
393 }
394 #endif /* CONFIG_BLK_ICQ */
395
396 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
397 extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
398 extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
399 const char *page, size_t count);
400 extern void blk_throtl_bio_endio(struct bio *bio);
401 extern void blk_throtl_stat_add(struct request *rq, u64 time);
402 #else
blk_throtl_bio_endio(struct bio * bio)403 static inline void blk_throtl_bio_endio(struct bio *bio) { }
blk_throtl_stat_add(struct request * rq,u64 time)404 static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
405 #endif
406
407 struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);
408
blk_queue_may_bounce(struct request_queue * q)409 static inline bool blk_queue_may_bounce(struct request_queue *q)
410 {
411 return IS_ENABLED(CONFIG_BOUNCE) &&
412 q->limits.bounce == BLK_BOUNCE_HIGH &&
413 max_low_pfn >= max_pfn;
414 }
415
blk_queue_bounce(struct bio * bio,struct request_queue * q)416 static inline struct bio *blk_queue_bounce(struct bio *bio,
417 struct request_queue *q)
418 {
419 if (unlikely(blk_queue_may_bounce(q) && bio_has_data(bio)))
420 return __blk_queue_bounce(bio, q);
421 return bio;
422 }
423
424 #ifdef CONFIG_BLK_DEV_ZONED
425 void disk_free_zone_bitmaps(struct gendisk *disk);
426 void disk_clear_zone_settings(struct gendisk *disk);
427 int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
428 unsigned long arg);
429 int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
430 unsigned int cmd, unsigned long arg);
431 #else /* CONFIG_BLK_DEV_ZONED */
disk_free_zone_bitmaps(struct gendisk * disk)432 static inline void disk_free_zone_bitmaps(struct gendisk *disk) {}
disk_clear_zone_settings(struct gendisk * disk)433 static inline void disk_clear_zone_settings(struct gendisk *disk) {}
blkdev_report_zones_ioctl(struct block_device * bdev,unsigned int cmd,unsigned long arg)434 static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
435 unsigned int cmd, unsigned long arg)
436 {
437 return -ENOTTY;
438 }
blkdev_zone_mgmt_ioctl(struct block_device * bdev,blk_mode_t mode,unsigned int cmd,unsigned long arg)439 static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
440 blk_mode_t mode, unsigned int cmd, unsigned long arg)
441 {
442 return -ENOTTY;
443 }
444 #endif /* CONFIG_BLK_DEV_ZONED */
445
446 struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
447 void bdev_add(struct block_device *bdev, dev_t dev);
448
449 int blk_alloc_ext_minor(void);
450 void blk_free_ext_minor(unsigned int minor);
451 #define ADDPART_FLAG_NONE 0
452 #define ADDPART_FLAG_RAID 1
453 #define ADDPART_FLAG_WHOLEDISK 2
454 int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
455 sector_t length);
456 int bdev_del_partition(struct gendisk *disk, int partno);
457 int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
458 sector_t length);
459 void drop_partition(struct block_device *part);
460
461 void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors);
462
463 struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
464 struct lock_class_key *lkclass);
465
466 int bio_add_hw_page(struct request_queue *q, struct bio *bio,
467 struct page *page, unsigned int len, unsigned int offset,
468 unsigned int max_sectors, bool *same_page);
469
470 /*
471 * Clean up a page appropriately, where the page may be pinned, may have a
472 * ref taken on it or neither.
473 */
bio_release_page(struct bio * bio,struct page * page)474 static inline void bio_release_page(struct bio *bio, struct page *page)
475 {
476 if (bio_flagged(bio, BIO_PAGE_PINNED))
477 unpin_user_page(page);
478 }
479
480 struct request_queue *blk_alloc_queue(int node_id);
481
482 int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode);
483
484 int disk_alloc_events(struct gendisk *disk);
485 void disk_add_events(struct gendisk *disk);
486 void disk_del_events(struct gendisk *disk);
487 void disk_release_events(struct gendisk *disk);
488 void disk_block_events(struct gendisk *disk);
489 void disk_unblock_events(struct gendisk *disk);
490 void disk_flush_events(struct gendisk *disk, unsigned int mask);
491 extern struct device_attribute dev_attr_events;
492 extern struct device_attribute dev_attr_events_async;
493 extern struct device_attribute dev_attr_events_poll_msecs;
494
495 extern struct attribute_group blk_trace_attr_group;
496
497 blk_mode_t file_to_blk_mode(struct file *file);
498 int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
499 loff_t lstart, loff_t lend);
500 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
501 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
502
503 extern const struct address_space_operations def_blk_aops;
504
505 int disk_register_independent_access_ranges(struct gendisk *disk);
506 void disk_unregister_independent_access_ranges(struct gendisk *disk);
507
508 #ifdef CONFIG_FAIL_MAKE_REQUEST
509 bool should_fail_request(struct block_device *part, unsigned int bytes);
510 #else /* CONFIG_FAIL_MAKE_REQUEST */
should_fail_request(struct block_device * part,unsigned int bytes)511 static inline bool should_fail_request(struct block_device *part,
512 unsigned int bytes)
513 {
514 return false;
515 }
516 #endif /* CONFIG_FAIL_MAKE_REQUEST */
517
518 /*
519 * Optimized request reference counting. Ideally we'd make timeouts be more
520 * clever, as that's the only reason we need references at all... But until
521 * this happens, this is faster than using refcount_t. Also see:
522 *
523 * abc54d634334 ("io_uring: switch to atomic_t for io_kiocb reference count")
524 */
525 #define req_ref_zero_or_close_to_overflow(req) \
526 ((unsigned int) atomic_read(&(req->ref)) + 127u <= 127u)
527
req_ref_inc_not_zero(struct request * req)528 static inline bool req_ref_inc_not_zero(struct request *req)
529 {
530 return atomic_inc_not_zero(&req->ref);
531 }
532
req_ref_put_and_test(struct request * req)533 static inline bool req_ref_put_and_test(struct request *req)
534 {
535 WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
536 return atomic_dec_and_test(&req->ref);
537 }
538
req_ref_set(struct request * req,int value)539 static inline void req_ref_set(struct request *req, int value)
540 {
541 atomic_set(&req->ref, value);
542 }
543
req_ref_read(struct request * req)544 static inline int req_ref_read(struct request *req)
545 {
546 return atomic_read(&req->ref);
547 }
548
549 #endif /* BLK_INTERNAL_H */
550