1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Functions related to setting various queue properties from drivers
4 */
5
6 #define pr_fmt(fmt) "%s: " fmt, __func__
7
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/init.h>
11 #include <linux/bio.h>
12 #include <linux/blkdev.h>
13 #include <linux/pagemap.h>
14 #include <linux/backing-dev-defs.h>
15 #include <linux/gcd.h>
16 #include <linux/lcm.h>
17 #include <linux/jiffies.h>
18 #include <linux/gfp.h>
19 #include <linux/dma-mapping.h>
20
21 #include "blk.h"
22 #include "blk-rq-qos.h"
23 #include "blk-wbt.h"
24
25 /* Protects blk_nr_sub_page_limit_queues and blk_sub_page_limits changes. */
26 static DEFINE_MUTEX(blk_sub_page_limit_lock);
27 static uint32_t blk_nr_sub_page_limit_queues;
28 DEFINE_STATIC_KEY_FALSE(blk_sub_page_limits);
29
blk_queue_rq_timeout(struct request_queue * q,unsigned int timeout)30 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
31 {
32 q->rq_timeout = timeout;
33 }
34 EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
35
36 /**
37 * blk_set_default_limits - reset limits to default values
38 * @lim: the queue_limits structure to reset
39 *
40 * Description:
41 * Returns a queue_limit struct to its default state.
42 */
blk_set_default_limits(struct queue_limits * lim)43 void blk_set_default_limits(struct queue_limits *lim)
44 {
45 lim->max_segments = BLK_MAX_SEGMENTS;
46 lim->max_discard_segments = 1;
47 lim->max_integrity_segments = 0;
48 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
49 lim->virt_boundary_mask = 0;
50 lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
51 lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
52 lim->max_user_sectors = lim->max_dev_sectors = 0;
53 lim->chunk_sectors = 0;
54 lim->max_write_zeroes_sectors = 0;
55 lim->max_zone_append_sectors = 0;
56 lim->max_discard_sectors = 0;
57 lim->max_hw_discard_sectors = 0;
58 lim->max_secure_erase_sectors = 0;
59 lim->discard_granularity = 0;
60 lim->discard_alignment = 0;
61 lim->discard_misaligned = 0;
62 lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
63 lim->bounce = BLK_BOUNCE_NONE;
64 lim->alignment_offset = 0;
65 lim->io_opt = 0;
66 lim->misaligned = 0;
67 lim->zoned = BLK_ZONED_NONE;
68 lim->zone_write_granularity = 0;
69 lim->dma_alignment = 511;
70 lim->sub_page_limits = false;
71 }
72
73 /**
74 * blk_set_stacking_limits - set default limits for stacking devices
75 * @lim: the queue_limits structure to reset
76 *
77 * Description:
78 * Returns a queue_limit struct to its default state. Should be used
79 * by stacking drivers like DM that have no internal limits.
80 */
blk_set_stacking_limits(struct queue_limits * lim)81 void blk_set_stacking_limits(struct queue_limits *lim)
82 {
83 blk_set_default_limits(lim);
84
85 /* Inherit limits from component devices */
86 lim->max_segments = USHRT_MAX;
87 lim->max_discard_segments = USHRT_MAX;
88 lim->max_hw_sectors = UINT_MAX;
89 lim->max_segment_size = UINT_MAX;
90 lim->max_sectors = UINT_MAX;
91 lim->max_dev_sectors = UINT_MAX;
92 lim->max_write_zeroes_sectors = UINT_MAX;
93 lim->max_zone_append_sectors = UINT_MAX;
94 }
95 EXPORT_SYMBOL(blk_set_stacking_limits);
96
97 /**
98 * blk_queue_bounce_limit - set bounce buffer limit for queue
99 * @q: the request queue for the device
100 * @bounce: bounce limit to enforce
101 *
102 * Description:
103 * Force bouncing for ISA DMA ranges or highmem.
104 *
105 * DEPRECATED, don't use in new code.
106 **/
blk_queue_bounce_limit(struct request_queue * q,enum blk_bounce bounce)107 void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce)
108 {
109 q->limits.bounce = bounce;
110 }
111 EXPORT_SYMBOL(blk_queue_bounce_limit);
112
113 /* For debugfs. */
blk_sub_page_limit_queues_get(void * data,u64 * val)114 int blk_sub_page_limit_queues_get(void *data, u64 *val)
115 {
116 *val = READ_ONCE(blk_nr_sub_page_limit_queues);
117
118 return 0;
119 }
120
121 /**
122 * blk_enable_sub_page_limits - enable support for limits below the page size
123 * @lim: request queue limits for which to enable support of these features.
124 *
125 * Enable support for max_segment_size values smaller than PAGE_SIZE and for
126 * max_hw_sectors values below PAGE_SIZE >> SECTOR_SHIFT. Support for these
127 * features is not enabled all the time because of the runtime overhead of these
128 * features.
129 */
blk_enable_sub_page_limits(struct queue_limits * lim)130 static void blk_enable_sub_page_limits(struct queue_limits *lim)
131 {
132 if (lim->sub_page_limits)
133 return;
134
135 lim->sub_page_limits = true;
136
137 mutex_lock(&blk_sub_page_limit_lock);
138 if (++blk_nr_sub_page_limit_queues == 1)
139 static_branch_enable(&blk_sub_page_limits);
140 mutex_unlock(&blk_sub_page_limit_lock);
141 }
142
143 /**
144 * blk_disable_sub_page_limits - disable support for limits below the page size
145 * @lim: request queue limits for which to enable support of these features.
146 *
147 * max_segment_size values smaller than PAGE_SIZE and for max_hw_sectors values
148 * below PAGE_SIZE >> SECTOR_SHIFT. Support for these features is not enabled
149 * all the time because of the runtime overhead of these features.
150 */
blk_disable_sub_page_limits(struct queue_limits * lim)151 void blk_disable_sub_page_limits(struct queue_limits *lim)
152 {
153 if (!lim->sub_page_limits)
154 return;
155
156 lim->sub_page_limits = false;
157
158 mutex_lock(&blk_sub_page_limit_lock);
159 WARN_ON_ONCE(blk_nr_sub_page_limit_queues <= 0);
160 if (--blk_nr_sub_page_limit_queues == 0)
161 static_branch_disable(&blk_sub_page_limits);
162 mutex_unlock(&blk_sub_page_limit_lock);
163 }
164
165 /**
166 * blk_queue_max_hw_sectors - set max sectors for a request for this queue
167 * @q: the request queue for the device
168 * @max_hw_sectors: max hardware sectors in the usual 512b unit
169 *
170 * Description:
171 * Enables a low level driver to set a hard upper limit,
172 * max_hw_sectors, on the size of requests. max_hw_sectors is set by
173 * the device driver based upon the capabilities of the I/O
174 * controller.
175 *
176 * max_dev_sectors is a hard limit imposed by the storage device for
177 * READ/WRITE requests. It is set by the disk driver.
178 *
179 * max_sectors is a soft limit imposed by the block layer for
180 * filesystem type requests. This value can be overridden on a
181 * per-device basis in /sys/block/<device>/queue/max_sectors_kb.
182 * The soft limit can not exceed max_hw_sectors.
183 **/
blk_queue_max_hw_sectors(struct request_queue * q,unsigned int max_hw_sectors)184 void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
185 {
186 struct queue_limits *limits = &q->limits;
187 unsigned int min_max_hw_sectors = PAGE_SIZE >> SECTOR_SHIFT;
188 unsigned int max_sectors;
189
190 if (max_hw_sectors < min_max_hw_sectors) {
191 blk_enable_sub_page_limits(limits);
192 min_max_hw_sectors = 1;
193 }
194
195 if (max_hw_sectors < min_max_hw_sectors) {
196 max_hw_sectors = min_max_hw_sectors;
197 pr_info("set to minimum %u\n", max_hw_sectors);
198 }
199
200 max_hw_sectors = round_down(max_hw_sectors,
201 limits->logical_block_size >> SECTOR_SHIFT);
202 limits->max_hw_sectors = max_hw_sectors;
203
204 max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
205
206 if (limits->max_user_sectors)
207 max_sectors = min(max_sectors, limits->max_user_sectors);
208 else
209 max_sectors = min(max_sectors, BLK_DEF_MAX_SECTORS);
210
211 max_sectors = round_down(max_sectors,
212 limits->logical_block_size >> SECTOR_SHIFT);
213 limits->max_sectors = max_sectors;
214
215 if (!q->disk)
216 return;
217 q->disk->bdi->io_pages = max_sectors >> (PAGE_SHIFT - 9);
218 }
219 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
220
221 /**
222 * blk_queue_chunk_sectors - set size of the chunk for this queue
223 * @q: the request queue for the device
224 * @chunk_sectors: chunk sectors in the usual 512b unit
225 *
226 * Description:
227 * If a driver doesn't want IOs to cross a given chunk size, it can set
228 * this limit and prevent merging across chunks. Note that the block layer
229 * must accept a page worth of data at any offset. So if the crossing of
230 * chunks is a hard limitation in the driver, it must still be prepared
231 * to split single page bios.
232 **/
blk_queue_chunk_sectors(struct request_queue * q,unsigned int chunk_sectors)233 void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
234 {
235 q->limits.chunk_sectors = chunk_sectors;
236 }
237 EXPORT_SYMBOL(blk_queue_chunk_sectors);
238
239 /**
240 * blk_queue_max_discard_sectors - set max sectors for a single discard
241 * @q: the request queue for the device
242 * @max_discard_sectors: maximum number of sectors to discard
243 **/
blk_queue_max_discard_sectors(struct request_queue * q,unsigned int max_discard_sectors)244 void blk_queue_max_discard_sectors(struct request_queue *q,
245 unsigned int max_discard_sectors)
246 {
247 q->limits.max_hw_discard_sectors = max_discard_sectors;
248 q->limits.max_discard_sectors = max_discard_sectors;
249 }
250 EXPORT_SYMBOL(blk_queue_max_discard_sectors);
251
252 /**
253 * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
254 * @q: the request queue for the device
255 * @max_sectors: maximum number of sectors to secure_erase
256 **/
blk_queue_max_secure_erase_sectors(struct request_queue * q,unsigned int max_sectors)257 void blk_queue_max_secure_erase_sectors(struct request_queue *q,
258 unsigned int max_sectors)
259 {
260 q->limits.max_secure_erase_sectors = max_sectors;
261 }
262 EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors);
263
264 /**
265 * blk_queue_max_write_zeroes_sectors - set max sectors for a single
266 * write zeroes
267 * @q: the request queue for the device
268 * @max_write_zeroes_sectors: maximum number of sectors to write per command
269 **/
blk_queue_max_write_zeroes_sectors(struct request_queue * q,unsigned int max_write_zeroes_sectors)270 void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
271 unsigned int max_write_zeroes_sectors)
272 {
273 q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors;
274 }
275 EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
276
277 /**
278 * blk_queue_max_zone_append_sectors - set max sectors for a single zone append
279 * @q: the request queue for the device
280 * @max_zone_append_sectors: maximum number of sectors to write per command
281 **/
blk_queue_max_zone_append_sectors(struct request_queue * q,unsigned int max_zone_append_sectors)282 void blk_queue_max_zone_append_sectors(struct request_queue *q,
283 unsigned int max_zone_append_sectors)
284 {
285 unsigned int max_sectors;
286
287 if (WARN_ON(!blk_queue_is_zoned(q)))
288 return;
289
290 max_sectors = min(q->limits.max_hw_sectors, max_zone_append_sectors);
291 max_sectors = min(q->limits.chunk_sectors, max_sectors);
292
293 /*
294 * Signal eventual driver bugs resulting in the max_zone_append sectors limit
295 * being 0 due to a 0 argument, the chunk_sectors limit (zone size) not set,
296 * or the max_hw_sectors limit not set.
297 */
298 WARN_ON(!max_sectors);
299
300 q->limits.max_zone_append_sectors = max_sectors;
301 }
302 EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors);
303
304 /**
305 * blk_queue_max_segments - set max hw segments for a request for this queue
306 * @q: the request queue for the device
307 * @max_segments: max number of segments
308 *
309 * Description:
310 * Enables a low level driver to set an upper limit on the number of
311 * hw data segments in a request.
312 **/
blk_queue_max_segments(struct request_queue * q,unsigned short max_segments)313 void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
314 {
315 if (!max_segments) {
316 max_segments = 1;
317 pr_info("set to minimum %u\n", max_segments);
318 }
319
320 q->limits.max_segments = max_segments;
321 }
322 EXPORT_SYMBOL(blk_queue_max_segments);
323
324 /**
325 * blk_queue_max_discard_segments - set max segments for discard requests
326 * @q: the request queue for the device
327 * @max_segments: max number of segments
328 *
329 * Description:
330 * Enables a low level driver to set an upper limit on the number of
331 * segments in a discard request.
332 **/
blk_queue_max_discard_segments(struct request_queue * q,unsigned short max_segments)333 void blk_queue_max_discard_segments(struct request_queue *q,
334 unsigned short max_segments)
335 {
336 q->limits.max_discard_segments = max_segments;
337 }
338 EXPORT_SYMBOL_GPL(blk_queue_max_discard_segments);
339
340 /**
341 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
342 * @q: the request queue for the device
343 * @max_size: max size of segment in bytes
344 *
345 * Description:
346 * Enables a low level driver to set an upper limit on the size of a
347 * coalesced segment
348 **/
blk_queue_max_segment_size(struct request_queue * q,unsigned int max_size)349 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
350 {
351 unsigned int min_max_segment_size = PAGE_SIZE;
352
353 if (max_size < min_max_segment_size) {
354 blk_enable_sub_page_limits(&q->limits);
355 min_max_segment_size = SECTOR_SIZE;
356 }
357
358 if (max_size < min_max_segment_size) {
359 max_size = min_max_segment_size;
360 pr_info("set to minimum %u\n", max_size);
361 }
362
363 /* see blk_queue_virt_boundary() for the explanation */
364 WARN_ON_ONCE(q->limits.virt_boundary_mask);
365
366 q->limits.max_segment_size = max_size;
367 }
368 EXPORT_SYMBOL(blk_queue_max_segment_size);
369
370 /**
371 * blk_queue_logical_block_size - set logical block size for the queue
372 * @q: the request queue for the device
373 * @size: the logical block size, in bytes
374 *
375 * Description:
376 * This should be set to the lowest possible block size that the
377 * storage device can address. The default of 512 covers most
378 * hardware.
379 **/
blk_queue_logical_block_size(struct request_queue * q,unsigned int size)380 void blk_queue_logical_block_size(struct request_queue *q, unsigned int size)
381 {
382 struct queue_limits *limits = &q->limits;
383
384 limits->logical_block_size = size;
385
386 if (limits->physical_block_size < size)
387 limits->physical_block_size = size;
388
389 if (limits->io_min < limits->physical_block_size)
390 limits->io_min = limits->physical_block_size;
391
392 limits->max_hw_sectors =
393 round_down(limits->max_hw_sectors, size >> SECTOR_SHIFT);
394 limits->max_sectors =
395 round_down(limits->max_sectors, size >> SECTOR_SHIFT);
396 }
397 EXPORT_SYMBOL(blk_queue_logical_block_size);
398
399 /**
400 * blk_queue_physical_block_size - set physical block size for the queue
401 * @q: the request queue for the device
402 * @size: the physical block size, in bytes
403 *
404 * Description:
405 * This should be set to the lowest possible sector size that the
406 * hardware can operate on without reverting to read-modify-write
407 * operations.
408 */
blk_queue_physical_block_size(struct request_queue * q,unsigned int size)409 void blk_queue_physical_block_size(struct request_queue *q, unsigned int size)
410 {
411 q->limits.physical_block_size = size;
412
413 if (q->limits.physical_block_size < q->limits.logical_block_size)
414 q->limits.physical_block_size = q->limits.logical_block_size;
415
416 if (q->limits.io_min < q->limits.physical_block_size)
417 q->limits.io_min = q->limits.physical_block_size;
418 }
419 EXPORT_SYMBOL(blk_queue_physical_block_size);
420
421 /**
422 * blk_queue_zone_write_granularity - set zone write granularity for the queue
423 * @q: the request queue for the zoned device
424 * @size: the zone write granularity size, in bytes
425 *
426 * Description:
427 * This should be set to the lowest possible size allowing to write in
428 * sequential zones of a zoned block device.
429 */
blk_queue_zone_write_granularity(struct request_queue * q,unsigned int size)430 void blk_queue_zone_write_granularity(struct request_queue *q,
431 unsigned int size)
432 {
433 if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
434 return;
435
436 q->limits.zone_write_granularity = size;
437
438 if (q->limits.zone_write_granularity < q->limits.logical_block_size)
439 q->limits.zone_write_granularity = q->limits.logical_block_size;
440 }
441 EXPORT_SYMBOL_GPL(blk_queue_zone_write_granularity);
442
443 /**
444 * blk_queue_alignment_offset - set physical block alignment offset
445 * @q: the request queue for the device
446 * @offset: alignment offset in bytes
447 *
448 * Description:
449 * Some devices are naturally misaligned to compensate for things like
450 * the legacy DOS partition table 63-sector offset. Low-level drivers
451 * should call this function for devices whose first sector is not
452 * naturally aligned.
453 */
blk_queue_alignment_offset(struct request_queue * q,unsigned int offset)454 void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
455 {
456 q->limits.alignment_offset =
457 offset & (q->limits.physical_block_size - 1);
458 q->limits.misaligned = 0;
459 }
460 EXPORT_SYMBOL(blk_queue_alignment_offset);
461
disk_update_readahead(struct gendisk * disk)462 void disk_update_readahead(struct gendisk *disk)
463 {
464 struct request_queue *q = disk->queue;
465
466 /*
467 * For read-ahead of large files to be effective, we need to read ahead
468 * at least twice the optimal I/O size.
469 */
470 disk->bdi->ra_pages =
471 max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
472 disk->bdi->io_pages = queue_max_sectors(q) >> (PAGE_SHIFT - 9);
473 }
474 EXPORT_SYMBOL_GPL(disk_update_readahead);
475
476 /**
477 * blk_limits_io_min - set minimum request size for a device
478 * @limits: the queue limits
479 * @min: smallest I/O size in bytes
480 *
481 * Description:
482 * Some devices have an internal block size bigger than the reported
483 * hardware sector size. This function can be used to signal the
484 * smallest I/O the device can perform without incurring a performance
485 * penalty.
486 */
blk_limits_io_min(struct queue_limits * limits,unsigned int min)487 void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
488 {
489 limits->io_min = min;
490
491 if (limits->io_min < limits->logical_block_size)
492 limits->io_min = limits->logical_block_size;
493
494 if (limits->io_min < limits->physical_block_size)
495 limits->io_min = limits->physical_block_size;
496 }
497 EXPORT_SYMBOL(blk_limits_io_min);
498
499 /**
500 * blk_queue_io_min - set minimum request size for the queue
501 * @q: the request queue for the device
502 * @min: smallest I/O size in bytes
503 *
504 * Description:
505 * Storage devices may report a granularity or preferred minimum I/O
506 * size which is the smallest request the device can perform without
507 * incurring a performance penalty. For disk drives this is often the
508 * physical block size. For RAID arrays it is often the stripe chunk
509 * size. A properly aligned multiple of minimum_io_size is the
510 * preferred request size for workloads where a high number of I/O
511 * operations is desired.
512 */
blk_queue_io_min(struct request_queue * q,unsigned int min)513 void blk_queue_io_min(struct request_queue *q, unsigned int min)
514 {
515 blk_limits_io_min(&q->limits, min);
516 }
517 EXPORT_SYMBOL(blk_queue_io_min);
518
519 /**
520 * blk_limits_io_opt - set optimal request size for a device
521 * @limits: the queue limits
522 * @opt: smallest I/O size in bytes
523 *
524 * Description:
525 * Storage devices may report an optimal I/O size, which is the
526 * device's preferred unit for sustained I/O. This is rarely reported
527 * for disk drives. For RAID arrays it is usually the stripe width or
528 * the internal track size. A properly aligned multiple of
529 * optimal_io_size is the preferred request size for workloads where
530 * sustained throughput is desired.
531 */
blk_limits_io_opt(struct queue_limits * limits,unsigned int opt)532 void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt)
533 {
534 limits->io_opt = opt;
535 }
536 EXPORT_SYMBOL(blk_limits_io_opt);
537
538 /**
539 * blk_queue_io_opt - set optimal request size for the queue
540 * @q: the request queue for the device
541 * @opt: optimal request size in bytes
542 *
543 * Description:
544 * Storage devices may report an optimal I/O size, which is the
545 * device's preferred unit for sustained I/O. This is rarely reported
546 * for disk drives. For RAID arrays it is usually the stripe width or
547 * the internal track size. A properly aligned multiple of
548 * optimal_io_size is the preferred request size for workloads where
549 * sustained throughput is desired.
550 */
blk_queue_io_opt(struct request_queue * q,unsigned int opt)551 void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
552 {
553 blk_limits_io_opt(&q->limits, opt);
554 if (!q->disk)
555 return;
556 q->disk->bdi->ra_pages =
557 max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
558 }
559 EXPORT_SYMBOL(blk_queue_io_opt);
560
queue_limit_alignment_offset(const struct queue_limits * lim,sector_t sector)561 static int queue_limit_alignment_offset(const struct queue_limits *lim,
562 sector_t sector)
563 {
564 unsigned int granularity = max(lim->physical_block_size, lim->io_min);
565 unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
566 << SECTOR_SHIFT;
567
568 return (granularity + lim->alignment_offset - alignment) % granularity;
569 }
570
queue_limit_discard_alignment(const struct queue_limits * lim,sector_t sector)571 static unsigned int queue_limit_discard_alignment(
572 const struct queue_limits *lim, sector_t sector)
573 {
574 unsigned int alignment, granularity, offset;
575
576 if (!lim->max_discard_sectors)
577 return 0;
578
579 /* Why are these in bytes, not sectors? */
580 alignment = lim->discard_alignment >> SECTOR_SHIFT;
581 granularity = lim->discard_granularity >> SECTOR_SHIFT;
582 if (!granularity)
583 return 0;
584
585 /* Offset of the partition start in 'granularity' sectors */
586 offset = sector_div(sector, granularity);
587
588 /* And why do we do this modulus *again* in blkdev_issue_discard()? */
589 offset = (granularity + alignment - offset) % granularity;
590
591 /* Turn it back into bytes, gaah */
592 return offset << SECTOR_SHIFT;
593 }
594
blk_round_down_sectors(unsigned int sectors,unsigned int lbs)595 static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
596 {
597 sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
598 if (sectors < PAGE_SIZE >> SECTOR_SHIFT)
599 sectors = PAGE_SIZE >> SECTOR_SHIFT;
600 return sectors;
601 }
602
603 /**
604 * blk_stack_limits - adjust queue_limits for stacked devices
605 * @t: the stacking driver limits (top device)
606 * @b: the underlying queue limits (bottom, component device)
607 * @start: first data sector within component device
608 *
609 * Description:
610 * This function is used by stacking drivers like MD and DM to ensure
611 * that all component devices have compatible block sizes and
612 * alignments. The stacking driver must provide a queue_limits
613 * struct (top) and then iteratively call the stacking function for
614 * all component (bottom) devices. The stacking function will
615 * attempt to combine the values and ensure proper alignment.
616 *
617 * Returns 0 if the top and bottom queue_limits are compatible. The
618 * top device's block sizes and alignment offsets may be adjusted to
619 * ensure alignment with the bottom device. If no compatible sizes
620 * and alignments exist, -1 is returned and the resulting top
621 * queue_limits will have the misaligned flag set to indicate that
622 * the alignment_offset is undefined.
623 */
blk_stack_limits(struct queue_limits * t,struct queue_limits * b,sector_t start)624 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
625 sector_t start)
626 {
627 unsigned int top, bottom, alignment, ret = 0;
628
629 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
630 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
631 t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
632 t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
633 b->max_write_zeroes_sectors);
634 t->max_zone_append_sectors = min(t->max_zone_append_sectors,
635 b->max_zone_append_sectors);
636 t->bounce = max(t->bounce, b->bounce);
637
638 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
639 b->seg_boundary_mask);
640 t->virt_boundary_mask = min_not_zero(t->virt_boundary_mask,
641 b->virt_boundary_mask);
642
643 t->max_segments = min_not_zero(t->max_segments, b->max_segments);
644 t->max_discard_segments = min_not_zero(t->max_discard_segments,
645 b->max_discard_segments);
646 t->max_integrity_segments = min_not_zero(t->max_integrity_segments,
647 b->max_integrity_segments);
648
649 t->max_segment_size = min_not_zero(t->max_segment_size,
650 b->max_segment_size);
651
652 t->misaligned |= b->misaligned;
653
654 alignment = queue_limit_alignment_offset(b, start);
655
656 /* Bottom device has different alignment. Check that it is
657 * compatible with the current top alignment.
658 */
659 if (t->alignment_offset != alignment) {
660
661 top = max(t->physical_block_size, t->io_min)
662 + t->alignment_offset;
663 bottom = max(b->physical_block_size, b->io_min) + alignment;
664
665 /* Verify that top and bottom intervals line up */
666 if (max(top, bottom) % min(top, bottom)) {
667 t->misaligned = 1;
668 ret = -1;
669 }
670 }
671
672 t->logical_block_size = max(t->logical_block_size,
673 b->logical_block_size);
674
675 t->physical_block_size = max(t->physical_block_size,
676 b->physical_block_size);
677
678 t->io_min = max(t->io_min, b->io_min);
679 t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
680 t->dma_alignment = max(t->dma_alignment, b->dma_alignment);
681
682 /* Set non-power-of-2 compatible chunk_sectors boundary */
683 if (b->chunk_sectors)
684 t->chunk_sectors = gcd(t->chunk_sectors, b->chunk_sectors);
685
686 /* Physical block size a multiple of the logical block size? */
687 if (t->physical_block_size & (t->logical_block_size - 1)) {
688 t->physical_block_size = t->logical_block_size;
689 t->misaligned = 1;
690 ret = -1;
691 }
692
693 /* Minimum I/O a multiple of the physical block size? */
694 if (t->io_min & (t->physical_block_size - 1)) {
695 t->io_min = t->physical_block_size;
696 t->misaligned = 1;
697 ret = -1;
698 }
699
700 /* Optimal I/O a multiple of the physical block size? */
701 if (t->io_opt & (t->physical_block_size - 1)) {
702 t->io_opt = 0;
703 t->misaligned = 1;
704 ret = -1;
705 }
706
707 /* chunk_sectors a multiple of the physical block size? */
708 if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) {
709 t->chunk_sectors = 0;
710 t->misaligned = 1;
711 ret = -1;
712 }
713
714 t->raid_partial_stripes_expensive =
715 max(t->raid_partial_stripes_expensive,
716 b->raid_partial_stripes_expensive);
717
718 /* Find lowest common alignment_offset */
719 t->alignment_offset = lcm_not_zero(t->alignment_offset, alignment)
720 % max(t->physical_block_size, t->io_min);
721
722 /* Verify that new alignment_offset is on a logical block boundary */
723 if (t->alignment_offset & (t->logical_block_size - 1)) {
724 t->misaligned = 1;
725 ret = -1;
726 }
727
728 t->max_sectors = blk_round_down_sectors(t->max_sectors, t->logical_block_size);
729 t->max_hw_sectors = blk_round_down_sectors(t->max_hw_sectors, t->logical_block_size);
730 t->max_dev_sectors = blk_round_down_sectors(t->max_dev_sectors, t->logical_block_size);
731
732 /* Discard alignment and granularity */
733 if (b->discard_granularity) {
734 alignment = queue_limit_discard_alignment(b, start);
735
736 if (t->discard_granularity != 0 &&
737 t->discard_alignment != alignment) {
738 top = t->discard_granularity + t->discard_alignment;
739 bottom = b->discard_granularity + alignment;
740
741 /* Verify that top and bottom intervals line up */
742 if ((max(top, bottom) % min(top, bottom)) != 0)
743 t->discard_misaligned = 1;
744 }
745
746 t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
747 b->max_discard_sectors);
748 t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors,
749 b->max_hw_discard_sectors);
750 t->discard_granularity = max(t->discard_granularity,
751 b->discard_granularity);
752 t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
753 t->discard_granularity;
754 }
755 t->max_secure_erase_sectors = min_not_zero(t->max_secure_erase_sectors,
756 b->max_secure_erase_sectors);
757 t->zone_write_granularity = max(t->zone_write_granularity,
758 b->zone_write_granularity);
759 t->zoned = max(t->zoned, b->zoned);
760 if (!t->zoned) {
761 t->zone_write_granularity = 0;
762 t->max_zone_append_sectors = 0;
763 }
764 return ret;
765 }
766 EXPORT_SYMBOL(blk_stack_limits);
767
768 /**
769 * disk_stack_limits - adjust queue limits for stacked drivers
770 * @disk: MD/DM gendisk (top)
771 * @bdev: the underlying block device (bottom)
772 * @offset: offset to beginning of data within component device
773 *
774 * Description:
775 * Merges the limits for a top level gendisk and a bottom level
776 * block_device.
777 */
disk_stack_limits(struct gendisk * disk,struct block_device * bdev,sector_t offset)778 void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
779 sector_t offset)
780 {
781 struct request_queue *t = disk->queue;
782
783 if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits,
784 get_start_sect(bdev) + (offset >> 9)) < 0)
785 pr_notice("%s: Warning: Device %pg is misaligned\n",
786 disk->disk_name, bdev);
787
788 disk_update_readahead(disk);
789 }
790 EXPORT_SYMBOL(disk_stack_limits);
791
792 /**
793 * blk_queue_update_dma_pad - update pad mask
794 * @q: the request queue for the device
795 * @mask: pad mask
796 *
797 * Update dma pad mask.
798 *
799 * Appending pad buffer to a request modifies the last entry of a
800 * scatter list such that it includes the pad buffer.
801 **/
blk_queue_update_dma_pad(struct request_queue * q,unsigned int mask)802 void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
803 {
804 if (mask > q->dma_pad_mask)
805 q->dma_pad_mask = mask;
806 }
807 EXPORT_SYMBOL(blk_queue_update_dma_pad);
808
809 /**
810 * blk_queue_segment_boundary - set boundary rules for segment merging
811 * @q: the request queue for the device
812 * @mask: the memory boundary mask
813 **/
blk_queue_segment_boundary(struct request_queue * q,unsigned long mask)814 void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
815 {
816 if (mask < PAGE_SIZE - 1) {
817 mask = PAGE_SIZE - 1;
818 pr_info("set to minimum %lx\n", mask);
819 }
820
821 q->limits.seg_boundary_mask = mask;
822 }
823 EXPORT_SYMBOL(blk_queue_segment_boundary);
824
825 /**
826 * blk_queue_virt_boundary - set boundary rules for bio merging
827 * @q: the request queue for the device
828 * @mask: the memory boundary mask
829 **/
blk_queue_virt_boundary(struct request_queue * q,unsigned long mask)830 void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask)
831 {
832 q->limits.virt_boundary_mask = mask;
833
834 /*
835 * Devices that require a virtual boundary do not support scatter/gather
836 * I/O natively, but instead require a descriptor list entry for each
837 * page (which might not be idential to the Linux PAGE_SIZE). Because
838 * of that they are not limited by our notion of "segment size".
839 */
840 if (mask)
841 q->limits.max_segment_size = UINT_MAX;
842 }
843 EXPORT_SYMBOL(blk_queue_virt_boundary);
844
845 /**
846 * blk_queue_dma_alignment - set dma length and memory alignment
847 * @q: the request queue for the device
848 * @mask: alignment mask
849 *
850 * description:
851 * set required memory and length alignment for direct dma transactions.
852 * this is used when building direct io requests for the queue.
853 *
854 **/
blk_queue_dma_alignment(struct request_queue * q,int mask)855 void blk_queue_dma_alignment(struct request_queue *q, int mask)
856 {
857 q->limits.dma_alignment = mask;
858 }
859 EXPORT_SYMBOL(blk_queue_dma_alignment);
860
861 /**
862 * blk_queue_update_dma_alignment - update dma length and memory alignment
863 * @q: the request queue for the device
864 * @mask: alignment mask
865 *
866 * description:
867 * update required memory and length alignment for direct dma transactions.
868 * If the requested alignment is larger than the current alignment, then
869 * the current queue alignment is updated to the new value, otherwise it
870 * is left alone. The design of this is to allow multiple objects
871 * (driver, device, transport etc) to set their respective
872 * alignments without having them interfere.
873 *
874 **/
blk_queue_update_dma_alignment(struct request_queue * q,int mask)875 void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
876 {
877 BUG_ON(mask > PAGE_SIZE);
878
879 if (mask > q->limits.dma_alignment)
880 q->limits.dma_alignment = mask;
881 }
882 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
883
884 /**
885 * blk_set_queue_depth - tell the block layer about the device queue depth
886 * @q: the request queue for the device
887 * @depth: queue depth
888 *
889 */
blk_set_queue_depth(struct request_queue * q,unsigned int depth)890 void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
891 {
892 q->queue_depth = depth;
893 rq_qos_queue_depth_changed(q);
894 }
895 EXPORT_SYMBOL(blk_set_queue_depth);
896
897 /**
898 * blk_queue_write_cache - configure queue's write cache
899 * @q: the request queue for the device
900 * @wc: write back cache on or off
901 * @fua: device supports FUA writes, if true
902 *
903 * Tell the block layer about the write cache of @q.
904 */
blk_queue_write_cache(struct request_queue * q,bool wc,bool fua)905 void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
906 {
907 if (wc) {
908 blk_queue_flag_set(QUEUE_FLAG_HW_WC, q);
909 blk_queue_flag_set(QUEUE_FLAG_WC, q);
910 } else {
911 blk_queue_flag_clear(QUEUE_FLAG_HW_WC, q);
912 blk_queue_flag_clear(QUEUE_FLAG_WC, q);
913 }
914 if (fua)
915 blk_queue_flag_set(QUEUE_FLAG_FUA, q);
916 else
917 blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
918
919 wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
920 }
921 EXPORT_SYMBOL_GPL(blk_queue_write_cache);
922
923 /**
924 * blk_queue_required_elevator_features - Set a queue required elevator features
925 * @q: the request queue for the target device
926 * @features: Required elevator features OR'ed together
927 *
928 * Tell the block layer that for the device controlled through @q, only the
929 * only elevators that can be used are those that implement at least the set of
930 * features specified by @features.
931 */
blk_queue_required_elevator_features(struct request_queue * q,unsigned int features)932 void blk_queue_required_elevator_features(struct request_queue *q,
933 unsigned int features)
934 {
935 q->required_elevator_features = features;
936 }
937 EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features);
938
939 /**
940 * blk_queue_can_use_dma_map_merging - configure queue for merging segments.
941 * @q: the request queue for the device
942 * @dev: the device pointer for dma
943 *
944 * Tell the block layer about merging the segments by dma map of @q.
945 */
blk_queue_can_use_dma_map_merging(struct request_queue * q,struct device * dev)946 bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
947 struct device *dev)
948 {
949 unsigned long boundary = dma_get_merge_boundary(dev);
950
951 if (!boundary)
952 return false;
953
954 /* No need to update max_segment_size. see blk_queue_virt_boundary() */
955 blk_queue_virt_boundary(q, boundary);
956
957 return true;
958 }
959 EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
960
disk_has_partitions(struct gendisk * disk)961 static bool disk_has_partitions(struct gendisk *disk)
962 {
963 unsigned long idx;
964 struct block_device *part;
965 bool ret = false;
966
967 rcu_read_lock();
968 xa_for_each(&disk->part_tbl, idx, part) {
969 if (bdev_is_partition(part)) {
970 ret = true;
971 break;
972 }
973 }
974 rcu_read_unlock();
975
976 return ret;
977 }
978
979 /**
980 * disk_set_zoned - configure the zoned model for a disk
981 * @disk: the gendisk of the queue to configure
982 * @model: the zoned model to set
983 *
984 * Set the zoned model of @disk to @model.
985 *
986 * When @model is BLK_ZONED_HM (host managed), this should be called only
987 * if zoned block device support is enabled (CONFIG_BLK_DEV_ZONED option).
988 * If @model specifies BLK_ZONED_HA (host aware), the effective model used
989 * depends on CONFIG_BLK_DEV_ZONED settings and on the existence of partitions
990 * on the disk.
991 */
disk_set_zoned(struct gendisk * disk,enum blk_zoned_model model)992 void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
993 {
994 struct request_queue *q = disk->queue;
995 unsigned int old_model = q->limits.zoned;
996
997 switch (model) {
998 case BLK_ZONED_HM:
999 /*
1000 * Host managed devices are supported only if
1001 * CONFIG_BLK_DEV_ZONED is enabled.
1002 */
1003 WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED));
1004 break;
1005 case BLK_ZONED_HA:
1006 /*
1007 * Host aware devices can be treated either as regular block
1008 * devices (similar to drive managed devices) or as zoned block
1009 * devices to take advantage of the zone command set, similarly
1010 * to host managed devices. We try the latter if there are no
1011 * partitions and zoned block device support is enabled, else
1012 * we do nothing special as far as the block layer is concerned.
1013 */
1014 if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) ||
1015 disk_has_partitions(disk))
1016 model = BLK_ZONED_NONE;
1017 break;
1018 case BLK_ZONED_NONE:
1019 default:
1020 if (WARN_ON_ONCE(model != BLK_ZONED_NONE))
1021 model = BLK_ZONED_NONE;
1022 break;
1023 }
1024
1025 q->limits.zoned = model;
1026 if (model != BLK_ZONED_NONE) {
1027 /*
1028 * Set the zone write granularity to the device logical block
1029 * size by default. The driver can change this value if needed.
1030 */
1031 blk_queue_zone_write_granularity(q,
1032 queue_logical_block_size(q));
1033 } else if (old_model != BLK_ZONED_NONE) {
1034 disk_clear_zone_settings(disk);
1035 }
1036 }
1037 EXPORT_SYMBOL_GPL(disk_set_zoned);
1038
bdev_alignment_offset(struct block_device * bdev)1039 int bdev_alignment_offset(struct block_device *bdev)
1040 {
1041 struct request_queue *q = bdev_get_queue(bdev);
1042
1043 if (q->limits.misaligned)
1044 return -1;
1045 if (bdev_is_partition(bdev))
1046 return queue_limit_alignment_offset(&q->limits,
1047 bdev->bd_start_sect);
1048 return q->limits.alignment_offset;
1049 }
1050 EXPORT_SYMBOL_GPL(bdev_alignment_offset);
1051
bdev_discard_alignment(struct block_device * bdev)1052 unsigned int bdev_discard_alignment(struct block_device *bdev)
1053 {
1054 struct request_queue *q = bdev_get_queue(bdev);
1055
1056 if (bdev_is_partition(bdev))
1057 return queue_limit_discard_alignment(&q->limits,
1058 bdev->bd_start_sect);
1059 return q->limits.discard_alignment;
1060 }
1061 EXPORT_SYMBOL_GPL(bdev_discard_alignment);
1062