Lines Matching +full:entry +full:- +full:latency
1 // SPDX-License-Identifier: GPL-2.0
3 * The Kyber I/O scheduler. Controls latency by throttling queue depths using
11 #include <linux/blk-mq.h>
17 #include "blk-mq.h"
18 #include "blk-mq-debugfs.h"
19 #include "blk-mq-sched.h"
20 #include "blk-mq-tag.h"
54 * Maximum device-wide depth for each scheduling domain.
68 * Default latency targets for each scheduling domain.
89 * to the target latency:
91 * <= 1/4 * target latency
92 * <= 1/2 * target latency
93 * <= 3/4 * target latency
94 * <= target latency
95 * <= 1 1/4 * target latency
96 * <= 1 1/2 * target latency
97 * <= 1 3/4 * target latency
98 * > 1 3/4 * target latency
102 * The width of the latency histogram buckets is
103 * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
107 * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
116 * We measure both the total latency and the I/O latency (i.e., latency after
130 * Per-cpu latency histograms: total latency and I/O latency for each scheduling
139 * we use request->mq_ctx->index_hw to index the kcq in khd.
154 * Each scheduling domain has a limited number of in-flight requests
155 * device-wide, limited by these tokens.
160 * Async request percentage, converted to per-word depth for
213 unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; in flush_latency_buckets()
214 atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type]; in flush_latency_buckets()
222 * Calculate the histogram bucket with the given percentile rank, or -1 if there
229 unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; in calculate_percentile()
236 return -1; in calculate_percentile()
242 if (!kqd->latency_timeout[sched_domain]) in calculate_percentile()
243 kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL); in calculate_percentile()
245 time_is_after_jiffies(kqd->latency_timeout[sched_domain])) { in calculate_percentile()
246 return -1; in calculate_percentile()
248 kqd->latency_timeout[sched_domain] = 0; in calculate_percentile()
251 for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) { in calculate_percentile()
254 percentile_samples -= buckets[bucket]; in calculate_percentile()
256 memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type])); in calculate_percentile()
258 trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain], in calculate_percentile()
269 if (depth != kqd->domain_tokens[sched_domain].sb.depth) { in kyber_resize_domain()
270 sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); in kyber_resize_domain()
271 trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain], in kyber_resize_domain()
283 /* Sum all of the per-cpu latency histograms. */ in kyber_timer_fn()
287 cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu); in kyber_timer_fn()
297 * Check if any domains have a high I/O latency, which might indicate in kyber_timer_fn()
323 * necessarily have enough samples to calculate the latency in kyber_timer_fn()
327 * reset it to -1. in kyber_timer_fn()
331 p99 = kqd->domain_p99[sched_domain]; in kyber_timer_fn()
332 kqd->domain_p99[sched_domain] = -1; in kyber_timer_fn()
334 kqd->domain_p99[sched_domain] = p99; in kyber_timer_fn()
340 * If this domain has bad latency, throttle less. Otherwise, in kyber_timer_fn()
343 * The new depth is scaled linearly with the p99 latency vs the in kyber_timer_fn()
344 * latency target. E.g., if the p99 is 3/4 of the target, then in kyber_timer_fn()
349 orig_depth = kqd->domain_tokens[sched_domain].sb.depth; in kyber_timer_fn()
359 int ret = -ENOMEM; in kyber_queue_data_alloc()
362 kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); in kyber_queue_data_alloc()
366 kqd->q = q; in kyber_queue_data_alloc()
368 kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency, in kyber_queue_data_alloc()
370 if (!kqd->cpu_latency) in kyber_queue_data_alloc()
373 timer_setup(&kqd->timer, kyber_timer_fn, 0); in kyber_queue_data_alloc()
378 ret = sbitmap_queue_init_node(&kqd->domain_tokens[i], in kyber_queue_data_alloc()
379 kyber_depth[i], -1, false, in kyber_queue_data_alloc()
380 GFP_KERNEL, q->node); in kyber_queue_data_alloc()
382 while (--i >= 0) in kyber_queue_data_alloc()
383 sbitmap_queue_free(&kqd->domain_tokens[i]); in kyber_queue_data_alloc()
389 kqd->domain_p99[i] = -1; in kyber_queue_data_alloc()
390 kqd->latency_targets[i] = kyber_latency_targets[i]; in kyber_queue_data_alloc()
396 free_percpu(kqd->cpu_latency); in kyber_queue_data_alloc()
410 return -ENOMEM; in kyber_init_sched()
414 kobject_put(&eq->kobj); in kyber_init_sched()
420 eq->elevator_data = kqd; in kyber_init_sched()
421 q->elevator = eq; in kyber_init_sched()
428 struct kyber_queue_data *kqd = e->elevator_data; in kyber_exit_sched()
431 del_timer_sync(&kqd->timer); in kyber_exit_sched()
434 sbitmap_queue_free(&kqd->domain_tokens[i]); in kyber_exit_sched()
435 free_percpu(kqd->cpu_latency); in kyber_exit_sched()
443 spin_lock_init(&kcq->lock); in kyber_ctx_queue_init()
445 INIT_LIST_HEAD(&kcq->rq_list[i]); in kyber_ctx_queue_init()
450 struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; in kyber_depth_updated()
451 struct blk_mq_tags *tags = hctx->sched_tags; in kyber_depth_updated()
452 unsigned int shift = tags->bitmap_tags->sb.shift; in kyber_depth_updated()
454 kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; in kyber_depth_updated()
456 sbitmap_queue_min_shallow_depth(tags->bitmap_tags, kqd->async_depth); in kyber_depth_updated()
464 khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node); in kyber_init_hctx()
466 return -ENOMEM; in kyber_init_hctx()
468 khd->kcqs = kmalloc_array_node(hctx->nr_ctx, in kyber_init_hctx()
470 GFP_KERNEL, hctx->numa_node); in kyber_init_hctx()
471 if (!khd->kcqs) in kyber_init_hctx()
474 for (i = 0; i < hctx->nr_ctx; i++) in kyber_init_hctx()
475 kyber_ctx_queue_init(&khd->kcqs[i]); in kyber_init_hctx()
478 if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx, in kyber_init_hctx()
479 ilog2(8), GFP_KERNEL, hctx->numa_node)) { in kyber_init_hctx()
480 while (--i >= 0) in kyber_init_hctx()
481 sbitmap_free(&khd->kcq_map[i]); in kyber_init_hctx()
486 spin_lock_init(&khd->lock); in kyber_init_hctx()
489 INIT_LIST_HEAD(&khd->rqs[i]); in kyber_init_hctx()
490 khd->domain_wait[i].sbq = NULL; in kyber_init_hctx()
491 init_waitqueue_func_entry(&khd->domain_wait[i].wait, in kyber_init_hctx()
493 khd->domain_wait[i].wait.private = hctx; in kyber_init_hctx()
494 INIT_LIST_HEAD(&khd->domain_wait[i].wait.entry); in kyber_init_hctx()
495 atomic_set(&khd->wait_index[i], 0); in kyber_init_hctx()
498 khd->cur_domain = 0; in kyber_init_hctx()
499 khd->batching = 0; in kyber_init_hctx()
501 hctx->sched_data = khd; in kyber_init_hctx()
507 kfree(khd->kcqs); in kyber_init_hctx()
510 return -ENOMEM; in kyber_init_hctx()
515 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_exit_hctx()
519 sbitmap_free(&khd->kcq_map[i]); in kyber_exit_hctx()
520 kfree(khd->kcqs); in kyber_exit_hctx()
521 kfree(hctx->sched_data); in kyber_exit_hctx()
526 return (long)rq->elv.priv[0]; in rq_get_domain_token()
531 rq->elv.priv[0] = (void *)(long)token; in rq_set_domain_token()
541 if (nr != -1) { in rq_clear_domain_token()
542 sched_domain = kyber_sched_domain(rq->cmd_flags); in rq_clear_domain_token()
543 sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr, in rq_clear_domain_token()
544 rq->mq_ctx->cpu); in rq_clear_domain_token()
551 * We use the scheduler tags as per-hardware queue queueing tokens. in kyber_limit_depth()
555 struct kyber_queue_data *kqd = data->q->elevator->elevator_data; in kyber_limit_depth()
557 data->shallow_depth = kqd->async_depth; in kyber_limit_depth()
565 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); in kyber_bio_merge()
566 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_bio_merge()
567 struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; in kyber_bio_merge()
568 unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); in kyber_bio_merge()
569 struct list_head *rq_list = &kcq->rq_list[sched_domain]; in kyber_bio_merge()
572 spin_lock(&kcq->lock); in kyber_bio_merge()
573 merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); in kyber_bio_merge()
574 spin_unlock(&kcq->lock); in kyber_bio_merge()
581 rq_set_domain_token(rq, -1); in kyber_prepare_request()
587 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_insert_requests()
591 unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags); in kyber_insert_requests()
592 struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]]; in kyber_insert_requests()
593 struct list_head *head = &kcq->rq_list[sched_domain]; in kyber_insert_requests()
595 spin_lock(&kcq->lock); in kyber_insert_requests()
597 list_move(&rq->queuelist, head); in kyber_insert_requests()
599 list_move_tail(&rq->queuelist, head); in kyber_insert_requests()
600 sbitmap_set_bit(&khd->kcq_map[sched_domain], in kyber_insert_requests()
601 rq->mq_ctx->index_hw[hctx->type]); in kyber_insert_requests()
603 spin_unlock(&kcq->lock); in kyber_insert_requests()
609 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; in kyber_finish_request()
616 u64 target, u64 latency) in add_latency_sample() argument
621 if (latency > 0) { in add_latency_sample()
623 bucket = min_t(unsigned int, div64_u64(latency - 1, divisor), in add_latency_sample()
624 KYBER_LATENCY_BUCKETS - 1); in add_latency_sample()
629 atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]); in add_latency_sample()
634 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; in kyber_completed_request()
639 sched_domain = kyber_sched_domain(rq->cmd_flags); in kyber_completed_request()
643 cpu_latency = get_cpu_ptr(kqd->cpu_latency); in kyber_completed_request()
644 target = kqd->latency_targets[sched_domain]; in kyber_completed_request()
646 target, now - rq->start_time_ns); in kyber_completed_request()
648 now - rq->io_start_time_ns); in kyber_completed_request()
649 put_cpu_ptr(kqd->cpu_latency); in kyber_completed_request()
651 timer_reduce(&kqd->timer, jiffies + HZ / 10); in kyber_completed_request()
663 struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr]; in flush_busy_kcq()
665 spin_lock(&kcq->lock); in flush_busy_kcq()
666 list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain], in flush_busy_kcq()
667 flush_data->list); in flush_busy_kcq()
669 spin_unlock(&kcq->lock); in flush_busy_kcq()
684 sbitmap_for_each_set(&khd->kcq_map[sched_domain], in kyber_flush_busy_kcqs()
691 struct blk_mq_hw_ctx *hctx = READ_ONCE(wqe->private); in kyber_domain_wake()
703 unsigned int sched_domain = khd->cur_domain; in kyber_get_domain_token()
704 struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain]; in kyber_get_domain_token()
705 struct sbq_wait *wait = &khd->domain_wait[sched_domain]; in kyber_get_domain_token()
714 * khd->lock, but we still need to be careful about the waker. in kyber_get_domain_token()
716 if (nr < 0 && list_empty_careful(&wait->wait.entry)) { in kyber_get_domain_token()
718 &khd->wait_index[sched_domain]); in kyber_get_domain_token()
719 khd->domain_ws[sched_domain] = ws; in kyber_get_domain_token()
732 * progress. It's possible that the waker already deleted the entry in kyber_get_domain_token()
736 if (nr >= 0 && !list_empty_careful(&wait->wait.entry)) { in kyber_get_domain_token()
737 ws = khd->domain_ws[sched_domain]; in kyber_get_domain_token()
738 spin_lock_irq(&ws->wait.lock); in kyber_get_domain_token()
740 spin_unlock_irq(&ws->wait.lock); in kyber_get_domain_token()
755 rqs = &khd->rqs[khd->cur_domain]; in kyber_dispatch_cur_domain()
762 * khd->lock serializes the flushes, so if we observed any bit set in in kyber_dispatch_cur_domain()
769 khd->batching++; in kyber_dispatch_cur_domain()
771 list_del_init(&rq->queuelist); in kyber_dispatch_cur_domain()
774 trace_kyber_throttled(kqd->q, in kyber_dispatch_cur_domain()
775 kyber_domain_names[khd->cur_domain]); in kyber_dispatch_cur_domain()
777 } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) { in kyber_dispatch_cur_domain()
780 kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs); in kyber_dispatch_cur_domain()
782 khd->batching++; in kyber_dispatch_cur_domain()
784 list_del_init(&rq->queuelist); in kyber_dispatch_cur_domain()
787 trace_kyber_throttled(kqd->q, in kyber_dispatch_cur_domain()
788 kyber_domain_names[khd->cur_domain]); in kyber_dispatch_cur_domain()
798 struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; in kyber_dispatch_request()
799 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_dispatch_request()
803 spin_lock(&khd->lock); in kyber_dispatch_request()
809 if (khd->batching < kyber_batch_size[khd->cur_domain]) { in kyber_dispatch_request()
824 khd->batching = 0; in kyber_dispatch_request()
826 if (khd->cur_domain == KYBER_NUM_DOMAINS - 1) in kyber_dispatch_request()
827 khd->cur_domain = 0; in kyber_dispatch_request()
829 khd->cur_domain++; in kyber_dispatch_request()
838 spin_unlock(&khd->lock); in kyber_dispatch_request()
844 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_has_work()
848 if (!list_empty_careful(&khd->rqs[i]) || in kyber_has_work()
849 sbitmap_any_bit_set(&khd->kcq_map[i])) in kyber_has_work()
860 struct kyber_queue_data *kqd = e->elevator_data; \
862 return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \
868 struct kyber_queue_data *kqd = e->elevator_data; \
876 kqd->latency_targets[domain] = nsec; \
897 struct kyber_queue_data *kqd = q->elevator->elevator_data; \
899 sbitmap_queue_show(&kqd->domain_tokens[domain], m); \
904 __acquires(&khd->lock) \
906 struct blk_mq_hw_ctx *hctx = m->private; \
907 struct kyber_hctx_data *khd = hctx->sched_data; \
909 spin_lock(&khd->lock); \
910 return seq_list_start(&khd->rqs[domain], *pos); \
916 struct blk_mq_hw_ctx *hctx = m->private; \
917 struct kyber_hctx_data *khd = hctx->sched_data; \
919 return seq_list_next(v, &khd->rqs[domain], pos); \
923 __releases(&khd->lock) \
925 struct blk_mq_hw_ctx *hctx = m->private; \
926 struct kyber_hctx_data *khd = hctx->sched_data; \
928 spin_unlock(&khd->lock); \
941 struct kyber_hctx_data *khd = hctx->sched_data; \
942 wait_queue_entry_t *wait = &khd->domain_wait[domain].wait; \
944 seq_printf(m, "%d\n", !list_empty_careful(&wait->entry)); \
956 struct kyber_queue_data *kqd = q->elevator->elevator_data; in KYBER_DEBUGFS_DOMAIN_ATTRS()
958 seq_printf(m, "%u\n", kqd->async_depth); in KYBER_DEBUGFS_DOMAIN_ATTRS()
965 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_cur_domain_show()
967 seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]); in kyber_cur_domain_show()
974 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_batching_show()
976 seq_printf(m, "%u\n", khd->batching); in kyber_batching_show()