blk-throttle.c - OpenGrok cross reference for /kernel/linux/linux-5.10/block/blk-throttle.c

Lines Matching +full:low +full:- +full:latency
1 // SPDX-License-Identifier: GPL-2.0
13 #include <linux/blk-cgroup.h>
16 #include "blk-cgroup-rwstat.h"
31 #define DFL_LATENCY_TARGET (-1L)
36  * For HD, very small latency comes from sequential IO. Such IO is helpless to
56  * the parent, they're popped in round-robin order so that no single source
61  * throtl_service_queue and then dispatched in round-robin order.
70 	struct list_head	node;		/* service_queue->queued[] */
87 	 * their ->disptime.
97 	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
179 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
189 /* We measure latency for request size from <= 4k to >= 1M */
198 	unsigned long latency; /* ns / 1024 */  member
247 	return pd_to_blkg(&tg->pd);  in tg_to_blkg()
251  * sq_to_tg - return the throl_grp the specified service queue belongs to
254  * Return the throtl_grp @sq belongs to.  If @sq is the top-level one
259 	if (sq && sq->parent_sq)  in sq_to_tg()
266  * sq_to_td - return throtl_data the specified service queue belongs to
277 		return tg->td;  in sq_to_td()
283  * cgroup's limit in LIMIT_MAX is scaled if low limit is set. This scale is to
286  *           every throtl_slice, the limit scales up 1/2 .low limit till the
288  * Scale down: exponentially scale down if a cgroup doesn't hit its .low limit
290 static uint64_t throtl_adjusted_limit(uint64_t low, struct throtl_data *td)  in throtl_adjusted_limit()  argument
293 	if (td->scale < 4096 && time_after_eq(jiffies,  in throtl_adjusted_limit()
294 	    td->low_upgrade_time + td->scale * td->throtl_slice))  in throtl_adjusted_limit()
295 		td->scale = (jiffies - td->low_upgrade_time) / td->throtl_slice;  in throtl_adjusted_limit()
297 	return low + (low >> 1) * td->scale;  in throtl_adjusted_limit()
306 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)  in tg_bps_limit()
309 	td = tg->td;  in tg_bps_limit()
310 	ret = tg->bps[rw][td->limit_index];  in tg_bps_limit()
311 	if (ret == 0 && td->limit_index == LIMIT_LOW) {  in tg_bps_limit()
313 		if (!list_empty(&blkg->blkcg->css.children) ||  in tg_bps_limit()
314 		    tg->iops[rw][td->limit_index])  in tg_bps_limit()
320 	if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&  in tg_bps_limit()
321 	    tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {  in tg_bps_limit()
324 		adjusted = throtl_adjusted_limit(tg->bps[rw][LIMIT_LOW], td);  in tg_bps_limit()
325 		ret = min(tg->bps[rw][LIMIT_MAX], adjusted);  in tg_bps_limit()
336 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)  in tg_iops_limit()
339 	td = tg->td;  in tg_iops_limit()
340 	ret = tg->iops[rw][td->limit_index];  in tg_iops_limit()
341 	if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {  in tg_iops_limit()
343 		if (!list_empty(&blkg->blkcg->css.children) ||  in tg_iops_limit()
344 		    tg->bps[rw][td->limit_index])  in tg_iops_limit()
350 	if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&  in tg_iops_limit()
351 	    tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {  in tg_iops_limit()
354 		adjusted = throtl_adjusted_limit(tg->iops[rw][LIMIT_LOW], td);  in tg_iops_limit()
357 		ret = min_t(unsigned int, tg->iops[rw][LIMIT_MAX], adjusted);  in tg_iops_limit()
363 	clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
366  * throtl_log - log debug message via blktrace
379 	if (likely(!blk_trace_note_message_enabled(__td->queue)))	\
382 		blk_add_cgroup_trace_msg(__td->queue,			\
383 			tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
385 		blk_add_trace_msg(__td->queue, "throtl " fmt, ##args);	\
394 	return bio->bi_iter.bi_size;  in throtl_bio_data_size()
399 	INIT_LIST_HEAD(&qn->node);  in throtl_qnode_init()
400 	bio_list_init(&qn->bios);  in throtl_qnode_init()
401 	qn->tg = tg;  in throtl_qnode_init()
405  * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it
408  * @queued: the service_queue->queued[] list @qn belongs to
411  * @qn->tg's reference count is bumped when @qn is activated.  See the
417 	bio_list_add(&qn->bios, bio);  in throtl_qnode_add_bio()
418 	if (list_empty(&qn->node)) {  in throtl_qnode_add_bio()
419 		list_add_tail(&qn->node, queued);  in throtl_qnode_add_bio()
420 		blkg_get(tg_to_blkg(qn->tg));  in throtl_qnode_add_bio()
425  * throtl_peek_queued - peek the first bio on a qnode list
437 	bio = bio_list_peek(&qn->bios);  in throtl_peek_queued()
443  * throtl_pop_queued - pop the first bio form a qnode list
449  * that the popping order is round-robin.
466 	bio = bio_list_pop(&qn->bios);  in throtl_pop_queued()
469 	if (bio_list_empty(&qn->bios)) {  in throtl_pop_queued()
470 		list_del_init(&qn->node);  in throtl_pop_queued()
472 			*tg_to_put = qn->tg;  in throtl_pop_queued()
474 			blkg_put(tg_to_blkg(qn->tg));  in throtl_pop_queued()
476 		list_move_tail(&qn->node, queued);  in throtl_pop_queued()
485 	INIT_LIST_HEAD(&sq->queued[0]);  in throtl_service_queue_init()
486 	INIT_LIST_HEAD(&sq->queued[1]);  in throtl_service_queue_init()
487 	sq->pending_tree = RB_ROOT_CACHED;  in throtl_service_queue_init()
488 	timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);  in throtl_service_queue_init()
498 	tg = kzalloc_node(sizeof(*tg), gfp, q->node);  in throtl_pd_alloc()
502 	if (blkg_rwstat_init(&tg->stat_bytes, gfp))  in throtl_pd_alloc()
505 	if (blkg_rwstat_init(&tg->stat_ios, gfp))  in throtl_pd_alloc()
508 	throtl_service_queue_init(&tg->service_queue);  in throtl_pd_alloc()
511 		throtl_qnode_init(&tg->qnode_on_self[rw], tg);  in throtl_pd_alloc()
512 		throtl_qnode_init(&tg->qnode_on_parent[rw], tg);  in throtl_pd_alloc()
515 	RB_CLEAR_NODE(&tg->rb_node);  in throtl_pd_alloc()
516 	tg->bps[READ][LIMIT_MAX] = U64_MAX;  in throtl_pd_alloc()
517 	tg->bps[WRITE][LIMIT_MAX] = U64_MAX;  in throtl_pd_alloc()
518 	tg->iops[READ][LIMIT_MAX] = UINT_MAX;  in throtl_pd_alloc()
519 	tg->iops[WRITE][LIMIT_MAX] = UINT_MAX;  in throtl_pd_alloc()
520 	tg->bps_conf[READ][LIMIT_MAX] = U64_MAX;  in throtl_pd_alloc()
521 	tg->bps_conf[WRITE][LIMIT_MAX] = U64_MAX;  in throtl_pd_alloc()
522 	tg->iops_conf[READ][LIMIT_MAX] = UINT_MAX;  in throtl_pd_alloc()
523 	tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;  in throtl_pd_alloc()
526 	tg->latency_target = DFL_LATENCY_TARGET;  in throtl_pd_alloc()
527 	tg->latency_target_conf = DFL_LATENCY_TARGET;  in throtl_pd_alloc()
528 	tg->idletime_threshold = DFL_IDLE_THRESHOLD;  in throtl_pd_alloc()
529 	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;  in throtl_pd_alloc()
531 	return &tg->pd;  in throtl_pd_alloc()
534 	blkg_rwstat_exit(&tg->stat_bytes);  in throtl_pd_alloc()
544 	struct throtl_data *td = blkg->q->td;  in throtl_pd_init()
545 	struct throtl_service_queue *sq = &tg->service_queue;  in throtl_pd_init()
560 	sq->parent_sq = &td->service_queue;  in throtl_pd_init()
561 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)  in throtl_pd_init()
562 		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;  in throtl_pd_init()
563 	tg->td = td;  in throtl_pd_init()
573 	struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);  in tg_update_has_rules()
574 	struct throtl_data *td = tg->td;  in tg_update_has_rules()
578 		tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||  in tg_update_has_rules()
579 			(td->limit_valid[td->limit_index] &&  in tg_update_has_rules()
601 	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {  in blk_throtl_update_limit_valid()
604 		if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||  in blk_throtl_update_limit_valid()
605 		    tg->iops[READ][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) {  in blk_throtl_update_limit_valid()
612 	td->limit_valid[LIMIT_LOW] = low_valid;  in blk_throtl_update_limit_valid()
620 	tg->bps[READ][LIMIT_LOW] = 0;  in throtl_pd_offline()
621 	tg->bps[WRITE][LIMIT_LOW] = 0;  in throtl_pd_offline()
622 	tg->iops[READ][LIMIT_LOW] = 0;  in throtl_pd_offline()
623 	tg->iops[WRITE][LIMIT_LOW] = 0;  in throtl_pd_offline()
625 	blk_throtl_update_limit_valid(tg->td);  in throtl_pd_offline()
627 	if (!tg->td->limit_valid[tg->td->limit_index])  in throtl_pd_offline()
628 		throtl_upgrade_state(tg->td);  in throtl_pd_offline()
635 	del_timer_sync(&tg->service_queue.pending_timer);  in throtl_pd_free()
636 	blkg_rwstat_exit(&tg->stat_bytes);  in throtl_pd_free()
637 	blkg_rwstat_exit(&tg->stat_ios);  in throtl_pd_free()
646 	n = rb_first_cached(&parent_sq->pending_tree);  in throtl_rb_first()
656 	rb_erase_cached(n, &parent_sq->pending_tree);  in throtl_rb_erase()
658 	--parent_sq->nr_pending;  in throtl_rb_erase()
669 	parent_sq->first_pending_disptime = tg->disptime;  in update_min_dispatch_time()
674 	struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq;  in tg_service_queue_add()
675 	struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node;  in tg_service_queue_add()
678 	unsigned long key = tg->disptime;  in tg_service_queue_add()
685 		if (time_before(key, __tg->disptime))  in tg_service_queue_add()
686 			node = &parent->rb_left;  in tg_service_queue_add()
688 			node = &parent->rb_right;  in tg_service_queue_add()
693 	rb_link_node(&tg->rb_node, parent, node);  in tg_service_queue_add()
694 	rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree,  in tg_service_queue_add()
700 	if (!(tg->flags & THROTL_TG_PENDING)) {  in throtl_enqueue_tg()
702 		tg->flags |= THROTL_TG_PENDING;  in throtl_enqueue_tg()
703 		tg->service_queue.parent_sq->nr_pending++;  in throtl_enqueue_tg()
709 	if (tg->flags & THROTL_TG_PENDING) {  in throtl_dequeue_tg()
710 		throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);  in throtl_dequeue_tg()
711 		tg->flags &= ~THROTL_TG_PENDING;  in throtl_dequeue_tg()
719 	unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice;  in throtl_schedule_pending_timer()
730 	mod_timer(&sq->pending_timer, expires);  in throtl_schedule_pending_timer()
732 		   expires - jiffies, jiffies);  in throtl_schedule_pending_timer()
736  * throtl_schedule_next_dispatch - schedule the next dispatch cycle
740  * Arm @sq->pending_timer so that the next dispatch cycle starts on the
750  * delay before dispatch starts even if @sq->first_pending_disptime is not
757 	if (!sq->nr_pending)  in throtl_schedule_next_dispatch()
763 	if (force || time_after(sq->first_pending_disptime, jiffies)) {  in throtl_schedule_next_dispatch()
764 		throtl_schedule_pending_timer(sq, sq->first_pending_disptime);  in throtl_schedule_next_dispatch()
775 	tg->bytes_disp[rw] = 0;  in throtl_start_new_slice_with_credit()
776 	tg->io_disp[rw] = 0;  in throtl_start_new_slice_with_credit()
778 	atomic_set(&tg->io_split_cnt[rw], 0);  in throtl_start_new_slice_with_credit()
786 	if (time_after_eq(start, tg->slice_start[rw]))  in throtl_start_new_slice_with_credit()
787 		tg->slice_start[rw] = start;  in throtl_start_new_slice_with_credit()
789 	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;  in throtl_start_new_slice_with_credit()
790 	throtl_log(&tg->service_queue,  in throtl_start_new_slice_with_credit()
792 		   rw == READ ? 'R' : 'W', tg->slice_start[rw],  in throtl_start_new_slice_with_credit()
793 		   tg->slice_end[rw], jiffies);  in throtl_start_new_slice_with_credit()
798 	tg->bytes_disp[rw] = 0;  in throtl_start_new_slice()
799 	tg->io_disp[rw] = 0;  in throtl_start_new_slice()
800 	tg->slice_start[rw] = jiffies;  in throtl_start_new_slice()
801 	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;  in throtl_start_new_slice()
803 	atomic_set(&tg->io_split_cnt[rw], 0);  in throtl_start_new_slice()
805 	throtl_log(&tg->service_queue,  in throtl_start_new_slice()
807 		   rw == READ ? 'R' : 'W', tg->slice_start[rw],  in throtl_start_new_slice()
808 		   tg->slice_end[rw], jiffies);  in throtl_start_new_slice()
814 	tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice);  in throtl_set_slice_end()
821 	throtl_log(&tg->service_queue,  in throtl_extend_slice()
823 		   rw == READ ? 'R' : 'W', tg->slice_start[rw],  in throtl_extend_slice()
824 		   tg->slice_end[rw], jiffies);  in throtl_extend_slice()
830 	if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))  in throtl_slice_used()
842 	BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));  in throtl_trim_slice()
845 	 * If bps are unlimited (-1), then time slice don't get  in throtl_trim_slice()
854 	 * that initially cgroup limit was very low resulting in high  in throtl_trim_slice()
860 	throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);  in throtl_trim_slice()
862 	time_elapsed = jiffies - tg->slice_start[rw];  in throtl_trim_slice()
864 	nr_slices = time_elapsed / tg->td->throtl_slice;  in throtl_trim_slice()
868 	tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;  in throtl_trim_slice()
872 	io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /  in throtl_trim_slice()
878 	if (tg->bytes_disp[rw] >= bytes_trim)  in throtl_trim_slice()
879 		tg->bytes_disp[rw] -= bytes_trim;  in throtl_trim_slice()
881 		tg->bytes_disp[rw] = 0;  in throtl_trim_slice()
883 	if (tg->io_disp[rw] >= io_trim)  in throtl_trim_slice()
884 		tg->io_disp[rw] -= io_trim;  in throtl_trim_slice()
886 		tg->io_disp[rw] = 0;  in throtl_trim_slice()
888 	tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;  in throtl_trim_slice()
890 	throtl_log(&tg->service_queue,  in throtl_trim_slice()
893 		   tg->slice_start[rw], tg->slice_end[rw], jiffies);  in throtl_trim_slice()
910 	jiffy_elapsed = jiffies - tg->slice_start[rw];  in tg_with_in_iops_limit()
913 	jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);  in tg_with_in_iops_limit()
930 	if (tg->io_disp[rw] + 1 <= io_allowed) {  in tg_with_in_iops_limit()
937 	jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;  in tg_with_in_iops_limit()
958 	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];  in tg_with_in_bps_limit()
962 		jiffy_elapsed_rnd = tg->td->throtl_slice;  in tg_with_in_bps_limit()
964 	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);  in tg_with_in_bps_limit()
968 	if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {  in tg_with_in_bps_limit()
975 	extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;  in tg_with_in_bps_limit()
985 	jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);  in tg_with_in_bps_limit()
993  * of jiffies to wait before this bio is with-in IO rate and can be dispatched
1009 	BUG_ON(tg->service_queue.nr_queued[rw] &&  in tg_may_dispatch()
1010 	       bio != throtl_peek_queued(&tg->service_queue.queued[rw]));  in tg_may_dispatch()
1012 	/* If tg->bps = -1, then BW is unlimited */  in tg_may_dispatch()
1026 	if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))  in tg_may_dispatch()
1029 		if (time_before(tg->slice_end[rw],  in tg_may_dispatch()
1030 		    jiffies + tg->td->throtl_slice))  in tg_may_dispatch()
1032 				jiffies + tg->td->throtl_slice);  in tg_may_dispatch()
1036 		tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);  in tg_may_dispatch()
1050 	if (time_before(tg->slice_end[rw], jiffies + max_wait))  in tg_may_dispatch()
1062 	tg->bytes_disp[rw] += bio_size;  in throtl_charge_bio()
1063 	tg->io_disp[rw]++;  in throtl_charge_bio()
1064 	tg->last_bytes_disp[rw] += bio_size;  in throtl_charge_bio()
1065 	tg->last_io_disp[rw]++;  in throtl_charge_bio()
1069 	 * more than once as a throttled bio will go through blk-throtl the  in throtl_charge_bio()
1078  * throtl_add_bio_tg - add a bio to the specified throtl_grp
1084  * tg->qnode_on_self[] is used.
1089 	struct throtl_service_queue *sq = &tg->service_queue;  in throtl_add_bio_tg()
1093 		qn = &tg->qnode_on_self[rw];  in throtl_add_bio_tg()
1101 	if (!sq->nr_queued[rw])  in throtl_add_bio_tg()
1102 		tg->flags |= THROTL_TG_WAS_EMPTY;  in throtl_add_bio_tg()
1104 	throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);  in throtl_add_bio_tg()
1106 	sq->nr_queued[rw]++;  in throtl_add_bio_tg()
1112 	struct throtl_service_queue *sq = &tg->service_queue;  in tg_update_disptime()
1113 	unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;  in tg_update_disptime()
1116 	bio = throtl_peek_queued(&sq->queued[READ]);  in tg_update_disptime()
1120 	bio = throtl_peek_queued(&sq->queued[WRITE]);  in tg_update_disptime()
1129 	tg->disptime = disptime;  in tg_update_disptime()
1133 	tg->flags &= ~THROTL_TG_WAS_EMPTY;  in tg_update_disptime()
1141 				child_tg->slice_start[rw]);  in start_parent_slice_with_credit()
1148 	struct throtl_service_queue *sq = &tg->service_queue;  in tg_dispatch_one_bio()
1149 	struct throtl_service_queue *parent_sq = sq->parent_sq;  in tg_dispatch_one_bio()
1160 	bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);  in tg_dispatch_one_bio()
1161 	sq->nr_queued[rw]--;  in tg_dispatch_one_bio()
1168 	 * @td->service_queue, @bio is ready to be issued.  Put it on its  in tg_dispatch_one_bio()
1173 		throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg);  in tg_dispatch_one_bio()
1176 		throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw],  in tg_dispatch_one_bio()
1177 				     &parent_sq->queued[rw]);  in tg_dispatch_one_bio()
1178 		BUG_ON(tg->td->nr_queued[rw] <= 0);  in tg_dispatch_one_bio()
1179 		tg->td->nr_queued[rw]--;  in tg_dispatch_one_bio()
1190 	struct throtl_service_queue *sq = &tg->service_queue;  in throtl_dispatch_tg()
1193 	unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads;  in throtl_dispatch_tg()
1198 	while ((bio = throtl_peek_queued(&sq->queued[READ])) &&  in throtl_dispatch_tg()
1208 	while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&  in throtl_dispatch_tg()
1229 		if (!parent_sq->nr_pending)  in throtl_select_dispatch()
1236 		if (time_before(jiffies, tg->disptime))  in throtl_select_dispatch()
1243 		sq = &tg->service_queue;  in throtl_select_dispatch()
1244 		if (sq->nr_queued[0] || sq->nr_queued[1])  in throtl_select_dispatch()
1257  * throtl_pending_timer_fn - timer function for service_queue->pending_timer
1268  * the top-level service_tree is reached, throtl_data->dispatch_work is
1276 	struct request_queue *q = td->queue;  in throtl_pending_timer_fn()
1281 	spin_lock_irq(&q->queue_lock);  in throtl_pending_timer_fn()
1286 	parent_sq = sq->parent_sq;  in throtl_pending_timer_fn()
1291 			   sq->nr_queued[READ] + sq->nr_queued[WRITE],  in throtl_pending_timer_fn()
1292 			   sq->nr_queued[READ], sq->nr_queued[WRITE]);  in throtl_pending_timer_fn()
1304 		spin_unlock_irq(&q->queue_lock);  in throtl_pending_timer_fn()
1306 		spin_lock_irq(&q->queue_lock);  in throtl_pending_timer_fn()
1314 		if (tg->flags & THROTL_TG_WAS_EMPTY) {  in throtl_pending_timer_fn()
1324 		/* reached the top-level, queue issuing */  in throtl_pending_timer_fn()
1325 		queue_work(kthrotld_workqueue, &td->dispatch_work);  in throtl_pending_timer_fn()
1328 	spin_unlock_irq(&q->queue_lock);  in throtl_pending_timer_fn()
1332  * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
1336  * of throtl_data->service_queue.  Those bios are ready and issued by this
1343 	struct throtl_service_queue *td_sq = &td->service_queue;  in blk_throtl_dispatch_work_fn()
1344 	struct request_queue *q = td->queue;  in blk_throtl_dispatch_work_fn()
1352 	spin_lock_irq(&q->queue_lock);  in blk_throtl_dispatch_work_fn()
1354 		while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL)))  in blk_throtl_dispatch_work_fn()
1356 	spin_unlock_irq(&q->queue_lock);  in blk_throtl_dispatch_work_fn()
1391 			  &blkcg_policy_throtl, seq_cft(sf)->private, false);  in tg_print_conf_u64()
1398 			  &blkcg_policy_throtl, seq_cft(sf)->private, false);  in tg_print_conf_uint()
1404 	struct throtl_service_queue *sq = &tg->service_queue;  in tg_conf_updated()
1408 	throtl_log(&tg->service_queue,  in tg_conf_updated()
1419 	 * blk-throttle.  in tg_conf_updated()
1422 			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {  in tg_conf_updated()
1428 		if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||  in tg_conf_updated()
1429 		    !blkg->parent->parent)  in tg_conf_updated()
1431 		parent_tg = blkg_to_tg(blkg->parent);  in tg_conf_updated()
1434 		 * higher latency target  in tg_conf_updated()
1436 		this_tg->idletime_threshold = min(this_tg->idletime_threshold,  in tg_conf_updated()
1437 				parent_tg->idletime_threshold);  in tg_conf_updated()
1438 		this_tg->latency_target = max(this_tg->latency_target,  in tg_conf_updated()
1439 				parent_tg->latency_target);  in tg_conf_updated()
1449 	 * account recently dispatched IO with new low rate.  in tg_conf_updated()
1454 	if (tg->flags & THROTL_TG_PENDING) {  in tg_conf_updated()
1456 		throtl_schedule_next_dispatch(sq->parent_sq, true);  in tg_conf_updated()
1462 	if (test_bit(QUEUE_FLAG_THROTL_INIT_DONE, &q->queue_flags))  in throtl_check_init_done()
1465 	return blk_queue_dying(q) ? -ENODEV : -EBUSY;  in throtl_check_init_done()
1469  * If throtl_check_init_done() return -EBUSY, we should retry after a short
1477 	if (ret == -EBUSY) {  in throtl_restart_syscall_when_busy()
1498 	ret = throtl_check_init_done(ctx.disk->queue);  in tg_set_conf()
1502 	ret = -EINVAL;  in tg_set_conf()
1511 		*(u64 *)((void *)tg + of_cft(of)->private) = v;  in tg_set_conf()
1513 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;  in tg_set_conf()
1539 			  seq_cft(sf)->private, true);  in tg_print_rwstat()
1557 			  seq_cft(sf)->private, true);  in tg_print_rwstat_recursive()
1613 	const char *dname = blkg_dev_name(pd->blkg);  in tg_prfill_limit()
1631 	if (tg->bps_conf[READ][off] == bps_dft &&  in tg_prfill_limit()
1632 	    tg->bps_conf[WRITE][off] == bps_dft &&  in tg_prfill_limit()
1633 	    tg->iops_conf[READ][off] == iops_dft &&  in tg_prfill_limit()
1634 	    tg->iops_conf[WRITE][off] == iops_dft &&  in tg_prfill_limit()
1636 	     (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&  in tg_prfill_limit()
1637 	      tg->latency_target_conf == DFL_LATENCY_TARGET)))  in tg_prfill_limit()
1640 	if (tg->bps_conf[READ][off] != U64_MAX)  in tg_prfill_limit()
1642 			tg->bps_conf[READ][off]);  in tg_prfill_limit()
1643 	if (tg->bps_conf[WRITE][off] != U64_MAX)  in tg_prfill_limit()
1645 			tg->bps_conf[WRITE][off]);  in tg_prfill_limit()
1646 	if (tg->iops_conf[READ][off] != UINT_MAX)  in tg_prfill_limit()
1648 			tg->iops_conf[READ][off]);  in tg_prfill_limit()
1649 	if (tg->iops_conf[WRITE][off] != UINT_MAX)  in tg_prfill_limit()
1651 			tg->iops_conf[WRITE][off]);  in tg_prfill_limit()
1653 		if (tg->idletime_threshold_conf == ULONG_MAX)  in tg_prfill_limit()
1657 				tg->idletime_threshold_conf);  in tg_prfill_limit()
1659 		if (tg->latency_target_conf == ULONG_MAX)  in tg_prfill_limit()
1660 			strcpy(latency_time, " latency=max");  in tg_prfill_limit()
1663 				" latency=%lu", tg->latency_target_conf);  in tg_prfill_limit()
1675 			  &blkcg_policy_throtl, seq_cft(sf)->private, false);  in tg_print_limit()
1689 	int index = of_cft(of)->private;  in tg_set_limit()
1695 	ret = throtl_check_init_done(ctx.disk->queue);  in tg_set_limit()
1700 	v[0] = tg->bps_conf[READ][index];  in tg_set_limit()
1701 	v[1] = tg->bps_conf[WRITE][index];  in tg_set_limit()
1702 	v[2] = tg->iops_conf[READ][index];  in tg_set_limit()
1703 	v[3] = tg->iops_conf[WRITE][index];  in tg_set_limit()
1705 	idle_time = tg->idletime_threshold_conf;  in tg_set_limit()
1706 	latency_time = tg->latency_target_conf;  in tg_set_limit()
1719 		ret = -EINVAL;  in tg_set_limit()
1725 		ret = -ERANGE;  in tg_set_limit()
1729 		ret = -EINVAL;  in tg_set_limit()
1740 		else if (off == LIMIT_LOW && !strcmp(tok, "latency"))  in tg_set_limit()
1746 	tg->bps_conf[READ][index] = v[0];  in tg_set_limit()
1747 	tg->bps_conf[WRITE][index] = v[1];  in tg_set_limit()
1748 	tg->iops_conf[READ][index] = v[2];  in tg_set_limit()
1749 	tg->iops_conf[WRITE][index] = v[3];  in tg_set_limit()
1752 		tg->bps[READ][index] = v[0];  in tg_set_limit()
1753 		tg->bps[WRITE][index] = v[1];  in tg_set_limit()
1754 		tg->iops[READ][index] = v[2];  in tg_set_limit()
1755 		tg->iops[WRITE][index] = v[3];  in tg_set_limit()
1757 	tg->bps[READ][LIMIT_LOW] = min(tg->bps_conf[READ][LIMIT_LOW],  in tg_set_limit()
1758 		tg->bps_conf[READ][LIMIT_MAX]);  in tg_set_limit()
1759 	tg->bps[WRITE][LIMIT_LOW] = min(tg->bps_conf[WRITE][LIMIT_LOW],  in tg_set_limit()
1760 		tg->bps_conf[WRITE][LIMIT_MAX]);  in tg_set_limit()
1761 	tg->iops[READ][LIMIT_LOW] = min(tg->iops_conf[READ][LIMIT_LOW],  in tg_set_limit()
1762 		tg->iops_conf[READ][LIMIT_MAX]);  in tg_set_limit()
1763 	tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],  in tg_set_limit()
1764 		tg->iops_conf[WRITE][LIMIT_MAX]);  in tg_set_limit()
1765 	tg->idletime_threshold_conf = idle_time;  in tg_set_limit()
1766 	tg->latency_target_conf = latency_time;  in tg_set_limit()
1768 	/* force user to configure all settings for low limit  */  in tg_set_limit()
1769 	if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||  in tg_set_limit()
1770 	      tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||  in tg_set_limit()
1771 	    tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||  in tg_set_limit()
1772 	    tg->latency_target_conf == DFL_LATENCY_TARGET) {  in tg_set_limit()
1773 		tg->bps[READ][LIMIT_LOW] = 0;  in tg_set_limit()
1774 		tg->bps[WRITE][LIMIT_LOW] = 0;  in tg_set_limit()
1775 		tg->iops[READ][LIMIT_LOW] = 0;  in tg_set_limit()
1776 		tg->iops[WRITE][LIMIT_LOW] = 0;  in tg_set_limit()
1777 		tg->idletime_threshold = DFL_IDLE_THRESHOLD;  in tg_set_limit()
1778 		tg->latency_target = DFL_LATENCY_TARGET;  in tg_set_limit()
1780 		tg->idletime_threshold = tg->idletime_threshold_conf;  in tg_set_limit()
1781 		tg->latency_target = tg->latency_target_conf;  in tg_set_limit()
1784 	blk_throtl_update_limit_valid(tg->td);  in tg_set_limit()
1785 	if (tg->td->limit_valid[LIMIT_LOW]) {  in tg_set_limit()
1787 			tg->td->limit_index = LIMIT_LOW;  in tg_set_limit()
1789 		tg->td->limit_index = LIMIT_MAX;  in tg_set_limit()
1791 		tg->td->limit_valid[LIMIT_LOW]);  in tg_set_limit()
1802 		.name = "low",
1821 	struct throtl_data *td = q->td;  in throtl_shutdown_wq()
1823 	cancel_work_sync(&td->dispatch_work);  in throtl_shutdown_wq()
1841 	if (tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW])  in __tg_last_low_overflow_time()
1842 		rtime = tg->last_low_overflow_time[READ];  in __tg_last_low_overflow_time()
1843 	if (tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW])  in __tg_last_low_overflow_time()
1844 		wtime = tg->last_low_overflow_time[WRITE];  in __tg_last_low_overflow_time()
1856 		parent_sq = parent->service_queue.parent_sq;  in tg_last_low_overflow_time()
1862 		 * The parent doesn't have low limit, it always reaches low  in tg_last_low_overflow_time()
1865 		if (!parent->bps[READ][LIMIT_LOW] &&  in tg_last_low_overflow_time()
1866 		    !parent->iops[READ][LIMIT_LOW] &&  in tg_last_low_overflow_time()
1867 		    !parent->bps[WRITE][LIMIT_LOW] &&  in tg_last_low_overflow_time()
1868 		    !parent->iops[WRITE][LIMIT_LOW])  in tg_last_low_overflow_time()
1880 	 * - single idle is too long, longer than a fixed value (in case user  in throtl_tg_is_idle()
1882 	 * - average think time is more than threshold  in throtl_tg_is_idle()
1883 	 * - IO latency is largely below threshold  in throtl_tg_is_idle()
1888 	time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);  in throtl_tg_is_idle()
1889 	ret = tg->latency_target == DFL_LATENCY_TARGET ||  in throtl_tg_is_idle()
1890 	      tg->idletime_threshold == DFL_IDLE_THRESHOLD ||  in throtl_tg_is_idle()
1891 	      (ktime_get_ns() >> 10) - tg->last_finish_time > time ||  in throtl_tg_is_idle()
1892 	      tg->avg_idletime > tg->idletime_threshold ||  in throtl_tg_is_idle()
1893 	      (tg->latency_target && tg->bio_cnt &&  in throtl_tg_is_idle()
1894 		tg->bad_bio_cnt * 5 < tg->bio_cnt);  in throtl_tg_is_idle()
1895 	throtl_log(&tg->service_queue,  in throtl_tg_is_idle()
1897 		tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,  in throtl_tg_is_idle()
1898 		tg->bio_cnt, ret, tg->td->scale);  in throtl_tg_is_idle()
1904 	struct throtl_service_queue *sq = &tg->service_queue;  in throtl_tg_can_upgrade()
1908 	 * if cgroup reaches low limit (if low limit is 0, the cgroup always  in throtl_tg_can_upgrade()
1911 	read_limit = tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW];  in throtl_tg_can_upgrade()
1912 	write_limit = tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW];  in throtl_tg_can_upgrade()
1915 	if (read_limit && sq->nr_queued[READ] &&  in throtl_tg_can_upgrade()
1916 	    (!write_limit || sq->nr_queued[WRITE]))  in throtl_tg_can_upgrade()
1918 	if (write_limit && sq->nr_queued[WRITE] &&  in throtl_tg_can_upgrade()
1919 	    (!read_limit || sq->nr_queued[READ]))  in throtl_tg_can_upgrade()
1923 		tg_last_low_overflow_time(tg) + tg->td->throtl_slice) &&  in throtl_tg_can_upgrade()
1934 		tg = sq_to_tg(tg->service_queue.parent_sq);  in throtl_hierarchy_can_upgrade()
1935 		if (!tg || !tg_to_blkg(tg)->parent)  in throtl_hierarchy_can_upgrade()
1947 	if (td->limit_index != LIMIT_LOW)  in throtl_can_upgrade()
1950 	if (time_before(jiffies, td->low_downgrade_time + td->throtl_slice))  in throtl_can_upgrade()
1954 	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {  in throtl_can_upgrade()
1959 		if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))  in throtl_can_upgrade()
1974 	if (tg->td->limit_index != LIMIT_LOW)  in throtl_upgrade_check()
1977 	if (time_after(tg->last_check_time + tg->td->throtl_slice, now))  in throtl_upgrade_check()
1980 	tg->last_check_time = now;  in throtl_upgrade_check()
1983 	     __tg_last_low_overflow_time(tg) + tg->td->throtl_slice))  in throtl_upgrade_check()
1986 	if (throtl_can_upgrade(tg->td, NULL))  in throtl_upgrade_check()
1987 		throtl_upgrade_state(tg->td);  in throtl_upgrade_check()
1995 	throtl_log(&td->service_queue, "upgrade to max");  in throtl_upgrade_state()
1996 	td->limit_index = LIMIT_MAX;  in throtl_upgrade_state()
1997 	td->low_upgrade_time = jiffies;  in throtl_upgrade_state()
1998 	td->scale = 0;  in throtl_upgrade_state()
2000 	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {  in throtl_upgrade_state()
2002 		struct throtl_service_queue *sq = &tg->service_queue;  in throtl_upgrade_state()
2004 		tg->disptime = jiffies - 1;  in throtl_upgrade_state()
2009 	throtl_select_dispatch(&td->service_queue);  in throtl_upgrade_state()
2010 	throtl_schedule_next_dispatch(&td->service_queue, true);  in throtl_upgrade_state()
2011 	queue_work(kthrotld_workqueue, &td->dispatch_work);  in throtl_upgrade_state()
2016 	td->scale /= 2;  in throtl_downgrade_state()
2018 	throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);  in throtl_downgrade_state()
2019 	if (td->scale) {  in throtl_downgrade_state()
2020 		td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;  in throtl_downgrade_state()
2024 	td->limit_index = LIMIT_LOW;  in throtl_downgrade_state()
2025 	td->low_downgrade_time = jiffies;  in throtl_downgrade_state()
2030 	struct throtl_data *td = tg->td;  in throtl_tg_can_downgrade()
2034 	 * If cgroup is below low limit, consider downgrade and throttle other  in throtl_tg_can_downgrade()
2037 	if (time_after_eq(now, td->low_upgrade_time + td->throtl_slice) &&  in throtl_tg_can_downgrade()
2039 					td->throtl_slice) &&  in throtl_tg_can_downgrade()
2041 	     !list_empty(&tg_to_blkg(tg)->blkcg->css.children)))  in throtl_tg_can_downgrade()
2051 		tg = sq_to_tg(tg->service_queue.parent_sq);  in throtl_hierarchy_can_downgrade()
2052 		if (!tg || !tg_to_blkg(tg)->parent)  in throtl_hierarchy_can_downgrade()
2065 	if (tg->td->limit_index != LIMIT_MAX ||  in throtl_downgrade_check()
2066 	    !tg->td->limit_valid[LIMIT_LOW])  in throtl_downgrade_check()
2068 	if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))  in throtl_downgrade_check()
2070 	if (time_after(tg->last_check_time + tg->td->throtl_slice, now))  in throtl_downgrade_check()
2073 	elapsed_time = now - tg->last_check_time;  in throtl_downgrade_check()
2074 	tg->last_check_time = now;  in throtl_downgrade_check()
2077 			tg->td->throtl_slice))  in throtl_downgrade_check()
2080 	if (tg->bps[READ][LIMIT_LOW]) {  in throtl_downgrade_check()
2081 		bps = tg->last_bytes_disp[READ] * HZ;  in throtl_downgrade_check()
2083 		if (bps >= tg->bps[READ][LIMIT_LOW])  in throtl_downgrade_check()
2084 			tg->last_low_overflow_time[READ] = now;  in throtl_downgrade_check()
2087 	if (tg->bps[WRITE][LIMIT_LOW]) {  in throtl_downgrade_check()
2088 		bps = tg->last_bytes_disp[WRITE] * HZ;  in throtl_downgrade_check()
2090 		if (bps >= tg->bps[WRITE][LIMIT_LOW])  in throtl_downgrade_check()
2091 			tg->last_low_overflow_time[WRITE] = now;  in throtl_downgrade_check()
2094 	if (tg->iops[READ][LIMIT_LOW]) {  in throtl_downgrade_check()
2095 		tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);  in throtl_downgrade_check()
2096 		iops = tg->last_io_disp[READ] * HZ / elapsed_time;  in throtl_downgrade_check()
2097 		if (iops >= tg->iops[READ][LIMIT_LOW])  in throtl_downgrade_check()
2098 			tg->last_low_overflow_time[READ] = now;  in throtl_downgrade_check()
2101 	if (tg->iops[WRITE][LIMIT_LOW]) {  in throtl_downgrade_check()
2102 		tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);  in throtl_downgrade_check()
2103 		iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;  in throtl_downgrade_check()
2104 		if (iops >= tg->iops[WRITE][LIMIT_LOW])  in throtl_downgrade_check()
2105 			tg->last_low_overflow_time[WRITE] = now;  in throtl_downgrade_check()
2109 	 * If cgroup is below low limit, consider downgrade and throttle other  in throtl_downgrade_check()
2113 		throtl_downgrade_state(tg->td);  in throtl_downgrade_check()
2115 	tg->last_bytes_disp[READ] = 0;  in throtl_downgrade_check()
2116 	tg->last_bytes_disp[WRITE] = 0;  in throtl_downgrade_check()
2117 	tg->last_io_disp[READ] = 0;  in throtl_downgrade_check()
2118 	tg->last_io_disp[WRITE] = 0;  in throtl_downgrade_check()
2124 	unsigned long last_finish_time = tg->last_finish_time;  in blk_throtl_update_idletime()
2131 	    last_finish_time == tg->checked_last_finish_time)  in blk_throtl_update_idletime()
2134 	tg->avg_idletime = (tg->avg_idletime * 7 + now - last_finish_time) >> 3;  in blk_throtl_update_idletime()
2135 	tg->checked_last_finish_time = last_finish_time;  in blk_throtl_update_idletime()
2144 	unsigned long latency[2];  in throtl_update_latency_buckets()  local
2146 	if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW])  in throtl_update_latency_buckets()
2148 	if (time_before(jiffies, td->last_calculate_time + HZ))  in throtl_update_latency_buckets()
2150 	td->last_calculate_time = jiffies;  in throtl_update_latency_buckets()
2155 			struct latency_bucket *tmp = &td->tmp_buckets[rw][i];  in throtl_update_latency_buckets()
2161 				bucket = per_cpu_ptr(td->latency_buckets[rw],  in throtl_update_latency_buckets()
2163 				tmp->total_latency += bucket[i].total_latency;  in throtl_update_latency_buckets()
2164 				tmp->samples += bucket[i].samples;  in throtl_update_latency_buckets()
2169 			if (tmp->samples >= 32) {  in throtl_update_latency_buckets()
2170 				int samples = tmp->samples;  in throtl_update_latency_buckets()
2172 				latency[rw] = tmp->total_latency;  in throtl_update_latency_buckets()
2174 				tmp->total_latency = 0;  in throtl_update_latency_buckets()
2175 				tmp->samples = 0;  in throtl_update_latency_buckets()
2176 				latency[rw] /= samples;  in throtl_update_latency_buckets()
2177 				if (latency[rw] == 0)  in throtl_update_latency_buckets()
2179 				avg_latency[rw][i].latency = latency[rw];  in throtl_update_latency_buckets()
2186 			if (!avg_latency[rw][i].latency) {  in throtl_update_latency_buckets()
2187 				if (td->avg_buckets[rw][i].latency < last_latency[rw])  in throtl_update_latency_buckets()
2188 					td->avg_buckets[rw][i].latency =  in throtl_update_latency_buckets()
2193 			if (!td->avg_buckets[rw][i].valid)  in throtl_update_latency_buckets()
2194 				latency[rw] = avg_latency[rw][i].latency;  in throtl_update_latency_buckets()
2196 				latency[rw] = (td->avg_buckets[rw][i].latency * 7 +  in throtl_update_latency_buckets()
2197 					avg_latency[rw][i].latency) >> 3;  in throtl_update_latency_buckets()
2199 			td->avg_buckets[rw][i].latency = max(latency[rw],  in throtl_update_latency_buckets()
2201 			td->avg_buckets[rw][i].valid = true;  in throtl_update_latency_buckets()
2202 			last_latency[rw] = td->avg_buckets[rw][i].latency;  in throtl_update_latency_buckets()
2207 		throtl_log(&td->service_queue,  in throtl_update_latency_buckets()
2208 			"Latency bucket %d: read latency=%ld, read valid=%d, "  in throtl_update_latency_buckets()
2209 			"write latency=%ld, write valid=%d", i,  in throtl_update_latency_buckets()
2210 			td->avg_buckets[READ][i].latency,  in throtl_update_latency_buckets()
2211 			td->avg_buckets[READ][i].valid,  in throtl_update_latency_buckets()
2212 			td->avg_buckets[WRITE][i].latency,  in throtl_update_latency_buckets()
2213 			td->avg_buckets[WRITE][i].valid);  in throtl_update_latency_buckets()
2223 	struct blkcg_gq *blkg = bio->bi_blkg;  in blk_throtl_charge_bio_split()
2229 		if (!parent->has_rules[rw])  in blk_throtl_charge_bio_split()
2232 		atomic_inc(&parent->io_split_cnt[rw]);  in blk_throtl_charge_bio_split()
2233 		atomic_inc(&parent->last_io_split_cnt[rw]);  in blk_throtl_charge_bio_split()
2235 		parent_sq = parent->service_queue.parent_sq;  in blk_throtl_charge_bio_split()
2242 	struct request_queue *q = bio->bi_disk->queue;  in blk_throtl_bio()
2243 	struct blkcg_gq *blkg = bio->bi_blkg;  in blk_throtl_bio()
2249 	struct throtl_data *td = tg->td;  in blk_throtl_bio()
2258 		blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,  in blk_throtl_bio()
2259 				bio->bi_iter.bi_size);  in blk_throtl_bio()
2260 		blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);  in blk_throtl_bio()
2263 	if (!tg->has_rules[rw])  in blk_throtl_bio()
2266 	spin_lock_irq(&q->queue_lock);  in blk_throtl_bio()
2272 	sq = &tg->service_queue;  in blk_throtl_bio()
2276 		if (tg->last_low_overflow_time[rw] == 0)  in blk_throtl_bio()
2277 			tg->last_low_overflow_time[rw] = jiffies;  in blk_throtl_bio()
2280 		/* throtl is FIFO - if bios are already queued, should queue */  in blk_throtl_bio()
2281 		if (sq->nr_queued[rw])  in blk_throtl_bio()
2286 			tg->last_low_overflow_time[rw] = jiffies;  in blk_throtl_bio()
2303 		 * low rate and * newly queued IO gets a really long dispatch  in blk_throtl_bio()
2315 		qn = &tg->qnode_on_parent[rw];  in blk_throtl_bio()
2316 		sq = sq->parent_sq;  in blk_throtl_bio()
2322 	/* out-of-limit, queue to @tg */  in blk_throtl_bio()
2325 		   tg->bytes_disp[rw], bio->bi_iter.bi_size,  in blk_throtl_bio()
2327 		   tg->io_disp[rw], tg_iops_limit(tg, rw),  in blk_throtl_bio()
2328 		   sq->nr_queued[READ], sq->nr_queued[WRITE]);  in blk_throtl_bio()
2330 	tg->last_low_overflow_time[rw] = jiffies;  in blk_throtl_bio()
2332 	td->nr_queued[rw]++;  in blk_throtl_bio()
2342 	if (tg->flags & THROTL_TG_WAS_EMPTY) {  in blk_throtl_bio()
2344 		throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true);  in blk_throtl_bio()
2348 	spin_unlock_irq(&q->queue_lock);  in blk_throtl_bio()
2353 	if (throttled || !td->track_bio_latency)  in blk_throtl_bio()
2354 		bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;  in blk_throtl_bio()
2364 	struct latency_bucket *latency;  in throtl_track_latency()  local
2367 	if (!td || td->limit_index != LIMIT_LOW ||  in throtl_track_latency()
2369 	    !blk_queue_nonrot(td->queue))  in throtl_track_latency()
2374 	latency = get_cpu_ptr(td->latency_buckets[op]);  in throtl_track_latency()
2375 	latency[index].total_latency += time;  in throtl_track_latency()
2376 	latency[index].samples++;  in throtl_track_latency()
2377 	put_cpu_ptr(td->latency_buckets[op]);  in throtl_track_latency()
2382 	struct request_queue *q = rq->q;  in blk_throtl_stat_add()
2383 	struct throtl_data *td = q->td;  in blk_throtl_stat_add()
2399 	blkg = bio->bi_blkg;  in blk_throtl_bio_endio()
2403 	if (!tg->td->limit_valid[LIMIT_LOW])  in blk_throtl_bio_endio()
2407 	tg->last_finish_time = finish_time_ns >> 10;  in blk_throtl_bio_endio()
2409 	start_time = bio_issue_time(&bio->bi_issue) >> 10;  in blk_throtl_bio_endio()
2414 	lat = finish_time - start_time;  in blk_throtl_bio_endio()
2416 	if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY))  in blk_throtl_bio_endio()
2417 		throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue),  in blk_throtl_bio_endio()
2420 	if (tg->latency_target && lat >= tg->td->filtered_latency) {  in blk_throtl_bio_endio()
2424 		bucket = request_bucket_index(bio_issue_size(&bio->bi_issue));  in blk_throtl_bio_endio()
2425 		threshold = tg->td->avg_buckets[rw][bucket].latency +  in blk_throtl_bio_endio()
2426 			tg->latency_target;  in blk_throtl_bio_endio()
2428 			tg->bad_bio_cnt++;  in blk_throtl_bio_endio()
2433 		tg->bio_cnt++;  in blk_throtl_bio_endio()
2436 	if (time_after(jiffies, tg->bio_cnt_reset_time) || tg->bio_cnt > 1024) {  in blk_throtl_bio_endio()
2437 		tg->bio_cnt_reset_time = tg->td->throtl_slice + jiffies;  in blk_throtl_bio_endio()
2438 		tg->bio_cnt /= 2;  in blk_throtl_bio_endio()
2439 		tg->bad_bio_cnt /= 2;  in blk_throtl_bio_endio()
2449 	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);  in blk_throtl_init()
2451 		return -ENOMEM;  in blk_throtl_init()
2452 	td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) *  in blk_throtl_init()
2454 	if (!td->latency_buckets[READ]) {  in blk_throtl_init()
2456 		return -ENOMEM;  in blk_throtl_init()
2458 	td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) *  in blk_throtl_init()
2460 	if (!td->latency_buckets[WRITE]) {  in blk_throtl_init()
2461 		free_percpu(td->latency_buckets[READ]);  in blk_throtl_init()
2463 		return -ENOMEM;  in blk_throtl_init()
2466 	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);  in blk_throtl_init()
2467 	throtl_service_queue_init(&td->service_queue);  in blk_throtl_init()
2469 	q->td = td;  in blk_throtl_init()
2470 	td->queue = q;  in blk_throtl_init()
2472 	td->limit_valid[LIMIT_MAX] = true;  in blk_throtl_init()
2473 	td->limit_index = LIMIT_MAX;  in blk_throtl_init()
2474 	td->low_upgrade_time = jiffies;  in blk_throtl_init()
2475 	td->low_downgrade_time = jiffies;  in blk_throtl_init()
2480 		free_percpu(td->latency_buckets[READ]);  in blk_throtl_init()
2481 		free_percpu(td->latency_buckets[WRITE]);  in blk_throtl_init()
2489 	BUG_ON(!q->td);  in blk_throtl_exit()
2490 	del_timer_sync(&q->td->service_queue.pending_timer);  in blk_throtl_exit()
2493 	free_percpu(q->td->latency_buckets[READ]);  in blk_throtl_exit()
2494 	free_percpu(q->td->latency_buckets[WRITE]);  in blk_throtl_exit()
2495 	kfree(q->td);  in blk_throtl_exit()
2503 	td = q->td;  in blk_throtl_register_queue()
2507 		td->throtl_slice = DFL_THROTL_SLICE_SSD;  in blk_throtl_register_queue()
2508 		td->filtered_latency = LATENCY_FILTERED_SSD;  in blk_throtl_register_queue()
2510 		td->throtl_slice = DFL_THROTL_SLICE_HD;  in blk_throtl_register_queue()
2511 		td->filtered_latency = LATENCY_FILTERED_HD;  in blk_throtl_register_queue()
2513 			td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY;  in blk_throtl_register_queue()
2514 			td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY;  in blk_throtl_register_queue()
2518 	/* if no low limit, use previous default */  in blk_throtl_register_queue()
2519 	td->throtl_slice = DFL_THROTL_SLICE_HD;  in blk_throtl_register_queue()
2522 	td->track_bio_latency = !queue_is_mq(q);  in blk_throtl_register_queue()
2523 	if (!td->track_bio_latency)  in blk_throtl_register_queue()
2530 	if (!q->td)  in blk_throtl_sample_time_show()
2531 		return -EINVAL;  in blk_throtl_sample_time_show()
2532 	return sprintf(page, "%u\n", jiffies_to_msecs(q->td->throtl_slice));  in blk_throtl_sample_time_show()
2541 	if (!q->td)  in blk_throtl_sample_time_store()
2542 		return -EINVAL;  in blk_throtl_sample_time_store()
2544 		return -EINVAL;  in blk_throtl_sample_time_store()
2547 		return -EINVAL;  in blk_throtl_sample_time_store()
2548 	q->td->throtl_slice = t;  in blk_throtl_sample_time_store()