• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
4  *
5  * Authors:	Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *			HTB support at LARTC mailing list
10  *		Ondrej Kraus, <krauso@barr.cz>
11  *			found missing INIT_QDISC(htb)
12  *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *			helped a lot to locate nasty class stall bug
14  *		Andi Kleen, Jamal Hadi, Bert Hubert
15  *			code review and helpful comments on shaping
16  *		Tomasz Wrona, <tw@eter.tym.pl>
17  *			created test case so that I was able to fix nasty bug
18  *		Wilfried Weissmann
19  *			spotted bug in dequeue code and helped with fix
20  *		Jiri Fojtasek
21  *			fixed requeue routine
22  *		and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40 
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47 
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53 
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011		/* major must be matched with number supplied by TC as version */
56 
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60 
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64 
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68 
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71 	HTB_CANT_SEND,		/* class can't send and can't borrow */
72 	HTB_MAY_BORROW,		/* class can't send but may borrow */
73 	HTB_CAN_SEND		/* class can send */
74 };
75 
76 struct htb_prio {
77 	union {
78 		struct rb_root	row;
79 		struct rb_root	feed;
80 	};
81 	struct rb_node	*ptr;
82 	/* When class changes from state 1->2 and disconnects from
83 	 * parent's feed then we lost ptr value and start from the
84 	 * first child again. Here we store classid of the
85 	 * last valid ptr (used when ptr is NULL).
86 	 */
87 	u32		last_ptr_id;
88 };
89 
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95 	struct Qdisc_class_common common;
96 	struct psched_ratecfg	rate;
97 	struct psched_ratecfg	ceil;
98 	s64			buffer, cbuffer;/* token bucket depth/rate */
99 	s64			mbuffer;	/* max wait time */
100 	u32			prio;		/* these two are used only by leaves... */
101 	int			quantum;	/* but stored for parent-to-leaf return */
102 
103 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
104 	struct tcf_block	*block;
105 	int			filter_cnt;
106 
107 	int			level;		/* our level (see above) */
108 	unsigned int		children;
109 	struct htb_class	*parent;	/* parent class */
110 
111 	struct net_rate_estimator __rcu *rate_est;
112 
113 	/*
114 	 * Written often fields
115 	 */
116 	struct gnet_stats_basic_packed bstats;
117 	struct gnet_stats_basic_packed bstats_bias;
118 	struct tc_htb_xstats	xstats;	/* our special stats */
119 
120 	/* token bucket parameters */
121 	s64			tokens, ctokens;/* current number of tokens */
122 	s64			t_c;		/* checkpoint time */
123 
124 	union {
125 		struct htb_class_leaf {
126 			int		deficit[TC_HTB_MAXDEPTH];
127 			struct Qdisc	*q;
128 			struct netdev_queue *offload_queue;
129 		} leaf;
130 		struct htb_class_inner {
131 			struct htb_prio clprio[TC_HTB_NUMPRIO];
132 		} inner;
133 	};
134 	s64			pq_key;
135 
136 	int			prio_activity;	/* for which prios are we active */
137 	enum htb_cmode		cmode;		/* current mode of the class */
138 	struct rb_node		pq_node;	/* node for event queue */
139 	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
140 
141 	unsigned int drops ____cacheline_aligned_in_smp;
142 	unsigned int		overlimits;
143 };
144 
145 struct htb_level {
146 	struct rb_root	wait_pq;
147 	struct htb_prio hprio[TC_HTB_NUMPRIO];
148 };
149 
150 struct htb_sched {
151 	struct Qdisc_class_hash clhash;
152 	int			defcls;		/* class where unclassified flows go to */
153 	int			rate2quantum;	/* quant = rate / rate2quantum */
154 
155 	/* filters for qdisc itself */
156 	struct tcf_proto __rcu	*filter_list;
157 	struct tcf_block	*block;
158 
159 #define HTB_WARN_TOOMANYEVENTS	0x1
160 	unsigned int		warned;	/* only one warning */
161 	int			direct_qlen;
162 	struct work_struct	work;
163 
164 	/* non shaped skbs; let them go directly thru */
165 	struct qdisc_skb_head	direct_queue;
166 	u32			direct_pkts;
167 	u32			overlimits;
168 
169 	struct qdisc_watchdog	watchdog;
170 
171 	s64			now;	/* cached dequeue time */
172 
173 	/* time of nearest event per level (row) */
174 	s64			near_ev_cache[TC_HTB_MAXDEPTH];
175 
176 	int			row_mask[TC_HTB_MAXDEPTH];
177 
178 	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
179 
180 	struct Qdisc		**direct_qdiscs;
181 	unsigned int            num_direct_qdiscs;
182 
183 	bool			offload;
184 };
185 
186 /* find class in global hash table using given handle */
htb_find(u32 handle,struct Qdisc * sch)187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
188 {
189 	struct htb_sched *q = qdisc_priv(sch);
190 	struct Qdisc_class_common *clc;
191 
192 	clc = qdisc_class_find(&q->clhash, handle);
193 	if (clc == NULL)
194 		return NULL;
195 	return container_of(clc, struct htb_class, common);
196 }
197 
htb_search(struct Qdisc * sch,u32 handle)198 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
199 {
200 	return (unsigned long)htb_find(handle, sch);
201 }
202 /**
203  * htb_classify - classify a packet into class
204  *
205  * It returns NULL if the packet should be dropped or -1 if the packet
206  * should be passed directly thru. In all other cases leaf class is returned.
207  * We allow direct class selection by classid in priority. The we examine
208  * filters in qdisc and in inner nodes (if higher filter points to the inner
209  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
210  * internal fifo (direct). These packets then go directly thru. If we still
211  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
212  * then finish and return direct queue.
213  */
214 #define HTB_DIRECT ((struct htb_class *)-1L)
215 
htb_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)216 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
217 				      int *qerr)
218 {
219 	struct htb_sched *q = qdisc_priv(sch);
220 	struct htb_class *cl;
221 	struct tcf_result res;
222 	struct tcf_proto *tcf;
223 	int result;
224 
225 	/* allow to select class by setting skb->priority to valid classid;
226 	 * note that nfmark can be used too by attaching filter fw with no
227 	 * rules in it
228 	 */
229 	if (skb->priority == sch->handle)
230 		return HTB_DIRECT;	/* X:0 (direct flow) selected */
231 	cl = htb_find(skb->priority, sch);
232 	if (cl) {
233 		if (cl->level == 0)
234 			return cl;
235 		/* Start with inner filter chain if a non-leaf class is selected */
236 		tcf = rcu_dereference_bh(cl->filter_list);
237 	} else {
238 		tcf = rcu_dereference_bh(q->filter_list);
239 	}
240 
241 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
242 	while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
243 #ifdef CONFIG_NET_CLS_ACT
244 		switch (result) {
245 		case TC_ACT_QUEUED:
246 		case TC_ACT_STOLEN:
247 		case TC_ACT_TRAP:
248 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
249 			fallthrough;
250 		case TC_ACT_SHOT:
251 			return NULL;
252 		}
253 #endif
254 		cl = (void *)res.class;
255 		if (!cl) {
256 			if (res.classid == sch->handle)
257 				return HTB_DIRECT;	/* X:0 (direct flow) */
258 			cl = htb_find(res.classid, sch);
259 			if (!cl)
260 				break;	/* filter selected invalid classid */
261 		}
262 		if (!cl->level)
263 			return cl;	/* we hit leaf; return it */
264 
265 		/* we have got inner class; apply inner filter chain */
266 		tcf = rcu_dereference_bh(cl->filter_list);
267 	}
268 	/* classification failed; try to use default class */
269 	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
270 	if (!cl || cl->level)
271 		return HTB_DIRECT;	/* bad default .. this is safe bet */
272 	return cl;
273 }
274 
275 /**
276  * htb_add_to_id_tree - adds class to the round robin list
277  * @root: the root of the tree
278  * @cl: the class to add
279  * @prio: the give prio in class
280  *
281  * Routine adds class to the list (actually tree) sorted by classid.
282  * Make sure that class is not already on such list for given prio.
283  */
htb_add_to_id_tree(struct rb_root * root,struct htb_class * cl,int prio)284 static void htb_add_to_id_tree(struct rb_root *root,
285 			       struct htb_class *cl, int prio)
286 {
287 	struct rb_node **p = &root->rb_node, *parent = NULL;
288 
289 	while (*p) {
290 		struct htb_class *c;
291 		parent = *p;
292 		c = rb_entry(parent, struct htb_class, node[prio]);
293 
294 		if (cl->common.classid > c->common.classid)
295 			p = &parent->rb_right;
296 		else
297 			p = &parent->rb_left;
298 	}
299 	rb_link_node(&cl->node[prio], parent, p);
300 	rb_insert_color(&cl->node[prio], root);
301 }
302 
303 /**
304  * htb_add_to_wait_tree - adds class to the event queue with delay
305  * @q: the priority event queue
306  * @cl: the class to add
307  * @delay: delay in microseconds
308  *
309  * The class is added to priority event queue to indicate that class will
310  * change its mode in cl->pq_key microseconds. Make sure that class is not
311  * already in the queue.
312  */
htb_add_to_wait_tree(struct htb_sched * q,struct htb_class * cl,s64 delay)313 static void htb_add_to_wait_tree(struct htb_sched *q,
314 				 struct htb_class *cl, s64 delay)
315 {
316 	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
317 
318 	cl->pq_key = q->now + delay;
319 	if (cl->pq_key == q->now)
320 		cl->pq_key++;
321 
322 	/* update the nearest event cache */
323 	if (q->near_ev_cache[cl->level] > cl->pq_key)
324 		q->near_ev_cache[cl->level] = cl->pq_key;
325 
326 	while (*p) {
327 		struct htb_class *c;
328 		parent = *p;
329 		c = rb_entry(parent, struct htb_class, pq_node);
330 		if (cl->pq_key >= c->pq_key)
331 			p = &parent->rb_right;
332 		else
333 			p = &parent->rb_left;
334 	}
335 	rb_link_node(&cl->pq_node, parent, p);
336 	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
337 }
338 
339 /**
340  * htb_next_rb_node - finds next node in binary tree
341  * @n: the current node in binary tree
342  *
343  * When we are past last key we return NULL.
344  * Average complexity is 2 steps per call.
345  */
htb_next_rb_node(struct rb_node ** n)346 static inline void htb_next_rb_node(struct rb_node **n)
347 {
348 	*n = rb_next(*n);
349 }
350 
351 /**
352  * htb_add_class_to_row - add class to its row
353  * @q: the priority event queue
354  * @cl: the class to add
355  * @mask: the given priorities in class in bitmap
356  *
357  * The class is added to row at priorities marked in mask.
358  * It does nothing if mask == 0.
359  */
htb_add_class_to_row(struct htb_sched * q,struct htb_class * cl,int mask)360 static inline void htb_add_class_to_row(struct htb_sched *q,
361 					struct htb_class *cl, int mask)
362 {
363 	q->row_mask[cl->level] |= mask;
364 	while (mask) {
365 		int prio = ffz(~mask);
366 		mask &= ~(1 << prio);
367 		htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
368 	}
369 }
370 
371 /* If this triggers, it is a bug in this code, but it need not be fatal */
htb_safe_rb_erase(struct rb_node * rb,struct rb_root * root)372 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
373 {
374 	if (RB_EMPTY_NODE(rb)) {
375 		WARN_ON(1);
376 	} else {
377 		rb_erase(rb, root);
378 		RB_CLEAR_NODE(rb);
379 	}
380 }
381 
382 
383 /**
384  * htb_remove_class_from_row - removes class from its row
385  * @q: the priority event queue
386  * @cl: the class to add
387  * @mask: the given priorities in class in bitmap
388  *
389  * The class is removed from row at priorities marked in mask.
390  * It does nothing if mask == 0.
391  */
htb_remove_class_from_row(struct htb_sched * q,struct htb_class * cl,int mask)392 static inline void htb_remove_class_from_row(struct htb_sched *q,
393 						 struct htb_class *cl, int mask)
394 {
395 	int m = 0;
396 	struct htb_level *hlevel = &q->hlevel[cl->level];
397 
398 	while (mask) {
399 		int prio = ffz(~mask);
400 		struct htb_prio *hprio = &hlevel->hprio[prio];
401 
402 		mask &= ~(1 << prio);
403 		if (hprio->ptr == cl->node + prio)
404 			htb_next_rb_node(&hprio->ptr);
405 
406 		htb_safe_rb_erase(cl->node + prio, &hprio->row);
407 		if (!hprio->row.rb_node)
408 			m |= 1 << prio;
409 	}
410 	q->row_mask[cl->level] &= ~m;
411 }
412 
413 /**
414  * htb_activate_prios - creates active classe's feed chain
415  * @q: the priority event queue
416  * @cl: the class to activate
417  *
418  * The class is connected to ancestors and/or appropriate rows
419  * for priorities it is participating on. cl->cmode must be new
420  * (activated) mode. It does nothing if cl->prio_activity == 0.
421  */
htb_activate_prios(struct htb_sched * q,struct htb_class * cl)422 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
423 {
424 	struct htb_class *p = cl->parent;
425 	long m, mask = cl->prio_activity;
426 
427 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
428 		m = mask;
429 		while (m) {
430 			unsigned int prio = ffz(~m);
431 
432 			if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
433 				break;
434 			m &= ~(1 << prio);
435 
436 			if (p->inner.clprio[prio].feed.rb_node)
437 				/* parent already has its feed in use so that
438 				 * reset bit in mask as parent is already ok
439 				 */
440 				mask &= ~(1 << prio);
441 
442 			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
443 		}
444 		p->prio_activity |= mask;
445 		cl = p;
446 		p = cl->parent;
447 
448 	}
449 	if (cl->cmode == HTB_CAN_SEND && mask)
450 		htb_add_class_to_row(q, cl, mask);
451 }
452 
453 /**
454  * htb_deactivate_prios - remove class from feed chain
455  * @q: the priority event queue
456  * @cl: the class to deactivate
457  *
458  * cl->cmode must represent old mode (before deactivation). It does
459  * nothing if cl->prio_activity == 0. Class is removed from all feed
460  * chains and rows.
461  */
htb_deactivate_prios(struct htb_sched * q,struct htb_class * cl)462 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
463 {
464 	struct htb_class *p = cl->parent;
465 	long m, mask = cl->prio_activity;
466 
467 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
468 		m = mask;
469 		mask = 0;
470 		while (m) {
471 			int prio = ffz(~m);
472 			m &= ~(1 << prio);
473 
474 			if (p->inner.clprio[prio].ptr == cl->node + prio) {
475 				/* we are removing child which is pointed to from
476 				 * parent feed - forget the pointer but remember
477 				 * classid
478 				 */
479 				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
480 				p->inner.clprio[prio].ptr = NULL;
481 			}
482 
483 			htb_safe_rb_erase(cl->node + prio,
484 					  &p->inner.clprio[prio].feed);
485 
486 			if (!p->inner.clprio[prio].feed.rb_node)
487 				mask |= 1 << prio;
488 		}
489 
490 		p->prio_activity &= ~mask;
491 		cl = p;
492 		p = cl->parent;
493 
494 	}
495 	if (cl->cmode == HTB_CAN_SEND && mask)
496 		htb_remove_class_from_row(q, cl, mask);
497 }
498 
htb_lowater(const struct htb_class * cl)499 static inline s64 htb_lowater(const struct htb_class *cl)
500 {
501 	if (htb_hysteresis)
502 		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
503 	else
504 		return 0;
505 }
htb_hiwater(const struct htb_class * cl)506 static inline s64 htb_hiwater(const struct htb_class *cl)
507 {
508 	if (htb_hysteresis)
509 		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
510 	else
511 		return 0;
512 }
513 
514 
515 /**
516  * htb_class_mode - computes and returns current class mode
517  * @cl: the target class
518  * @diff: diff time in microseconds
519  *
520  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
521  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
522  * from now to time when cl will change its state.
523  * Also it is worth to note that class mode doesn't change simply
524  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
525  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
526  * mode transitions per time unit. The speed gain is about 1/6.
527  */
528 static inline enum htb_cmode
htb_class_mode(struct htb_class * cl,s64 * diff)529 htb_class_mode(struct htb_class *cl, s64 *diff)
530 {
531 	s64 toks;
532 
533 	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
534 		*diff = -toks;
535 		return HTB_CANT_SEND;
536 	}
537 
538 	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
539 		return HTB_CAN_SEND;
540 
541 	*diff = -toks;
542 	return HTB_MAY_BORROW;
543 }
544 
545 /**
546  * htb_change_class_mode - changes classe's mode
547  * @q: the priority event queue
548  * @cl: the target class
549  * @diff: diff time in microseconds
550  *
551  * This should be the only way how to change classe's mode under normal
552  * circumstances. Routine will update feed lists linkage, change mode
553  * and add class to the wait event queue if appropriate. New mode should
554  * be different from old one and cl->pq_key has to be valid if changing
555  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
556  */
557 static void
htb_change_class_mode(struct htb_sched * q,struct htb_class * cl,s64 * diff)558 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
559 {
560 	enum htb_cmode new_mode = htb_class_mode(cl, diff);
561 
562 	if (new_mode == cl->cmode)
563 		return;
564 
565 	if (new_mode == HTB_CANT_SEND) {
566 		cl->overlimits++;
567 		q->overlimits++;
568 	}
569 
570 	if (cl->prio_activity) {	/* not necessary: speed optimization */
571 		if (cl->cmode != HTB_CANT_SEND)
572 			htb_deactivate_prios(q, cl);
573 		cl->cmode = new_mode;
574 		if (new_mode != HTB_CANT_SEND)
575 			htb_activate_prios(q, cl);
576 	} else
577 		cl->cmode = new_mode;
578 }
579 
580 /**
581  * htb_activate - inserts leaf cl into appropriate active feeds
582  * @q: the priority event queue
583  * @cl: the target class
584  *
585  * Routine learns (new) priority of leaf and activates feed chain
586  * for the prio. It can be called on already active leaf safely.
587  * It also adds leaf into droplist.
588  */
htb_activate(struct htb_sched * q,struct htb_class * cl)589 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
590 {
591 	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
592 
593 	if (!cl->prio_activity) {
594 		cl->prio_activity = 1 << cl->prio;
595 		htb_activate_prios(q, cl);
596 	}
597 }
598 
599 /**
600  * htb_deactivate - remove leaf cl from active feeds
601  * @q: the priority event queue
602  * @cl: the target class
603  *
604  * Make sure that leaf is active. In the other words it can't be called
605  * with non-active leaf. It also removes class from the drop list.
606  */
htb_deactivate(struct htb_sched * q,struct htb_class * cl)607 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
608 {
609 	WARN_ON(!cl->prio_activity);
610 
611 	htb_deactivate_prios(q, cl);
612 	cl->prio_activity = 0;
613 }
614 
htb_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)615 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
616 		       struct sk_buff **to_free)
617 {
618 	int ret;
619 	unsigned int len = qdisc_pkt_len(skb);
620 	struct htb_sched *q = qdisc_priv(sch);
621 	struct htb_class *cl = htb_classify(skb, sch, &ret);
622 
623 	if (cl == HTB_DIRECT) {
624 		/* enqueue to helper queue */
625 		if (q->direct_queue.qlen < q->direct_qlen) {
626 			__qdisc_enqueue_tail(skb, &q->direct_queue);
627 			q->direct_pkts++;
628 		} else {
629 			return qdisc_drop(skb, sch, to_free);
630 		}
631 #ifdef CONFIG_NET_CLS_ACT
632 	} else if (!cl) {
633 		if (ret & __NET_XMIT_BYPASS)
634 			qdisc_qstats_drop(sch);
635 		__qdisc_drop(skb, to_free);
636 		return ret;
637 #endif
638 	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
639 					to_free)) != NET_XMIT_SUCCESS) {
640 		if (net_xmit_drop_count(ret)) {
641 			qdisc_qstats_drop(sch);
642 			cl->drops++;
643 		}
644 		return ret;
645 	} else {
646 		htb_activate(q, cl);
647 	}
648 
649 	sch->qstats.backlog += len;
650 	sch->q.qlen++;
651 	return NET_XMIT_SUCCESS;
652 }
653 
htb_accnt_tokens(struct htb_class * cl,int bytes,s64 diff)654 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
655 {
656 	s64 toks = diff + cl->tokens;
657 
658 	if (toks > cl->buffer)
659 		toks = cl->buffer;
660 	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
661 	if (toks <= -cl->mbuffer)
662 		toks = 1 - cl->mbuffer;
663 
664 	cl->tokens = toks;
665 }
666 
htb_accnt_ctokens(struct htb_class * cl,int bytes,s64 diff)667 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
668 {
669 	s64 toks = diff + cl->ctokens;
670 
671 	if (toks > cl->cbuffer)
672 		toks = cl->cbuffer;
673 	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
674 	if (toks <= -cl->mbuffer)
675 		toks = 1 - cl->mbuffer;
676 
677 	cl->ctokens = toks;
678 }
679 
680 /**
681  * htb_charge_class - charges amount "bytes" to leaf and ancestors
682  * @q: the priority event queue
683  * @cl: the class to start iterate
684  * @level: the minimum level to account
685  * @skb: the socket buffer
686  *
687  * Routine assumes that packet "bytes" long was dequeued from leaf cl
688  * borrowing from "level". It accounts bytes to ceil leaky bucket for
689  * leaf and all ancestors and to rate bucket for ancestors at levels
690  * "level" and higher. It also handles possible change of mode resulting
691  * from the update. Note that mode can also increase here (MAY_BORROW to
692  * CAN_SEND) because we can use more precise clock that event queue here.
693  * In such case we remove class from event queue first.
694  */
htb_charge_class(struct htb_sched * q,struct htb_class * cl,int level,struct sk_buff * skb)695 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
696 			     int level, struct sk_buff *skb)
697 {
698 	int bytes = qdisc_pkt_len(skb);
699 	enum htb_cmode old_mode;
700 	s64 diff;
701 
702 	while (cl) {
703 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
704 		if (cl->level >= level) {
705 			if (cl->level == level)
706 				cl->xstats.lends++;
707 			htb_accnt_tokens(cl, bytes, diff);
708 		} else {
709 			cl->xstats.borrows++;
710 			cl->tokens += diff;	/* we moved t_c; update tokens */
711 		}
712 		htb_accnt_ctokens(cl, bytes, diff);
713 		cl->t_c = q->now;
714 
715 		old_mode = cl->cmode;
716 		diff = 0;
717 		htb_change_class_mode(q, cl, &diff);
718 		if (old_mode != cl->cmode) {
719 			if (old_mode != HTB_CAN_SEND)
720 				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
721 			if (cl->cmode != HTB_CAN_SEND)
722 				htb_add_to_wait_tree(q, cl, diff);
723 		}
724 
725 		/* update basic stats except for leaves which are already updated */
726 		if (cl->level)
727 			bstats_update(&cl->bstats, skb);
728 
729 		cl = cl->parent;
730 	}
731 }
732 
733 /**
734  * htb_do_events - make mode changes to classes at the level
735  * @q: the priority event queue
736  * @level: which wait_pq in 'q->hlevel'
737  * @start: start jiffies
738  *
739  * Scans event queue for pending events and applies them. Returns time of
740  * next pending event (0 for no event in pq, q->now for too many events).
741  * Note: Applied are events whose have cl->pq_key <= q->now.
742  */
htb_do_events(struct htb_sched * q,const int level,unsigned long start)743 static s64 htb_do_events(struct htb_sched *q, const int level,
744 			 unsigned long start)
745 {
746 	/* don't run for longer than 2 jiffies; 2 is used instead of
747 	 * 1 to simplify things when jiffy is going to be incremented
748 	 * too soon
749 	 */
750 	unsigned long stop_at = start + 2;
751 	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
752 
753 	while (time_before(jiffies, stop_at)) {
754 		struct htb_class *cl;
755 		s64 diff;
756 		struct rb_node *p = rb_first(wait_pq);
757 
758 		if (!p)
759 			return 0;
760 
761 		cl = rb_entry(p, struct htb_class, pq_node);
762 		if (cl->pq_key > q->now)
763 			return cl->pq_key;
764 
765 		htb_safe_rb_erase(p, wait_pq);
766 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
767 		htb_change_class_mode(q, cl, &diff);
768 		if (cl->cmode != HTB_CAN_SEND)
769 			htb_add_to_wait_tree(q, cl, diff);
770 	}
771 
772 	/* too much load - let's continue after a break for scheduling */
773 	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
774 		pr_warn("htb: too many events!\n");
775 		q->warned |= HTB_WARN_TOOMANYEVENTS;
776 	}
777 
778 	return q->now;
779 }
780 
781 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
782  * is no such one exists.
783  */
htb_id_find_next_upper(int prio,struct rb_node * n,u32 id)784 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
785 					      u32 id)
786 {
787 	struct rb_node *r = NULL;
788 	while (n) {
789 		struct htb_class *cl =
790 		    rb_entry(n, struct htb_class, node[prio]);
791 
792 		if (id > cl->common.classid) {
793 			n = n->rb_right;
794 		} else if (id < cl->common.classid) {
795 			r = n;
796 			n = n->rb_left;
797 		} else {
798 			return n;
799 		}
800 	}
801 	return r;
802 }
803 
804 /**
805  * htb_lookup_leaf - returns next leaf class in DRR order
806  * @hprio: the current one
807  * @prio: which prio in class
808  *
809  * Find leaf where current feed pointers points to.
810  */
htb_lookup_leaf(struct htb_prio * hprio,const int prio)811 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
812 {
813 	int i;
814 	struct {
815 		struct rb_node *root;
816 		struct rb_node **pptr;
817 		u32 *pid;
818 	} stk[TC_HTB_MAXDEPTH], *sp = stk;
819 
820 	BUG_ON(!hprio->row.rb_node);
821 	sp->root = hprio->row.rb_node;
822 	sp->pptr = &hprio->ptr;
823 	sp->pid = &hprio->last_ptr_id;
824 
825 	for (i = 0; i < 65535; i++) {
826 		if (!*sp->pptr && *sp->pid) {
827 			/* ptr was invalidated but id is valid - try to recover
828 			 * the original or next ptr
829 			 */
830 			*sp->pptr =
831 			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
832 		}
833 		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
834 				 * can become out of date quickly
835 				 */
836 		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
837 			*sp->pptr = sp->root;
838 			while ((*sp->pptr)->rb_left)
839 				*sp->pptr = (*sp->pptr)->rb_left;
840 			if (sp > stk) {
841 				sp--;
842 				if (!*sp->pptr) {
843 					WARN_ON(1);
844 					return NULL;
845 				}
846 				htb_next_rb_node(sp->pptr);
847 			}
848 		} else {
849 			struct htb_class *cl;
850 			struct htb_prio *clp;
851 
852 			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
853 			if (!cl->level)
854 				return cl;
855 			clp = &cl->inner.clprio[prio];
856 			(++sp)->root = clp->feed.rb_node;
857 			sp->pptr = &clp->ptr;
858 			sp->pid = &clp->last_ptr_id;
859 		}
860 	}
861 	WARN_ON(1);
862 	return NULL;
863 }
864 
865 /* dequeues packet at given priority and level; call only if
866  * you are sure that there is active class at prio/level
867  */
htb_dequeue_tree(struct htb_sched * q,const int prio,const int level)868 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
869 					const int level)
870 {
871 	struct sk_buff *skb = NULL;
872 	struct htb_class *cl, *start;
873 	struct htb_level *hlevel = &q->hlevel[level];
874 	struct htb_prio *hprio = &hlevel->hprio[prio];
875 
876 	/* look initial class up in the row */
877 	start = cl = htb_lookup_leaf(hprio, prio);
878 
879 	do {
880 next:
881 		if (unlikely(!cl))
882 			return NULL;
883 
884 		/* class can be empty - it is unlikely but can be true if leaf
885 		 * qdisc drops packets in enqueue routine or if someone used
886 		 * graft operation on the leaf since last dequeue;
887 		 * simply deactivate and skip such class
888 		 */
889 		if (unlikely(cl->leaf.q->q.qlen == 0)) {
890 			struct htb_class *next;
891 			htb_deactivate(q, cl);
892 
893 			/* row/level might become empty */
894 			if ((q->row_mask[level] & (1 << prio)) == 0)
895 				return NULL;
896 
897 			next = htb_lookup_leaf(hprio, prio);
898 
899 			if (cl == start)	/* fix start if we just deleted it */
900 				start = next;
901 			cl = next;
902 			goto next;
903 		}
904 
905 		skb = cl->leaf.q->dequeue(cl->leaf.q);
906 		if (likely(skb != NULL))
907 			break;
908 
909 		qdisc_warn_nonwc("htb", cl->leaf.q);
910 		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
911 					 &q->hlevel[0].hprio[prio].ptr);
912 		cl = htb_lookup_leaf(hprio, prio);
913 
914 	} while (cl != start);
915 
916 	if (likely(skb != NULL)) {
917 		bstats_update(&cl->bstats, skb);
918 		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
919 		if (cl->leaf.deficit[level] < 0) {
920 			cl->leaf.deficit[level] += cl->quantum;
921 			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
922 						 &q->hlevel[0].hprio[prio].ptr);
923 		}
924 		/* this used to be after charge_class but this constelation
925 		 * gives us slightly better performance
926 		 */
927 		if (!cl->leaf.q->q.qlen)
928 			htb_deactivate(q, cl);
929 		htb_charge_class(q, cl, level, skb);
930 	}
931 	return skb;
932 }
933 
htb_dequeue(struct Qdisc * sch)934 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
935 {
936 	struct sk_buff *skb;
937 	struct htb_sched *q = qdisc_priv(sch);
938 	int level;
939 	s64 next_event;
940 	unsigned long start_at;
941 
942 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
943 	skb = __qdisc_dequeue_head(&q->direct_queue);
944 	if (skb != NULL) {
945 ok:
946 		qdisc_bstats_update(sch, skb);
947 		qdisc_qstats_backlog_dec(sch, skb);
948 		sch->q.qlen--;
949 		return skb;
950 	}
951 
952 	if (!sch->q.qlen)
953 		goto fin;
954 	q->now = ktime_get_ns();
955 	start_at = jiffies;
956 
957 	next_event = q->now + 5LLU * NSEC_PER_SEC;
958 
959 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
960 		/* common case optimization - skip event handler quickly */
961 		int m;
962 		s64 event = q->near_ev_cache[level];
963 
964 		if (q->now >= event) {
965 			event = htb_do_events(q, level, start_at);
966 			if (!event)
967 				event = q->now + NSEC_PER_SEC;
968 			q->near_ev_cache[level] = event;
969 		}
970 
971 		if (next_event > event)
972 			next_event = event;
973 
974 		m = ~q->row_mask[level];
975 		while (m != (int)(-1)) {
976 			int prio = ffz(m);
977 
978 			m |= 1 << prio;
979 			skb = htb_dequeue_tree(q, prio, level);
980 			if (likely(skb != NULL))
981 				goto ok;
982 		}
983 	}
984 	if (likely(next_event > q->now))
985 		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
986 	else
987 		schedule_work(&q->work);
988 fin:
989 	return skb;
990 }
991 
992 /* reset all classes */
993 /* always caled under BH & queue lock */
htb_reset(struct Qdisc * sch)994 static void htb_reset(struct Qdisc *sch)
995 {
996 	struct htb_sched *q = qdisc_priv(sch);
997 	struct htb_class *cl;
998 	unsigned int i;
999 
1000 	for (i = 0; i < q->clhash.hashsize; i++) {
1001 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1002 			if (cl->level)
1003 				memset(&cl->inner, 0, sizeof(cl->inner));
1004 			else {
1005 				if (cl->leaf.q && !q->offload)
1006 					qdisc_reset(cl->leaf.q);
1007 			}
1008 			cl->prio_activity = 0;
1009 			cl->cmode = HTB_CAN_SEND;
1010 		}
1011 	}
1012 	qdisc_watchdog_cancel(&q->watchdog);
1013 	__qdisc_reset_queue(&q->direct_queue);
1014 	memset(q->hlevel, 0, sizeof(q->hlevel));
1015 	memset(q->row_mask, 0, sizeof(q->row_mask));
1016 }
1017 
1018 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1019 	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
1020 	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
1021 	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1022 	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1023 	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1024 	[TCA_HTB_RATE64] = { .type = NLA_U64 },
1025 	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
1026 	[TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1027 };
1028 
htb_work_func(struct work_struct * work)1029 static void htb_work_func(struct work_struct *work)
1030 {
1031 	struct htb_sched *q = container_of(work, struct htb_sched, work);
1032 	struct Qdisc *sch = q->watchdog.qdisc;
1033 
1034 	rcu_read_lock();
1035 	__netif_schedule(qdisc_root(sch));
1036 	rcu_read_unlock();
1037 }
1038 
htb_set_lockdep_class_child(struct Qdisc * q)1039 static void htb_set_lockdep_class_child(struct Qdisc *q)
1040 {
1041 	static struct lock_class_key child_key;
1042 
1043 	lockdep_set_class(qdisc_lock(q), &child_key);
1044 }
1045 
htb_offload(struct net_device * dev,struct tc_htb_qopt_offload * opt)1046 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1047 {
1048 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1049 }
1050 
htb_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)1051 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1052 		    struct netlink_ext_ack *extack)
1053 {
1054 	struct net_device *dev = qdisc_dev(sch);
1055 	struct tc_htb_qopt_offload offload_opt;
1056 	struct htb_sched *q = qdisc_priv(sch);
1057 	struct nlattr *tb[TCA_HTB_MAX + 1];
1058 	struct tc_htb_glob *gopt;
1059 	unsigned int ntx;
1060 	bool offload;
1061 	int err;
1062 
1063 	qdisc_watchdog_init(&q->watchdog, sch);
1064 	INIT_WORK(&q->work, htb_work_func);
1065 
1066 	if (!opt)
1067 		return -EINVAL;
1068 
1069 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1070 	if (err)
1071 		return err;
1072 
1073 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1074 					  NULL);
1075 	if (err < 0)
1076 		return err;
1077 
1078 	if (!tb[TCA_HTB_INIT])
1079 		return -EINVAL;
1080 
1081 	gopt = nla_data(tb[TCA_HTB_INIT]);
1082 	if (gopt->version != HTB_VER >> 16)
1083 		return -EINVAL;
1084 
1085 	offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1086 
1087 	if (offload) {
1088 		if (sch->parent != TC_H_ROOT)
1089 			return -EOPNOTSUPP;
1090 
1091 		if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1092 			return -EOPNOTSUPP;
1093 
1094 		q->num_direct_qdiscs = dev->real_num_tx_queues;
1095 		q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1096 					   sizeof(*q->direct_qdiscs),
1097 					   GFP_KERNEL);
1098 		if (!q->direct_qdiscs)
1099 			return -ENOMEM;
1100 	}
1101 
1102 	err = qdisc_class_hash_init(&q->clhash);
1103 	if (err < 0)
1104 		goto err_free_direct_qdiscs;
1105 
1106 	qdisc_skb_head_init(&q->direct_queue);
1107 
1108 	if (tb[TCA_HTB_DIRECT_QLEN])
1109 		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1110 	else
1111 		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1112 
1113 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1114 		q->rate2quantum = 1;
1115 	q->defcls = gopt->defcls;
1116 
1117 	if (!offload)
1118 		return 0;
1119 
1120 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1121 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1122 		struct Qdisc *qdisc;
1123 
1124 		qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1125 					  TC_H_MAKE(sch->handle, 0), extack);
1126 		if (!qdisc) {
1127 			err = -ENOMEM;
1128 			goto err_free_qdiscs;
1129 		}
1130 
1131 		htb_set_lockdep_class_child(qdisc);
1132 		q->direct_qdiscs[ntx] = qdisc;
1133 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1134 	}
1135 
1136 	sch->flags |= TCQ_F_MQROOT;
1137 
1138 	offload_opt = (struct tc_htb_qopt_offload) {
1139 		.command = TC_HTB_CREATE,
1140 		.parent_classid = TC_H_MAJ(sch->handle) >> 16,
1141 		.classid = TC_H_MIN(q->defcls),
1142 		.extack = extack,
1143 	};
1144 	err = htb_offload(dev, &offload_opt);
1145 	if (err)
1146 		goto err_free_qdiscs;
1147 
1148 	/* Defer this assignment, so that htb_destroy skips offload-related
1149 	 * parts (especially calling ndo_setup_tc) on errors.
1150 	 */
1151 	q->offload = true;
1152 
1153 	return 0;
1154 
1155 err_free_qdiscs:
1156 	for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1157 	     ntx++)
1158 		qdisc_put(q->direct_qdiscs[ntx]);
1159 
1160 	qdisc_class_hash_destroy(&q->clhash);
1161 	/* Prevent use-after-free and double-free when htb_destroy gets called.
1162 	 */
1163 	q->clhash.hash = NULL;
1164 	q->clhash.hashsize = 0;
1165 
1166 err_free_direct_qdiscs:
1167 	kfree(q->direct_qdiscs);
1168 	q->direct_qdiscs = NULL;
1169 	return err;
1170 }
1171 
htb_attach_offload(struct Qdisc * sch)1172 static void htb_attach_offload(struct Qdisc *sch)
1173 {
1174 	struct net_device *dev = qdisc_dev(sch);
1175 	struct htb_sched *q = qdisc_priv(sch);
1176 	unsigned int ntx;
1177 
1178 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1179 		struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1180 
1181 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1182 		qdisc_put(old);
1183 		qdisc_hash_add(qdisc, false);
1184 	}
1185 	for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1186 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1187 		struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1188 
1189 		qdisc_put(old);
1190 	}
1191 
1192 	kfree(q->direct_qdiscs);
1193 	q->direct_qdiscs = NULL;
1194 }
1195 
htb_attach_software(struct Qdisc * sch)1196 static void htb_attach_software(struct Qdisc *sch)
1197 {
1198 	struct net_device *dev = qdisc_dev(sch);
1199 	unsigned int ntx;
1200 
1201 	/* Resemble qdisc_graft behavior. */
1202 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1203 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1204 		struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1205 
1206 		qdisc_refcount_inc(sch);
1207 
1208 		qdisc_put(old);
1209 	}
1210 }
1211 
htb_attach(struct Qdisc * sch)1212 static void htb_attach(struct Qdisc *sch)
1213 {
1214 	struct htb_sched *q = qdisc_priv(sch);
1215 
1216 	if (q->offload)
1217 		htb_attach_offload(sch);
1218 	else
1219 		htb_attach_software(sch);
1220 }
1221 
htb_dump(struct Qdisc * sch,struct sk_buff * skb)1222 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1223 {
1224 	struct htb_sched *q = qdisc_priv(sch);
1225 	struct nlattr *nest;
1226 	struct tc_htb_glob gopt;
1227 
1228 	if (q->offload)
1229 		sch->flags |= TCQ_F_OFFLOADED;
1230 	else
1231 		sch->flags &= ~TCQ_F_OFFLOADED;
1232 
1233 	sch->qstats.overlimits = q->overlimits;
1234 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1235 	 * no change can happen on the qdisc parameters.
1236 	 */
1237 
1238 	gopt.direct_pkts = q->direct_pkts;
1239 	gopt.version = HTB_VER;
1240 	gopt.rate2quantum = q->rate2quantum;
1241 	gopt.defcls = q->defcls;
1242 	gopt.debug = 0;
1243 
1244 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1245 	if (nest == NULL)
1246 		goto nla_put_failure;
1247 	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1248 	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1249 		goto nla_put_failure;
1250 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1251 		goto nla_put_failure;
1252 
1253 	return nla_nest_end(skb, nest);
1254 
1255 nla_put_failure:
1256 	nla_nest_cancel(skb, nest);
1257 	return -1;
1258 }
1259 
htb_dump_class(struct Qdisc * sch,unsigned long arg,struct sk_buff * skb,struct tcmsg * tcm)1260 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1261 			  struct sk_buff *skb, struct tcmsg *tcm)
1262 {
1263 	struct htb_class *cl = (struct htb_class *)arg;
1264 	struct htb_sched *q = qdisc_priv(sch);
1265 	struct nlattr *nest;
1266 	struct tc_htb_opt opt;
1267 
1268 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1269 	 * no change can happen on the class parameters.
1270 	 */
1271 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1272 	tcm->tcm_handle = cl->common.classid;
1273 	if (!cl->level && cl->leaf.q)
1274 		tcm->tcm_info = cl->leaf.q->handle;
1275 
1276 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1277 	if (nest == NULL)
1278 		goto nla_put_failure;
1279 
1280 	memset(&opt, 0, sizeof(opt));
1281 
1282 	psched_ratecfg_getrate(&opt.rate, &cl->rate);
1283 	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1284 	psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1285 	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1286 	opt.quantum = cl->quantum;
1287 	opt.prio = cl->prio;
1288 	opt.level = cl->level;
1289 	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1290 		goto nla_put_failure;
1291 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1292 		goto nla_put_failure;
1293 	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1294 	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1295 			      TCA_HTB_PAD))
1296 		goto nla_put_failure;
1297 	if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1298 	    nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1299 			      TCA_HTB_PAD))
1300 		goto nla_put_failure;
1301 
1302 	return nla_nest_end(skb, nest);
1303 
1304 nla_put_failure:
1305 	nla_nest_cancel(skb, nest);
1306 	return -1;
1307 }
1308 
htb_offload_aggregate_stats(struct htb_sched * q,struct htb_class * cl)1309 static void htb_offload_aggregate_stats(struct htb_sched *q,
1310 					struct htb_class *cl)
1311 {
1312 	struct htb_class *c;
1313 	unsigned int i;
1314 
1315 	memset(&cl->bstats, 0, sizeof(cl->bstats));
1316 
1317 	for (i = 0; i < q->clhash.hashsize; i++) {
1318 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1319 			struct htb_class *p = c;
1320 
1321 			while (p && p->level < cl->level)
1322 				p = p->parent;
1323 
1324 			if (p != cl)
1325 				continue;
1326 
1327 			cl->bstats.bytes += c->bstats_bias.bytes;
1328 			cl->bstats.packets += c->bstats_bias.packets;
1329 			if (c->level == 0) {
1330 				cl->bstats.bytes += c->leaf.q->bstats.bytes;
1331 				cl->bstats.packets += c->leaf.q->bstats.packets;
1332 			}
1333 		}
1334 	}
1335 }
1336 
1337 static int
htb_dump_class_stats(struct Qdisc * sch,unsigned long arg,struct gnet_dump * d)1338 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1339 {
1340 	struct htb_class *cl = (struct htb_class *)arg;
1341 	struct htb_sched *q = qdisc_priv(sch);
1342 	struct gnet_stats_queue qs = {
1343 		.drops = cl->drops,
1344 		.overlimits = cl->overlimits,
1345 	};
1346 	__u32 qlen = 0;
1347 
1348 	if (!cl->level && cl->leaf.q)
1349 		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1350 
1351 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1352 				    INT_MIN, INT_MAX);
1353 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1354 				     INT_MIN, INT_MAX);
1355 
1356 	if (q->offload) {
1357 		if (!cl->level) {
1358 			if (cl->leaf.q)
1359 				cl->bstats = cl->leaf.q->bstats;
1360 			else
1361 				memset(&cl->bstats, 0, sizeof(cl->bstats));
1362 			cl->bstats.bytes += cl->bstats_bias.bytes;
1363 			cl->bstats.packets += cl->bstats_bias.packets;
1364 		} else {
1365 			htb_offload_aggregate_stats(q, cl);
1366 		}
1367 	}
1368 
1369 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1370 				  d, NULL, &cl->bstats) < 0 ||
1371 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1372 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1373 		return -1;
1374 
1375 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1376 }
1377 
1378 static struct netdev_queue *
htb_select_queue(struct Qdisc * sch,struct tcmsg * tcm)1379 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1380 {
1381 	struct net_device *dev = qdisc_dev(sch);
1382 	struct tc_htb_qopt_offload offload_opt;
1383 	struct htb_sched *q = qdisc_priv(sch);
1384 	int err;
1385 
1386 	if (!q->offload)
1387 		return sch->dev_queue;
1388 
1389 	offload_opt = (struct tc_htb_qopt_offload) {
1390 		.command = TC_HTB_LEAF_QUERY_QUEUE,
1391 		.classid = TC_H_MIN(tcm->tcm_parent),
1392 	};
1393 	err = htb_offload(dev, &offload_opt);
1394 	if (err || offload_opt.qid >= dev->num_tx_queues)
1395 		return NULL;
1396 	return netdev_get_tx_queue(dev, offload_opt.qid);
1397 }
1398 
1399 static struct Qdisc *
htb_graft_helper(struct netdev_queue * dev_queue,struct Qdisc * new_q)1400 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1401 {
1402 	struct net_device *dev = dev_queue->dev;
1403 	struct Qdisc *old_q;
1404 
1405 	if (dev->flags & IFF_UP)
1406 		dev_deactivate(dev);
1407 	old_q = dev_graft_qdisc(dev_queue, new_q);
1408 	if (new_q)
1409 		new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1410 	if (dev->flags & IFF_UP)
1411 		dev_activate(dev);
1412 
1413 	return old_q;
1414 }
1415 
htb_offload_get_queue(struct htb_class * cl)1416 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1417 {
1418 	struct netdev_queue *queue;
1419 
1420 	queue = cl->leaf.offload_queue;
1421 	if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1422 		WARN_ON(cl->leaf.q->dev_queue != queue);
1423 
1424 	return queue;
1425 }
1426 
htb_offload_move_qdisc(struct Qdisc * sch,struct htb_class * cl_old,struct htb_class * cl_new,bool destroying)1427 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1428 				   struct htb_class *cl_new, bool destroying)
1429 {
1430 	struct netdev_queue *queue_old, *queue_new;
1431 	struct net_device *dev = qdisc_dev(sch);
1432 
1433 	queue_old = htb_offload_get_queue(cl_old);
1434 	queue_new = htb_offload_get_queue(cl_new);
1435 
1436 	if (!destroying) {
1437 		struct Qdisc *qdisc;
1438 
1439 		if (dev->flags & IFF_UP)
1440 			dev_deactivate(dev);
1441 		qdisc = dev_graft_qdisc(queue_old, NULL);
1442 		WARN_ON(qdisc != cl_old->leaf.q);
1443 	}
1444 
1445 	if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1446 		cl_old->leaf.q->dev_queue = queue_new;
1447 	cl_old->leaf.offload_queue = queue_new;
1448 
1449 	if (!destroying) {
1450 		struct Qdisc *qdisc;
1451 
1452 		qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1453 		if (dev->flags & IFF_UP)
1454 			dev_activate(dev);
1455 		WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1456 	}
1457 }
1458 
htb_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)1459 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1460 		     struct Qdisc **old, struct netlink_ext_ack *extack)
1461 {
1462 	struct netdev_queue *dev_queue = sch->dev_queue;
1463 	struct htb_class *cl = (struct htb_class *)arg;
1464 	struct htb_sched *q = qdisc_priv(sch);
1465 	struct Qdisc *old_q;
1466 
1467 	if (cl->level)
1468 		return -EINVAL;
1469 
1470 	if (q->offload)
1471 		dev_queue = htb_offload_get_queue(cl);
1472 
1473 	if (!new) {
1474 		new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1475 					cl->common.classid, extack);
1476 		if (!new)
1477 			return -ENOBUFS;
1478 	}
1479 
1480 	if (q->offload) {
1481 		htb_set_lockdep_class_child(new);
1482 		/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1483 		qdisc_refcount_inc(new);
1484 		old_q = htb_graft_helper(dev_queue, new);
1485 	}
1486 
1487 	*old = qdisc_replace(sch, new, &cl->leaf.q);
1488 
1489 	if (q->offload) {
1490 		WARN_ON(old_q != *old);
1491 		qdisc_put(old_q);
1492 	}
1493 
1494 	return 0;
1495 }
1496 
htb_leaf(struct Qdisc * sch,unsigned long arg)1497 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1498 {
1499 	struct htb_class *cl = (struct htb_class *)arg;
1500 	return !cl->level ? cl->leaf.q : NULL;
1501 }
1502 
htb_qlen_notify(struct Qdisc * sch,unsigned long arg)1503 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1504 {
1505 	struct htb_class *cl = (struct htb_class *)arg;
1506 
1507 	htb_deactivate(qdisc_priv(sch), cl);
1508 }
1509 
htb_parent_last_child(struct htb_class * cl)1510 static inline int htb_parent_last_child(struct htb_class *cl)
1511 {
1512 	if (!cl->parent)
1513 		/* the root class */
1514 		return 0;
1515 	if (cl->parent->children > 1)
1516 		/* not the last child */
1517 		return 0;
1518 	return 1;
1519 }
1520 
htb_parent_to_leaf(struct Qdisc * sch,struct htb_class * cl,struct Qdisc * new_q)1521 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1522 			       struct Qdisc *new_q)
1523 {
1524 	struct htb_sched *q = qdisc_priv(sch);
1525 	struct htb_class *parent = cl->parent;
1526 
1527 	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1528 
1529 	if (parent->cmode != HTB_CAN_SEND)
1530 		htb_safe_rb_erase(&parent->pq_node,
1531 				  &q->hlevel[parent->level].wait_pq);
1532 
1533 	parent->level = 0;
1534 	memset(&parent->inner, 0, sizeof(parent->inner));
1535 	parent->leaf.q = new_q ? new_q : &noop_qdisc;
1536 	parent->tokens = parent->buffer;
1537 	parent->ctokens = parent->cbuffer;
1538 	parent->t_c = ktime_get_ns();
1539 	parent->cmode = HTB_CAN_SEND;
1540 	if (q->offload)
1541 		parent->leaf.offload_queue = cl->leaf.offload_queue;
1542 }
1543 
htb_parent_to_leaf_offload(struct Qdisc * sch,struct netdev_queue * dev_queue,struct Qdisc * new_q)1544 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1545 				       struct netdev_queue *dev_queue,
1546 				       struct Qdisc *new_q)
1547 {
1548 	struct Qdisc *old_q;
1549 
1550 	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1551 	if (new_q)
1552 		qdisc_refcount_inc(new_q);
1553 	old_q = htb_graft_helper(dev_queue, new_q);
1554 	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1555 }
1556 
htb_destroy_class_offload(struct Qdisc * sch,struct htb_class * cl,bool last_child,bool destroying,struct netlink_ext_ack * extack)1557 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1558 				     bool last_child, bool destroying,
1559 				     struct netlink_ext_ack *extack)
1560 {
1561 	struct tc_htb_qopt_offload offload_opt;
1562 	struct netdev_queue *dev_queue;
1563 	struct Qdisc *q = cl->leaf.q;
1564 	struct Qdisc *old;
1565 	int err;
1566 
1567 	if (cl->level)
1568 		return -EINVAL;
1569 
1570 	WARN_ON(!q);
1571 	dev_queue = htb_offload_get_queue(cl);
1572 	/* When destroying, caller qdisc_graft grafts the new qdisc and invokes
1573 	 * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
1574 	 * does not need to graft or qdisc_put the qdisc being destroyed.
1575 	 */
1576 	if (!destroying) {
1577 		old = htb_graft_helper(dev_queue, NULL);
1578 		/* Last qdisc grafted should be the same as cl->leaf.q when
1579 		 * calling htb_delete.
1580 		 */
1581 		WARN_ON(old != q);
1582 	}
1583 
1584 	if (cl->parent) {
1585 		cl->parent->bstats_bias.bytes += q->bstats.bytes;
1586 		cl->parent->bstats_bias.packets += q->bstats.packets;
1587 	}
1588 
1589 	offload_opt = (struct tc_htb_qopt_offload) {
1590 		.command = !last_child ? TC_HTB_LEAF_DEL :
1591 			   destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1592 			   TC_HTB_LEAF_DEL_LAST,
1593 		.classid = cl->common.classid,
1594 		.extack = extack,
1595 	};
1596 	err = htb_offload(qdisc_dev(sch), &offload_opt);
1597 
1598 	if (!destroying) {
1599 		if (!err)
1600 			qdisc_put(old);
1601 		else
1602 			htb_graft_helper(dev_queue, old);
1603 	}
1604 
1605 	if (last_child)
1606 		return err;
1607 
1608 	if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1609 		u32 classid = TC_H_MAJ(sch->handle) |
1610 			      TC_H_MIN(offload_opt.classid);
1611 		struct htb_class *moved_cl = htb_find(classid, sch);
1612 
1613 		htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1614 	}
1615 
1616 	return err;
1617 }
1618 
htb_destroy_class(struct Qdisc * sch,struct htb_class * cl)1619 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1620 {
1621 	if (!cl->level) {
1622 		WARN_ON(!cl->leaf.q);
1623 		qdisc_put(cl->leaf.q);
1624 	}
1625 	gen_kill_estimator(&cl->rate_est);
1626 	tcf_block_put(cl->block);
1627 	kfree(cl);
1628 }
1629 
htb_destroy(struct Qdisc * sch)1630 static void htb_destroy(struct Qdisc *sch)
1631 {
1632 	struct net_device *dev = qdisc_dev(sch);
1633 	struct tc_htb_qopt_offload offload_opt;
1634 	struct htb_sched *q = qdisc_priv(sch);
1635 	struct hlist_node *next;
1636 	bool nonempty, changed;
1637 	struct htb_class *cl;
1638 	unsigned int i;
1639 
1640 	cancel_work_sync(&q->work);
1641 	qdisc_watchdog_cancel(&q->watchdog);
1642 	/* This line used to be after htb_destroy_class call below
1643 	 * and surprisingly it worked in 2.4. But it must precede it
1644 	 * because filter need its target class alive to be able to call
1645 	 * unbind_filter on it (without Oops).
1646 	 */
1647 	tcf_block_put(q->block);
1648 
1649 	for (i = 0; i < q->clhash.hashsize; i++) {
1650 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1651 			tcf_block_put(cl->block);
1652 			cl->block = NULL;
1653 		}
1654 	}
1655 
1656 	do {
1657 		nonempty = false;
1658 		changed = false;
1659 		for (i = 0; i < q->clhash.hashsize; i++) {
1660 			hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1661 						  common.hnode) {
1662 				bool last_child;
1663 
1664 				if (!q->offload) {
1665 					htb_destroy_class(sch, cl);
1666 					continue;
1667 				}
1668 
1669 				nonempty = true;
1670 
1671 				if (cl->level)
1672 					continue;
1673 
1674 				changed = true;
1675 
1676 				last_child = htb_parent_last_child(cl);
1677 				htb_destroy_class_offload(sch, cl, last_child,
1678 							  true, NULL);
1679 				qdisc_class_hash_remove(&q->clhash,
1680 							&cl->common);
1681 				if (cl->parent)
1682 					cl->parent->children--;
1683 				if (last_child)
1684 					htb_parent_to_leaf(sch, cl, NULL);
1685 				htb_destroy_class(sch, cl);
1686 			}
1687 		}
1688 	} while (changed);
1689 	WARN_ON(nonempty);
1690 
1691 	qdisc_class_hash_destroy(&q->clhash);
1692 	__qdisc_reset_queue(&q->direct_queue);
1693 
1694 	if (!q->offload)
1695 		return;
1696 
1697 	offload_opt = (struct tc_htb_qopt_offload) {
1698 		.command = TC_HTB_DESTROY,
1699 	};
1700 	htb_offload(dev, &offload_opt);
1701 
1702 	if (!q->direct_qdiscs)
1703 		return;
1704 	for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1705 		qdisc_put(q->direct_qdiscs[i]);
1706 	kfree(q->direct_qdiscs);
1707 }
1708 
htb_delete(struct Qdisc * sch,unsigned long arg,struct netlink_ext_ack * extack)1709 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1710 		      struct netlink_ext_ack *extack)
1711 {
1712 	struct htb_sched *q = qdisc_priv(sch);
1713 	struct htb_class *cl = (struct htb_class *)arg;
1714 	struct Qdisc *new_q = NULL;
1715 	int last_child = 0;
1716 	int err;
1717 
1718 	/* TODO: why don't allow to delete subtree ? references ? does
1719 	 * tc subsys guarantee us that in htb_destroy it holds no class
1720 	 * refs so that we can remove children safely there ?
1721 	 */
1722 	if (cl->children || cl->filter_cnt)
1723 		return -EBUSY;
1724 
1725 	if (!cl->level && htb_parent_last_child(cl))
1726 		last_child = 1;
1727 
1728 	if (q->offload) {
1729 		err = htb_destroy_class_offload(sch, cl, last_child, false,
1730 						extack);
1731 		if (err)
1732 			return err;
1733 	}
1734 
1735 	if (last_child) {
1736 		struct netdev_queue *dev_queue = sch->dev_queue;
1737 
1738 		if (q->offload)
1739 			dev_queue = htb_offload_get_queue(cl);
1740 
1741 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1742 					  cl->parent->common.classid,
1743 					  NULL);
1744 		if (q->offload) {
1745 			if (new_q)
1746 				htb_set_lockdep_class_child(new_q);
1747 			htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1748 		}
1749 	}
1750 
1751 	sch_tree_lock(sch);
1752 
1753 	if (!cl->level)
1754 		qdisc_purge_queue(cl->leaf.q);
1755 
1756 	/* delete from hash and active; remainder in destroy_class */
1757 	qdisc_class_hash_remove(&q->clhash, &cl->common);
1758 	if (cl->parent)
1759 		cl->parent->children--;
1760 
1761 	if (cl->prio_activity)
1762 		htb_deactivate(q, cl);
1763 
1764 	if (cl->cmode != HTB_CAN_SEND)
1765 		htb_safe_rb_erase(&cl->pq_node,
1766 				  &q->hlevel[cl->level].wait_pq);
1767 
1768 	if (last_child)
1769 		htb_parent_to_leaf(sch, cl, new_q);
1770 
1771 	sch_tree_unlock(sch);
1772 
1773 	htb_destroy_class(sch, cl);
1774 	return 0;
1775 }
1776 
htb_change_class(struct Qdisc * sch,u32 classid,u32 parentid,struct nlattr ** tca,unsigned long * arg,struct netlink_ext_ack * extack)1777 static int htb_change_class(struct Qdisc *sch, u32 classid,
1778 			    u32 parentid, struct nlattr **tca,
1779 			    unsigned long *arg, struct netlink_ext_ack *extack)
1780 {
1781 	int err = -EINVAL;
1782 	struct htb_sched *q = qdisc_priv(sch);
1783 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
1784 	struct tc_htb_qopt_offload offload_opt;
1785 	struct nlattr *opt = tca[TCA_OPTIONS];
1786 	struct nlattr *tb[TCA_HTB_MAX + 1];
1787 	struct Qdisc *parent_qdisc = NULL;
1788 	struct netdev_queue *dev_queue;
1789 	struct tc_htb_opt *hopt;
1790 	u64 rate64, ceil64;
1791 	int warn = 0;
1792 
1793 	/* extract all subattrs from opt attr */
1794 	if (!opt)
1795 		goto failure;
1796 
1797 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1798 					  NULL);
1799 	if (err < 0)
1800 		goto failure;
1801 
1802 	err = -EINVAL;
1803 	if (tb[TCA_HTB_PARMS] == NULL)
1804 		goto failure;
1805 
1806 	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1807 
1808 	hopt = nla_data(tb[TCA_HTB_PARMS]);
1809 	if (!hopt->rate.rate || !hopt->ceil.rate)
1810 		goto failure;
1811 
1812 	if (q->offload) {
1813 		/* Options not supported by the offload. */
1814 		if (hopt->rate.overhead || hopt->ceil.overhead) {
1815 			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1816 			goto failure;
1817 		}
1818 		if (hopt->rate.mpu || hopt->ceil.mpu) {
1819 			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1820 			goto failure;
1821 		}
1822 		if (hopt->quantum) {
1823 			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1824 			goto failure;
1825 		}
1826 		if (hopt->prio) {
1827 			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
1828 			goto failure;
1829 		}
1830 	}
1831 
1832 	/* Keeping backward compatible with rate_table based iproute2 tc */
1833 	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1834 		qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1835 					      NULL));
1836 
1837 	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1838 		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1839 					      NULL));
1840 
1841 	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1842 	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1843 
1844 	if (!cl) {		/* new class */
1845 		struct net_device *dev = qdisc_dev(sch);
1846 		struct Qdisc *new_q, *old_q;
1847 		int prio;
1848 		struct {
1849 			struct nlattr		nla;
1850 			struct gnet_estimator	opt;
1851 		} est = {
1852 			.nla = {
1853 				.nla_len	= nla_attr_size(sizeof(est.opt)),
1854 				.nla_type	= TCA_RATE,
1855 			},
1856 			.opt = {
1857 				/* 4s interval, 16s averaging constant */
1858 				.interval	= 2,
1859 				.ewma_log	= 2,
1860 			},
1861 		};
1862 
1863 		/* check for valid classid */
1864 		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1865 		    htb_find(classid, sch))
1866 			goto failure;
1867 
1868 		/* check maximal depth */
1869 		if (parent && parent->parent && parent->parent->level < 2) {
1870 			pr_err("htb: tree is too deep\n");
1871 			goto failure;
1872 		}
1873 		err = -ENOBUFS;
1874 		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1875 		if (!cl)
1876 			goto failure;
1877 
1878 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1879 		if (err) {
1880 			kfree(cl);
1881 			goto failure;
1882 		}
1883 		if (htb_rate_est || tca[TCA_RATE]) {
1884 			err = gen_new_estimator(&cl->bstats, NULL,
1885 						&cl->rate_est,
1886 						NULL,
1887 						qdisc_root_sleeping_running(sch),
1888 						tca[TCA_RATE] ? : &est.nla);
1889 			if (err)
1890 				goto err_block_put;
1891 		}
1892 
1893 		cl->children = 0;
1894 		RB_CLEAR_NODE(&cl->pq_node);
1895 
1896 		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1897 			RB_CLEAR_NODE(&cl->node[prio]);
1898 
1899 		cl->common.classid = classid;
1900 
1901 		/* Make sure nothing interrupts us in between of two
1902 		 * ndo_setup_tc calls.
1903 		 */
1904 		ASSERT_RTNL();
1905 
1906 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1907 		 * so that can't be used inside of sch_tree_lock
1908 		 * -- thanks to Karlis Peisenieks
1909 		 */
1910 		if (!q->offload) {
1911 			dev_queue = sch->dev_queue;
1912 		} else if (!(parent && !parent->level)) {
1913 			/* Assign a dev_queue to this classid. */
1914 			offload_opt = (struct tc_htb_qopt_offload) {
1915 				.command = TC_HTB_LEAF_ALLOC_QUEUE,
1916 				.classid = cl->common.classid,
1917 				.parent_classid = parent ?
1918 					TC_H_MIN(parent->common.classid) :
1919 					TC_HTB_CLASSID_ROOT,
1920 				.rate = max_t(u64, hopt->rate.rate, rate64),
1921 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1922 				.extack = extack,
1923 			};
1924 			err = htb_offload(dev, &offload_opt);
1925 			if (err) {
1926 				pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1927 				       err);
1928 				goto err_kill_estimator;
1929 			}
1930 			dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1931 		} else { /* First child. */
1932 			dev_queue = htb_offload_get_queue(parent);
1933 			old_q = htb_graft_helper(dev_queue, NULL);
1934 			WARN_ON(old_q != parent->leaf.q);
1935 			offload_opt = (struct tc_htb_qopt_offload) {
1936 				.command = TC_HTB_LEAF_TO_INNER,
1937 				.classid = cl->common.classid,
1938 				.parent_classid =
1939 					TC_H_MIN(parent->common.classid),
1940 				.rate = max_t(u64, hopt->rate.rate, rate64),
1941 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1942 				.extack = extack,
1943 			};
1944 			err = htb_offload(dev, &offload_opt);
1945 			if (err) {
1946 				pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1947 				       err);
1948 				htb_graft_helper(dev_queue, old_q);
1949 				goto err_kill_estimator;
1950 			}
1951 			parent->bstats_bias.bytes += old_q->bstats.bytes;
1952 			parent->bstats_bias.packets += old_q->bstats.packets;
1953 			qdisc_put(old_q);
1954 		}
1955 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1956 					  classid, NULL);
1957 		if (q->offload) {
1958 			if (new_q) {
1959 				htb_set_lockdep_class_child(new_q);
1960 				/* One ref for cl->leaf.q, the other for
1961 				 * dev_queue->qdisc.
1962 				 */
1963 				qdisc_refcount_inc(new_q);
1964 			}
1965 			old_q = htb_graft_helper(dev_queue, new_q);
1966 			/* No qdisc_put needed. */
1967 			WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1968 		}
1969 		sch_tree_lock(sch);
1970 		if (parent && !parent->level) {
1971 			/* turn parent into inner node */
1972 			qdisc_purge_queue(parent->leaf.q);
1973 			parent_qdisc = parent->leaf.q;
1974 			if (parent->prio_activity)
1975 				htb_deactivate(q, parent);
1976 
1977 			/* remove from evt list because of level change */
1978 			if (parent->cmode != HTB_CAN_SEND) {
1979 				htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1980 				parent->cmode = HTB_CAN_SEND;
1981 			}
1982 			parent->level = (parent->parent ? parent->parent->level
1983 					 : TC_HTB_MAXDEPTH) - 1;
1984 			memset(&parent->inner, 0, sizeof(parent->inner));
1985 		}
1986 
1987 		/* leaf (we) needs elementary qdisc */
1988 		cl->leaf.q = new_q ? new_q : &noop_qdisc;
1989 		if (q->offload)
1990 			cl->leaf.offload_queue = dev_queue;
1991 
1992 		cl->parent = parent;
1993 
1994 		/* set class to be in HTB_CAN_SEND state */
1995 		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1996 		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1997 		cl->mbuffer = 60ULL * NSEC_PER_SEC;	/* 1min */
1998 		cl->t_c = ktime_get_ns();
1999 		cl->cmode = HTB_CAN_SEND;
2000 
2001 		/* attach to the hash list and parent's family */
2002 		qdisc_class_hash_insert(&q->clhash, &cl->common);
2003 		if (parent)
2004 			parent->children++;
2005 		if (cl->leaf.q != &noop_qdisc)
2006 			qdisc_hash_add(cl->leaf.q, true);
2007 	} else {
2008 		if (tca[TCA_RATE]) {
2009 			err = gen_replace_estimator(&cl->bstats, NULL,
2010 						    &cl->rate_est,
2011 						    NULL,
2012 						    qdisc_root_sleeping_running(sch),
2013 						    tca[TCA_RATE]);
2014 			if (err)
2015 				return err;
2016 		}
2017 
2018 		if (q->offload) {
2019 			struct net_device *dev = qdisc_dev(sch);
2020 
2021 			offload_opt = (struct tc_htb_qopt_offload) {
2022 				.command = TC_HTB_NODE_MODIFY,
2023 				.classid = cl->common.classid,
2024 				.rate = max_t(u64, hopt->rate.rate, rate64),
2025 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
2026 				.extack = extack,
2027 			};
2028 			err = htb_offload(dev, &offload_opt);
2029 			if (err)
2030 				/* Estimator was replaced, and rollback may fail
2031 				 * as well, so we don't try to recover it, and
2032 				 * the estimator won't work property with the
2033 				 * offload anyway, because bstats are updated
2034 				 * only when the stats are queried.
2035 				 */
2036 				return err;
2037 		}
2038 
2039 		sch_tree_lock(sch);
2040 	}
2041 
2042 	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2043 	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2044 
2045 	/* it used to be a nasty bug here, we have to check that node
2046 	 * is really leaf before changing cl->leaf !
2047 	 */
2048 	if (!cl->level) {
2049 		u64 quantum = cl->rate.rate_bytes_ps;
2050 
2051 		do_div(quantum, q->rate2quantum);
2052 		cl->quantum = min_t(u64, quantum, INT_MAX);
2053 
2054 		if (!hopt->quantum && cl->quantum < 1000) {
2055 			warn = -1;
2056 			cl->quantum = 1000;
2057 		}
2058 		if (!hopt->quantum && cl->quantum > 200000) {
2059 			warn = 1;
2060 			cl->quantum = 200000;
2061 		}
2062 		if (hopt->quantum)
2063 			cl->quantum = hopt->quantum;
2064 		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2065 			cl->prio = TC_HTB_NUMPRIO - 1;
2066 	}
2067 
2068 	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2069 	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2070 
2071 	sch_tree_unlock(sch);
2072 	qdisc_put(parent_qdisc);
2073 
2074 	if (warn)
2075 		pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2076 			    cl->common.classid, (warn == -1 ? "small" : "big"));
2077 
2078 	qdisc_class_hash_grow(sch, &q->clhash);
2079 
2080 	*arg = (unsigned long)cl;
2081 	return 0;
2082 
2083 err_kill_estimator:
2084 	gen_kill_estimator(&cl->rate_est);
2085 err_block_put:
2086 	tcf_block_put(cl->block);
2087 	kfree(cl);
2088 failure:
2089 	return err;
2090 }
2091 
htb_tcf_block(struct Qdisc * sch,unsigned long arg,struct netlink_ext_ack * extack)2092 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2093 				       struct netlink_ext_ack *extack)
2094 {
2095 	struct htb_sched *q = qdisc_priv(sch);
2096 	struct htb_class *cl = (struct htb_class *)arg;
2097 
2098 	return cl ? cl->block : q->block;
2099 }
2100 
htb_bind_filter(struct Qdisc * sch,unsigned long parent,u32 classid)2101 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2102 				     u32 classid)
2103 {
2104 	struct htb_class *cl = htb_find(classid, sch);
2105 
2106 	/*if (cl && !cl->level) return 0;
2107 	 * The line above used to be there to prevent attaching filters to
2108 	 * leaves. But at least tc_index filter uses this just to get class
2109 	 * for other reasons so that we have to allow for it.
2110 	 * ----
2111 	 * 19.6.2002 As Werner explained it is ok - bind filter is just
2112 	 * another way to "lock" the class - unlike "get" this lock can
2113 	 * be broken by class during destroy IIUC.
2114 	 */
2115 	if (cl)
2116 		cl->filter_cnt++;
2117 	return (unsigned long)cl;
2118 }
2119 
htb_unbind_filter(struct Qdisc * sch,unsigned long arg)2120 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2121 {
2122 	struct htb_class *cl = (struct htb_class *)arg;
2123 
2124 	if (cl)
2125 		cl->filter_cnt--;
2126 }
2127 
htb_walk(struct Qdisc * sch,struct qdisc_walker * arg)2128 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2129 {
2130 	struct htb_sched *q = qdisc_priv(sch);
2131 	struct htb_class *cl;
2132 	unsigned int i;
2133 
2134 	if (arg->stop)
2135 		return;
2136 
2137 	for (i = 0; i < q->clhash.hashsize; i++) {
2138 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2139 			if (arg->count < arg->skip) {
2140 				arg->count++;
2141 				continue;
2142 			}
2143 			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2144 				arg->stop = 1;
2145 				return;
2146 			}
2147 			arg->count++;
2148 		}
2149 	}
2150 }
2151 
2152 static const struct Qdisc_class_ops htb_class_ops = {
2153 	.select_queue	=	htb_select_queue,
2154 	.graft		=	htb_graft,
2155 	.leaf		=	htb_leaf,
2156 	.qlen_notify	=	htb_qlen_notify,
2157 	.find		=	htb_search,
2158 	.change		=	htb_change_class,
2159 	.delete		=	htb_delete,
2160 	.walk		=	htb_walk,
2161 	.tcf_block	=	htb_tcf_block,
2162 	.bind_tcf	=	htb_bind_filter,
2163 	.unbind_tcf	=	htb_unbind_filter,
2164 	.dump		=	htb_dump_class,
2165 	.dump_stats	=	htb_dump_class_stats,
2166 };
2167 
2168 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2169 	.cl_ops		=	&htb_class_ops,
2170 	.id		=	"htb",
2171 	.priv_size	=	sizeof(struct htb_sched),
2172 	.enqueue	=	htb_enqueue,
2173 	.dequeue	=	htb_dequeue,
2174 	.peek		=	qdisc_peek_dequeued,
2175 	.init		=	htb_init,
2176 	.attach		=	htb_attach,
2177 	.reset		=	htb_reset,
2178 	.destroy	=	htb_destroy,
2179 	.dump		=	htb_dump,
2180 	.owner		=	THIS_MODULE,
2181 };
2182 
htb_module_init(void)2183 static int __init htb_module_init(void)
2184 {
2185 	return register_qdisc(&htb_qdisc_ops);
2186 }
htb_module_exit(void)2187 static void __exit htb_module_exit(void)
2188 {
2189 	unregister_qdisc(&htb_qdisc_ops);
2190 }
2191 
2192 module_init(htb_module_init)
2193 module_exit(htb_module_exit)
2194 MODULE_LICENSE("GPL");
2195