• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Martin Devera, <devik@cdi.cz>
10  *
11  * Credits (in time order) for older HTB versions:
12  *              Stef Coene <stef.coene@docum.org>
13  *			HTB support at LARTC mailing list
14  *		Ondrej Kraus, <krauso@barr.cz>
15  *			found missing INIT_QDISC(htb)
16  *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17  *			helped a lot to locate nasty class stall bug
18  *		Andi Kleen, Jamal Hadi, Bert Hubert
19  *			code review and helpful comments on shaping
20  *		Tomasz Wrona, <tw@eter.tym.pl>
21  *			created test case so that I was able to fix nasty bug
22  *		Wilfried Weissmann
23  *			spotted bug in dequeue code and helped with fix
24  *		Jiri Fojtasek
25  *			fixed requeue routine
26  *		and many others. thanks.
27  */
28 #include <linux/module.h>
29 #include <linux/moduleparam.h>
30 #include <linux/types.h>
31 #include <linux/kernel.h>
32 #include <linux/string.h>
33 #include <linux/errno.h>
34 #include <linux/skbuff.h>
35 #include <linux/list.h>
36 #include <linux/compiler.h>
37 #include <linux/rbtree.h>
38 #include <net/netlink.h>
39 #include <net/pkt_sched.h>
40 
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47 
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53 
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
56 
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60 
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64 
65 /* used internaly to keep status of single class */
66 enum htb_cmode {
67 	HTB_CANT_SEND,		/* class can't send and can't borrow */
68 	HTB_MAY_BORROW,		/* class can't send but may borrow */
69 	HTB_CAN_SEND		/* class can send */
70 };
71 
72 /* interior & leaf nodes; props specific to leaves are marked L: */
73 struct htb_class {
74 	struct Qdisc_class_common common;
75 	/* general class parameters */
76 	struct gnet_stats_basic bstats;
77 	struct gnet_stats_queue qstats;
78 	struct gnet_stats_rate_est rate_est;
79 	struct tc_htb_xstats xstats;	/* our special stats */
80 	int refcnt;		/* usage count of this class */
81 
82 	/* topology */
83 	int level;		/* our level (see above) */
84 	unsigned int children;
85 	struct htb_class *parent;	/* parent class */
86 
87 	int prio;		/* these two are used only by leaves... */
88 	int quantum;		/* but stored for parent-to-leaf return */
89 
90 	union {
91 		struct htb_class_leaf {
92 			struct Qdisc *q;
93 			int deficit[TC_HTB_MAXDEPTH];
94 			struct list_head drop_list;
95 		} leaf;
96 		struct htb_class_inner {
97 			struct rb_root feed[TC_HTB_NUMPRIO];	/* feed trees */
98 			struct rb_node *ptr[TC_HTB_NUMPRIO];	/* current class ptr */
99 			/* When class changes from state 1->2 and disconnects from
100 			   parent's feed then we lost ptr value and start from the
101 			   first child again. Here we store classid of the
102 			   last valid ptr (used when ptr is NULL). */
103 			u32 last_ptr_id[TC_HTB_NUMPRIO];
104 		} inner;
105 	} un;
106 	struct rb_node node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
107 	struct rb_node pq_node;	/* node for event queue */
108 	psched_time_t pq_key;
109 
110 	int prio_activity;	/* for which prios are we active */
111 	enum htb_cmode cmode;	/* current mode of the class */
112 
113 	/* class attached filters */
114 	struct tcf_proto *filter_list;
115 	int filter_cnt;
116 
117 	int warned;		/* only one warning about non work conserving .. */
118 
119 	/* token bucket parameters */
120 	struct qdisc_rate_table *rate;	/* rate table of the class itself */
121 	struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
122 	long buffer, cbuffer;	/* token bucket depth/rate */
123 	psched_tdiff_t mbuffer;	/* max wait time */
124 	long tokens, ctokens;	/* current number of tokens */
125 	psched_time_t t_c;	/* checkpoint time */
126 };
127 
128 struct htb_sched {
129 	struct Qdisc_class_hash clhash;
130 	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
131 
132 	/* self list - roots of self generating tree */
133 	struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
134 	int row_mask[TC_HTB_MAXDEPTH];
135 	struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
136 	u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
137 
138 	/* self wait list - roots of wait PQs per row */
139 	struct rb_root wait_pq[TC_HTB_MAXDEPTH];
140 
141 	/* time of nearest event per level (row) */
142 	psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
143 
144 	int defcls;		/* class where unclassified flows go to */
145 
146 	/* filters for qdisc itself */
147 	struct tcf_proto *filter_list;
148 
149 	int rate2quantum;	/* quant = rate / rate2quantum */
150 	psched_time_t now;	/* cached dequeue time */
151 	struct qdisc_watchdog watchdog;
152 
153 	/* non shaped skbs; let them go directly thru */
154 	struct sk_buff_head direct_queue;
155 	int direct_qlen;	/* max qlen of above */
156 
157 	long direct_pkts;
158 };
159 
160 /* find class in global hash table using given handle */
htb_find(u32 handle,struct Qdisc * sch)161 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
162 {
163 	struct htb_sched *q = qdisc_priv(sch);
164 	struct Qdisc_class_common *clc;
165 
166 	clc = qdisc_class_find(&q->clhash, handle);
167 	if (clc == NULL)
168 		return NULL;
169 	return container_of(clc, struct htb_class, common);
170 }
171 
172 /**
173  * htb_classify - classify a packet into class
174  *
175  * It returns NULL if the packet should be dropped or -1 if the packet
176  * should be passed directly thru. In all other cases leaf class is returned.
177  * We allow direct class selection by classid in priority. The we examine
178  * filters in qdisc and in inner nodes (if higher filter points to the inner
179  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
180  * internal fifo (direct). These packets then go directly thru. If we still
181  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
182  * then finish and return direct queue.
183  */
184 #define HTB_DIRECT (struct htb_class*)-1
185 
htb_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)186 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
187 				      int *qerr)
188 {
189 	struct htb_sched *q = qdisc_priv(sch);
190 	struct htb_class *cl;
191 	struct tcf_result res;
192 	struct tcf_proto *tcf;
193 	int result;
194 
195 	/* allow to select class by setting skb->priority to valid classid;
196 	   note that nfmark can be used too by attaching filter fw with no
197 	   rules in it */
198 	if (skb->priority == sch->handle)
199 		return HTB_DIRECT;	/* X:0 (direct flow) selected */
200 	if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
201 		return cl;
202 
203 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
204 	tcf = q->filter_list;
205 	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
206 #ifdef CONFIG_NET_CLS_ACT
207 		switch (result) {
208 		case TC_ACT_QUEUED:
209 		case TC_ACT_STOLEN:
210 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
211 		case TC_ACT_SHOT:
212 			return NULL;
213 		}
214 #endif
215 		if ((cl = (void *)res.class) == NULL) {
216 			if (res.classid == sch->handle)
217 				return HTB_DIRECT;	/* X:0 (direct flow) */
218 			if ((cl = htb_find(res.classid, sch)) == NULL)
219 				break;	/* filter selected invalid classid */
220 		}
221 		if (!cl->level)
222 			return cl;	/* we hit leaf; return it */
223 
224 		/* we have got inner class; apply inner filter chain */
225 		tcf = cl->filter_list;
226 	}
227 	/* classification failed; try to use default class */
228 	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
229 	if (!cl || cl->level)
230 		return HTB_DIRECT;	/* bad default .. this is safe bet */
231 	return cl;
232 }
233 
234 /**
235  * htb_add_to_id_tree - adds class to the round robin list
236  *
237  * Routine adds class to the list (actually tree) sorted by classid.
238  * Make sure that class is not already on such list for given prio.
239  */
htb_add_to_id_tree(struct rb_root * root,struct htb_class * cl,int prio)240 static void htb_add_to_id_tree(struct rb_root *root,
241 			       struct htb_class *cl, int prio)
242 {
243 	struct rb_node **p = &root->rb_node, *parent = NULL;
244 
245 	while (*p) {
246 		struct htb_class *c;
247 		parent = *p;
248 		c = rb_entry(parent, struct htb_class, node[prio]);
249 
250 		if (cl->common.classid > c->common.classid)
251 			p = &parent->rb_right;
252 		else
253 			p = &parent->rb_left;
254 	}
255 	rb_link_node(&cl->node[prio], parent, p);
256 	rb_insert_color(&cl->node[prio], root);
257 }
258 
259 /**
260  * htb_add_to_wait_tree - adds class to the event queue with delay
261  *
262  * The class is added to priority event queue to indicate that class will
263  * change its mode in cl->pq_key microseconds. Make sure that class is not
264  * already in the queue.
265  */
htb_add_to_wait_tree(struct htb_sched * q,struct htb_class * cl,long delay)266 static void htb_add_to_wait_tree(struct htb_sched *q,
267 				 struct htb_class *cl, long delay)
268 {
269 	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
270 
271 	cl->pq_key = q->now + delay;
272 	if (cl->pq_key == q->now)
273 		cl->pq_key++;
274 
275 	/* update the nearest event cache */
276 	if (q->near_ev_cache[cl->level] > cl->pq_key)
277 		q->near_ev_cache[cl->level] = cl->pq_key;
278 
279 	while (*p) {
280 		struct htb_class *c;
281 		parent = *p;
282 		c = rb_entry(parent, struct htb_class, pq_node);
283 		if (cl->pq_key >= c->pq_key)
284 			p = &parent->rb_right;
285 		else
286 			p = &parent->rb_left;
287 	}
288 	rb_link_node(&cl->pq_node, parent, p);
289 	rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
290 }
291 
292 /**
293  * htb_next_rb_node - finds next node in binary tree
294  *
295  * When we are past last key we return NULL.
296  * Average complexity is 2 steps per call.
297  */
htb_next_rb_node(struct rb_node ** n)298 static inline void htb_next_rb_node(struct rb_node **n)
299 {
300 	*n = rb_next(*n);
301 }
302 
303 /**
304  * htb_add_class_to_row - add class to its row
305  *
306  * The class is added to row at priorities marked in mask.
307  * It does nothing if mask == 0.
308  */
htb_add_class_to_row(struct htb_sched * q,struct htb_class * cl,int mask)309 static inline void htb_add_class_to_row(struct htb_sched *q,
310 					struct htb_class *cl, int mask)
311 {
312 	q->row_mask[cl->level] |= mask;
313 	while (mask) {
314 		int prio = ffz(~mask);
315 		mask &= ~(1 << prio);
316 		htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
317 	}
318 }
319 
320 /* If this triggers, it is a bug in this code, but it need not be fatal */
htb_safe_rb_erase(struct rb_node * rb,struct rb_root * root)321 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
322 {
323 	if (RB_EMPTY_NODE(rb)) {
324 		WARN_ON(1);
325 	} else {
326 		rb_erase(rb, root);
327 		RB_CLEAR_NODE(rb);
328 	}
329 }
330 
331 
332 /**
333  * htb_remove_class_from_row - removes class from its row
334  *
335  * The class is removed from row at priorities marked in mask.
336  * It does nothing if mask == 0.
337  */
htb_remove_class_from_row(struct htb_sched * q,struct htb_class * cl,int mask)338 static inline void htb_remove_class_from_row(struct htb_sched *q,
339 						 struct htb_class *cl, int mask)
340 {
341 	int m = 0;
342 
343 	while (mask) {
344 		int prio = ffz(~mask);
345 
346 		mask &= ~(1 << prio);
347 		if (q->ptr[cl->level][prio] == cl->node + prio)
348 			htb_next_rb_node(q->ptr[cl->level] + prio);
349 
350 		htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
351 		if (!q->row[cl->level][prio].rb_node)
352 			m |= 1 << prio;
353 	}
354 	q->row_mask[cl->level] &= ~m;
355 }
356 
357 /**
358  * htb_activate_prios - creates active classe's feed chain
359  *
360  * The class is connected to ancestors and/or appropriate rows
361  * for priorities it is participating on. cl->cmode must be new
362  * (activated) mode. It does nothing if cl->prio_activity == 0.
363  */
htb_activate_prios(struct htb_sched * q,struct htb_class * cl)364 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
365 {
366 	struct htb_class *p = cl->parent;
367 	long m, mask = cl->prio_activity;
368 
369 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
370 		m = mask;
371 		while (m) {
372 			int prio = ffz(~m);
373 			m &= ~(1 << prio);
374 
375 			if (p->un.inner.feed[prio].rb_node)
376 				/* parent already has its feed in use so that
377 				   reset bit in mask as parent is already ok */
378 				mask &= ~(1 << prio);
379 
380 			htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
381 		}
382 		p->prio_activity |= mask;
383 		cl = p;
384 		p = cl->parent;
385 
386 	}
387 	if (cl->cmode == HTB_CAN_SEND && mask)
388 		htb_add_class_to_row(q, cl, mask);
389 }
390 
391 /**
392  * htb_deactivate_prios - remove class from feed chain
393  *
394  * cl->cmode must represent old mode (before deactivation). It does
395  * nothing if cl->prio_activity == 0. Class is removed from all feed
396  * chains and rows.
397  */
htb_deactivate_prios(struct htb_sched * q,struct htb_class * cl)398 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
399 {
400 	struct htb_class *p = cl->parent;
401 	long m, mask = cl->prio_activity;
402 
403 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
404 		m = mask;
405 		mask = 0;
406 		while (m) {
407 			int prio = ffz(~m);
408 			m &= ~(1 << prio);
409 
410 			if (p->un.inner.ptr[prio] == cl->node + prio) {
411 				/* we are removing child which is pointed to from
412 				   parent feed - forget the pointer but remember
413 				   classid */
414 				p->un.inner.last_ptr_id[prio] = cl->common.classid;
415 				p->un.inner.ptr[prio] = NULL;
416 			}
417 
418 			htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
419 
420 			if (!p->un.inner.feed[prio].rb_node)
421 				mask |= 1 << prio;
422 		}
423 
424 		p->prio_activity &= ~mask;
425 		cl = p;
426 		p = cl->parent;
427 
428 	}
429 	if (cl->cmode == HTB_CAN_SEND && mask)
430 		htb_remove_class_from_row(q, cl, mask);
431 }
432 
htb_lowater(const struct htb_class * cl)433 static inline long htb_lowater(const struct htb_class *cl)
434 {
435 	if (htb_hysteresis)
436 		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
437 	else
438 		return 0;
439 }
htb_hiwater(const struct htb_class * cl)440 static inline long htb_hiwater(const struct htb_class *cl)
441 {
442 	if (htb_hysteresis)
443 		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
444 	else
445 		return 0;
446 }
447 
448 
449 /**
450  * htb_class_mode - computes and returns current class mode
451  *
452  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
453  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
454  * from now to time when cl will change its state.
455  * Also it is worth to note that class mode doesn't change simply
456  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
457  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
458  * mode transitions per time unit. The speed gain is about 1/6.
459  */
460 static inline enum htb_cmode
htb_class_mode(struct htb_class * cl,long * diff)461 htb_class_mode(struct htb_class *cl, long *diff)
462 {
463 	long toks;
464 
465 	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
466 		*diff = -toks;
467 		return HTB_CANT_SEND;
468 	}
469 
470 	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
471 		return HTB_CAN_SEND;
472 
473 	*diff = -toks;
474 	return HTB_MAY_BORROW;
475 }
476 
477 /**
478  * htb_change_class_mode - changes classe's mode
479  *
480  * This should be the only way how to change classe's mode under normal
481  * cirsumstances. Routine will update feed lists linkage, change mode
482  * and add class to the wait event queue if appropriate. New mode should
483  * be different from old one and cl->pq_key has to be valid if changing
484  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
485  */
486 static void
htb_change_class_mode(struct htb_sched * q,struct htb_class * cl,long * diff)487 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
488 {
489 	enum htb_cmode new_mode = htb_class_mode(cl, diff);
490 
491 	if (new_mode == cl->cmode)
492 		return;
493 
494 	if (cl->prio_activity) {	/* not necessary: speed optimization */
495 		if (cl->cmode != HTB_CANT_SEND)
496 			htb_deactivate_prios(q, cl);
497 		cl->cmode = new_mode;
498 		if (new_mode != HTB_CANT_SEND)
499 			htb_activate_prios(q, cl);
500 	} else
501 		cl->cmode = new_mode;
502 }
503 
504 /**
505  * htb_activate - inserts leaf cl into appropriate active feeds
506  *
507  * Routine learns (new) priority of leaf and activates feed chain
508  * for the prio. It can be called on already active leaf safely.
509  * It also adds leaf into droplist.
510  */
htb_activate(struct htb_sched * q,struct htb_class * cl)511 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
512 {
513 	WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
514 
515 	if (!cl->prio_activity) {
516 		cl->prio_activity = 1 << cl->prio;
517 		htb_activate_prios(q, cl);
518 		list_add_tail(&cl->un.leaf.drop_list,
519 			      q->drops + cl->prio);
520 	}
521 }
522 
523 /**
524  * htb_deactivate - remove leaf cl from active feeds
525  *
526  * Make sure that leaf is active. In the other words it can't be called
527  * with non-active leaf. It also removes class from the drop list.
528  */
htb_deactivate(struct htb_sched * q,struct htb_class * cl)529 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
530 {
531 	WARN_ON(!cl->prio_activity);
532 
533 	htb_deactivate_prios(q, cl);
534 	cl->prio_activity = 0;
535 	list_del_init(&cl->un.leaf.drop_list);
536 }
537 
htb_enqueue(struct sk_buff * skb,struct Qdisc * sch)538 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
539 {
540 	int uninitialized_var(ret);
541 	struct htb_sched *q = qdisc_priv(sch);
542 	struct htb_class *cl = htb_classify(skb, sch, &ret);
543 
544 	if (cl == HTB_DIRECT) {
545 		/* enqueue to helper queue */
546 		if (q->direct_queue.qlen < q->direct_qlen) {
547 			__skb_queue_tail(&q->direct_queue, skb);
548 			q->direct_pkts++;
549 		} else {
550 			kfree_skb(skb);
551 			sch->qstats.drops++;
552 			return NET_XMIT_DROP;
553 		}
554 #ifdef CONFIG_NET_CLS_ACT
555 	} else if (!cl) {
556 		if (ret & __NET_XMIT_BYPASS)
557 			sch->qstats.drops++;
558 		kfree_skb(skb);
559 		return ret;
560 #endif
561 	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
562 		if (net_xmit_drop_count(ret)) {
563 			sch->qstats.drops++;
564 			cl->qstats.drops++;
565 		}
566 		return ret;
567 	} else {
568 		cl->bstats.packets +=
569 			skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
570 		cl->bstats.bytes += qdisc_pkt_len(skb);
571 		htb_activate(q, cl);
572 	}
573 
574 	sch->q.qlen++;
575 	sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
576 	sch->bstats.bytes += qdisc_pkt_len(skb);
577 	return NET_XMIT_SUCCESS;
578 }
579 
htb_accnt_tokens(struct htb_class * cl,int bytes,long diff)580 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
581 {
582 	long toks = diff + cl->tokens;
583 
584 	if (toks > cl->buffer)
585 		toks = cl->buffer;
586 	toks -= (long) qdisc_l2t(cl->rate, bytes);
587 	if (toks <= -cl->mbuffer)
588 		toks = 1 - cl->mbuffer;
589 
590 	cl->tokens = toks;
591 }
592 
htb_accnt_ctokens(struct htb_class * cl,int bytes,long diff)593 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
594 {
595 	long toks = diff + cl->ctokens;
596 
597 	if (toks > cl->cbuffer)
598 		toks = cl->cbuffer;
599 	toks -= (long) qdisc_l2t(cl->ceil, bytes);
600 	if (toks <= -cl->mbuffer)
601 		toks = 1 - cl->mbuffer;
602 
603 	cl->ctokens = toks;
604 }
605 
606 /**
607  * htb_charge_class - charges amount "bytes" to leaf and ancestors
608  *
609  * Routine assumes that packet "bytes" long was dequeued from leaf cl
610  * borrowing from "level". It accounts bytes to ceil leaky bucket for
611  * leaf and all ancestors and to rate bucket for ancestors at levels
612  * "level" and higher. It also handles possible change of mode resulting
613  * from the update. Note that mode can also increase here (MAY_BORROW to
614  * CAN_SEND) because we can use more precise clock that event queue here.
615  * In such case we remove class from event queue first.
616  */
htb_charge_class(struct htb_sched * q,struct htb_class * cl,int level,struct sk_buff * skb)617 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
618 			     int level, struct sk_buff *skb)
619 {
620 	int bytes = qdisc_pkt_len(skb);
621 	enum htb_cmode old_mode;
622 	long diff;
623 
624 	while (cl) {
625 		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
626 		if (cl->level >= level) {
627 			if (cl->level == level)
628 				cl->xstats.lends++;
629 			htb_accnt_tokens(cl, bytes, diff);
630 		} else {
631 			cl->xstats.borrows++;
632 			cl->tokens += diff;	/* we moved t_c; update tokens */
633 		}
634 		htb_accnt_ctokens(cl, bytes, diff);
635 		cl->t_c = q->now;
636 
637 		old_mode = cl->cmode;
638 		diff = 0;
639 		htb_change_class_mode(q, cl, &diff);
640 		if (old_mode != cl->cmode) {
641 			if (old_mode != HTB_CAN_SEND)
642 				htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
643 			if (cl->cmode != HTB_CAN_SEND)
644 				htb_add_to_wait_tree(q, cl, diff);
645 		}
646 
647 		/* update byte stats except for leaves which are already updated */
648 		if (cl->level) {
649 			cl->bstats.bytes += bytes;
650 			cl->bstats.packets += skb_is_gso(skb)?
651 					skb_shinfo(skb)->gso_segs:1;
652 		}
653 		cl = cl->parent;
654 	}
655 }
656 
657 /**
658  * htb_do_events - make mode changes to classes at the level
659  *
660  * Scans event queue for pending events and applies them. Returns time of
661  * next pending event (0 for no event in pq).
662  * Note: Applied are events whose have cl->pq_key <= q->now.
663  */
htb_do_events(struct htb_sched * q,int level,unsigned long start)664 static psched_time_t htb_do_events(struct htb_sched *q, int level,
665 				   unsigned long start)
666 {
667 	/* don't run for longer than 2 jiffies; 2 is used instead of
668 	   1 to simplify things when jiffy is going to be incremented
669 	   too soon */
670 	unsigned long stop_at = start + 2;
671 	while (time_before(jiffies, stop_at)) {
672 		struct htb_class *cl;
673 		long diff;
674 		struct rb_node *p = rb_first(&q->wait_pq[level]);
675 
676 		if (!p)
677 			return 0;
678 
679 		cl = rb_entry(p, struct htb_class, pq_node);
680 		if (cl->pq_key > q->now)
681 			return cl->pq_key;
682 
683 		htb_safe_rb_erase(p, q->wait_pq + level);
684 		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
685 		htb_change_class_mode(q, cl, &diff);
686 		if (cl->cmode != HTB_CAN_SEND)
687 			htb_add_to_wait_tree(q, cl, diff);
688 	}
689 	/* too much load - let's continue on next jiffie (including above) */
690 	return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ;
691 }
692 
693 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
694    is no such one exists. */
htb_id_find_next_upper(int prio,struct rb_node * n,u32 id)695 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
696 					      u32 id)
697 {
698 	struct rb_node *r = NULL;
699 	while (n) {
700 		struct htb_class *cl =
701 		    rb_entry(n, struct htb_class, node[prio]);
702 
703 		if (id > cl->common.classid) {
704 			n = n->rb_right;
705 		} else if (id < cl->common.classid) {
706 			r = n;
707 			n = n->rb_left;
708 		} else {
709 			return n;
710 		}
711 	}
712 	return r;
713 }
714 
715 /**
716  * htb_lookup_leaf - returns next leaf class in DRR order
717  *
718  * Find leaf where current feed pointers points to.
719  */
htb_lookup_leaf(struct rb_root * tree,int prio,struct rb_node ** pptr,u32 * pid)720 static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
721 					 struct rb_node **pptr, u32 * pid)
722 {
723 	int i;
724 	struct {
725 		struct rb_node *root;
726 		struct rb_node **pptr;
727 		u32 *pid;
728 	} stk[TC_HTB_MAXDEPTH], *sp = stk;
729 
730 	BUG_ON(!tree->rb_node);
731 	sp->root = tree->rb_node;
732 	sp->pptr = pptr;
733 	sp->pid = pid;
734 
735 	for (i = 0; i < 65535; i++) {
736 		if (!*sp->pptr && *sp->pid) {
737 			/* ptr was invalidated but id is valid - try to recover
738 			   the original or next ptr */
739 			*sp->pptr =
740 			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
741 		}
742 		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
743 				   can become out of date quickly */
744 		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
745 			*sp->pptr = sp->root;
746 			while ((*sp->pptr)->rb_left)
747 				*sp->pptr = (*sp->pptr)->rb_left;
748 			if (sp > stk) {
749 				sp--;
750 				if (!*sp->pptr) {
751 					WARN_ON(1);
752 					return NULL;
753 				}
754 				htb_next_rb_node(sp->pptr);
755 			}
756 		} else {
757 			struct htb_class *cl;
758 			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
759 			if (!cl->level)
760 				return cl;
761 			(++sp)->root = cl->un.inner.feed[prio].rb_node;
762 			sp->pptr = cl->un.inner.ptr + prio;
763 			sp->pid = cl->un.inner.last_ptr_id + prio;
764 		}
765 	}
766 	WARN_ON(1);
767 	return NULL;
768 }
769 
770 /* dequeues packet at given priority and level; call only if
771    you are sure that there is active class at prio/level */
htb_dequeue_tree(struct htb_sched * q,int prio,int level)772 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
773 					int level)
774 {
775 	struct sk_buff *skb = NULL;
776 	struct htb_class *cl, *start;
777 	/* look initial class up in the row */
778 	start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
779 				     q->ptr[level] + prio,
780 				     q->last_ptr_id[level] + prio);
781 
782 	do {
783 next:
784 		if (unlikely(!cl))
785 			return NULL;
786 
787 		/* class can be empty - it is unlikely but can be true if leaf
788 		   qdisc drops packets in enqueue routine or if someone used
789 		   graft operation on the leaf since last dequeue;
790 		   simply deactivate and skip such class */
791 		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
792 			struct htb_class *next;
793 			htb_deactivate(q, cl);
794 
795 			/* row/level might become empty */
796 			if ((q->row_mask[level] & (1 << prio)) == 0)
797 				return NULL;
798 
799 			next = htb_lookup_leaf(q->row[level] + prio,
800 					       prio, q->ptr[level] + prio,
801 					       q->last_ptr_id[level] + prio);
802 
803 			if (cl == start)	/* fix start if we just deleted it */
804 				start = next;
805 			cl = next;
806 			goto next;
807 		}
808 
809 		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
810 		if (likely(skb != NULL))
811 			break;
812 		if (!cl->warned) {
813 			printk(KERN_WARNING
814 			       "htb: class %X isn't work conserving ?!\n",
815 			       cl->common.classid);
816 			cl->warned = 1;
817 		}
818 
819 		htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
820 				  ptr[0]) + prio);
821 		cl = htb_lookup_leaf(q->row[level] + prio, prio,
822 				     q->ptr[level] + prio,
823 				     q->last_ptr_id[level] + prio);
824 
825 	} while (cl != start);
826 
827 	if (likely(skb != NULL)) {
828 		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
829 		if (cl->un.leaf.deficit[level] < 0) {
830 			cl->un.leaf.deficit[level] += cl->quantum;
831 			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
832 					  ptr[0]) + prio);
833 		}
834 		/* this used to be after charge_class but this constelation
835 		   gives us slightly better performance */
836 		if (!cl->un.leaf.q->q.qlen)
837 			htb_deactivate(q, cl);
838 		htb_charge_class(q, cl, level, skb);
839 	}
840 	return skb;
841 }
842 
htb_dequeue(struct Qdisc * sch)843 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
844 {
845 	struct sk_buff *skb = NULL;
846 	struct htb_sched *q = qdisc_priv(sch);
847 	int level;
848 	psched_time_t next_event;
849 	unsigned long start_at;
850 
851 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
852 	skb = __skb_dequeue(&q->direct_queue);
853 	if (skb != NULL) {
854 		sch->flags &= ~TCQ_F_THROTTLED;
855 		sch->q.qlen--;
856 		return skb;
857 	}
858 
859 	if (!sch->q.qlen)
860 		goto fin;
861 	q->now = psched_get_time();
862 	start_at = jiffies;
863 
864 	next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
865 
866 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
867 		/* common case optimization - skip event handler quickly */
868 		int m;
869 		psched_time_t event;
870 
871 		if (q->now >= q->near_ev_cache[level]) {
872 			event = htb_do_events(q, level, start_at);
873 			if (!event)
874 				event = q->now + PSCHED_TICKS_PER_SEC;
875 			q->near_ev_cache[level] = event;
876 		} else
877 			event = q->near_ev_cache[level];
878 
879 		if (next_event > event)
880 			next_event = event;
881 
882 		m = ~q->row_mask[level];
883 		while (m != (int)(-1)) {
884 			int prio = ffz(m);
885 			m |= 1 << prio;
886 			skb = htb_dequeue_tree(q, prio, level);
887 			if (likely(skb != NULL)) {
888 				sch->q.qlen--;
889 				sch->flags &= ~TCQ_F_THROTTLED;
890 				goto fin;
891 			}
892 		}
893 	}
894 	sch->qstats.overlimits++;
895 	qdisc_watchdog_schedule(&q->watchdog, next_event);
896 fin:
897 	return skb;
898 }
899 
900 /* try to drop from each class (by prio) until one succeed */
htb_drop(struct Qdisc * sch)901 static unsigned int htb_drop(struct Qdisc *sch)
902 {
903 	struct htb_sched *q = qdisc_priv(sch);
904 	int prio;
905 
906 	for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
907 		struct list_head *p;
908 		list_for_each(p, q->drops + prio) {
909 			struct htb_class *cl = list_entry(p, struct htb_class,
910 							  un.leaf.drop_list);
911 			unsigned int len;
912 			if (cl->un.leaf.q->ops->drop &&
913 			    (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
914 				sch->q.qlen--;
915 				if (!cl->un.leaf.q->q.qlen)
916 					htb_deactivate(q, cl);
917 				return len;
918 			}
919 		}
920 	}
921 	return 0;
922 }
923 
924 /* reset all classes */
925 /* always caled under BH & queue lock */
htb_reset(struct Qdisc * sch)926 static void htb_reset(struct Qdisc *sch)
927 {
928 	struct htb_sched *q = qdisc_priv(sch);
929 	struct htb_class *cl;
930 	struct hlist_node *n;
931 	unsigned int i;
932 
933 	for (i = 0; i < q->clhash.hashsize; i++) {
934 		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
935 			if (cl->level)
936 				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
937 			else {
938 				if (cl->un.leaf.q)
939 					qdisc_reset(cl->un.leaf.q);
940 				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
941 			}
942 			cl->prio_activity = 0;
943 			cl->cmode = HTB_CAN_SEND;
944 
945 		}
946 	}
947 	qdisc_watchdog_cancel(&q->watchdog);
948 	__skb_queue_purge(&q->direct_queue);
949 	sch->q.qlen = 0;
950 	memset(q->row, 0, sizeof(q->row));
951 	memset(q->row_mask, 0, sizeof(q->row_mask));
952 	memset(q->wait_pq, 0, sizeof(q->wait_pq));
953 	memset(q->ptr, 0, sizeof(q->ptr));
954 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
955 		INIT_LIST_HEAD(q->drops + i);
956 }
957 
958 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
959 	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
960 	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
961 	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
962 	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
963 };
964 
htb_init(struct Qdisc * sch,struct nlattr * opt)965 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
966 {
967 	struct htb_sched *q = qdisc_priv(sch);
968 	struct nlattr *tb[TCA_HTB_INIT + 1];
969 	struct tc_htb_glob *gopt;
970 	int err;
971 	int i;
972 
973 	if (!opt)
974 		return -EINVAL;
975 
976 	err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
977 	if (err < 0)
978 		return err;
979 
980 	if (tb[TCA_HTB_INIT] == NULL) {
981 		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
982 		return -EINVAL;
983 	}
984 	gopt = nla_data(tb[TCA_HTB_INIT]);
985 	if (gopt->version != HTB_VER >> 16) {
986 		printk(KERN_ERR
987 		       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
988 		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
989 		return -EINVAL;
990 	}
991 
992 	err = qdisc_class_hash_init(&q->clhash);
993 	if (err < 0)
994 		return err;
995 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
996 		INIT_LIST_HEAD(q->drops + i);
997 
998 	qdisc_watchdog_init(&q->watchdog, sch);
999 	skb_queue_head_init(&q->direct_queue);
1000 
1001 	q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1002 	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
1003 		q->direct_qlen = 2;
1004 
1005 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1006 		q->rate2quantum = 1;
1007 	q->defcls = gopt->defcls;
1008 
1009 	return 0;
1010 }
1011 
htb_dump(struct Qdisc * sch,struct sk_buff * skb)1012 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1013 {
1014 	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1015 	struct htb_sched *q = qdisc_priv(sch);
1016 	struct nlattr *nest;
1017 	struct tc_htb_glob gopt;
1018 
1019 	spin_lock_bh(root_lock);
1020 
1021 	gopt.direct_pkts = q->direct_pkts;
1022 	gopt.version = HTB_VER;
1023 	gopt.rate2quantum = q->rate2quantum;
1024 	gopt.defcls = q->defcls;
1025 	gopt.debug = 0;
1026 
1027 	nest = nla_nest_start(skb, TCA_OPTIONS);
1028 	if (nest == NULL)
1029 		goto nla_put_failure;
1030 	NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1031 	nla_nest_end(skb, nest);
1032 
1033 	spin_unlock_bh(root_lock);
1034 	return skb->len;
1035 
1036 nla_put_failure:
1037 	spin_unlock_bh(root_lock);
1038 	nla_nest_cancel(skb, nest);
1039 	return -1;
1040 }
1041 
htb_dump_class(struct Qdisc * sch,unsigned long arg,struct sk_buff * skb,struct tcmsg * tcm)1042 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1043 			  struct sk_buff *skb, struct tcmsg *tcm)
1044 {
1045 	struct htb_class *cl = (struct htb_class *)arg;
1046 	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1047 	struct nlattr *nest;
1048 	struct tc_htb_opt opt;
1049 
1050 	spin_lock_bh(root_lock);
1051 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1052 	tcm->tcm_handle = cl->common.classid;
1053 	if (!cl->level && cl->un.leaf.q)
1054 		tcm->tcm_info = cl->un.leaf.q->handle;
1055 
1056 	nest = nla_nest_start(skb, TCA_OPTIONS);
1057 	if (nest == NULL)
1058 		goto nla_put_failure;
1059 
1060 	memset(&opt, 0, sizeof(opt));
1061 
1062 	opt.rate = cl->rate->rate;
1063 	opt.buffer = cl->buffer;
1064 	opt.ceil = cl->ceil->rate;
1065 	opt.cbuffer = cl->cbuffer;
1066 	opt.quantum = cl->quantum;
1067 	opt.prio = cl->prio;
1068 	opt.level = cl->level;
1069 	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1070 
1071 	nla_nest_end(skb, nest);
1072 	spin_unlock_bh(root_lock);
1073 	return skb->len;
1074 
1075 nla_put_failure:
1076 	spin_unlock_bh(root_lock);
1077 	nla_nest_cancel(skb, nest);
1078 	return -1;
1079 }
1080 
1081 static int
htb_dump_class_stats(struct Qdisc * sch,unsigned long arg,struct gnet_dump * d)1082 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1083 {
1084 	struct htb_class *cl = (struct htb_class *)arg;
1085 
1086 	if (!cl->level && cl->un.leaf.q)
1087 		cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1088 	cl->xstats.tokens = cl->tokens;
1089 	cl->xstats.ctokens = cl->ctokens;
1090 
1091 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1092 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1093 	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
1094 		return -1;
1095 
1096 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1097 }
1098 
htb_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old)1099 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1100 		     struct Qdisc **old)
1101 {
1102 	struct htb_class *cl = (struct htb_class *)arg;
1103 
1104 	if (cl && !cl->level) {
1105 		if (new == NULL &&
1106 		    (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1107 					     &pfifo_qdisc_ops,
1108 					     cl->common.classid))
1109 		    == NULL)
1110 			return -ENOBUFS;
1111 		sch_tree_lock(sch);
1112 		*old = cl->un.leaf.q;
1113 		cl->un.leaf.q = new;
1114 		if (*old != NULL) {
1115 			qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1116 			qdisc_reset(*old);
1117 		}
1118 		sch_tree_unlock(sch);
1119 		return 0;
1120 	}
1121 	return -ENOENT;
1122 }
1123 
htb_leaf(struct Qdisc * sch,unsigned long arg)1124 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1125 {
1126 	struct htb_class *cl = (struct htb_class *)arg;
1127 	return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1128 }
1129 
htb_qlen_notify(struct Qdisc * sch,unsigned long arg)1130 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1131 {
1132 	struct htb_class *cl = (struct htb_class *)arg;
1133 
1134 	if (cl->un.leaf.q->q.qlen == 0)
1135 		htb_deactivate(qdisc_priv(sch), cl);
1136 }
1137 
htb_get(struct Qdisc * sch,u32 classid)1138 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1139 {
1140 	struct htb_class *cl = htb_find(classid, sch);
1141 	if (cl)
1142 		cl->refcnt++;
1143 	return (unsigned long)cl;
1144 }
1145 
htb_parent_last_child(struct htb_class * cl)1146 static inline int htb_parent_last_child(struct htb_class *cl)
1147 {
1148 	if (!cl->parent)
1149 		/* the root class */
1150 		return 0;
1151 	if (cl->parent->children > 1)
1152 		/* not the last child */
1153 		return 0;
1154 	return 1;
1155 }
1156 
htb_parent_to_leaf(struct htb_sched * q,struct htb_class * cl,struct Qdisc * new_q)1157 static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1158 			       struct Qdisc *new_q)
1159 {
1160 	struct htb_class *parent = cl->parent;
1161 
1162 	WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1163 
1164 	if (parent->cmode != HTB_CAN_SEND)
1165 		htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1166 
1167 	parent->level = 0;
1168 	memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1169 	INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1170 	parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1171 	parent->tokens = parent->buffer;
1172 	parent->ctokens = parent->cbuffer;
1173 	parent->t_c = psched_get_time();
1174 	parent->cmode = HTB_CAN_SEND;
1175 }
1176 
htb_destroy_class(struct Qdisc * sch,struct htb_class * cl)1177 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1178 {
1179 	if (!cl->level) {
1180 		WARN_ON(!cl->un.leaf.q);
1181 		qdisc_destroy(cl->un.leaf.q);
1182 	}
1183 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
1184 	qdisc_put_rtab(cl->rate);
1185 	qdisc_put_rtab(cl->ceil);
1186 
1187 	tcf_destroy_chain(&cl->filter_list);
1188 	kfree(cl);
1189 }
1190 
1191 /* always caled under BH & queue lock */
htb_destroy(struct Qdisc * sch)1192 static void htb_destroy(struct Qdisc *sch)
1193 {
1194 	struct htb_sched *q = qdisc_priv(sch);
1195 	struct hlist_node *n, *next;
1196 	struct htb_class *cl;
1197 	unsigned int i;
1198 
1199 	qdisc_watchdog_cancel(&q->watchdog);
1200 	/* This line used to be after htb_destroy_class call below
1201 	   and surprisingly it worked in 2.4. But it must precede it
1202 	   because filter need its target class alive to be able to call
1203 	   unbind_filter on it (without Oops). */
1204 	tcf_destroy_chain(&q->filter_list);
1205 
1206 	for (i = 0; i < q->clhash.hashsize; i++) {
1207 		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1208 			tcf_destroy_chain(&cl->filter_list);
1209 	}
1210 	for (i = 0; i < q->clhash.hashsize; i++) {
1211 		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1212 					  common.hnode)
1213 			htb_destroy_class(sch, cl);
1214 	}
1215 	qdisc_class_hash_destroy(&q->clhash);
1216 	__skb_queue_purge(&q->direct_queue);
1217 }
1218 
htb_delete(struct Qdisc * sch,unsigned long arg)1219 static int htb_delete(struct Qdisc *sch, unsigned long arg)
1220 {
1221 	struct htb_sched *q = qdisc_priv(sch);
1222 	struct htb_class *cl = (struct htb_class *)arg;
1223 	unsigned int qlen;
1224 	struct Qdisc *new_q = NULL;
1225 	int last_child = 0;
1226 
1227 	// TODO: why don't allow to delete subtree ? references ? does
1228 	// tc subsys quarantee us that in htb_destroy it holds no class
1229 	// refs so that we can remove children safely there ?
1230 	if (cl->children || cl->filter_cnt)
1231 		return -EBUSY;
1232 
1233 	if (!cl->level && htb_parent_last_child(cl)) {
1234 		new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1235 					  &pfifo_qdisc_ops,
1236 					  cl->parent->common.classid);
1237 		last_child = 1;
1238 	}
1239 
1240 	sch_tree_lock(sch);
1241 
1242 	if (!cl->level) {
1243 		qlen = cl->un.leaf.q->q.qlen;
1244 		qdisc_reset(cl->un.leaf.q);
1245 		qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1246 	}
1247 
1248 	/* delete from hash and active; remainder in destroy_class */
1249 	qdisc_class_hash_remove(&q->clhash, &cl->common);
1250 	if (cl->parent)
1251 		cl->parent->children--;
1252 
1253 	if (cl->prio_activity)
1254 		htb_deactivate(q, cl);
1255 
1256 	if (cl->cmode != HTB_CAN_SEND)
1257 		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1258 
1259 	if (last_child)
1260 		htb_parent_to_leaf(q, cl, new_q);
1261 
1262 	if (--cl->refcnt == 0)
1263 		htb_destroy_class(sch, cl);
1264 
1265 	sch_tree_unlock(sch);
1266 	return 0;
1267 }
1268 
htb_put(struct Qdisc * sch,unsigned long arg)1269 static void htb_put(struct Qdisc *sch, unsigned long arg)
1270 {
1271 	struct htb_class *cl = (struct htb_class *)arg;
1272 
1273 	if (--cl->refcnt == 0)
1274 		htb_destroy_class(sch, cl);
1275 }
1276 
htb_change_class(struct Qdisc * sch,u32 classid,u32 parentid,struct nlattr ** tca,unsigned long * arg)1277 static int htb_change_class(struct Qdisc *sch, u32 classid,
1278 			    u32 parentid, struct nlattr **tca,
1279 			    unsigned long *arg)
1280 {
1281 	int err = -EINVAL;
1282 	struct htb_sched *q = qdisc_priv(sch);
1283 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
1284 	struct nlattr *opt = tca[TCA_OPTIONS];
1285 	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1286 	struct nlattr *tb[TCA_HTB_RTAB + 1];
1287 	struct tc_htb_opt *hopt;
1288 
1289 	/* extract all subattrs from opt attr */
1290 	if (!opt)
1291 		goto failure;
1292 
1293 	err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
1294 	if (err < 0)
1295 		goto failure;
1296 
1297 	err = -EINVAL;
1298 	if (tb[TCA_HTB_PARMS] == NULL)
1299 		goto failure;
1300 
1301 	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1302 
1303 	hopt = nla_data(tb[TCA_HTB_PARMS]);
1304 
1305 	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1306 	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1307 	if (!rtab || !ctab)
1308 		goto failure;
1309 
1310 	if (!cl) {		/* new class */
1311 		struct Qdisc *new_q;
1312 		int prio;
1313 		struct {
1314 			struct nlattr		nla;
1315 			struct gnet_estimator	opt;
1316 		} est = {
1317 			.nla = {
1318 				.nla_len	= nla_attr_size(sizeof(est.opt)),
1319 				.nla_type	= TCA_RATE,
1320 			},
1321 			.opt = {
1322 				/* 4s interval, 16s averaging constant */
1323 				.interval	= 2,
1324 				.ewma_log	= 2,
1325 			},
1326 		};
1327 
1328 		/* check for valid classid */
1329 		if (!classid || TC_H_MAJ(classid ^ sch->handle)
1330 		    || htb_find(classid, sch))
1331 			goto failure;
1332 
1333 		/* check maximal depth */
1334 		if (parent && parent->parent && parent->parent->level < 2) {
1335 			printk(KERN_ERR "htb: tree is too deep\n");
1336 			goto failure;
1337 		}
1338 		err = -ENOBUFS;
1339 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1340 			goto failure;
1341 
1342 		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1343 					qdisc_root_sleeping_lock(sch),
1344 					tca[TCA_RATE] ? : &est.nla);
1345 		if (err) {
1346 			kfree(cl);
1347 			goto failure;
1348 		}
1349 
1350 		cl->refcnt = 1;
1351 		cl->children = 0;
1352 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1353 		RB_CLEAR_NODE(&cl->pq_node);
1354 
1355 		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1356 			RB_CLEAR_NODE(&cl->node[prio]);
1357 
1358 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1359 		   so that can't be used inside of sch_tree_lock
1360 		   -- thanks to Karlis Peisenieks */
1361 		new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1362 					  &pfifo_qdisc_ops, classid);
1363 		sch_tree_lock(sch);
1364 		if (parent && !parent->level) {
1365 			unsigned int qlen = parent->un.leaf.q->q.qlen;
1366 
1367 			/* turn parent into inner node */
1368 			qdisc_reset(parent->un.leaf.q);
1369 			qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1370 			qdisc_destroy(parent->un.leaf.q);
1371 			if (parent->prio_activity)
1372 				htb_deactivate(q, parent);
1373 
1374 			/* remove from evt list because of level change */
1375 			if (parent->cmode != HTB_CAN_SEND) {
1376 				htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1377 				parent->cmode = HTB_CAN_SEND;
1378 			}
1379 			parent->level = (parent->parent ? parent->parent->level
1380 					 : TC_HTB_MAXDEPTH) - 1;
1381 			memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1382 		}
1383 		/* leaf (we) needs elementary qdisc */
1384 		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1385 
1386 		cl->common.classid = classid;
1387 		cl->parent = parent;
1388 
1389 		/* set class to be in HTB_CAN_SEND state */
1390 		cl->tokens = hopt->buffer;
1391 		cl->ctokens = hopt->cbuffer;
1392 		cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;	/* 1min */
1393 		cl->t_c = psched_get_time();
1394 		cl->cmode = HTB_CAN_SEND;
1395 
1396 		/* attach to the hash list and parent's family */
1397 		qdisc_class_hash_insert(&q->clhash, &cl->common);
1398 		if (parent)
1399 			parent->children++;
1400 	} else {
1401 		if (tca[TCA_RATE]) {
1402 			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1403 						    qdisc_root_sleeping_lock(sch),
1404 						    tca[TCA_RATE]);
1405 			if (err)
1406 				return err;
1407 		}
1408 		sch_tree_lock(sch);
1409 	}
1410 
1411 	/* it used to be a nasty bug here, we have to check that node
1412 	   is really leaf before changing cl->un.leaf ! */
1413 	if (!cl->level) {
1414 		cl->quantum = rtab->rate.rate / q->rate2quantum;
1415 		if (!hopt->quantum && cl->quantum < 1000) {
1416 			printk(KERN_WARNING
1417 			       "HTB: quantum of class %X is small. Consider r2q change.\n",
1418 			       cl->common.classid);
1419 			cl->quantum = 1000;
1420 		}
1421 		if (!hopt->quantum && cl->quantum > 200000) {
1422 			printk(KERN_WARNING
1423 			       "HTB: quantum of class %X is big. Consider r2q change.\n",
1424 			       cl->common.classid);
1425 			cl->quantum = 200000;
1426 		}
1427 		if (hopt->quantum)
1428 			cl->quantum = hopt->quantum;
1429 		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1430 			cl->prio = TC_HTB_NUMPRIO - 1;
1431 	}
1432 
1433 	cl->buffer = hopt->buffer;
1434 	cl->cbuffer = hopt->cbuffer;
1435 	if (cl->rate)
1436 		qdisc_put_rtab(cl->rate);
1437 	cl->rate = rtab;
1438 	if (cl->ceil)
1439 		qdisc_put_rtab(cl->ceil);
1440 	cl->ceil = ctab;
1441 	sch_tree_unlock(sch);
1442 
1443 	qdisc_class_hash_grow(sch, &q->clhash);
1444 
1445 	*arg = (unsigned long)cl;
1446 	return 0;
1447 
1448 failure:
1449 	if (rtab)
1450 		qdisc_put_rtab(rtab);
1451 	if (ctab)
1452 		qdisc_put_rtab(ctab);
1453 	return err;
1454 }
1455 
htb_find_tcf(struct Qdisc * sch,unsigned long arg)1456 static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1457 {
1458 	struct htb_sched *q = qdisc_priv(sch);
1459 	struct htb_class *cl = (struct htb_class *)arg;
1460 	struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1461 
1462 	return fl;
1463 }
1464 
htb_bind_filter(struct Qdisc * sch,unsigned long parent,u32 classid)1465 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1466 				     u32 classid)
1467 {
1468 	struct htb_class *cl = htb_find(classid, sch);
1469 
1470 	/*if (cl && !cl->level) return 0;
1471 	   The line above used to be there to prevent attaching filters to
1472 	   leaves. But at least tc_index filter uses this just to get class
1473 	   for other reasons so that we have to allow for it.
1474 	   ----
1475 	   19.6.2002 As Werner explained it is ok - bind filter is just
1476 	   another way to "lock" the class - unlike "get" this lock can
1477 	   be broken by class during destroy IIUC.
1478 	 */
1479 	if (cl)
1480 		cl->filter_cnt++;
1481 	return (unsigned long)cl;
1482 }
1483 
htb_unbind_filter(struct Qdisc * sch,unsigned long arg)1484 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1485 {
1486 	struct htb_class *cl = (struct htb_class *)arg;
1487 
1488 	if (cl)
1489 		cl->filter_cnt--;
1490 }
1491 
htb_walk(struct Qdisc * sch,struct qdisc_walker * arg)1492 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1493 {
1494 	struct htb_sched *q = qdisc_priv(sch);
1495 	struct htb_class *cl;
1496 	struct hlist_node *n;
1497 	unsigned int i;
1498 
1499 	if (arg->stop)
1500 		return;
1501 
1502 	for (i = 0; i < q->clhash.hashsize; i++) {
1503 		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1504 			if (arg->count < arg->skip) {
1505 				arg->count++;
1506 				continue;
1507 			}
1508 			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1509 				arg->stop = 1;
1510 				return;
1511 			}
1512 			arg->count++;
1513 		}
1514 	}
1515 }
1516 
1517 static const struct Qdisc_class_ops htb_class_ops = {
1518 	.graft		=	htb_graft,
1519 	.leaf		=	htb_leaf,
1520 	.qlen_notify	=	htb_qlen_notify,
1521 	.get		=	htb_get,
1522 	.put		=	htb_put,
1523 	.change		=	htb_change_class,
1524 	.delete		=	htb_delete,
1525 	.walk		=	htb_walk,
1526 	.tcf_chain	=	htb_find_tcf,
1527 	.bind_tcf	=	htb_bind_filter,
1528 	.unbind_tcf	=	htb_unbind_filter,
1529 	.dump		=	htb_dump_class,
1530 	.dump_stats	=	htb_dump_class_stats,
1531 };
1532 
1533 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1534 	.next		=	NULL,
1535 	.cl_ops		=	&htb_class_ops,
1536 	.id		=	"htb",
1537 	.priv_size	=	sizeof(struct htb_sched),
1538 	.enqueue	=	htb_enqueue,
1539 	.dequeue	=	htb_dequeue,
1540 	.peek		=	qdisc_peek_dequeued,
1541 	.drop		=	htb_drop,
1542 	.init		=	htb_init,
1543 	.reset		=	htb_reset,
1544 	.destroy	=	htb_destroy,
1545 	.change		=	NULL /* htb_change */,
1546 	.dump		=	htb_dump,
1547 	.owner		=	THIS_MODULE,
1548 };
1549 
htb_module_init(void)1550 static int __init htb_module_init(void)
1551 {
1552 	return register_qdisc(&htb_qdisc_ops);
1553 }
htb_module_exit(void)1554 static void __exit htb_module_exit(void)
1555 {
1556 	unregister_qdisc(&htb_qdisc_ops);
1557 }
1558 
1559 module_init(htb_module_init)
1560 module_exit(htb_module_exit)
1561 MODULE_LICENSE("GPL");
1562