• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_api.c	Packet scheduler API.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Fixes:
8  *
9  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12  */
13 
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28 
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34 
35 #include <trace/events/qdisc.h>
36 
37 /*
38 
39    Short review.
40    -------------
41 
42    This file consists of two interrelated parts:
43 
44    1. queueing disciplines manager frontend.
45    2. traffic classes manager frontend.
46 
47    Generally, queueing discipline ("qdisc") is a black box,
48    which is able to enqueue packets and to dequeue them (when
49    device is ready to send something) in order and at times
50    determined by algorithm hidden in it.
51 
52    qdisc's are divided to two categories:
53    - "queues", which have no internal structure visible from outside.
54    - "schedulers", which split all the packets to "traffic classes",
55      using "packet classifiers" (look at cls_api.c)
56 
57    In turn, classes may have child qdiscs (as rule, queues)
58    attached to them etc. etc. etc.
59 
60    The goal of the routines in this file is to translate
61    information supplied by user in the form of handles
62    to more intelligible for kernel form, to make some sanity
63    checks and part of work, which is common to all qdiscs
64    and to provide rtnetlink notifications.
65 
66    All real intelligent work is done inside qdisc modules.
67 
68 
69 
70    Every discipline has two major routines: enqueue and dequeue.
71 
72    ---dequeue
73 
74    dequeue usually returns a skb to send. It is allowed to return NULL,
75    but it does not mean that queue is empty, it just means that
76    discipline does not want to send anything this time.
77    Queue is really empty if q->q.qlen == 0.
78    For complicated disciplines with multiple queues q->q is not
79    real packet queue, but however q->q.qlen must be valid.
80 
81    ---enqueue
82 
83    enqueue returns 0, if packet was enqueued successfully.
84    If packet (this one or another one) was dropped, it returns
85    not zero error code.
86    NET_XMIT_DROP 	- this packet dropped
87      Expected action: do not backoff, but wait until queue will clear.
88    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
89      Expected action: backoff or ignore
90 
91    Auxiliary routines:
92 
93    ---peek
94 
95    like dequeue but without removing a packet from the queue
96 
97    ---reset
98 
99    returns qdisc to initial state: purge all buffers, clear all
100    timers, counters (except for statistics) etc.
101 
102    ---init
103 
104    initializes newly created qdisc.
105 
106    ---destroy
107 
108    destroys resources allocated by init and during lifetime of qdisc.
109 
110    ---change
111 
112    changes qdisc parameters.
113  */
114 
115 /* Protects list of registered TC modules. It is pure SMP lock. */
116 static DEFINE_RWLOCK(qdisc_mod_lock);
117 
118 
119 /************************************************
120  *	Queueing disciplines manipulation.	*
121  ************************************************/
122 
123 
124 /* The list of all installed queueing disciplines. */
125 
126 static struct Qdisc_ops *qdisc_base;
127 
128 /* Register/unregister queueing discipline */
129 
register_qdisc(struct Qdisc_ops * qops)130 int register_qdisc(struct Qdisc_ops *qops)
131 {
132 	struct Qdisc_ops *q, **qp;
133 	int rc = -EEXIST;
134 
135 	write_lock(&qdisc_mod_lock);
136 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
137 		if (!strcmp(qops->id, q->id))
138 			goto out;
139 
140 	if (qops->enqueue == NULL)
141 		qops->enqueue = noop_qdisc_ops.enqueue;
142 	if (qops->peek == NULL) {
143 		if (qops->dequeue == NULL)
144 			qops->peek = noop_qdisc_ops.peek;
145 		else
146 			goto out_einval;
147 	}
148 	if (qops->dequeue == NULL)
149 		qops->dequeue = noop_qdisc_ops.dequeue;
150 
151 	if (qops->cl_ops) {
152 		const struct Qdisc_class_ops *cops = qops->cl_ops;
153 
154 		if (!(cops->find && cops->walk && cops->leaf))
155 			goto out_einval;
156 
157 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
158 			goto out_einval;
159 	}
160 
161 	qops->next = NULL;
162 	*qp = qops;
163 	rc = 0;
164 out:
165 	write_unlock(&qdisc_mod_lock);
166 	return rc;
167 
168 out_einval:
169 	rc = -EINVAL;
170 	goto out;
171 }
172 EXPORT_SYMBOL(register_qdisc);
173 
unregister_qdisc(struct Qdisc_ops * qops)174 void unregister_qdisc(struct Qdisc_ops *qops)
175 {
176 	struct Qdisc_ops *q, **qp;
177 	int err = -ENOENT;
178 
179 	write_lock(&qdisc_mod_lock);
180 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
181 		if (q == qops)
182 			break;
183 	if (q) {
184 		*qp = q->next;
185 		q->next = NULL;
186 		err = 0;
187 	}
188 	write_unlock(&qdisc_mod_lock);
189 
190 	WARN(err, "unregister qdisc(%s) failed\n", qops->id);
191 }
192 EXPORT_SYMBOL(unregister_qdisc);
193 
194 /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)195 void qdisc_get_default(char *name, size_t len)
196 {
197 	read_lock(&qdisc_mod_lock);
198 	strscpy(name, default_qdisc_ops->id, len);
199 	read_unlock(&qdisc_mod_lock);
200 }
201 
qdisc_lookup_default(const char * name)202 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
203 {
204 	struct Qdisc_ops *q = NULL;
205 
206 	for (q = qdisc_base; q; q = q->next) {
207 		if (!strcmp(name, q->id)) {
208 			if (!try_module_get(q->owner))
209 				q = NULL;
210 			break;
211 		}
212 	}
213 
214 	return q;
215 }
216 
217 /* Set new default qdisc to use */
qdisc_set_default(const char * name)218 int qdisc_set_default(const char *name)
219 {
220 	const struct Qdisc_ops *ops;
221 
222 	if (!capable(CAP_NET_ADMIN))
223 		return -EPERM;
224 
225 	write_lock(&qdisc_mod_lock);
226 	ops = qdisc_lookup_default(name);
227 	if (!ops) {
228 		/* Not found, drop lock and try to load module */
229 		write_unlock(&qdisc_mod_lock);
230 		request_module("sch_%s", name);
231 		write_lock(&qdisc_mod_lock);
232 
233 		ops = qdisc_lookup_default(name);
234 	}
235 
236 	if (ops) {
237 		/* Set new default */
238 		module_put(default_qdisc_ops->owner);
239 		default_qdisc_ops = ops;
240 	}
241 	write_unlock(&qdisc_mod_lock);
242 
243 	return ops ? 0 : -ENOENT;
244 }
245 
246 #ifdef CONFIG_NET_SCH_DEFAULT
247 /* Set default value from kernel config */
sch_default_qdisc(void)248 static int __init sch_default_qdisc(void)
249 {
250 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
251 }
252 late_initcall(sch_default_qdisc);
253 #endif
254 
255 /* We know handle. Find qdisc among all qdisc's attached to device
256  * (root qdisc, all its children, children of children etc.)
257  * Note: caller either uses rtnl or rcu_read_lock()
258  */
259 
qdisc_match_from_root(struct Qdisc * root,u32 handle)260 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
261 {
262 	struct Qdisc *q;
263 
264 	if (!qdisc_dev(root))
265 		return (root->handle == handle ? root : NULL);
266 
267 	if (!(root->flags & TCQ_F_BUILTIN) &&
268 	    root->handle == handle)
269 		return root;
270 
271 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
272 				   lockdep_rtnl_is_held()) {
273 		if (q->handle == handle)
274 			return q;
275 	}
276 	return NULL;
277 }
278 
qdisc_hash_add(struct Qdisc * q,bool invisible)279 void qdisc_hash_add(struct Qdisc *q, bool invisible)
280 {
281 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
282 		ASSERT_RTNL();
283 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
284 		if (invisible)
285 			q->flags |= TCQ_F_INVISIBLE;
286 	}
287 }
288 EXPORT_SYMBOL(qdisc_hash_add);
289 
qdisc_hash_del(struct Qdisc * q)290 void qdisc_hash_del(struct Qdisc *q)
291 {
292 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
293 		ASSERT_RTNL();
294 		hash_del_rcu(&q->hash);
295 	}
296 }
297 EXPORT_SYMBOL(qdisc_hash_del);
298 
qdisc_lookup(struct net_device * dev,u32 handle)299 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
300 {
301 	struct Qdisc *q;
302 
303 	if (!handle)
304 		return NULL;
305 	q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
306 	if (q)
307 		goto out;
308 
309 	if (dev_ingress_queue(dev))
310 		q = qdisc_match_from_root(
311 			rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
312 			handle);
313 out:
314 	return q;
315 }
316 
qdisc_lookup_rcu(struct net_device * dev,u32 handle)317 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
318 {
319 	struct netdev_queue *nq;
320 	struct Qdisc *q;
321 
322 	if (!handle)
323 		return NULL;
324 	q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
325 	if (q)
326 		goto out;
327 
328 	nq = dev_ingress_queue_rcu(dev);
329 	if (nq)
330 		q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
331 					  handle);
332 out:
333 	return q;
334 }
335 
qdisc_leaf(struct Qdisc * p,u32 classid)336 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
337 {
338 	unsigned long cl;
339 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
340 
341 	if (cops == NULL)
342 		return NULL;
343 	cl = cops->find(p, classid);
344 
345 	if (cl == 0)
346 		return NULL;
347 	return cops->leaf(p, cl);
348 }
349 
350 /* Find queueing discipline by name */
351 
qdisc_lookup_ops(struct nlattr * kind)352 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
353 {
354 	struct Qdisc_ops *q = NULL;
355 
356 	if (kind) {
357 		read_lock(&qdisc_mod_lock);
358 		for (q = qdisc_base; q; q = q->next) {
359 			if (nla_strcmp(kind, q->id) == 0) {
360 				if (!try_module_get(q->owner))
361 					q = NULL;
362 				break;
363 			}
364 		}
365 		read_unlock(&qdisc_mod_lock);
366 	}
367 	return q;
368 }
369 
370 /* The linklayer setting were not transferred from iproute2, in older
371  * versions, and the rate tables lookup systems have been dropped in
372  * the kernel. To keep backward compatible with older iproute2 tc
373  * utils, we detect the linklayer setting by detecting if the rate
374  * table were modified.
375  *
376  * For linklayer ATM table entries, the rate table will be aligned to
377  * 48 bytes, thus some table entries will contain the same value.  The
378  * mpu (min packet unit) is also encoded into the old rate table, thus
379  * starting from the mpu, we find low and high table entries for
380  * mapping this cell.  If these entries contain the same value, when
381  * the rate tables have been modified for linklayer ATM.
382  *
383  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
384  * and then roundup to the next cell, calc the table entry one below,
385  * and compare.
386  */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)387 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
388 {
389 	int low       = roundup(r->mpu, 48);
390 	int high      = roundup(low+1, 48);
391 	int cell_low  = low >> r->cell_log;
392 	int cell_high = (high >> r->cell_log) - 1;
393 
394 	/* rtab is too inaccurate at rates > 100Mbit/s */
395 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
396 		pr_debug("TC linklayer: Giving up ATM detection\n");
397 		return TC_LINKLAYER_ETHERNET;
398 	}
399 
400 	if ((cell_high > cell_low) && (cell_high < 256)
401 	    && (rtab[cell_low] == rtab[cell_high])) {
402 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
403 			 cell_low, cell_high, rtab[cell_high]);
404 		return TC_LINKLAYER_ATM;
405 	}
406 	return TC_LINKLAYER_ETHERNET;
407 }
408 
409 static struct qdisc_rate_table *qdisc_rtab_list;
410 
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)411 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
412 					struct nlattr *tab,
413 					struct netlink_ext_ack *extack)
414 {
415 	struct qdisc_rate_table *rtab;
416 
417 	if (tab == NULL || r->rate == 0 ||
418 	    r->cell_log == 0 || r->cell_log >= 32 ||
419 	    nla_len(tab) != TC_RTAB_SIZE) {
420 		NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
421 		return NULL;
422 	}
423 
424 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
425 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
426 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
427 			rtab->refcnt++;
428 			return rtab;
429 		}
430 	}
431 
432 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
433 	if (rtab) {
434 		rtab->rate = *r;
435 		rtab->refcnt = 1;
436 		memcpy(rtab->data, nla_data(tab), 1024);
437 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
438 			r->linklayer = __detect_linklayer(r, rtab->data);
439 		rtab->next = qdisc_rtab_list;
440 		qdisc_rtab_list = rtab;
441 	} else {
442 		NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
443 	}
444 	return rtab;
445 }
446 EXPORT_SYMBOL(qdisc_get_rtab);
447 
qdisc_put_rtab(struct qdisc_rate_table * tab)448 void qdisc_put_rtab(struct qdisc_rate_table *tab)
449 {
450 	struct qdisc_rate_table *rtab, **rtabp;
451 
452 	if (!tab || --tab->refcnt)
453 		return;
454 
455 	for (rtabp = &qdisc_rtab_list;
456 	     (rtab = *rtabp) != NULL;
457 	     rtabp = &rtab->next) {
458 		if (rtab == tab) {
459 			*rtabp = rtab->next;
460 			kfree(rtab);
461 			return;
462 		}
463 	}
464 }
465 EXPORT_SYMBOL(qdisc_put_rtab);
466 
467 static LIST_HEAD(qdisc_stab_list);
468 
469 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
470 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
471 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
472 };
473 
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)474 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
475 					       struct netlink_ext_ack *extack)
476 {
477 	struct nlattr *tb[TCA_STAB_MAX + 1];
478 	struct qdisc_size_table *stab;
479 	struct tc_sizespec *s;
480 	unsigned int tsize = 0;
481 	u16 *tab = NULL;
482 	int err;
483 
484 	err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
485 					  extack);
486 	if (err < 0)
487 		return ERR_PTR(err);
488 	if (!tb[TCA_STAB_BASE]) {
489 		NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
490 		return ERR_PTR(-EINVAL);
491 	}
492 
493 	s = nla_data(tb[TCA_STAB_BASE]);
494 
495 	if (s->tsize > 0) {
496 		if (!tb[TCA_STAB_DATA]) {
497 			NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
498 			return ERR_PTR(-EINVAL);
499 		}
500 		tab = nla_data(tb[TCA_STAB_DATA]);
501 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
502 	}
503 
504 	if (tsize != s->tsize || (!tab && tsize > 0)) {
505 		NL_SET_ERR_MSG(extack, "Invalid size of size table");
506 		return ERR_PTR(-EINVAL);
507 	}
508 
509 	list_for_each_entry(stab, &qdisc_stab_list, list) {
510 		if (memcmp(&stab->szopts, s, sizeof(*s)))
511 			continue;
512 		if (tsize > 0 &&
513 		    memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
514 			continue;
515 		stab->refcnt++;
516 		return stab;
517 	}
518 
519 	if (s->size_log > STAB_SIZE_LOG_MAX ||
520 	    s->cell_log > STAB_SIZE_LOG_MAX) {
521 		NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
522 		return ERR_PTR(-EINVAL);
523 	}
524 
525 	stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
526 	if (!stab)
527 		return ERR_PTR(-ENOMEM);
528 
529 	stab->refcnt = 1;
530 	stab->szopts = *s;
531 	if (tsize > 0)
532 		memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
533 
534 	list_add_tail(&stab->list, &qdisc_stab_list);
535 
536 	return stab;
537 }
538 
qdisc_put_stab(struct qdisc_size_table * tab)539 void qdisc_put_stab(struct qdisc_size_table *tab)
540 {
541 	if (!tab)
542 		return;
543 
544 	if (--tab->refcnt == 0) {
545 		list_del(&tab->list);
546 		kfree_rcu(tab, rcu);
547 	}
548 }
549 EXPORT_SYMBOL(qdisc_put_stab);
550 
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)551 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
552 {
553 	struct nlattr *nest;
554 
555 	nest = nla_nest_start_noflag(skb, TCA_STAB);
556 	if (nest == NULL)
557 		goto nla_put_failure;
558 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
559 		goto nla_put_failure;
560 	nla_nest_end(skb, nest);
561 
562 	return skb->len;
563 
564 nla_put_failure:
565 	return -1;
566 }
567 
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)568 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
569 			       const struct qdisc_size_table *stab)
570 {
571 	int pkt_len, slot;
572 
573 	pkt_len = skb->len + stab->szopts.overhead;
574 	if (unlikely(!stab->szopts.tsize))
575 		goto out;
576 
577 	slot = pkt_len + stab->szopts.cell_align;
578 	if (unlikely(slot < 0))
579 		slot = 0;
580 
581 	slot >>= stab->szopts.cell_log;
582 	if (likely(slot < stab->szopts.tsize))
583 		pkt_len = stab->data[slot];
584 	else
585 		pkt_len = stab->data[stab->szopts.tsize - 1] *
586 				(slot / stab->szopts.tsize) +
587 				stab->data[slot % stab->szopts.tsize];
588 
589 	pkt_len <<= stab->szopts.size_log;
590 out:
591 	if (unlikely(pkt_len < 1))
592 		pkt_len = 1;
593 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
594 }
595 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
596 
qdisc_warn_nonwc(const char * txt,struct Qdisc * qdisc)597 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
598 {
599 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
600 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
601 			txt, qdisc->ops->id, qdisc->handle >> 16);
602 		qdisc->flags |= TCQ_F_WARN_NONWC;
603 	}
604 }
605 EXPORT_SYMBOL(qdisc_warn_nonwc);
606 
qdisc_watchdog(struct hrtimer * timer)607 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
608 {
609 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
610 						 timer);
611 
612 	rcu_read_lock();
613 	__netif_schedule(qdisc_root(wd->qdisc));
614 	rcu_read_unlock();
615 
616 	return HRTIMER_NORESTART;
617 }
618 
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)619 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
620 				 clockid_t clockid)
621 {
622 	hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
623 	wd->timer.function = qdisc_watchdog;
624 	wd->qdisc = qdisc;
625 }
626 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
627 
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)628 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
629 {
630 	qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
631 }
632 EXPORT_SYMBOL(qdisc_watchdog_init);
633 
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)634 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
635 				      u64 delta_ns)
636 {
637 	bool deactivated;
638 
639 	rcu_read_lock();
640 	deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
641 			       &qdisc_root_sleeping(wd->qdisc)->state);
642 	rcu_read_unlock();
643 	if (deactivated)
644 		return;
645 
646 	if (hrtimer_is_queued(&wd->timer)) {
647 		/* If timer is already set in [expires, expires + delta_ns],
648 		 * do not reprogram it.
649 		 */
650 		if (wd->last_expires - expires <= delta_ns)
651 			return;
652 	}
653 
654 	wd->last_expires = expires;
655 	hrtimer_start_range_ns(&wd->timer,
656 			       ns_to_ktime(expires),
657 			       delta_ns,
658 			       HRTIMER_MODE_ABS_PINNED);
659 }
660 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
661 
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)662 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
663 {
664 	hrtimer_cancel(&wd->timer);
665 }
666 EXPORT_SYMBOL(qdisc_watchdog_cancel);
667 
qdisc_class_hash_alloc(unsigned int n)668 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
669 {
670 	struct hlist_head *h;
671 	unsigned int i;
672 
673 	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
674 
675 	if (h != NULL) {
676 		for (i = 0; i < n; i++)
677 			INIT_HLIST_HEAD(&h[i]);
678 	}
679 	return h;
680 }
681 
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)682 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
683 {
684 	struct Qdisc_class_common *cl;
685 	struct hlist_node *next;
686 	struct hlist_head *nhash, *ohash;
687 	unsigned int nsize, nmask, osize;
688 	unsigned int i, h;
689 
690 	/* Rehash when load factor exceeds 0.75 */
691 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
692 		return;
693 	nsize = clhash->hashsize * 2;
694 	nmask = nsize - 1;
695 	nhash = qdisc_class_hash_alloc(nsize);
696 	if (nhash == NULL)
697 		return;
698 
699 	ohash = clhash->hash;
700 	osize = clhash->hashsize;
701 
702 	sch_tree_lock(sch);
703 	for (i = 0; i < osize; i++) {
704 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
705 			h = qdisc_class_hash(cl->classid, nmask);
706 			hlist_add_head(&cl->hnode, &nhash[h]);
707 		}
708 	}
709 	clhash->hash     = nhash;
710 	clhash->hashsize = nsize;
711 	clhash->hashmask = nmask;
712 	sch_tree_unlock(sch);
713 
714 	kvfree(ohash);
715 }
716 EXPORT_SYMBOL(qdisc_class_hash_grow);
717 
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)718 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
719 {
720 	unsigned int size = 4;
721 
722 	clhash->hash = qdisc_class_hash_alloc(size);
723 	if (!clhash->hash)
724 		return -ENOMEM;
725 	clhash->hashsize  = size;
726 	clhash->hashmask  = size - 1;
727 	clhash->hashelems = 0;
728 	return 0;
729 }
730 EXPORT_SYMBOL(qdisc_class_hash_init);
731 
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)732 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
733 {
734 	kvfree(clhash->hash);
735 }
736 EXPORT_SYMBOL(qdisc_class_hash_destroy);
737 
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)738 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
739 			     struct Qdisc_class_common *cl)
740 {
741 	unsigned int h;
742 
743 	INIT_HLIST_NODE(&cl->hnode);
744 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
745 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
746 	clhash->hashelems++;
747 }
748 EXPORT_SYMBOL(qdisc_class_hash_insert);
749 
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)750 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
751 			     struct Qdisc_class_common *cl)
752 {
753 	hlist_del(&cl->hnode);
754 	clhash->hashelems--;
755 }
756 EXPORT_SYMBOL(qdisc_class_hash_remove);
757 
758 /* Allocate an unique handle from space managed by kernel
759  * Possible range is [8000-FFFF]:0000 (0x8000 values)
760  */
qdisc_alloc_handle(struct net_device * dev)761 static u32 qdisc_alloc_handle(struct net_device *dev)
762 {
763 	int i = 0x8000;
764 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
765 
766 	do {
767 		autohandle += TC_H_MAKE(0x10000U, 0);
768 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
769 			autohandle = TC_H_MAKE(0x80000000U, 0);
770 		if (!qdisc_lookup(dev, autohandle))
771 			return autohandle;
772 		cond_resched();
773 	} while	(--i > 0);
774 
775 	return 0;
776 }
777 
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)778 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
779 {
780 	bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
781 	const struct Qdisc_class_ops *cops;
782 	unsigned long cl;
783 	u32 parentid;
784 	bool notify;
785 	int drops;
786 
787 	if (n == 0 && len == 0)
788 		return;
789 	drops = max_t(int, n, 0);
790 	rcu_read_lock();
791 	while ((parentid = sch->parent)) {
792 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
793 			break;
794 
795 		if (sch->flags & TCQ_F_NOPARENT)
796 			break;
797 		/* Notify parent qdisc only if child qdisc becomes empty.
798 		 *
799 		 * If child was empty even before update then backlog
800 		 * counter is screwed and we skip notification because
801 		 * parent class is already passive.
802 		 *
803 		 * If the original child was offloaded then it is allowed
804 		 * to be seem as empty, so the parent is notified anyway.
805 		 */
806 		notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
807 						       !qdisc_is_offloaded);
808 		/* TODO: perform the search on a per txq basis */
809 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
810 		if (sch == NULL) {
811 			WARN_ON_ONCE(parentid != TC_H_ROOT);
812 			break;
813 		}
814 		cops = sch->ops->cl_ops;
815 		if (notify && cops->qlen_notify) {
816 			cl = cops->find(sch, parentid);
817 			cops->qlen_notify(sch, cl);
818 		}
819 		sch->q.qlen -= n;
820 		sch->qstats.backlog -= len;
821 		__qdisc_qstats_drop(sch, drops);
822 	}
823 	rcu_read_unlock();
824 }
825 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
826 
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)827 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
828 			      void *type_data)
829 {
830 	struct net_device *dev = qdisc_dev(sch);
831 	int err;
832 
833 	sch->flags &= ~TCQ_F_OFFLOADED;
834 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
835 		return 0;
836 
837 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
838 	if (err == -EOPNOTSUPP)
839 		return 0;
840 
841 	if (!err)
842 		sch->flags |= TCQ_F_OFFLOADED;
843 
844 	return err;
845 }
846 EXPORT_SYMBOL(qdisc_offload_dump_helper);
847 
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)848 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
849 				struct Qdisc *new, struct Qdisc *old,
850 				enum tc_setup_type type, void *type_data,
851 				struct netlink_ext_ack *extack)
852 {
853 	bool any_qdisc_is_offloaded;
854 	int err;
855 
856 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
857 		return;
858 
859 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
860 
861 	/* Don't report error if the graft is part of destroy operation. */
862 	if (!err || !new || new == &noop_qdisc)
863 		return;
864 
865 	/* Don't report error if the parent, the old child and the new
866 	 * one are not offloaded.
867 	 */
868 	any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
869 	any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
870 	any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
871 
872 	if (any_qdisc_is_offloaded)
873 		NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
874 }
875 EXPORT_SYMBOL(qdisc_offload_graft_helper);
876 
qdisc_offload_query_caps(struct net_device * dev,enum tc_setup_type type,void * caps,size_t caps_len)877 void qdisc_offload_query_caps(struct net_device *dev,
878 			      enum tc_setup_type type,
879 			      void *caps, size_t caps_len)
880 {
881 	const struct net_device_ops *ops = dev->netdev_ops;
882 	struct tc_query_caps_base base = {
883 		.type = type,
884 		.caps = caps,
885 	};
886 
887 	memset(caps, 0, caps_len);
888 
889 	if (ops->ndo_setup_tc)
890 		ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
891 }
892 EXPORT_SYMBOL(qdisc_offload_query_caps);
893 
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)894 static void qdisc_offload_graft_root(struct net_device *dev,
895 				     struct Qdisc *new, struct Qdisc *old,
896 				     struct netlink_ext_ack *extack)
897 {
898 	struct tc_root_qopt_offload graft_offload = {
899 		.command	= TC_ROOT_GRAFT,
900 		.handle		= new ? new->handle : 0,
901 		.ingress	= (new && new->flags & TCQ_F_INGRESS) ||
902 				  (old && old->flags & TCQ_F_INGRESS),
903 	};
904 
905 	qdisc_offload_graft_helper(dev, NULL, new, old,
906 				   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
907 }
908 
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)909 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
910 			 u32 portid, u32 seq, u16 flags, int event,
911 			 struct netlink_ext_ack *extack)
912 {
913 	struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
914 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
915 	struct tcmsg *tcm;
916 	struct nlmsghdr  *nlh;
917 	unsigned char *b = skb_tail_pointer(skb);
918 	struct gnet_dump d;
919 	struct qdisc_size_table *stab;
920 	u32 block_index;
921 	__u32 qlen;
922 
923 	cond_resched();
924 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
925 	if (!nlh)
926 		goto out_nlmsg_trim;
927 	tcm = nlmsg_data(nlh);
928 	tcm->tcm_family = AF_UNSPEC;
929 	tcm->tcm__pad1 = 0;
930 	tcm->tcm__pad2 = 0;
931 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
932 	tcm->tcm_parent = clid;
933 	tcm->tcm_handle = q->handle;
934 	tcm->tcm_info = refcount_read(&q->refcnt);
935 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
936 		goto nla_put_failure;
937 	if (q->ops->ingress_block_get) {
938 		block_index = q->ops->ingress_block_get(q);
939 		if (block_index &&
940 		    nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
941 			goto nla_put_failure;
942 	}
943 	if (q->ops->egress_block_get) {
944 		block_index = q->ops->egress_block_get(q);
945 		if (block_index &&
946 		    nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
947 			goto nla_put_failure;
948 	}
949 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
950 		goto nla_put_failure;
951 	if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
952 		goto nla_put_failure;
953 	qlen = qdisc_qlen_sum(q);
954 
955 	stab = rtnl_dereference(q->stab);
956 	if (stab && qdisc_dump_stab(skb, stab) < 0)
957 		goto nla_put_failure;
958 
959 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
960 					 NULL, &d, TCA_PAD) < 0)
961 		goto nla_put_failure;
962 
963 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
964 		goto nla_put_failure;
965 
966 	if (qdisc_is_percpu_stats(q)) {
967 		cpu_bstats = q->cpu_bstats;
968 		cpu_qstats = q->cpu_qstats;
969 	}
970 
971 	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
972 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
973 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
974 		goto nla_put_failure;
975 
976 	if (gnet_stats_finish_copy(&d) < 0)
977 		goto nla_put_failure;
978 
979 	if (extack && extack->_msg &&
980 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
981 		goto out_nlmsg_trim;
982 
983 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
984 
985 	return skb->len;
986 
987 out_nlmsg_trim:
988 nla_put_failure:
989 	nlmsg_trim(skb, b);
990 	return -1;
991 }
992 
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)993 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
994 {
995 	if (q->flags & TCQ_F_BUILTIN)
996 		return true;
997 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
998 		return true;
999 
1000 	return false;
1001 }
1002 
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1003 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1004 			struct nlmsghdr *n, u32 clid,
1005 			struct Qdisc *old, struct Qdisc *new,
1006 			struct netlink_ext_ack *extack)
1007 {
1008 	struct sk_buff *skb;
1009 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1010 
1011 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1012 	if (!skb)
1013 		return -ENOBUFS;
1014 
1015 	if (old && !tc_qdisc_dump_ignore(old, false)) {
1016 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1017 				  0, RTM_DELQDISC, extack) < 0)
1018 			goto err_out;
1019 	}
1020 	if (new && !tc_qdisc_dump_ignore(new, false)) {
1021 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1022 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
1023 			goto err_out;
1024 	}
1025 
1026 	if (skb->len)
1027 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1028 				      n->nlmsg_flags & NLM_F_ECHO);
1029 
1030 err_out:
1031 	kfree_skb(skb);
1032 	return -EINVAL;
1033 }
1034 
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1035 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1036 			       struct nlmsghdr *n, u32 clid,
1037 			       struct Qdisc *old, struct Qdisc *new,
1038 			       struct netlink_ext_ack *extack)
1039 {
1040 	if (new || old)
1041 		qdisc_notify(net, skb, n, clid, old, new, extack);
1042 
1043 	if (old)
1044 		qdisc_put(old);
1045 }
1046 
qdisc_clear_nolock(struct Qdisc * sch)1047 static void qdisc_clear_nolock(struct Qdisc *sch)
1048 {
1049 	sch->flags &= ~TCQ_F_NOLOCK;
1050 	if (!(sch->flags & TCQ_F_CPUSTATS))
1051 		return;
1052 
1053 	free_percpu(sch->cpu_bstats);
1054 	free_percpu(sch->cpu_qstats);
1055 	sch->cpu_bstats = NULL;
1056 	sch->cpu_qstats = NULL;
1057 	sch->flags &= ~TCQ_F_CPUSTATS;
1058 }
1059 
1060 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1061  * to device "dev".
1062  *
1063  * When appropriate send a netlink notification using 'skb'
1064  * and "n".
1065  *
1066  * On success, destroy old qdisc.
1067  */
1068 
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)1069 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1070 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1071 		       struct Qdisc *new, struct Qdisc *old,
1072 		       struct netlink_ext_ack *extack)
1073 {
1074 	struct Qdisc *q = old;
1075 	struct net *net = dev_net(dev);
1076 
1077 	if (parent == NULL) {
1078 		unsigned int i, num_q, ingress;
1079 		struct netdev_queue *dev_queue;
1080 
1081 		ingress = 0;
1082 		num_q = dev->num_tx_queues;
1083 		if ((q && q->flags & TCQ_F_INGRESS) ||
1084 		    (new && new->flags & TCQ_F_INGRESS)) {
1085 			ingress = 1;
1086 			dev_queue = dev_ingress_queue(dev);
1087 			if (!dev_queue) {
1088 				NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1089 				return -ENOENT;
1090 			}
1091 
1092 			q = rtnl_dereference(dev_queue->qdisc_sleeping);
1093 
1094 			/* This is the counterpart of that qdisc_refcount_inc_nz() call in
1095 			 * __tcf_qdisc_find() for filter requests.
1096 			 */
1097 			if (!qdisc_refcount_dec_if_one(q)) {
1098 				NL_SET_ERR_MSG(extack,
1099 					       "Current ingress or clsact Qdisc has ongoing filter requests");
1100 				return -EBUSY;
1101 			}
1102 		}
1103 
1104 		if (dev->flags & IFF_UP)
1105 			dev_deactivate(dev);
1106 
1107 		qdisc_offload_graft_root(dev, new, old, extack);
1108 
1109 		if (new && new->ops->attach && !ingress)
1110 			goto skip;
1111 
1112 		if (!ingress) {
1113 			for (i = 0; i < num_q; i++) {
1114 				dev_queue = netdev_get_tx_queue(dev, i);
1115 				old = dev_graft_qdisc(dev_queue, new);
1116 
1117 				if (new && i > 0)
1118 					qdisc_refcount_inc(new);
1119 				qdisc_put(old);
1120 			}
1121 		} else {
1122 			old = dev_graft_qdisc(dev_queue, NULL);
1123 
1124 			/* {ingress,clsact}_destroy() @old before grafting @new to avoid
1125 			 * unprotected concurrent accesses to net_device::miniq_{in,e}gress
1126 			 * pointer(s) in mini_qdisc_pair_swap().
1127 			 */
1128 			qdisc_notify(net, skb, n, classid, old, new, extack);
1129 			qdisc_destroy(old);
1130 
1131 			dev_graft_qdisc(dev_queue, new);
1132 		}
1133 
1134 skip:
1135 		if (!ingress) {
1136 			old = rtnl_dereference(dev->qdisc);
1137 			if (new && !new->ops->attach)
1138 				qdisc_refcount_inc(new);
1139 			rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1140 
1141 			notify_and_destroy(net, skb, n, classid, old, new, extack);
1142 
1143 			if (new && new->ops->attach)
1144 				new->ops->attach(new);
1145 		}
1146 
1147 		if (dev->flags & IFF_UP)
1148 			dev_activate(dev);
1149 	} else {
1150 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1151 		unsigned long cl;
1152 		int err;
1153 
1154 		/* Only support running class lockless if parent is lockless */
1155 		if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1156 			qdisc_clear_nolock(new);
1157 
1158 		if (!cops || !cops->graft)
1159 			return -EOPNOTSUPP;
1160 
1161 		cl = cops->find(parent, classid);
1162 		if (!cl) {
1163 			NL_SET_ERR_MSG(extack, "Specified class not found");
1164 			return -ENOENT;
1165 		}
1166 
1167 		if (new && new->ops == &noqueue_qdisc_ops) {
1168 			NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1169 			return -EINVAL;
1170 		}
1171 
1172 		err = cops->graft(parent, cl, new, &old, extack);
1173 		if (err)
1174 			return err;
1175 		notify_and_destroy(net, skb, n, classid, old, new, extack);
1176 	}
1177 	return 0;
1178 }
1179 
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1180 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1181 				   struct netlink_ext_ack *extack)
1182 {
1183 	u32 block_index;
1184 
1185 	if (tca[TCA_INGRESS_BLOCK]) {
1186 		block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1187 
1188 		if (!block_index) {
1189 			NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1190 			return -EINVAL;
1191 		}
1192 		if (!sch->ops->ingress_block_set) {
1193 			NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1194 			return -EOPNOTSUPP;
1195 		}
1196 		sch->ops->ingress_block_set(sch, block_index);
1197 	}
1198 	if (tca[TCA_EGRESS_BLOCK]) {
1199 		block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1200 
1201 		if (!block_index) {
1202 			NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1203 			return -EINVAL;
1204 		}
1205 		if (!sch->ops->egress_block_set) {
1206 			NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1207 			return -EOPNOTSUPP;
1208 		}
1209 		sch->ops->egress_block_set(sch, block_index);
1210 	}
1211 	return 0;
1212 }
1213 
1214 /*
1215    Allocate and initialize new qdisc.
1216 
1217    Parameters are passed via opt.
1218  */
1219 
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)1220 static struct Qdisc *qdisc_create(struct net_device *dev,
1221 				  struct netdev_queue *dev_queue,
1222 				  u32 parent, u32 handle,
1223 				  struct nlattr **tca, int *errp,
1224 				  struct netlink_ext_ack *extack)
1225 {
1226 	int err;
1227 	struct nlattr *kind = tca[TCA_KIND];
1228 	struct Qdisc *sch;
1229 	struct Qdisc_ops *ops;
1230 	struct qdisc_size_table *stab;
1231 
1232 	ops = qdisc_lookup_ops(kind);
1233 #ifdef CONFIG_MODULES
1234 	if (ops == NULL && kind != NULL) {
1235 		char name[IFNAMSIZ];
1236 		if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1237 			/* We dropped the RTNL semaphore in order to
1238 			 * perform the module load.  So, even if we
1239 			 * succeeded in loading the module we have to
1240 			 * tell the caller to replay the request.  We
1241 			 * indicate this using -EAGAIN.
1242 			 * We replay the request because the device may
1243 			 * go away in the mean time.
1244 			 */
1245 			rtnl_unlock();
1246 			request_module("sch_%s", name);
1247 			rtnl_lock();
1248 			ops = qdisc_lookup_ops(kind);
1249 			if (ops != NULL) {
1250 				/* We will try again qdisc_lookup_ops,
1251 				 * so don't keep a reference.
1252 				 */
1253 				module_put(ops->owner);
1254 				err = -EAGAIN;
1255 				goto err_out;
1256 			}
1257 		}
1258 	}
1259 #endif
1260 
1261 	err = -ENOENT;
1262 	if (!ops) {
1263 		NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1264 		goto err_out;
1265 	}
1266 
1267 	sch = qdisc_alloc(dev_queue, ops, extack);
1268 	if (IS_ERR(sch)) {
1269 		err = PTR_ERR(sch);
1270 		goto err_out2;
1271 	}
1272 
1273 	sch->parent = parent;
1274 
1275 	if (handle == TC_H_INGRESS) {
1276 		if (!(sch->flags & TCQ_F_INGRESS)) {
1277 			NL_SET_ERR_MSG(extack,
1278 				       "Specified parent ID is reserved for ingress and clsact Qdiscs");
1279 			err = -EINVAL;
1280 			goto err_out3;
1281 		}
1282 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
1283 	} else {
1284 		if (handle == 0) {
1285 			handle = qdisc_alloc_handle(dev);
1286 			if (handle == 0) {
1287 				NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1288 				err = -ENOSPC;
1289 				goto err_out3;
1290 			}
1291 		}
1292 		if (!netif_is_multiqueue(dev))
1293 			sch->flags |= TCQ_F_ONETXQUEUE;
1294 	}
1295 
1296 	sch->handle = handle;
1297 
1298 	/* This exist to keep backward compatible with a userspace
1299 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
1300 	 * facility on older kernels by setting tx_queue_len=0 (prior
1301 	 * to qdisc init), and then forgot to reinit tx_queue_len
1302 	 * before again attaching a qdisc.
1303 	 */
1304 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1305 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1306 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1307 	}
1308 
1309 	err = qdisc_block_indexes_set(sch, tca, extack);
1310 	if (err)
1311 		goto err_out3;
1312 
1313 	if (ops->init) {
1314 		err = ops->init(sch, tca[TCA_OPTIONS], extack);
1315 		if (err != 0)
1316 			goto err_out5;
1317 	}
1318 
1319 	if (tca[TCA_STAB]) {
1320 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1321 		if (IS_ERR(stab)) {
1322 			err = PTR_ERR(stab);
1323 			goto err_out4;
1324 		}
1325 		rcu_assign_pointer(sch->stab, stab);
1326 	}
1327 	if (tca[TCA_RATE]) {
1328 		err = -EOPNOTSUPP;
1329 		if (sch->flags & TCQ_F_MQROOT) {
1330 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1331 			goto err_out4;
1332 		}
1333 
1334 		err = gen_new_estimator(&sch->bstats,
1335 					sch->cpu_bstats,
1336 					&sch->rate_est,
1337 					NULL,
1338 					true,
1339 					tca[TCA_RATE]);
1340 		if (err) {
1341 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1342 			goto err_out4;
1343 		}
1344 	}
1345 
1346 	qdisc_hash_add(sch, false);
1347 	trace_qdisc_create(ops, dev, parent);
1348 
1349 	return sch;
1350 
1351 err_out5:
1352 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1353 	if (ops->destroy)
1354 		ops->destroy(sch);
1355 err_out3:
1356 	netdev_put(dev, &sch->dev_tracker);
1357 	qdisc_free(sch);
1358 err_out2:
1359 	module_put(ops->owner);
1360 err_out:
1361 	*errp = err;
1362 	return NULL;
1363 
1364 err_out4:
1365 	/*
1366 	 * Any broken qdiscs that would require a ops->reset() here?
1367 	 * The qdisc was never in action so it shouldn't be necessary.
1368 	 */
1369 	qdisc_put_stab(rtnl_dereference(sch->stab));
1370 	if (ops->destroy)
1371 		ops->destroy(sch);
1372 	goto err_out3;
1373 }
1374 
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1375 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1376 			struct netlink_ext_ack *extack)
1377 {
1378 	struct qdisc_size_table *ostab, *stab = NULL;
1379 	int err = 0;
1380 
1381 	if (tca[TCA_OPTIONS]) {
1382 		if (!sch->ops->change) {
1383 			NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1384 			return -EINVAL;
1385 		}
1386 		if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1387 			NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1388 			return -EOPNOTSUPP;
1389 		}
1390 		err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1391 		if (err)
1392 			return err;
1393 	}
1394 
1395 	if (tca[TCA_STAB]) {
1396 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1397 		if (IS_ERR(stab))
1398 			return PTR_ERR(stab);
1399 	}
1400 
1401 	ostab = rtnl_dereference(sch->stab);
1402 	rcu_assign_pointer(sch->stab, stab);
1403 	qdisc_put_stab(ostab);
1404 
1405 	if (tca[TCA_RATE]) {
1406 		/* NB: ignores errors from replace_estimator
1407 		   because change can't be undone. */
1408 		if (sch->flags & TCQ_F_MQROOT)
1409 			goto out;
1410 		gen_replace_estimator(&sch->bstats,
1411 				      sch->cpu_bstats,
1412 				      &sch->rate_est,
1413 				      NULL,
1414 				      true,
1415 				      tca[TCA_RATE]);
1416 	}
1417 out:
1418 	return 0;
1419 }
1420 
1421 struct check_loop_arg {
1422 	struct qdisc_walker	w;
1423 	struct Qdisc		*p;
1424 	int			depth;
1425 };
1426 
1427 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1428 			 struct qdisc_walker *w);
1429 
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)1430 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1431 {
1432 	struct check_loop_arg	arg;
1433 
1434 	if (q->ops->cl_ops == NULL)
1435 		return 0;
1436 
1437 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1438 	arg.w.fn = check_loop_fn;
1439 	arg.depth = depth;
1440 	arg.p = p;
1441 	q->ops->cl_ops->walk(q, &arg.w);
1442 	return arg.w.stop ? -ELOOP : 0;
1443 }
1444 
1445 static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1446 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1447 {
1448 	struct Qdisc *leaf;
1449 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1450 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1451 
1452 	leaf = cops->leaf(q, cl);
1453 	if (leaf) {
1454 		if (leaf == arg->p || arg->depth > 7)
1455 			return -ELOOP;
1456 		return check_loop(leaf, arg->p, arg->depth + 1);
1457 	}
1458 	return 0;
1459 }
1460 
1461 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1462 	[TCA_KIND]		= { .type = NLA_STRING },
1463 	[TCA_RATE]		= { .type = NLA_BINARY,
1464 				    .len = sizeof(struct tc_estimator) },
1465 	[TCA_STAB]		= { .type = NLA_NESTED },
1466 	[TCA_DUMP_INVISIBLE]	= { .type = NLA_FLAG },
1467 	[TCA_CHAIN]		= { .type = NLA_U32 },
1468 	[TCA_INGRESS_BLOCK]	= { .type = NLA_U32 },
1469 	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
1470 };
1471 
1472 /*
1473  * Delete/get qdisc.
1474  */
1475 
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1476 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1477 			struct netlink_ext_ack *extack)
1478 {
1479 	struct net *net = sock_net(skb->sk);
1480 	struct tcmsg *tcm = nlmsg_data(n);
1481 	struct nlattr *tca[TCA_MAX + 1];
1482 	struct net_device *dev;
1483 	u32 clid;
1484 	struct Qdisc *q = NULL;
1485 	struct Qdisc *p = NULL;
1486 	int err;
1487 
1488 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1489 				     rtm_tca_policy, extack);
1490 	if (err < 0)
1491 		return err;
1492 
1493 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1494 	if (!dev)
1495 		return -ENODEV;
1496 
1497 	clid = tcm->tcm_parent;
1498 	if (clid) {
1499 		if (clid != TC_H_ROOT) {
1500 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1501 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1502 				if (!p) {
1503 					NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1504 					return -ENOENT;
1505 				}
1506 				q = qdisc_leaf(p, clid);
1507 			} else if (dev_ingress_queue(dev)) {
1508 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1509 			}
1510 		} else {
1511 			q = rtnl_dereference(dev->qdisc);
1512 		}
1513 		if (!q) {
1514 			NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1515 			return -ENOENT;
1516 		}
1517 
1518 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1519 			NL_SET_ERR_MSG(extack, "Invalid handle");
1520 			return -EINVAL;
1521 		}
1522 	} else {
1523 		q = qdisc_lookup(dev, tcm->tcm_handle);
1524 		if (!q) {
1525 			NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1526 			return -ENOENT;
1527 		}
1528 	}
1529 
1530 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1531 		NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1532 		return -EINVAL;
1533 	}
1534 
1535 	if (n->nlmsg_type == RTM_DELQDISC) {
1536 		if (!clid) {
1537 			NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1538 			return -EINVAL;
1539 		}
1540 		if (q->handle == 0) {
1541 			NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1542 			return -ENOENT;
1543 		}
1544 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1545 		if (err != 0)
1546 			return err;
1547 	} else {
1548 		qdisc_notify(net, skb, n, clid, NULL, q, NULL);
1549 	}
1550 	return 0;
1551 }
1552 
req_create_or_replace(struct nlmsghdr * n)1553 static bool req_create_or_replace(struct nlmsghdr *n)
1554 {
1555 	return (n->nlmsg_flags & NLM_F_CREATE &&
1556 		n->nlmsg_flags & NLM_F_REPLACE);
1557 }
1558 
req_create_exclusive(struct nlmsghdr * n)1559 static bool req_create_exclusive(struct nlmsghdr *n)
1560 {
1561 	return (n->nlmsg_flags & NLM_F_CREATE &&
1562 		n->nlmsg_flags & NLM_F_EXCL);
1563 }
1564 
req_change(struct nlmsghdr * n)1565 static bool req_change(struct nlmsghdr *n)
1566 {
1567 	return (!(n->nlmsg_flags & NLM_F_CREATE) &&
1568 		!(n->nlmsg_flags & NLM_F_REPLACE) &&
1569 		!(n->nlmsg_flags & NLM_F_EXCL));
1570 }
1571 
1572 /*
1573  * Create/change qdisc.
1574  */
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1575 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1576 			   struct netlink_ext_ack *extack)
1577 {
1578 	struct net *net = sock_net(skb->sk);
1579 	struct tcmsg *tcm;
1580 	struct nlattr *tca[TCA_MAX + 1];
1581 	struct net_device *dev;
1582 	u32 clid;
1583 	struct Qdisc *q, *p;
1584 	int err;
1585 
1586 replay:
1587 	/* Reinit, just in case something touches this. */
1588 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1589 				     rtm_tca_policy, extack);
1590 	if (err < 0)
1591 		return err;
1592 
1593 	tcm = nlmsg_data(n);
1594 	clid = tcm->tcm_parent;
1595 	q = p = NULL;
1596 
1597 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1598 	if (!dev)
1599 		return -ENODEV;
1600 
1601 
1602 	if (clid) {
1603 		if (clid != TC_H_ROOT) {
1604 			if (clid != TC_H_INGRESS) {
1605 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1606 				if (!p) {
1607 					NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1608 					return -ENOENT;
1609 				}
1610 				q = qdisc_leaf(p, clid);
1611 			} else if (dev_ingress_queue_create(dev)) {
1612 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1613 			}
1614 		} else {
1615 			q = rtnl_dereference(dev->qdisc);
1616 		}
1617 
1618 		/* It may be default qdisc, ignore it */
1619 		if (q && q->handle == 0)
1620 			q = NULL;
1621 
1622 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1623 			if (tcm->tcm_handle) {
1624 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1625 					NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1626 					return -EEXIST;
1627 				}
1628 				if (TC_H_MIN(tcm->tcm_handle)) {
1629 					NL_SET_ERR_MSG(extack, "Invalid minor handle");
1630 					return -EINVAL;
1631 				}
1632 				q = qdisc_lookup(dev, tcm->tcm_handle);
1633 				if (!q)
1634 					goto create_n_graft;
1635 				if (n->nlmsg_flags & NLM_F_EXCL) {
1636 					NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1637 					return -EEXIST;
1638 				}
1639 				if (tca[TCA_KIND] &&
1640 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1641 					NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1642 					return -EINVAL;
1643 				}
1644 				if (q->flags & TCQ_F_INGRESS) {
1645 					NL_SET_ERR_MSG(extack,
1646 						       "Cannot regraft ingress or clsact Qdiscs");
1647 					return -EINVAL;
1648 				}
1649 				if (q == p ||
1650 				    (p && check_loop(q, p, 0))) {
1651 					NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1652 					return -ELOOP;
1653 				}
1654 				if (clid == TC_H_INGRESS) {
1655 					NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
1656 					return -EINVAL;
1657 				}
1658 				qdisc_refcount_inc(q);
1659 				goto graft;
1660 			} else {
1661 				if (!q)
1662 					goto create_n_graft;
1663 
1664 				/* This magic test requires explanation.
1665 				 *
1666 				 *   We know, that some child q is already
1667 				 *   attached to this parent and have choice:
1668 				 *   1) change it or 2) create/graft new one.
1669 				 *   If the requested qdisc kind is different
1670 				 *   than the existing one, then we choose graft.
1671 				 *   If they are the same then this is "change"
1672 				 *   operation - just let it fallthrough..
1673 				 *
1674 				 *   1. We are allowed to create/graft only
1675 				 *   if the request is explicitly stating
1676 				 *   "please create if it doesn't exist".
1677 				 *
1678 				 *   2. If the request is to exclusive create
1679 				 *   then the qdisc tcm_handle is not expected
1680 				 *   to exist, so that we choose create/graft too.
1681 				 *
1682 				 *   3. The last case is when no flags are set.
1683 				 *   This will happen when for example tc
1684 				 *   utility issues a "change" command.
1685 				 *   Alas, it is sort of hole in API, we
1686 				 *   cannot decide what to do unambiguously.
1687 				 *   For now we select create/graft.
1688 				 */
1689 				if (tca[TCA_KIND] &&
1690 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1691 					if (req_create_or_replace(n) ||
1692 					    req_create_exclusive(n))
1693 						goto create_n_graft;
1694 					else if (req_change(n))
1695 						goto create_n_graft2;
1696 				}
1697 			}
1698 		}
1699 	} else {
1700 		if (!tcm->tcm_handle) {
1701 			NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1702 			return -EINVAL;
1703 		}
1704 		q = qdisc_lookup(dev, tcm->tcm_handle);
1705 	}
1706 
1707 	/* Change qdisc parameters */
1708 	if (!q) {
1709 		NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1710 		return -ENOENT;
1711 	}
1712 	if (n->nlmsg_flags & NLM_F_EXCL) {
1713 		NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1714 		return -EEXIST;
1715 	}
1716 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1717 		NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1718 		return -EINVAL;
1719 	}
1720 	err = qdisc_change(q, tca, extack);
1721 	if (err == 0)
1722 		qdisc_notify(net, skb, n, clid, NULL, q, extack);
1723 	return err;
1724 
1725 create_n_graft:
1726 	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1727 		NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1728 		return -ENOENT;
1729 	}
1730 create_n_graft2:
1731 	if (clid == TC_H_INGRESS) {
1732 		if (dev_ingress_queue(dev)) {
1733 			q = qdisc_create(dev, dev_ingress_queue(dev),
1734 					 tcm->tcm_parent, tcm->tcm_parent,
1735 					 tca, &err, extack);
1736 		} else {
1737 			NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1738 			err = -ENOENT;
1739 		}
1740 	} else {
1741 		struct netdev_queue *dev_queue;
1742 
1743 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1744 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1745 		else if (p)
1746 			dev_queue = p->dev_queue;
1747 		else
1748 			dev_queue = netdev_get_tx_queue(dev, 0);
1749 
1750 		q = qdisc_create(dev, dev_queue,
1751 				 tcm->tcm_parent, tcm->tcm_handle,
1752 				 tca, &err, extack);
1753 	}
1754 	if (q == NULL) {
1755 		if (err == -EAGAIN)
1756 			goto replay;
1757 		return err;
1758 	}
1759 
1760 graft:
1761 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1762 	if (err) {
1763 		if (q)
1764 			qdisc_put(q);
1765 		return err;
1766 	}
1767 
1768 	return 0;
1769 }
1770 
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)1771 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1772 			      struct netlink_callback *cb,
1773 			      int *q_idx_p, int s_q_idx, bool recur,
1774 			      bool dump_invisible)
1775 {
1776 	int ret = 0, q_idx = *q_idx_p;
1777 	struct Qdisc *q;
1778 	int b;
1779 
1780 	if (!root)
1781 		return 0;
1782 
1783 	q = root;
1784 	if (q_idx < s_q_idx) {
1785 		q_idx++;
1786 	} else {
1787 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1788 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1789 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1790 				  RTM_NEWQDISC, NULL) <= 0)
1791 			goto done;
1792 		q_idx++;
1793 	}
1794 
1795 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1796 	 * itself has already been dumped.
1797 	 *
1798 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1799 	 * qdisc hashtable, we don't want to hit it again
1800 	 */
1801 	if (!qdisc_dev(root) || !recur)
1802 		goto out;
1803 
1804 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1805 		if (q_idx < s_q_idx) {
1806 			q_idx++;
1807 			continue;
1808 		}
1809 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1810 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1811 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1812 				  RTM_NEWQDISC, NULL) <= 0)
1813 			goto done;
1814 		q_idx++;
1815 	}
1816 
1817 out:
1818 	*q_idx_p = q_idx;
1819 	return ret;
1820 done:
1821 	ret = -1;
1822 	goto out;
1823 }
1824 
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)1825 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1826 {
1827 	struct net *net = sock_net(skb->sk);
1828 	int idx, q_idx;
1829 	int s_idx, s_q_idx;
1830 	struct net_device *dev;
1831 	const struct nlmsghdr *nlh = cb->nlh;
1832 	struct nlattr *tca[TCA_MAX + 1];
1833 	int err;
1834 
1835 	s_idx = cb->args[0];
1836 	s_q_idx = q_idx = cb->args[1];
1837 
1838 	idx = 0;
1839 	ASSERT_RTNL();
1840 
1841 	err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1842 				     rtm_tca_policy, cb->extack);
1843 	if (err < 0)
1844 		return err;
1845 
1846 	for_each_netdev(net, dev) {
1847 		struct netdev_queue *dev_queue;
1848 
1849 		if (idx < s_idx)
1850 			goto cont;
1851 		if (idx > s_idx)
1852 			s_q_idx = 0;
1853 		q_idx = 0;
1854 
1855 		if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1856 				       skb, cb, &q_idx, s_q_idx,
1857 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1858 			goto done;
1859 
1860 		dev_queue = dev_ingress_queue(dev);
1861 		if (dev_queue &&
1862 		    tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
1863 				       skb, cb, &q_idx, s_q_idx, false,
1864 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1865 			goto done;
1866 
1867 cont:
1868 		idx++;
1869 	}
1870 
1871 done:
1872 	cb->args[0] = idx;
1873 	cb->args[1] = q_idx;
1874 
1875 	return skb->len;
1876 }
1877 
1878 
1879 
1880 /************************************************
1881  *	Traffic classes manipulation.		*
1882  ************************************************/
1883 
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)1884 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1885 			  unsigned long cl, u32 portid, u32 seq, u16 flags,
1886 			  int event, struct netlink_ext_ack *extack)
1887 {
1888 	struct tcmsg *tcm;
1889 	struct nlmsghdr  *nlh;
1890 	unsigned char *b = skb_tail_pointer(skb);
1891 	struct gnet_dump d;
1892 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1893 
1894 	cond_resched();
1895 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1896 	if (!nlh)
1897 		goto out_nlmsg_trim;
1898 	tcm = nlmsg_data(nlh);
1899 	tcm->tcm_family = AF_UNSPEC;
1900 	tcm->tcm__pad1 = 0;
1901 	tcm->tcm__pad2 = 0;
1902 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1903 	tcm->tcm_parent = q->handle;
1904 	tcm->tcm_handle = q->handle;
1905 	tcm->tcm_info = 0;
1906 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1907 		goto nla_put_failure;
1908 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1909 		goto nla_put_failure;
1910 
1911 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1912 					 NULL, &d, TCA_PAD) < 0)
1913 		goto nla_put_failure;
1914 
1915 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1916 		goto nla_put_failure;
1917 
1918 	if (gnet_stats_finish_copy(&d) < 0)
1919 		goto nla_put_failure;
1920 
1921 	if (extack && extack->_msg &&
1922 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
1923 		goto out_nlmsg_trim;
1924 
1925 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1926 
1927 	return skb->len;
1928 
1929 out_nlmsg_trim:
1930 nla_put_failure:
1931 	nlmsg_trim(skb, b);
1932 	return -1;
1933 }
1934 
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event,struct netlink_ext_ack * extack)1935 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1936 			 struct nlmsghdr *n, struct Qdisc *q,
1937 			 unsigned long cl, int event, struct netlink_ext_ack *extack)
1938 {
1939 	struct sk_buff *skb;
1940 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1941 
1942 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1943 	if (!skb)
1944 		return -ENOBUFS;
1945 
1946 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
1947 		kfree_skb(skb);
1948 		return -EINVAL;
1949 	}
1950 
1951 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1952 			      n->nlmsg_flags & NLM_F_ECHO);
1953 }
1954 
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)1955 static int tclass_del_notify(struct net *net,
1956 			     const struct Qdisc_class_ops *cops,
1957 			     struct sk_buff *oskb, struct nlmsghdr *n,
1958 			     struct Qdisc *q, unsigned long cl,
1959 			     struct netlink_ext_ack *extack)
1960 {
1961 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1962 	struct sk_buff *skb;
1963 	int err = 0;
1964 
1965 	if (!cops->delete)
1966 		return -EOPNOTSUPP;
1967 
1968 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1969 	if (!skb)
1970 		return -ENOBUFS;
1971 
1972 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1973 			   RTM_DELTCLASS, extack) < 0) {
1974 		kfree_skb(skb);
1975 		return -EINVAL;
1976 	}
1977 
1978 	err = cops->delete(q, cl, extack);
1979 	if (err) {
1980 		kfree_skb(skb);
1981 		return err;
1982 	}
1983 
1984 	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1985 			     n->nlmsg_flags & NLM_F_ECHO);
1986 	return err;
1987 }
1988 
1989 #ifdef CONFIG_NET_CLS
1990 
1991 struct tcf_bind_args {
1992 	struct tcf_walker w;
1993 	unsigned long base;
1994 	unsigned long cl;
1995 	u32 classid;
1996 };
1997 
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)1998 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1999 {
2000 	struct tcf_bind_args *a = (void *)arg;
2001 
2002 	if (n && tp->ops->bind_class) {
2003 		struct Qdisc *q = tcf_block_q(tp->chain->block);
2004 
2005 		sch_tree_lock(q);
2006 		tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
2007 		sch_tree_unlock(q);
2008 	}
2009 	return 0;
2010 }
2011 
2012 struct tc_bind_class_args {
2013 	struct qdisc_walker w;
2014 	unsigned long new_cl;
2015 	u32 portid;
2016 	u32 clid;
2017 };
2018 
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)2019 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
2020 				struct qdisc_walker *w)
2021 {
2022 	struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
2023 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2024 	struct tcf_block *block;
2025 	struct tcf_chain *chain;
2026 
2027 	block = cops->tcf_block(q, cl, NULL);
2028 	if (!block)
2029 		return 0;
2030 	for (chain = tcf_get_next_chain(block, NULL);
2031 	     chain;
2032 	     chain = tcf_get_next_chain(block, chain)) {
2033 		struct tcf_proto *tp;
2034 
2035 		for (tp = tcf_get_next_proto(chain, NULL);
2036 		     tp; tp = tcf_get_next_proto(chain, tp)) {
2037 			struct tcf_bind_args arg = {};
2038 
2039 			arg.w.fn = tcf_node_bind;
2040 			arg.classid = a->clid;
2041 			arg.base = cl;
2042 			arg.cl = a->new_cl;
2043 			tp->ops->walk(tp, &arg.w, true);
2044 		}
2045 	}
2046 
2047 	return 0;
2048 }
2049 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2050 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2051 			   unsigned long new_cl)
2052 {
2053 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2054 	struct tc_bind_class_args args = {};
2055 
2056 	if (!cops->tcf_block)
2057 		return;
2058 	args.portid = portid;
2059 	args.clid = clid;
2060 	args.new_cl = new_cl;
2061 	args.w.fn = tc_bind_class_walker;
2062 	q->ops->cl_ops->walk(q, &args.w);
2063 }
2064 
2065 #else
2066 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2067 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2068 			   unsigned long new_cl)
2069 {
2070 }
2071 
2072 #endif
2073 
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)2074 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2075 			 struct netlink_ext_ack *extack)
2076 {
2077 	struct net *net = sock_net(skb->sk);
2078 	struct tcmsg *tcm = nlmsg_data(n);
2079 	struct nlattr *tca[TCA_MAX + 1];
2080 	struct net_device *dev;
2081 	struct Qdisc *q = NULL;
2082 	const struct Qdisc_class_ops *cops;
2083 	unsigned long cl = 0;
2084 	unsigned long new_cl;
2085 	u32 portid;
2086 	u32 clid;
2087 	u32 qid;
2088 	int err;
2089 
2090 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2091 				     rtm_tca_policy, extack);
2092 	if (err < 0)
2093 		return err;
2094 
2095 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2096 	if (!dev)
2097 		return -ENODEV;
2098 
2099 	/*
2100 	   parent == TC_H_UNSPEC - unspecified parent.
2101 	   parent == TC_H_ROOT   - class is root, which has no parent.
2102 	   parent == X:0	 - parent is root class.
2103 	   parent == X:Y	 - parent is a node in hierarchy.
2104 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
2105 
2106 	   handle == 0:0	 - generate handle from kernel pool.
2107 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
2108 	   handle == X:Y	 - clear.
2109 	   handle == X:0	 - root class.
2110 	 */
2111 
2112 	/* Step 1. Determine qdisc handle X:0 */
2113 
2114 	portid = tcm->tcm_parent;
2115 	clid = tcm->tcm_handle;
2116 	qid = TC_H_MAJ(clid);
2117 
2118 	if (portid != TC_H_ROOT) {
2119 		u32 qid1 = TC_H_MAJ(portid);
2120 
2121 		if (qid && qid1) {
2122 			/* If both majors are known, they must be identical. */
2123 			if (qid != qid1)
2124 				return -EINVAL;
2125 		} else if (qid1) {
2126 			qid = qid1;
2127 		} else if (qid == 0)
2128 			qid = rtnl_dereference(dev->qdisc)->handle;
2129 
2130 		/* Now qid is genuine qdisc handle consistent
2131 		 * both with parent and child.
2132 		 *
2133 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2134 		 */
2135 		if (portid)
2136 			portid = TC_H_MAKE(qid, portid);
2137 	} else {
2138 		if (qid == 0)
2139 			qid = rtnl_dereference(dev->qdisc)->handle;
2140 	}
2141 
2142 	/* OK. Locate qdisc */
2143 	q = qdisc_lookup(dev, qid);
2144 	if (!q)
2145 		return -ENOENT;
2146 
2147 	/* An check that it supports classes */
2148 	cops = q->ops->cl_ops;
2149 	if (cops == NULL)
2150 		return -EINVAL;
2151 
2152 	/* Now try to get class */
2153 	if (clid == 0) {
2154 		if (portid == TC_H_ROOT)
2155 			clid = qid;
2156 	} else
2157 		clid = TC_H_MAKE(qid, clid);
2158 
2159 	if (clid)
2160 		cl = cops->find(q, clid);
2161 
2162 	if (cl == 0) {
2163 		err = -ENOENT;
2164 		if (n->nlmsg_type != RTM_NEWTCLASS ||
2165 		    !(n->nlmsg_flags & NLM_F_CREATE))
2166 			goto out;
2167 	} else {
2168 		switch (n->nlmsg_type) {
2169 		case RTM_NEWTCLASS:
2170 			err = -EEXIST;
2171 			if (n->nlmsg_flags & NLM_F_EXCL)
2172 				goto out;
2173 			break;
2174 		case RTM_DELTCLASS:
2175 			err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2176 			/* Unbind the class with flilters with 0 */
2177 			tc_bind_tclass(q, portid, clid, 0);
2178 			goto out;
2179 		case RTM_GETTCLASS:
2180 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack);
2181 			goto out;
2182 		default:
2183 			err = -EINVAL;
2184 			goto out;
2185 		}
2186 	}
2187 
2188 	if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2189 		NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2190 		return -EOPNOTSUPP;
2191 	}
2192 
2193 	new_cl = cl;
2194 	err = -EOPNOTSUPP;
2195 	if (cops->change)
2196 		err = cops->change(q, clid, portid, tca, &new_cl, extack);
2197 	if (err == 0) {
2198 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
2199 		/* We just create a new class, need to do reverse binding. */
2200 		if (cl != new_cl)
2201 			tc_bind_tclass(q, portid, clid, new_cl);
2202 	}
2203 out:
2204 	return err;
2205 }
2206 
2207 struct qdisc_dump_args {
2208 	struct qdisc_walker	w;
2209 	struct sk_buff		*skb;
2210 	struct netlink_callback	*cb;
2211 };
2212 
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)2213 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2214 			    struct qdisc_walker *arg)
2215 {
2216 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2217 
2218 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2219 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2220 			      RTM_NEWTCLASS, NULL);
2221 }
2222 
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)2223 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2224 				struct tcmsg *tcm, struct netlink_callback *cb,
2225 				int *t_p, int s_t)
2226 {
2227 	struct qdisc_dump_args arg;
2228 
2229 	if (tc_qdisc_dump_ignore(q, false) ||
2230 	    *t_p < s_t || !q->ops->cl_ops ||
2231 	    (tcm->tcm_parent &&
2232 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2233 		(*t_p)++;
2234 		return 0;
2235 	}
2236 	if (*t_p > s_t)
2237 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2238 	arg.w.fn = qdisc_class_dump;
2239 	arg.skb = skb;
2240 	arg.cb = cb;
2241 	arg.w.stop  = 0;
2242 	arg.w.skip = cb->args[1];
2243 	arg.w.count = 0;
2244 	q->ops->cl_ops->walk(q, &arg.w);
2245 	cb->args[1] = arg.w.count;
2246 	if (arg.w.stop)
2247 		return -1;
2248 	(*t_p)++;
2249 	return 0;
2250 }
2251 
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)2252 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2253 			       struct tcmsg *tcm, struct netlink_callback *cb,
2254 			       int *t_p, int s_t, bool recur)
2255 {
2256 	struct Qdisc *q;
2257 	int b;
2258 
2259 	if (!root)
2260 		return 0;
2261 
2262 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2263 		return -1;
2264 
2265 	if (!qdisc_dev(root) || !recur)
2266 		return 0;
2267 
2268 	if (tcm->tcm_parent) {
2269 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2270 		if (q && q != root &&
2271 		    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2272 			return -1;
2273 		return 0;
2274 	}
2275 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2276 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2277 			return -1;
2278 	}
2279 
2280 	return 0;
2281 }
2282 
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)2283 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2284 {
2285 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2286 	struct net *net = sock_net(skb->sk);
2287 	struct netdev_queue *dev_queue;
2288 	struct net_device *dev;
2289 	int t, s_t;
2290 
2291 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2292 		return 0;
2293 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
2294 	if (!dev)
2295 		return 0;
2296 
2297 	s_t = cb->args[0];
2298 	t = 0;
2299 
2300 	if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2301 				skb, tcm, cb, &t, s_t, true) < 0)
2302 		goto done;
2303 
2304 	dev_queue = dev_ingress_queue(dev);
2305 	if (dev_queue &&
2306 	    tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
2307 				skb, tcm, cb, &t, s_t, false) < 0)
2308 		goto done;
2309 
2310 done:
2311 	cb->args[0] = t;
2312 
2313 	dev_put(dev);
2314 	return skb->len;
2315 }
2316 
2317 #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)2318 static int psched_show(struct seq_file *seq, void *v)
2319 {
2320 	seq_printf(seq, "%08x %08x %08x %08x\n",
2321 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2322 		   1000000,
2323 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
2324 
2325 	return 0;
2326 }
2327 
psched_net_init(struct net * net)2328 static int __net_init psched_net_init(struct net *net)
2329 {
2330 	struct proc_dir_entry *e;
2331 
2332 	e = proc_create_single("psched", 0, net->proc_net, psched_show);
2333 	if (e == NULL)
2334 		return -ENOMEM;
2335 
2336 	return 0;
2337 }
2338 
psched_net_exit(struct net * net)2339 static void __net_exit psched_net_exit(struct net *net)
2340 {
2341 	remove_proc_entry("psched", net->proc_net);
2342 }
2343 #else
psched_net_init(struct net * net)2344 static int __net_init psched_net_init(struct net *net)
2345 {
2346 	return 0;
2347 }
2348 
psched_net_exit(struct net * net)2349 static void __net_exit psched_net_exit(struct net *net)
2350 {
2351 }
2352 #endif
2353 
2354 static struct pernet_operations psched_net_ops = {
2355 	.init = psched_net_init,
2356 	.exit = psched_net_exit,
2357 };
2358 
pktsched_init(void)2359 static int __init pktsched_init(void)
2360 {
2361 	int err;
2362 
2363 	err = register_pernet_subsys(&psched_net_ops);
2364 	if (err) {
2365 		pr_err("pktsched_init: "
2366 		       "cannot initialize per netns operations\n");
2367 		return err;
2368 	}
2369 
2370 	register_qdisc(&pfifo_fast_ops);
2371 	register_qdisc(&pfifo_qdisc_ops);
2372 	register_qdisc(&bfifo_qdisc_ops);
2373 	register_qdisc(&pfifo_head_drop_qdisc_ops);
2374 	register_qdisc(&mq_qdisc_ops);
2375 	register_qdisc(&noqueue_qdisc_ops);
2376 
2377 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2378 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2379 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2380 		      0);
2381 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2382 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2383 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2384 		      0);
2385 
2386 	return 0;
2387 }
2388 
2389 subsys_initcall(pktsched_init);
2390