• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_api.c	Packet scheduler API.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Fixes:
8  *
9  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12  */
13 
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28 
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34 #include <net/tc_wrapper.h>
35 
36 #include <trace/events/qdisc.h>
37 
38 /*
39 
40    Short review.
41    -------------
42 
43    This file consists of two interrelated parts:
44 
45    1. queueing disciplines manager frontend.
46    2. traffic classes manager frontend.
47 
48    Generally, queueing discipline ("qdisc") is a black box,
49    which is able to enqueue packets and to dequeue them (when
50    device is ready to send something) in order and at times
51    determined by algorithm hidden in it.
52 
53    qdisc's are divided to two categories:
54    - "queues", which have no internal structure visible from outside.
55    - "schedulers", which split all the packets to "traffic classes",
56      using "packet classifiers" (look at cls_api.c)
57 
58    In turn, classes may have child qdiscs (as rule, queues)
59    attached to them etc. etc. etc.
60 
61    The goal of the routines in this file is to translate
62    information supplied by user in the form of handles
63    to more intelligible for kernel form, to make some sanity
64    checks and part of work, which is common to all qdiscs
65    and to provide rtnetlink notifications.
66 
67    All real intelligent work is done inside qdisc modules.
68 
69 
70 
71    Every discipline has two major routines: enqueue and dequeue.
72 
73    ---dequeue
74 
75    dequeue usually returns a skb to send. It is allowed to return NULL,
76    but it does not mean that queue is empty, it just means that
77    discipline does not want to send anything this time.
78    Queue is really empty if q->q.qlen == 0.
79    For complicated disciplines with multiple queues q->q is not
80    real packet queue, but however q->q.qlen must be valid.
81 
82    ---enqueue
83 
84    enqueue returns 0, if packet was enqueued successfully.
85    If packet (this one or another one) was dropped, it returns
86    not zero error code.
87    NET_XMIT_DROP 	- this packet dropped
88      Expected action: do not backoff, but wait until queue will clear.
89    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
90      Expected action: backoff or ignore
91 
92    Auxiliary routines:
93 
94    ---peek
95 
96    like dequeue but without removing a packet from the queue
97 
98    ---reset
99 
100    returns qdisc to initial state: purge all buffers, clear all
101    timers, counters (except for statistics) etc.
102 
103    ---init
104 
105    initializes newly created qdisc.
106 
107    ---destroy
108 
109    destroys resources allocated by init and during lifetime of qdisc.
110 
111    ---change
112 
113    changes qdisc parameters.
114  */
115 
116 /* Protects list of registered TC modules. It is pure SMP lock. */
117 static DEFINE_RWLOCK(qdisc_mod_lock);
118 
119 
120 /************************************************
121  *	Queueing disciplines manipulation.	*
122  ************************************************/
123 
124 
125 /* The list of all installed queueing disciplines. */
126 
127 static struct Qdisc_ops *qdisc_base;
128 
129 /* Register/unregister queueing discipline */
130 
register_qdisc(struct Qdisc_ops * qops)131 int register_qdisc(struct Qdisc_ops *qops)
132 {
133 	struct Qdisc_ops *q, **qp;
134 	int rc = -EEXIST;
135 
136 	write_lock(&qdisc_mod_lock);
137 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
138 		if (!strcmp(qops->id, q->id))
139 			goto out;
140 
141 	if (qops->enqueue == NULL)
142 		qops->enqueue = noop_qdisc_ops.enqueue;
143 	if (qops->peek == NULL) {
144 		if (qops->dequeue == NULL)
145 			qops->peek = noop_qdisc_ops.peek;
146 		else
147 			goto out_einval;
148 	}
149 	if (qops->dequeue == NULL)
150 		qops->dequeue = noop_qdisc_ops.dequeue;
151 
152 	if (qops->cl_ops) {
153 		const struct Qdisc_class_ops *cops = qops->cl_ops;
154 
155 		if (!(cops->find && cops->walk && cops->leaf))
156 			goto out_einval;
157 
158 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
159 			goto out_einval;
160 	}
161 
162 	qops->next = NULL;
163 	*qp = qops;
164 	rc = 0;
165 out:
166 	write_unlock(&qdisc_mod_lock);
167 	return rc;
168 
169 out_einval:
170 	rc = -EINVAL;
171 	goto out;
172 }
173 EXPORT_SYMBOL(register_qdisc);
174 
unregister_qdisc(struct Qdisc_ops * qops)175 void unregister_qdisc(struct Qdisc_ops *qops)
176 {
177 	struct Qdisc_ops *q, **qp;
178 	int err = -ENOENT;
179 
180 	write_lock(&qdisc_mod_lock);
181 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
182 		if (q == qops)
183 			break;
184 	if (q) {
185 		*qp = q->next;
186 		q->next = NULL;
187 		err = 0;
188 	}
189 	write_unlock(&qdisc_mod_lock);
190 
191 	WARN(err, "unregister qdisc(%s) failed\n", qops->id);
192 }
193 EXPORT_SYMBOL(unregister_qdisc);
194 
195 /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)196 void qdisc_get_default(char *name, size_t len)
197 {
198 	read_lock(&qdisc_mod_lock);
199 	strscpy(name, default_qdisc_ops->id, len);
200 	read_unlock(&qdisc_mod_lock);
201 }
202 
qdisc_lookup_default(const char * name)203 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204 {
205 	struct Qdisc_ops *q = NULL;
206 
207 	for (q = qdisc_base; q; q = q->next) {
208 		if (!strcmp(name, q->id)) {
209 			if (!try_module_get(q->owner))
210 				q = NULL;
211 			break;
212 		}
213 	}
214 
215 	return q;
216 }
217 
218 /* Set new default qdisc to use */
qdisc_set_default(const char * name)219 int qdisc_set_default(const char *name)
220 {
221 	const struct Qdisc_ops *ops;
222 
223 	if (!capable(CAP_NET_ADMIN))
224 		return -EPERM;
225 
226 	write_lock(&qdisc_mod_lock);
227 	ops = qdisc_lookup_default(name);
228 	if (!ops) {
229 		/* Not found, drop lock and try to load module */
230 		write_unlock(&qdisc_mod_lock);
231 		request_module(NET_SCH_ALIAS_PREFIX "%s", name);
232 		write_lock(&qdisc_mod_lock);
233 
234 		ops = qdisc_lookup_default(name);
235 	}
236 
237 	if (ops) {
238 		/* Set new default */
239 		module_put(default_qdisc_ops->owner);
240 		default_qdisc_ops = ops;
241 	}
242 	write_unlock(&qdisc_mod_lock);
243 
244 	return ops ? 0 : -ENOENT;
245 }
246 
247 #ifdef CONFIG_NET_SCH_DEFAULT
248 /* Set default value from kernel config */
sch_default_qdisc(void)249 static int __init sch_default_qdisc(void)
250 {
251 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252 }
253 late_initcall(sch_default_qdisc);
254 #endif
255 
256 /* We know handle. Find qdisc among all qdisc's attached to device
257  * (root qdisc, all its children, children of children etc.)
258  * Note: caller either uses rtnl or rcu_read_lock()
259  */
260 
qdisc_match_from_root(struct Qdisc * root,u32 handle)261 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
262 {
263 	struct Qdisc *q;
264 
265 	if (!qdisc_dev(root))
266 		return (root->handle == handle ? root : NULL);
267 
268 	if (!(root->flags & TCQ_F_BUILTIN) &&
269 	    root->handle == handle)
270 		return root;
271 
272 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
273 				   lockdep_rtnl_is_held()) {
274 		if (q->handle == handle)
275 			return q;
276 	}
277 	return NULL;
278 }
279 
qdisc_hash_add(struct Qdisc * q,bool invisible)280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
281 {
282 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
283 		ASSERT_RTNL();
284 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285 		if (invisible)
286 			q->flags |= TCQ_F_INVISIBLE;
287 	}
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290 
qdisc_hash_del(struct Qdisc * q)291 void qdisc_hash_del(struct Qdisc *q)
292 {
293 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294 		ASSERT_RTNL();
295 		hash_del_rcu(&q->hash);
296 	}
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299 
qdisc_lookup(struct net_device * dev,u32 handle)300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302 	struct Qdisc *q;
303 
304 	if (!handle)
305 		return NULL;
306 	q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
307 	if (q)
308 		goto out;
309 
310 	if (dev_ingress_queue(dev))
311 		q = qdisc_match_from_root(
312 			rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
313 			handle);
314 out:
315 	return q;
316 }
317 
qdisc_lookup_rcu(struct net_device * dev,u32 handle)318 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
319 {
320 	struct netdev_queue *nq;
321 	struct Qdisc *q;
322 
323 	if (!handle)
324 		return NULL;
325 	q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
326 	if (q)
327 		goto out;
328 
329 	nq = dev_ingress_queue_rcu(dev);
330 	if (nq)
331 		q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
332 					  handle);
333 out:
334 	return q;
335 }
336 
qdisc_leaf(struct Qdisc * p,u32 classid,struct netlink_ext_ack * extack)337 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
338 				struct netlink_ext_ack *extack)
339 {
340 	unsigned long cl;
341 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
342 
343 	if (cops == NULL) {
344 		NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
345 		return ERR_PTR(-EOPNOTSUPP);
346 	}
347 	cl = cops->find(p, classid);
348 
349 	if (cl == 0) {
350 		NL_SET_ERR_MSG(extack, "Specified class not found");
351 		return ERR_PTR(-ENOENT);
352 	}
353 	return cops->leaf(p, cl);
354 }
355 
356 /* Find queueing discipline by name */
357 
qdisc_lookup_ops(struct nlattr * kind)358 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
359 {
360 	struct Qdisc_ops *q = NULL;
361 
362 	if (kind) {
363 		read_lock(&qdisc_mod_lock);
364 		for (q = qdisc_base; q; q = q->next) {
365 			if (nla_strcmp(kind, q->id) == 0) {
366 				if (!try_module_get(q->owner))
367 					q = NULL;
368 				break;
369 			}
370 		}
371 		read_unlock(&qdisc_mod_lock);
372 	}
373 	return q;
374 }
375 
376 /* The linklayer setting were not transferred from iproute2, in older
377  * versions, and the rate tables lookup systems have been dropped in
378  * the kernel. To keep backward compatible with older iproute2 tc
379  * utils, we detect the linklayer setting by detecting if the rate
380  * table were modified.
381  *
382  * For linklayer ATM table entries, the rate table will be aligned to
383  * 48 bytes, thus some table entries will contain the same value.  The
384  * mpu (min packet unit) is also encoded into the old rate table, thus
385  * starting from the mpu, we find low and high table entries for
386  * mapping this cell.  If these entries contain the same value, when
387  * the rate tables have been modified for linklayer ATM.
388  *
389  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
390  * and then roundup to the next cell, calc the table entry one below,
391  * and compare.
392  */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)393 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
394 {
395 	int low       = roundup(r->mpu, 48);
396 	int high      = roundup(low+1, 48);
397 	int cell_low  = low >> r->cell_log;
398 	int cell_high = (high >> r->cell_log) - 1;
399 
400 	/* rtab is too inaccurate at rates > 100Mbit/s */
401 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
402 		pr_debug("TC linklayer: Giving up ATM detection\n");
403 		return TC_LINKLAYER_ETHERNET;
404 	}
405 
406 	if ((cell_high > cell_low) && (cell_high < 256)
407 	    && (rtab[cell_low] == rtab[cell_high])) {
408 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
409 			 cell_low, cell_high, rtab[cell_high]);
410 		return TC_LINKLAYER_ATM;
411 	}
412 	return TC_LINKLAYER_ETHERNET;
413 }
414 
415 static struct qdisc_rate_table *qdisc_rtab_list;
416 
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)417 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
418 					struct nlattr *tab,
419 					struct netlink_ext_ack *extack)
420 {
421 	struct qdisc_rate_table *rtab;
422 
423 	if (tab == NULL || r->rate == 0 ||
424 	    r->cell_log == 0 || r->cell_log >= 32 ||
425 	    nla_len(tab) != TC_RTAB_SIZE) {
426 		NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
427 		return NULL;
428 	}
429 
430 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
431 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
432 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
433 			rtab->refcnt++;
434 			return rtab;
435 		}
436 	}
437 
438 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
439 	if (rtab) {
440 		rtab->rate = *r;
441 		rtab->refcnt = 1;
442 		memcpy(rtab->data, nla_data(tab), 1024);
443 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
444 			r->linklayer = __detect_linklayer(r, rtab->data);
445 		rtab->next = qdisc_rtab_list;
446 		qdisc_rtab_list = rtab;
447 	} else {
448 		NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
449 	}
450 	return rtab;
451 }
452 EXPORT_SYMBOL(qdisc_get_rtab);
453 
qdisc_put_rtab(struct qdisc_rate_table * tab)454 void qdisc_put_rtab(struct qdisc_rate_table *tab)
455 {
456 	struct qdisc_rate_table *rtab, **rtabp;
457 
458 	if (!tab || --tab->refcnt)
459 		return;
460 
461 	for (rtabp = &qdisc_rtab_list;
462 	     (rtab = *rtabp) != NULL;
463 	     rtabp = &rtab->next) {
464 		if (rtab == tab) {
465 			*rtabp = rtab->next;
466 			kfree(rtab);
467 			return;
468 		}
469 	}
470 }
471 EXPORT_SYMBOL(qdisc_put_rtab);
472 
473 static LIST_HEAD(qdisc_stab_list);
474 
475 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
476 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
477 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
478 };
479 
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)480 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
481 					       struct netlink_ext_ack *extack)
482 {
483 	struct nlattr *tb[TCA_STAB_MAX + 1];
484 	struct qdisc_size_table *stab;
485 	struct tc_sizespec *s;
486 	unsigned int tsize = 0;
487 	u16 *tab = NULL;
488 	int err;
489 
490 	err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
491 					  extack);
492 	if (err < 0)
493 		return ERR_PTR(err);
494 	if (!tb[TCA_STAB_BASE]) {
495 		NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
496 		return ERR_PTR(-EINVAL);
497 	}
498 
499 	s = nla_data(tb[TCA_STAB_BASE]);
500 
501 	if (s->tsize > 0) {
502 		if (!tb[TCA_STAB_DATA]) {
503 			NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
504 			return ERR_PTR(-EINVAL);
505 		}
506 		tab = nla_data(tb[TCA_STAB_DATA]);
507 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
508 	}
509 
510 	if (tsize != s->tsize || (!tab && tsize > 0)) {
511 		NL_SET_ERR_MSG(extack, "Invalid size of size table");
512 		return ERR_PTR(-EINVAL);
513 	}
514 
515 	list_for_each_entry(stab, &qdisc_stab_list, list) {
516 		if (memcmp(&stab->szopts, s, sizeof(*s)))
517 			continue;
518 		if (tsize > 0 &&
519 		    memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
520 			continue;
521 		stab->refcnt++;
522 		return stab;
523 	}
524 
525 	if (s->size_log > STAB_SIZE_LOG_MAX ||
526 	    s->cell_log > STAB_SIZE_LOG_MAX) {
527 		NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
528 		return ERR_PTR(-EINVAL);
529 	}
530 
531 	stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
532 	if (!stab)
533 		return ERR_PTR(-ENOMEM);
534 
535 	stab->refcnt = 1;
536 	stab->szopts = *s;
537 	if (tsize > 0)
538 		memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
539 
540 	list_add_tail(&stab->list, &qdisc_stab_list);
541 
542 	return stab;
543 }
544 
qdisc_put_stab(struct qdisc_size_table * tab)545 void qdisc_put_stab(struct qdisc_size_table *tab)
546 {
547 	if (!tab)
548 		return;
549 
550 	if (--tab->refcnt == 0) {
551 		list_del(&tab->list);
552 		kfree_rcu(tab, rcu);
553 	}
554 }
555 EXPORT_SYMBOL(qdisc_put_stab);
556 
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)557 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
558 {
559 	struct nlattr *nest;
560 
561 	nest = nla_nest_start_noflag(skb, TCA_STAB);
562 	if (nest == NULL)
563 		goto nla_put_failure;
564 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
565 		goto nla_put_failure;
566 	nla_nest_end(skb, nest);
567 
568 	return skb->len;
569 
570 nla_put_failure:
571 	return -1;
572 }
573 
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)574 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
575 			       const struct qdisc_size_table *stab)
576 {
577 	int pkt_len, slot;
578 
579 	pkt_len = skb->len + stab->szopts.overhead;
580 	if (unlikely(!stab->szopts.tsize))
581 		goto out;
582 
583 	slot = pkt_len + stab->szopts.cell_align;
584 	if (unlikely(slot < 0))
585 		slot = 0;
586 
587 	slot >>= stab->szopts.cell_log;
588 	if (likely(slot < stab->szopts.tsize))
589 		pkt_len = stab->data[slot];
590 	else
591 		pkt_len = stab->data[stab->szopts.tsize - 1] *
592 				(slot / stab->szopts.tsize) +
593 				stab->data[slot % stab->szopts.tsize];
594 
595 	pkt_len <<= stab->szopts.size_log;
596 out:
597 	if (unlikely(pkt_len < 1))
598 		pkt_len = 1;
599 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
600 }
601 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
602 
qdisc_warn_nonwc(const char * txt,struct Qdisc * qdisc)603 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
604 {
605 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
606 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
607 			txt, qdisc->ops->id, qdisc->handle >> 16);
608 		qdisc->flags |= TCQ_F_WARN_NONWC;
609 	}
610 }
611 EXPORT_SYMBOL(qdisc_warn_nonwc);
612 
qdisc_watchdog(struct hrtimer * timer)613 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
614 {
615 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
616 						 timer);
617 
618 	rcu_read_lock();
619 	__netif_schedule(qdisc_root(wd->qdisc));
620 	rcu_read_unlock();
621 
622 	return HRTIMER_NORESTART;
623 }
624 
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)625 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
626 				 clockid_t clockid)
627 {
628 	hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
629 	wd->timer.function = qdisc_watchdog;
630 	wd->qdisc = qdisc;
631 }
632 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
633 
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)634 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
635 {
636 	qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
637 }
638 EXPORT_SYMBOL(qdisc_watchdog_init);
639 
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)640 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
641 				      u64 delta_ns)
642 {
643 	bool deactivated;
644 
645 	rcu_read_lock();
646 	deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
647 			       &qdisc_root_sleeping(wd->qdisc)->state);
648 	rcu_read_unlock();
649 	if (deactivated)
650 		return;
651 
652 	if (hrtimer_is_queued(&wd->timer)) {
653 		u64 softexpires;
654 
655 		softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
656 		/* If timer is already set in [expires, expires + delta_ns],
657 		 * do not reprogram it.
658 		 */
659 		if (softexpires - expires <= delta_ns)
660 			return;
661 	}
662 
663 	hrtimer_start_range_ns(&wd->timer,
664 			       ns_to_ktime(expires),
665 			       delta_ns,
666 			       HRTIMER_MODE_ABS_PINNED);
667 }
668 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
669 
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)670 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
671 {
672 	hrtimer_cancel(&wd->timer);
673 }
674 EXPORT_SYMBOL(qdisc_watchdog_cancel);
675 
qdisc_class_hash_alloc(unsigned int n)676 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
677 {
678 	struct hlist_head *h;
679 	unsigned int i;
680 
681 	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
682 
683 	if (h != NULL) {
684 		for (i = 0; i < n; i++)
685 			INIT_HLIST_HEAD(&h[i]);
686 	}
687 	return h;
688 }
689 
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)690 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
691 {
692 	struct Qdisc_class_common *cl;
693 	struct hlist_node *next;
694 	struct hlist_head *nhash, *ohash;
695 	unsigned int nsize, nmask, osize;
696 	unsigned int i, h;
697 
698 	/* Rehash when load factor exceeds 0.75 */
699 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
700 		return;
701 	nsize = clhash->hashsize * 2;
702 	nmask = nsize - 1;
703 	nhash = qdisc_class_hash_alloc(nsize);
704 	if (nhash == NULL)
705 		return;
706 
707 	ohash = clhash->hash;
708 	osize = clhash->hashsize;
709 
710 	sch_tree_lock(sch);
711 	for (i = 0; i < osize; i++) {
712 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
713 			h = qdisc_class_hash(cl->classid, nmask);
714 			hlist_add_head(&cl->hnode, &nhash[h]);
715 		}
716 	}
717 	clhash->hash     = nhash;
718 	clhash->hashsize = nsize;
719 	clhash->hashmask = nmask;
720 	sch_tree_unlock(sch);
721 
722 	kvfree(ohash);
723 }
724 EXPORT_SYMBOL(qdisc_class_hash_grow);
725 
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)726 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
727 {
728 	unsigned int size = 4;
729 
730 	clhash->hash = qdisc_class_hash_alloc(size);
731 	if (!clhash->hash)
732 		return -ENOMEM;
733 	clhash->hashsize  = size;
734 	clhash->hashmask  = size - 1;
735 	clhash->hashelems = 0;
736 	return 0;
737 }
738 EXPORT_SYMBOL(qdisc_class_hash_init);
739 
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)740 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
741 {
742 	kvfree(clhash->hash);
743 }
744 EXPORT_SYMBOL(qdisc_class_hash_destroy);
745 
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)746 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
747 			     struct Qdisc_class_common *cl)
748 {
749 	unsigned int h;
750 
751 	INIT_HLIST_NODE(&cl->hnode);
752 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
753 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
754 	clhash->hashelems++;
755 }
756 EXPORT_SYMBOL(qdisc_class_hash_insert);
757 
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)758 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
759 			     struct Qdisc_class_common *cl)
760 {
761 	hlist_del(&cl->hnode);
762 	clhash->hashelems--;
763 }
764 EXPORT_SYMBOL(qdisc_class_hash_remove);
765 
766 /* Allocate an unique handle from space managed by kernel
767  * Possible range is [8000-FFFF]:0000 (0x8000 values)
768  */
qdisc_alloc_handle(struct net_device * dev)769 static u32 qdisc_alloc_handle(struct net_device *dev)
770 {
771 	int i = 0x8000;
772 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
773 
774 	do {
775 		autohandle += TC_H_MAKE(0x10000U, 0);
776 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
777 			autohandle = TC_H_MAKE(0x80000000U, 0);
778 		if (!qdisc_lookup(dev, autohandle))
779 			return autohandle;
780 		cond_resched();
781 	} while	(--i > 0);
782 
783 	return 0;
784 }
785 
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)786 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
787 {
788 	const struct Qdisc_class_ops *cops;
789 	unsigned long cl;
790 	u32 parentid;
791 	bool notify;
792 	int drops;
793 
794 	drops = max_t(int, n, 0);
795 	rcu_read_lock();
796 	while ((parentid = sch->parent)) {
797 		if (parentid == TC_H_ROOT)
798 			break;
799 
800 		if (sch->flags & TCQ_F_NOPARENT)
801 			break;
802 		/* Notify parent qdisc only if child qdisc becomes empty. */
803 		notify = !sch->q.qlen;
804 		/* TODO: perform the search on a per txq basis */
805 		sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
806 		if (sch == NULL) {
807 			WARN_ON_ONCE(parentid != TC_H_ROOT);
808 			break;
809 		}
810 		cops = sch->ops->cl_ops;
811 		if (notify && cops->qlen_notify) {
812 			/* Note that qlen_notify must be idempotent as it may get called
813 			 * multiple times.
814 			 */
815 			cl = cops->find(sch, parentid);
816 			cops->qlen_notify(sch, cl);
817 		}
818 		sch->q.qlen -= n;
819 		sch->qstats.backlog -= len;
820 		__qdisc_qstats_drop(sch, drops);
821 	}
822 	rcu_read_unlock();
823 }
824 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
825 
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)826 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
827 			      void *type_data)
828 {
829 	struct net_device *dev = qdisc_dev(sch);
830 	int err;
831 
832 	sch->flags &= ~TCQ_F_OFFLOADED;
833 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
834 		return 0;
835 
836 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
837 	if (err == -EOPNOTSUPP)
838 		return 0;
839 
840 	if (!err)
841 		sch->flags |= TCQ_F_OFFLOADED;
842 
843 	return err;
844 }
845 EXPORT_SYMBOL(qdisc_offload_dump_helper);
846 
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)847 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
848 				struct Qdisc *new, struct Qdisc *old,
849 				enum tc_setup_type type, void *type_data,
850 				struct netlink_ext_ack *extack)
851 {
852 	bool any_qdisc_is_offloaded;
853 	int err;
854 
855 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
856 		return;
857 
858 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
859 
860 	/* Don't report error if the graft is part of destroy operation. */
861 	if (!err || !new || new == &noop_qdisc)
862 		return;
863 
864 	/* Don't report error if the parent, the old child and the new
865 	 * one are not offloaded.
866 	 */
867 	any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
868 	any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
869 	any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
870 
871 	if (any_qdisc_is_offloaded)
872 		NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
873 }
874 EXPORT_SYMBOL(qdisc_offload_graft_helper);
875 
qdisc_offload_query_caps(struct net_device * dev,enum tc_setup_type type,void * caps,size_t caps_len)876 void qdisc_offload_query_caps(struct net_device *dev,
877 			      enum tc_setup_type type,
878 			      void *caps, size_t caps_len)
879 {
880 	const struct net_device_ops *ops = dev->netdev_ops;
881 	struct tc_query_caps_base base = {
882 		.type = type,
883 		.caps = caps,
884 	};
885 
886 	memset(caps, 0, caps_len);
887 
888 	if (ops->ndo_setup_tc)
889 		ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
890 }
891 EXPORT_SYMBOL(qdisc_offload_query_caps);
892 
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)893 static void qdisc_offload_graft_root(struct net_device *dev,
894 				     struct Qdisc *new, struct Qdisc *old,
895 				     struct netlink_ext_ack *extack)
896 {
897 	struct tc_root_qopt_offload graft_offload = {
898 		.command	= TC_ROOT_GRAFT,
899 		.handle		= new ? new->handle : 0,
900 		.ingress	= (new && new->flags & TCQ_F_INGRESS) ||
901 				  (old && old->flags & TCQ_F_INGRESS),
902 	};
903 
904 	qdisc_offload_graft_helper(dev, NULL, new, old,
905 				   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
906 }
907 
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)908 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
909 			 u32 portid, u32 seq, u16 flags, int event,
910 			 struct netlink_ext_ack *extack)
911 {
912 	struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
913 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
914 	struct tcmsg *tcm;
915 	struct nlmsghdr  *nlh;
916 	unsigned char *b = skb_tail_pointer(skb);
917 	struct gnet_dump d;
918 	struct qdisc_size_table *stab;
919 	u32 block_index;
920 	__u32 qlen;
921 
922 	cond_resched();
923 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
924 	if (!nlh)
925 		goto out_nlmsg_trim;
926 	tcm = nlmsg_data(nlh);
927 	tcm->tcm_family = AF_UNSPEC;
928 	tcm->tcm__pad1 = 0;
929 	tcm->tcm__pad2 = 0;
930 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
931 	tcm->tcm_parent = clid;
932 	tcm->tcm_handle = q->handle;
933 	tcm->tcm_info = refcount_read(&q->refcnt);
934 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
935 		goto nla_put_failure;
936 	if (q->ops->ingress_block_get) {
937 		block_index = q->ops->ingress_block_get(q);
938 		if (block_index &&
939 		    nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
940 			goto nla_put_failure;
941 	}
942 	if (q->ops->egress_block_get) {
943 		block_index = q->ops->egress_block_get(q);
944 		if (block_index &&
945 		    nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
946 			goto nla_put_failure;
947 	}
948 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
949 		goto nla_put_failure;
950 	if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
951 		goto nla_put_failure;
952 	qlen = qdisc_qlen_sum(q);
953 
954 	stab = rtnl_dereference(q->stab);
955 	if (stab && qdisc_dump_stab(skb, stab) < 0)
956 		goto nla_put_failure;
957 
958 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
959 					 NULL, &d, TCA_PAD) < 0)
960 		goto nla_put_failure;
961 
962 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
963 		goto nla_put_failure;
964 
965 	if (qdisc_is_percpu_stats(q)) {
966 		cpu_bstats = q->cpu_bstats;
967 		cpu_qstats = q->cpu_qstats;
968 	}
969 
970 	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
971 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
972 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
973 		goto nla_put_failure;
974 
975 	if (gnet_stats_finish_copy(&d) < 0)
976 		goto nla_put_failure;
977 
978 	if (extack && extack->_msg &&
979 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
980 		goto out_nlmsg_trim;
981 
982 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
983 
984 	return skb->len;
985 
986 out_nlmsg_trim:
987 nla_put_failure:
988 	nlmsg_trim(skb, b);
989 	return -1;
990 }
991 
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)992 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
993 {
994 	if (q->flags & TCQ_F_BUILTIN)
995 		return true;
996 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
997 		return true;
998 
999 	return false;
1000 }
1001 
qdisc_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * q,struct netlink_ext_ack * extack)1002 static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
1003 			    struct nlmsghdr *n, u32 clid, struct Qdisc *q,
1004 			    struct netlink_ext_ack *extack)
1005 {
1006 	struct sk_buff *skb;
1007 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1008 
1009 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1010 	if (!skb)
1011 		return -ENOBUFS;
1012 
1013 	if (!tc_qdisc_dump_ignore(q, false)) {
1014 		if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
1015 				  RTM_NEWQDISC, extack) < 0)
1016 			goto err_out;
1017 	}
1018 
1019 	if (skb->len)
1020 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1021 				      n->nlmsg_flags & NLM_F_ECHO);
1022 
1023 err_out:
1024 	kfree_skb(skb);
1025 	return -EINVAL;
1026 }
1027 
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1028 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1029 			struct nlmsghdr *n, u32 clid,
1030 			struct Qdisc *old, struct Qdisc *new,
1031 			struct netlink_ext_ack *extack)
1032 {
1033 	struct sk_buff *skb;
1034 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1035 
1036 	if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1037 		return 0;
1038 
1039 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1040 	if (!skb)
1041 		return -ENOBUFS;
1042 
1043 	if (old && !tc_qdisc_dump_ignore(old, false)) {
1044 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1045 				  0, RTM_DELQDISC, extack) < 0)
1046 			goto err_out;
1047 	}
1048 	if (new && !tc_qdisc_dump_ignore(new, false)) {
1049 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1050 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
1051 			goto err_out;
1052 	}
1053 
1054 	if (skb->len)
1055 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1056 				      n->nlmsg_flags & NLM_F_ECHO);
1057 
1058 err_out:
1059 	kfree_skb(skb);
1060 	return -EINVAL;
1061 }
1062 
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1063 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1064 			       struct nlmsghdr *n, u32 clid,
1065 			       struct Qdisc *old, struct Qdisc *new,
1066 			       struct netlink_ext_ack *extack)
1067 {
1068 	if (new || old)
1069 		qdisc_notify(net, skb, n, clid, old, new, extack);
1070 
1071 	if (old)
1072 		qdisc_put(old);
1073 }
1074 
qdisc_clear_nolock(struct Qdisc * sch)1075 static void qdisc_clear_nolock(struct Qdisc *sch)
1076 {
1077 	sch->flags &= ~TCQ_F_NOLOCK;
1078 	if (!(sch->flags & TCQ_F_CPUSTATS))
1079 		return;
1080 
1081 	free_percpu(sch->cpu_bstats);
1082 	free_percpu(sch->cpu_qstats);
1083 	sch->cpu_bstats = NULL;
1084 	sch->cpu_qstats = NULL;
1085 	sch->flags &= ~TCQ_F_CPUSTATS;
1086 }
1087 
1088 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1089  * to device "dev".
1090  *
1091  * When appropriate send a netlink notification using 'skb'
1092  * and "n".
1093  *
1094  * On success, destroy old qdisc.
1095  */
1096 
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)1097 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1098 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1099 		       struct Qdisc *new, struct Qdisc *old,
1100 		       struct netlink_ext_ack *extack)
1101 {
1102 	struct Qdisc *q = old;
1103 	struct net *net = dev_net(dev);
1104 
1105 	if (parent == NULL) {
1106 		unsigned int i, num_q, ingress;
1107 		struct netdev_queue *dev_queue;
1108 
1109 		ingress = 0;
1110 		num_q = dev->num_tx_queues;
1111 		if ((q && q->flags & TCQ_F_INGRESS) ||
1112 		    (new && new->flags & TCQ_F_INGRESS)) {
1113 			ingress = 1;
1114 			dev_queue = dev_ingress_queue(dev);
1115 			if (!dev_queue) {
1116 				NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1117 				return -ENOENT;
1118 			}
1119 
1120 			q = rtnl_dereference(dev_queue->qdisc_sleeping);
1121 
1122 			/* This is the counterpart of that qdisc_refcount_inc_nz() call in
1123 			 * __tcf_qdisc_find() for filter requests.
1124 			 */
1125 			if (!qdisc_refcount_dec_if_one(q)) {
1126 				NL_SET_ERR_MSG(extack,
1127 					       "Current ingress or clsact Qdisc has ongoing filter requests");
1128 				return -EBUSY;
1129 			}
1130 		}
1131 
1132 		if (dev->flags & IFF_UP)
1133 			dev_deactivate(dev);
1134 
1135 		qdisc_offload_graft_root(dev, new, old, extack);
1136 
1137 		if (new && new->ops->attach && !ingress)
1138 			goto skip;
1139 
1140 		if (!ingress) {
1141 			for (i = 0; i < num_q; i++) {
1142 				dev_queue = netdev_get_tx_queue(dev, i);
1143 				old = dev_graft_qdisc(dev_queue, new);
1144 
1145 				if (new && i > 0)
1146 					qdisc_refcount_inc(new);
1147 				qdisc_put(old);
1148 			}
1149 		} else {
1150 			old = dev_graft_qdisc(dev_queue, NULL);
1151 
1152 			/* {ingress,clsact}_destroy() @old before grafting @new to avoid
1153 			 * unprotected concurrent accesses to net_device::miniq_{in,e}gress
1154 			 * pointer(s) in mini_qdisc_pair_swap().
1155 			 */
1156 			qdisc_notify(net, skb, n, classid, old, new, extack);
1157 			qdisc_destroy(old);
1158 
1159 			dev_graft_qdisc(dev_queue, new);
1160 		}
1161 
1162 skip:
1163 		if (!ingress) {
1164 			old = rtnl_dereference(dev->qdisc);
1165 			if (new && !new->ops->attach)
1166 				qdisc_refcount_inc(new);
1167 			rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1168 
1169 			notify_and_destroy(net, skb, n, classid, old, new, extack);
1170 
1171 			if (new && new->ops->attach)
1172 				new->ops->attach(new);
1173 		}
1174 
1175 		if (dev->flags & IFF_UP)
1176 			dev_activate(dev);
1177 	} else {
1178 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1179 		unsigned long cl;
1180 		int err;
1181 
1182 		/* Only support running class lockless if parent is lockless */
1183 		if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1184 			qdisc_clear_nolock(new);
1185 
1186 		if (!cops || !cops->graft)
1187 			return -EOPNOTSUPP;
1188 
1189 		cl = cops->find(parent, classid);
1190 		if (!cl) {
1191 			NL_SET_ERR_MSG(extack, "Specified class not found");
1192 			return -ENOENT;
1193 		}
1194 
1195 		if (new && new->ops == &noqueue_qdisc_ops) {
1196 			NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1197 			return -EINVAL;
1198 		}
1199 
1200 		if (new &&
1201 		    !(parent->flags & TCQ_F_MQROOT) &&
1202 		    rcu_access_pointer(new->stab)) {
1203 			NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
1204 			return -EINVAL;
1205 		}
1206 		err = cops->graft(parent, cl, new, &old, extack);
1207 		if (err)
1208 			return err;
1209 		notify_and_destroy(net, skb, n, classid, old, new, extack);
1210 	}
1211 	return 0;
1212 }
1213 
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1214 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1215 				   struct netlink_ext_ack *extack)
1216 {
1217 	u32 block_index;
1218 
1219 	if (tca[TCA_INGRESS_BLOCK]) {
1220 		block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1221 
1222 		if (!block_index) {
1223 			NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1224 			return -EINVAL;
1225 		}
1226 		if (!sch->ops->ingress_block_set) {
1227 			NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1228 			return -EOPNOTSUPP;
1229 		}
1230 		sch->ops->ingress_block_set(sch, block_index);
1231 	}
1232 	if (tca[TCA_EGRESS_BLOCK]) {
1233 		block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1234 
1235 		if (!block_index) {
1236 			NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1237 			return -EINVAL;
1238 		}
1239 		if (!sch->ops->egress_block_set) {
1240 			NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1241 			return -EOPNOTSUPP;
1242 		}
1243 		sch->ops->egress_block_set(sch, block_index);
1244 	}
1245 	return 0;
1246 }
1247 
1248 /*
1249    Allocate and initialize new qdisc.
1250 
1251    Parameters are passed via opt.
1252  */
1253 
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)1254 static struct Qdisc *qdisc_create(struct net_device *dev,
1255 				  struct netdev_queue *dev_queue,
1256 				  u32 parent, u32 handle,
1257 				  struct nlattr **tca, int *errp,
1258 				  struct netlink_ext_ack *extack)
1259 {
1260 	int err;
1261 	struct nlattr *kind = tca[TCA_KIND];
1262 	struct Qdisc *sch;
1263 	struct Qdisc_ops *ops;
1264 	struct qdisc_size_table *stab;
1265 
1266 	ops = qdisc_lookup_ops(kind);
1267 #ifdef CONFIG_MODULES
1268 	if (ops == NULL && kind != NULL) {
1269 		char name[IFNAMSIZ];
1270 		if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1271 			/* We dropped the RTNL semaphore in order to
1272 			 * perform the module load.  So, even if we
1273 			 * succeeded in loading the module we have to
1274 			 * tell the caller to replay the request.  We
1275 			 * indicate this using -EAGAIN.
1276 			 * We replay the request because the device may
1277 			 * go away in the mean time.
1278 			 */
1279 			rtnl_unlock();
1280 			request_module(NET_SCH_ALIAS_PREFIX "%s", name);
1281 			rtnl_lock();
1282 			ops = qdisc_lookup_ops(kind);
1283 			if (ops != NULL) {
1284 				/* We will try again qdisc_lookup_ops,
1285 				 * so don't keep a reference.
1286 				 */
1287 				module_put(ops->owner);
1288 				err = -EAGAIN;
1289 				goto err_out;
1290 			}
1291 		}
1292 	}
1293 #endif
1294 
1295 	err = -ENOENT;
1296 	if (!ops) {
1297 		NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1298 		goto err_out;
1299 	}
1300 
1301 	sch = qdisc_alloc(dev_queue, ops, extack);
1302 	if (IS_ERR(sch)) {
1303 		err = PTR_ERR(sch);
1304 		goto err_out2;
1305 	}
1306 
1307 	sch->parent = parent;
1308 
1309 	if (handle == TC_H_INGRESS) {
1310 		if (!(sch->flags & TCQ_F_INGRESS)) {
1311 			NL_SET_ERR_MSG(extack,
1312 				       "Specified parent ID is reserved for ingress and clsact Qdiscs");
1313 			err = -EINVAL;
1314 			goto err_out3;
1315 		}
1316 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
1317 	} else {
1318 		if (handle == 0) {
1319 			handle = qdisc_alloc_handle(dev);
1320 			if (handle == 0) {
1321 				NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1322 				err = -ENOSPC;
1323 				goto err_out3;
1324 			}
1325 		}
1326 		if (!netif_is_multiqueue(dev))
1327 			sch->flags |= TCQ_F_ONETXQUEUE;
1328 	}
1329 
1330 	sch->handle = handle;
1331 
1332 	/* This exist to keep backward compatible with a userspace
1333 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
1334 	 * facility on older kernels by setting tx_queue_len=0 (prior
1335 	 * to qdisc init), and then forgot to reinit tx_queue_len
1336 	 * before again attaching a qdisc.
1337 	 */
1338 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1339 		WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
1340 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1341 	}
1342 
1343 	err = qdisc_block_indexes_set(sch, tca, extack);
1344 	if (err)
1345 		goto err_out3;
1346 
1347 	if (tca[TCA_STAB]) {
1348 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1349 		if (IS_ERR(stab)) {
1350 			err = PTR_ERR(stab);
1351 			goto err_out3;
1352 		}
1353 		rcu_assign_pointer(sch->stab, stab);
1354 	}
1355 
1356 	if (ops->init) {
1357 		err = ops->init(sch, tca[TCA_OPTIONS], extack);
1358 		if (err != 0)
1359 			goto err_out4;
1360 	}
1361 
1362 	if (tca[TCA_RATE]) {
1363 		err = -EOPNOTSUPP;
1364 		if (sch->flags & TCQ_F_MQROOT) {
1365 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1366 			goto err_out4;
1367 		}
1368 
1369 		err = gen_new_estimator(&sch->bstats,
1370 					sch->cpu_bstats,
1371 					&sch->rate_est,
1372 					NULL,
1373 					true,
1374 					tca[TCA_RATE]);
1375 		if (err) {
1376 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1377 			goto err_out4;
1378 		}
1379 	}
1380 
1381 	qdisc_hash_add(sch, false);
1382 	trace_qdisc_create(ops, dev, parent);
1383 
1384 	return sch;
1385 
1386 err_out4:
1387 	/* Even if ops->init() failed, we call ops->destroy()
1388 	 * like qdisc_create_dflt().
1389 	 */
1390 	if (ops->destroy)
1391 		ops->destroy(sch);
1392 	qdisc_put_stab(rtnl_dereference(sch->stab));
1393 err_out3:
1394 	lockdep_unregister_key(&sch->root_lock_key);
1395 	netdev_put(dev, &sch->dev_tracker);
1396 	qdisc_free(sch);
1397 err_out2:
1398 	module_put(ops->owner);
1399 err_out:
1400 	*errp = err;
1401 	return NULL;
1402 }
1403 
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1404 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1405 			struct netlink_ext_ack *extack)
1406 {
1407 	struct qdisc_size_table *ostab, *stab = NULL;
1408 	int err = 0;
1409 
1410 	if (tca[TCA_OPTIONS]) {
1411 		if (!sch->ops->change) {
1412 			NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1413 			return -EINVAL;
1414 		}
1415 		if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1416 			NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1417 			return -EOPNOTSUPP;
1418 		}
1419 		err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1420 		if (err)
1421 			return err;
1422 	}
1423 
1424 	if (tca[TCA_STAB]) {
1425 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1426 		if (IS_ERR(stab))
1427 			return PTR_ERR(stab);
1428 	}
1429 
1430 	ostab = rtnl_dereference(sch->stab);
1431 	rcu_assign_pointer(sch->stab, stab);
1432 	qdisc_put_stab(ostab);
1433 
1434 	if (tca[TCA_RATE]) {
1435 		/* NB: ignores errors from replace_estimator
1436 		   because change can't be undone. */
1437 		if (sch->flags & TCQ_F_MQROOT)
1438 			goto out;
1439 		gen_replace_estimator(&sch->bstats,
1440 				      sch->cpu_bstats,
1441 				      &sch->rate_est,
1442 				      NULL,
1443 				      true,
1444 				      tca[TCA_RATE]);
1445 	}
1446 out:
1447 	return 0;
1448 }
1449 
1450 struct check_loop_arg {
1451 	struct qdisc_walker	w;
1452 	struct Qdisc		*p;
1453 	int			depth;
1454 };
1455 
1456 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1457 			 struct qdisc_walker *w);
1458 
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)1459 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1460 {
1461 	struct check_loop_arg	arg;
1462 
1463 	if (q->ops->cl_ops == NULL)
1464 		return 0;
1465 
1466 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1467 	arg.w.fn = check_loop_fn;
1468 	arg.depth = depth;
1469 	arg.p = p;
1470 	q->ops->cl_ops->walk(q, &arg.w);
1471 	return arg.w.stop ? -ELOOP : 0;
1472 }
1473 
1474 static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1475 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1476 {
1477 	struct Qdisc *leaf;
1478 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1479 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1480 
1481 	leaf = cops->leaf(q, cl);
1482 	if (leaf) {
1483 		if (leaf == arg->p || arg->depth > 7)
1484 			return -ELOOP;
1485 		return check_loop(leaf, arg->p, arg->depth + 1);
1486 	}
1487 	return 0;
1488 }
1489 
1490 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1491 	[TCA_KIND]		= { .type = NLA_STRING },
1492 	[TCA_RATE]		= { .type = NLA_BINARY,
1493 				    .len = sizeof(struct tc_estimator) },
1494 	[TCA_STAB]		= { .type = NLA_NESTED },
1495 	[TCA_DUMP_INVISIBLE]	= { .type = NLA_FLAG },
1496 	[TCA_CHAIN]		= { .type = NLA_U32 },
1497 	[TCA_INGRESS_BLOCK]	= { .type = NLA_U32 },
1498 	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
1499 };
1500 
1501 /*
1502  * Delete/get qdisc.
1503  */
1504 
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1505 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1506 			struct netlink_ext_ack *extack)
1507 {
1508 	struct net *net = sock_net(skb->sk);
1509 	struct tcmsg *tcm = nlmsg_data(n);
1510 	struct nlattr *tca[TCA_MAX + 1];
1511 	struct net_device *dev;
1512 	u32 clid;
1513 	struct Qdisc *q = NULL;
1514 	struct Qdisc *p = NULL;
1515 	int err;
1516 
1517 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1518 				     rtm_tca_policy, extack);
1519 	if (err < 0)
1520 		return err;
1521 
1522 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1523 	if (!dev)
1524 		return -ENODEV;
1525 
1526 	clid = tcm->tcm_parent;
1527 	if (clid) {
1528 		if (clid != TC_H_ROOT) {
1529 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1530 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1531 				if (!p) {
1532 					NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1533 					return -ENOENT;
1534 				}
1535 				q = qdisc_leaf(p, clid, extack);
1536 			} else if (dev_ingress_queue(dev)) {
1537 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1538 			}
1539 		} else {
1540 			q = rtnl_dereference(dev->qdisc);
1541 		}
1542 		if (!q) {
1543 			NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1544 			return -ENOENT;
1545 		}
1546 		if (IS_ERR(q))
1547 			return PTR_ERR(q);
1548 
1549 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1550 			NL_SET_ERR_MSG(extack, "Invalid handle");
1551 			return -EINVAL;
1552 		}
1553 	} else {
1554 		q = qdisc_lookup(dev, tcm->tcm_handle);
1555 		if (!q) {
1556 			NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1557 			return -ENOENT;
1558 		}
1559 	}
1560 
1561 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1562 		NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1563 		return -EINVAL;
1564 	}
1565 
1566 	if (n->nlmsg_type == RTM_DELQDISC) {
1567 		if (!clid) {
1568 			NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1569 			return -EINVAL;
1570 		}
1571 		if (q->handle == 0) {
1572 			NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1573 			return -ENOENT;
1574 		}
1575 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1576 		if (err != 0)
1577 			return err;
1578 	} else {
1579 		qdisc_get_notify(net, skb, n, clid, q, NULL);
1580 	}
1581 	return 0;
1582 }
1583 
req_create_or_replace(struct nlmsghdr * n)1584 static bool req_create_or_replace(struct nlmsghdr *n)
1585 {
1586 	return (n->nlmsg_flags & NLM_F_CREATE &&
1587 		n->nlmsg_flags & NLM_F_REPLACE);
1588 }
1589 
req_create_exclusive(struct nlmsghdr * n)1590 static bool req_create_exclusive(struct nlmsghdr *n)
1591 {
1592 	return (n->nlmsg_flags & NLM_F_CREATE &&
1593 		n->nlmsg_flags & NLM_F_EXCL);
1594 }
1595 
req_change(struct nlmsghdr * n)1596 static bool req_change(struct nlmsghdr *n)
1597 {
1598 	return (!(n->nlmsg_flags & NLM_F_CREATE) &&
1599 		!(n->nlmsg_flags & NLM_F_REPLACE) &&
1600 		!(n->nlmsg_flags & NLM_F_EXCL));
1601 }
1602 
1603 /*
1604  * Create/change qdisc.
1605  */
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1606 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1607 			   struct netlink_ext_ack *extack)
1608 {
1609 	struct net *net = sock_net(skb->sk);
1610 	struct tcmsg *tcm;
1611 	struct nlattr *tca[TCA_MAX + 1];
1612 	struct net_device *dev;
1613 	u32 clid;
1614 	struct Qdisc *q, *p;
1615 	int err;
1616 
1617 replay:
1618 	/* Reinit, just in case something touches this. */
1619 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1620 				     rtm_tca_policy, extack);
1621 	if (err < 0)
1622 		return err;
1623 
1624 	tcm = nlmsg_data(n);
1625 	clid = tcm->tcm_parent;
1626 	q = p = NULL;
1627 
1628 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1629 	if (!dev)
1630 		return -ENODEV;
1631 
1632 
1633 	if (clid) {
1634 		if (clid != TC_H_ROOT) {
1635 			if (clid != TC_H_INGRESS) {
1636 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1637 				if (!p) {
1638 					NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1639 					return -ENOENT;
1640 				}
1641 				q = qdisc_leaf(p, clid, extack);
1642 				if (IS_ERR(q))
1643 					return PTR_ERR(q);
1644 			} else if (dev_ingress_queue_create(dev)) {
1645 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1646 			}
1647 		} else {
1648 			q = rtnl_dereference(dev->qdisc);
1649 		}
1650 
1651 		/* It may be default qdisc, ignore it */
1652 		if (q && q->handle == 0)
1653 			q = NULL;
1654 
1655 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1656 			if (tcm->tcm_handle) {
1657 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1658 					NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1659 					return -EEXIST;
1660 				}
1661 				if (TC_H_MIN(tcm->tcm_handle)) {
1662 					NL_SET_ERR_MSG(extack, "Invalid minor handle");
1663 					return -EINVAL;
1664 				}
1665 				q = qdisc_lookup(dev, tcm->tcm_handle);
1666 				if (!q)
1667 					goto create_n_graft;
1668 				if (q->parent != tcm->tcm_parent) {
1669 					NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
1670 					return -EINVAL;
1671 				}
1672 				if (n->nlmsg_flags & NLM_F_EXCL) {
1673 					NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1674 					return -EEXIST;
1675 				}
1676 				if (tca[TCA_KIND] &&
1677 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1678 					NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1679 					return -EINVAL;
1680 				}
1681 				if (q->flags & TCQ_F_INGRESS) {
1682 					NL_SET_ERR_MSG(extack,
1683 						       "Cannot regraft ingress or clsact Qdiscs");
1684 					return -EINVAL;
1685 				}
1686 				if (q == p ||
1687 				    (p && check_loop(q, p, 0))) {
1688 					NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1689 					return -ELOOP;
1690 				}
1691 				if (clid == TC_H_INGRESS) {
1692 					NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
1693 					return -EINVAL;
1694 				}
1695 				qdisc_refcount_inc(q);
1696 				goto graft;
1697 			} else {
1698 				if (!q)
1699 					goto create_n_graft;
1700 
1701 				/* This magic test requires explanation.
1702 				 *
1703 				 *   We know, that some child q is already
1704 				 *   attached to this parent and have choice:
1705 				 *   1) change it or 2) create/graft new one.
1706 				 *   If the requested qdisc kind is different
1707 				 *   than the existing one, then we choose graft.
1708 				 *   If they are the same then this is "change"
1709 				 *   operation - just let it fallthrough..
1710 				 *
1711 				 *   1. We are allowed to create/graft only
1712 				 *   if the request is explicitly stating
1713 				 *   "please create if it doesn't exist".
1714 				 *
1715 				 *   2. If the request is to exclusive create
1716 				 *   then the qdisc tcm_handle is not expected
1717 				 *   to exist, so that we choose create/graft too.
1718 				 *
1719 				 *   3. The last case is when no flags are set.
1720 				 *   This will happen when for example tc
1721 				 *   utility issues a "change" command.
1722 				 *   Alas, it is sort of hole in API, we
1723 				 *   cannot decide what to do unambiguously.
1724 				 *   For now we select create/graft.
1725 				 */
1726 				if (tca[TCA_KIND] &&
1727 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1728 					if (req_create_or_replace(n) ||
1729 					    req_create_exclusive(n))
1730 						goto create_n_graft;
1731 					else if (req_change(n))
1732 						goto create_n_graft2;
1733 				}
1734 			}
1735 		}
1736 	} else {
1737 		if (!tcm->tcm_handle) {
1738 			NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1739 			return -EINVAL;
1740 		}
1741 		q = qdisc_lookup(dev, tcm->tcm_handle);
1742 	}
1743 
1744 	/* Change qdisc parameters */
1745 	if (!q) {
1746 		NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1747 		return -ENOENT;
1748 	}
1749 	if (n->nlmsg_flags & NLM_F_EXCL) {
1750 		NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1751 		return -EEXIST;
1752 	}
1753 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1754 		NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1755 		return -EINVAL;
1756 	}
1757 	err = qdisc_change(q, tca, extack);
1758 	if (err == 0)
1759 		qdisc_notify(net, skb, n, clid, NULL, q, extack);
1760 	return err;
1761 
1762 create_n_graft:
1763 	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1764 		NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1765 		return -ENOENT;
1766 	}
1767 create_n_graft2:
1768 	if (clid == TC_H_INGRESS) {
1769 		if (dev_ingress_queue(dev)) {
1770 			q = qdisc_create(dev, dev_ingress_queue(dev),
1771 					 tcm->tcm_parent, tcm->tcm_parent,
1772 					 tca, &err, extack);
1773 		} else {
1774 			NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1775 			err = -ENOENT;
1776 		}
1777 	} else {
1778 		struct netdev_queue *dev_queue;
1779 
1780 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1781 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1782 		else if (p)
1783 			dev_queue = p->dev_queue;
1784 		else
1785 			dev_queue = netdev_get_tx_queue(dev, 0);
1786 
1787 		q = qdisc_create(dev, dev_queue,
1788 				 tcm->tcm_parent, tcm->tcm_handle,
1789 				 tca, &err, extack);
1790 	}
1791 	if (q == NULL) {
1792 		if (err == -EAGAIN)
1793 			goto replay;
1794 		return err;
1795 	}
1796 
1797 graft:
1798 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1799 	if (err) {
1800 		if (q)
1801 			qdisc_put(q);
1802 		return err;
1803 	}
1804 
1805 	return 0;
1806 }
1807 
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)1808 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1809 			      struct netlink_callback *cb,
1810 			      int *q_idx_p, int s_q_idx, bool recur,
1811 			      bool dump_invisible)
1812 {
1813 	int ret = 0, q_idx = *q_idx_p;
1814 	struct Qdisc *q;
1815 	int b;
1816 
1817 	if (!root)
1818 		return 0;
1819 
1820 	q = root;
1821 	if (q_idx < s_q_idx) {
1822 		q_idx++;
1823 	} else {
1824 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1825 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1826 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1827 				  RTM_NEWQDISC, NULL) <= 0)
1828 			goto done;
1829 		q_idx++;
1830 	}
1831 
1832 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1833 	 * itself has already been dumped.
1834 	 *
1835 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1836 	 * qdisc hashtable, we don't want to hit it again
1837 	 */
1838 	if (!qdisc_dev(root) || !recur)
1839 		goto out;
1840 
1841 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1842 		if (q_idx < s_q_idx) {
1843 			q_idx++;
1844 			continue;
1845 		}
1846 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1847 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1848 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1849 				  RTM_NEWQDISC, NULL) <= 0)
1850 			goto done;
1851 		q_idx++;
1852 	}
1853 
1854 out:
1855 	*q_idx_p = q_idx;
1856 	return ret;
1857 done:
1858 	ret = -1;
1859 	goto out;
1860 }
1861 
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)1862 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1863 {
1864 	struct net *net = sock_net(skb->sk);
1865 	int idx, q_idx;
1866 	int s_idx, s_q_idx;
1867 	struct net_device *dev;
1868 	const struct nlmsghdr *nlh = cb->nlh;
1869 	struct nlattr *tca[TCA_MAX + 1];
1870 	int err;
1871 
1872 	s_idx = cb->args[0];
1873 	s_q_idx = q_idx = cb->args[1];
1874 
1875 	idx = 0;
1876 	ASSERT_RTNL();
1877 
1878 	err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1879 				     rtm_tca_policy, cb->extack);
1880 	if (err < 0)
1881 		return err;
1882 
1883 	for_each_netdev(net, dev) {
1884 		struct netdev_queue *dev_queue;
1885 
1886 		if (idx < s_idx)
1887 			goto cont;
1888 		if (idx > s_idx)
1889 			s_q_idx = 0;
1890 		q_idx = 0;
1891 
1892 		if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1893 				       skb, cb, &q_idx, s_q_idx,
1894 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1895 			goto done;
1896 
1897 		dev_queue = dev_ingress_queue(dev);
1898 		if (dev_queue &&
1899 		    tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
1900 				       skb, cb, &q_idx, s_q_idx, false,
1901 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1902 			goto done;
1903 
1904 cont:
1905 		idx++;
1906 	}
1907 
1908 done:
1909 	cb->args[0] = idx;
1910 	cb->args[1] = q_idx;
1911 
1912 	return skb->len;
1913 }
1914 
1915 
1916 
1917 /************************************************
1918  *	Traffic classes manipulation.		*
1919  ************************************************/
1920 
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)1921 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1922 			  unsigned long cl, u32 portid, u32 seq, u16 flags,
1923 			  int event, struct netlink_ext_ack *extack)
1924 {
1925 	struct tcmsg *tcm;
1926 	struct nlmsghdr  *nlh;
1927 	unsigned char *b = skb_tail_pointer(skb);
1928 	struct gnet_dump d;
1929 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1930 
1931 	cond_resched();
1932 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1933 	if (!nlh)
1934 		goto out_nlmsg_trim;
1935 	tcm = nlmsg_data(nlh);
1936 	tcm->tcm_family = AF_UNSPEC;
1937 	tcm->tcm__pad1 = 0;
1938 	tcm->tcm__pad2 = 0;
1939 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1940 	tcm->tcm_parent = q->handle;
1941 	tcm->tcm_handle = q->handle;
1942 	tcm->tcm_info = 0;
1943 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1944 		goto nla_put_failure;
1945 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1946 		goto nla_put_failure;
1947 
1948 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1949 					 NULL, &d, TCA_PAD) < 0)
1950 		goto nla_put_failure;
1951 
1952 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1953 		goto nla_put_failure;
1954 
1955 	if (gnet_stats_finish_copy(&d) < 0)
1956 		goto nla_put_failure;
1957 
1958 	if (extack && extack->_msg &&
1959 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
1960 		goto out_nlmsg_trim;
1961 
1962 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1963 
1964 	return skb->len;
1965 
1966 out_nlmsg_trim:
1967 nla_put_failure:
1968 	nlmsg_trim(skb, b);
1969 	return -1;
1970 }
1971 
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event,struct netlink_ext_ack * extack)1972 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1973 			 struct nlmsghdr *n, struct Qdisc *q,
1974 			 unsigned long cl, int event, struct netlink_ext_ack *extack)
1975 {
1976 	struct sk_buff *skb;
1977 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1978 
1979 	if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1980 		return 0;
1981 
1982 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1983 	if (!skb)
1984 		return -ENOBUFS;
1985 
1986 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
1987 		kfree_skb(skb);
1988 		return -EINVAL;
1989 	}
1990 
1991 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1992 			      n->nlmsg_flags & NLM_F_ECHO);
1993 }
1994 
tclass_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)1995 static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
1996 			     struct nlmsghdr *n, struct Qdisc *q,
1997 			     unsigned long cl, struct netlink_ext_ack *extack)
1998 {
1999 	struct sk_buff *skb;
2000 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2001 
2002 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2003 	if (!skb)
2004 		return -ENOBUFS;
2005 
2006 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
2007 			   extack) < 0) {
2008 		kfree_skb(skb);
2009 		return -EINVAL;
2010 	}
2011 
2012 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2013 			      n->nlmsg_flags & NLM_F_ECHO);
2014 }
2015 
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)2016 static int tclass_del_notify(struct net *net,
2017 			     const struct Qdisc_class_ops *cops,
2018 			     struct sk_buff *oskb, struct nlmsghdr *n,
2019 			     struct Qdisc *q, unsigned long cl,
2020 			     struct netlink_ext_ack *extack)
2021 {
2022 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2023 	struct sk_buff *skb;
2024 	int err = 0;
2025 
2026 	if (!cops->delete)
2027 		return -EOPNOTSUPP;
2028 
2029 	if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
2030 		skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2031 		if (!skb)
2032 			return -ENOBUFS;
2033 
2034 		if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
2035 				   RTM_DELTCLASS, extack) < 0) {
2036 			kfree_skb(skb);
2037 			return -EINVAL;
2038 		}
2039 	} else {
2040 		skb = NULL;
2041 	}
2042 
2043 	err = cops->delete(q, cl, extack);
2044 	if (err) {
2045 		kfree_skb(skb);
2046 		return err;
2047 	}
2048 
2049 	err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
2050 				   n->nlmsg_flags & NLM_F_ECHO);
2051 	return err;
2052 }
2053 
2054 #ifdef CONFIG_NET_CLS
2055 
2056 struct tcf_bind_args {
2057 	struct tcf_walker w;
2058 	unsigned long base;
2059 	unsigned long cl;
2060 	u32 classid;
2061 };
2062 
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)2063 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2064 {
2065 	struct tcf_bind_args *a = (void *)arg;
2066 
2067 	if (n && tp->ops->bind_class) {
2068 		struct Qdisc *q = tcf_block_q(tp->chain->block);
2069 
2070 		sch_tree_lock(q);
2071 		tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
2072 		sch_tree_unlock(q);
2073 	}
2074 	return 0;
2075 }
2076 
2077 struct tc_bind_class_args {
2078 	struct qdisc_walker w;
2079 	unsigned long new_cl;
2080 	u32 portid;
2081 	u32 clid;
2082 };
2083 
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)2084 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
2085 				struct qdisc_walker *w)
2086 {
2087 	struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
2088 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2089 	struct tcf_block *block;
2090 	struct tcf_chain *chain;
2091 
2092 	block = cops->tcf_block(q, cl, NULL);
2093 	if (!block)
2094 		return 0;
2095 	for (chain = tcf_get_next_chain(block, NULL);
2096 	     chain;
2097 	     chain = tcf_get_next_chain(block, chain)) {
2098 		struct tcf_proto *tp;
2099 
2100 		for (tp = tcf_get_next_proto(chain, NULL);
2101 		     tp; tp = tcf_get_next_proto(chain, tp)) {
2102 			struct tcf_bind_args arg = {};
2103 
2104 			arg.w.fn = tcf_node_bind;
2105 			arg.classid = a->clid;
2106 			arg.base = cl;
2107 			arg.cl = a->new_cl;
2108 			tp->ops->walk(tp, &arg.w, true);
2109 		}
2110 	}
2111 
2112 	return 0;
2113 }
2114 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2115 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2116 			   unsigned long new_cl)
2117 {
2118 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2119 	struct tc_bind_class_args args = {};
2120 
2121 	if (!cops->tcf_block)
2122 		return;
2123 	args.portid = portid;
2124 	args.clid = clid;
2125 	args.new_cl = new_cl;
2126 	args.w.fn = tc_bind_class_walker;
2127 	q->ops->cl_ops->walk(q, &args.w);
2128 }
2129 
2130 #else
2131 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2132 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2133 			   unsigned long new_cl)
2134 {
2135 }
2136 
2137 #endif
2138 
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)2139 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2140 			 struct netlink_ext_ack *extack)
2141 {
2142 	struct net *net = sock_net(skb->sk);
2143 	struct tcmsg *tcm = nlmsg_data(n);
2144 	struct nlattr *tca[TCA_MAX + 1];
2145 	struct net_device *dev;
2146 	struct Qdisc *q = NULL;
2147 	const struct Qdisc_class_ops *cops;
2148 	unsigned long cl = 0;
2149 	unsigned long new_cl;
2150 	u32 portid;
2151 	u32 clid;
2152 	u32 qid;
2153 	int err;
2154 
2155 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2156 				     rtm_tca_policy, extack);
2157 	if (err < 0)
2158 		return err;
2159 
2160 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2161 	if (!dev)
2162 		return -ENODEV;
2163 
2164 	/*
2165 	   parent == TC_H_UNSPEC - unspecified parent.
2166 	   parent == TC_H_ROOT   - class is root, which has no parent.
2167 	   parent == X:0	 - parent is root class.
2168 	   parent == X:Y	 - parent is a node in hierarchy.
2169 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
2170 
2171 	   handle == 0:0	 - generate handle from kernel pool.
2172 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
2173 	   handle == X:Y	 - clear.
2174 	   handle == X:0	 - root class.
2175 	 */
2176 
2177 	/* Step 1. Determine qdisc handle X:0 */
2178 
2179 	portid = tcm->tcm_parent;
2180 	clid = tcm->tcm_handle;
2181 	qid = TC_H_MAJ(clid);
2182 
2183 	if (portid != TC_H_ROOT) {
2184 		u32 qid1 = TC_H_MAJ(portid);
2185 
2186 		if (qid && qid1) {
2187 			/* If both majors are known, they must be identical. */
2188 			if (qid != qid1)
2189 				return -EINVAL;
2190 		} else if (qid1) {
2191 			qid = qid1;
2192 		} else if (qid == 0)
2193 			qid = rtnl_dereference(dev->qdisc)->handle;
2194 
2195 		/* Now qid is genuine qdisc handle consistent
2196 		 * both with parent and child.
2197 		 *
2198 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2199 		 */
2200 		if (portid)
2201 			portid = TC_H_MAKE(qid, portid);
2202 	} else {
2203 		if (qid == 0)
2204 			qid = rtnl_dereference(dev->qdisc)->handle;
2205 	}
2206 
2207 	/* OK. Locate qdisc */
2208 	q = qdisc_lookup(dev, qid);
2209 	if (!q)
2210 		return -ENOENT;
2211 
2212 	/* An check that it supports classes */
2213 	cops = q->ops->cl_ops;
2214 	if (cops == NULL)
2215 		return -EINVAL;
2216 
2217 	/* Now try to get class */
2218 	if (clid == 0) {
2219 		if (portid == TC_H_ROOT)
2220 			clid = qid;
2221 	} else
2222 		clid = TC_H_MAKE(qid, clid);
2223 
2224 	if (clid)
2225 		cl = cops->find(q, clid);
2226 
2227 	if (cl == 0) {
2228 		err = -ENOENT;
2229 		if (n->nlmsg_type != RTM_NEWTCLASS ||
2230 		    !(n->nlmsg_flags & NLM_F_CREATE))
2231 			goto out;
2232 	} else {
2233 		switch (n->nlmsg_type) {
2234 		case RTM_NEWTCLASS:
2235 			err = -EEXIST;
2236 			if (n->nlmsg_flags & NLM_F_EXCL)
2237 				goto out;
2238 			break;
2239 		case RTM_DELTCLASS:
2240 			err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2241 			/* Unbind the class with flilters with 0 */
2242 			tc_bind_tclass(q, portid, clid, 0);
2243 			goto out;
2244 		case RTM_GETTCLASS:
2245 			err = tclass_get_notify(net, skb, n, q, cl, extack);
2246 			goto out;
2247 		default:
2248 			err = -EINVAL;
2249 			goto out;
2250 		}
2251 	}
2252 
2253 	if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2254 		NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2255 		return -EOPNOTSUPP;
2256 	}
2257 
2258 	/* Prevent creation of traffic classes with classid TC_H_ROOT */
2259 	if (clid == TC_H_ROOT) {
2260 		NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
2261 		return -EINVAL;
2262 	}
2263 
2264 	new_cl = cl;
2265 	err = -EOPNOTSUPP;
2266 	if (cops->change)
2267 		err = cops->change(q, clid, portid, tca, &new_cl, extack);
2268 	if (err == 0) {
2269 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
2270 		/* We just create a new class, need to do reverse binding. */
2271 		if (cl != new_cl)
2272 			tc_bind_tclass(q, portid, clid, new_cl);
2273 	}
2274 out:
2275 	return err;
2276 }
2277 
2278 struct qdisc_dump_args {
2279 	struct qdisc_walker	w;
2280 	struct sk_buff		*skb;
2281 	struct netlink_callback	*cb;
2282 };
2283 
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)2284 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2285 			    struct qdisc_walker *arg)
2286 {
2287 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2288 
2289 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2290 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2291 			      RTM_NEWTCLASS, NULL);
2292 }
2293 
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)2294 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2295 				struct tcmsg *tcm, struct netlink_callback *cb,
2296 				int *t_p, int s_t)
2297 {
2298 	struct qdisc_dump_args arg;
2299 
2300 	if (tc_qdisc_dump_ignore(q, false) ||
2301 	    *t_p < s_t || !q->ops->cl_ops ||
2302 	    (tcm->tcm_parent &&
2303 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2304 		(*t_p)++;
2305 		return 0;
2306 	}
2307 	if (*t_p > s_t)
2308 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2309 	arg.w.fn = qdisc_class_dump;
2310 	arg.skb = skb;
2311 	arg.cb = cb;
2312 	arg.w.stop  = 0;
2313 	arg.w.skip = cb->args[1];
2314 	arg.w.count = 0;
2315 	q->ops->cl_ops->walk(q, &arg.w);
2316 	cb->args[1] = arg.w.count;
2317 	if (arg.w.stop)
2318 		return -1;
2319 	(*t_p)++;
2320 	return 0;
2321 }
2322 
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)2323 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2324 			       struct tcmsg *tcm, struct netlink_callback *cb,
2325 			       int *t_p, int s_t, bool recur)
2326 {
2327 	struct Qdisc *q;
2328 	int b;
2329 
2330 	if (!root)
2331 		return 0;
2332 
2333 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2334 		return -1;
2335 
2336 	if (!qdisc_dev(root) || !recur)
2337 		return 0;
2338 
2339 	if (tcm->tcm_parent) {
2340 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2341 		if (q && q != root &&
2342 		    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2343 			return -1;
2344 		return 0;
2345 	}
2346 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2347 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2348 			return -1;
2349 	}
2350 
2351 	return 0;
2352 }
2353 
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)2354 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2355 {
2356 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2357 	struct net *net = sock_net(skb->sk);
2358 	struct netdev_queue *dev_queue;
2359 	struct net_device *dev;
2360 	int t, s_t;
2361 
2362 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2363 		return 0;
2364 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
2365 	if (!dev)
2366 		return 0;
2367 
2368 	s_t = cb->args[0];
2369 	t = 0;
2370 
2371 	if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2372 				skb, tcm, cb, &t, s_t, true) < 0)
2373 		goto done;
2374 
2375 	dev_queue = dev_ingress_queue(dev);
2376 	if (dev_queue &&
2377 	    tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
2378 				skb, tcm, cb, &t, s_t, false) < 0)
2379 		goto done;
2380 
2381 done:
2382 	cb->args[0] = t;
2383 
2384 	dev_put(dev);
2385 	return skb->len;
2386 }
2387 
2388 #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)2389 static int psched_show(struct seq_file *seq, void *v)
2390 {
2391 	seq_printf(seq, "%08x %08x %08x %08x\n",
2392 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2393 		   1000000,
2394 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
2395 
2396 	return 0;
2397 }
2398 
psched_net_init(struct net * net)2399 static int __net_init psched_net_init(struct net *net)
2400 {
2401 	struct proc_dir_entry *e;
2402 
2403 	e = proc_create_single("psched", 0, net->proc_net, psched_show);
2404 	if (e == NULL)
2405 		return -ENOMEM;
2406 
2407 	return 0;
2408 }
2409 
psched_net_exit(struct net * net)2410 static void __net_exit psched_net_exit(struct net *net)
2411 {
2412 	remove_proc_entry("psched", net->proc_net);
2413 }
2414 #else
psched_net_init(struct net * net)2415 static int __net_init psched_net_init(struct net *net)
2416 {
2417 	return 0;
2418 }
2419 
psched_net_exit(struct net * net)2420 static void __net_exit psched_net_exit(struct net *net)
2421 {
2422 }
2423 #endif
2424 
2425 static struct pernet_operations psched_net_ops = {
2426 	.init = psched_net_init,
2427 	.exit = psched_net_exit,
2428 };
2429 
2430 #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
2431 DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
2432 #endif
2433 
pktsched_init(void)2434 static int __init pktsched_init(void)
2435 {
2436 	int err;
2437 
2438 	err = register_pernet_subsys(&psched_net_ops);
2439 	if (err) {
2440 		pr_err("pktsched_init: "
2441 		       "cannot initialize per netns operations\n");
2442 		return err;
2443 	}
2444 
2445 	register_qdisc(&pfifo_fast_ops);
2446 	register_qdisc(&pfifo_qdisc_ops);
2447 	register_qdisc(&bfifo_qdisc_ops);
2448 	register_qdisc(&pfifo_head_drop_qdisc_ops);
2449 	register_qdisc(&mq_qdisc_ops);
2450 	register_qdisc(&noqueue_qdisc_ops);
2451 
2452 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2453 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2454 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2455 		      0);
2456 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2457 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2458 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2459 		      0);
2460 
2461 	tc_wrapper_init();
2462 
2463 	return 0;
2464 }
2465 
2466 subsys_initcall(pktsched_init);
2467