• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 	unsigned char		flags;
39 	struct timer_list	adapt_timer;
40 	struct Qdisc		*sch;
41 	struct red_parms	parms;
42 	struct red_vars		vars;
43 	struct red_stats	stats;
44 	struct Qdisc		*qdisc;
45 };
46 
red_use_ecn(struct red_sched_data * q)47 static inline int red_use_ecn(struct red_sched_data *q)
48 {
49 	return q->flags & TC_RED_ECN;
50 }
51 
red_use_harddrop(struct red_sched_data * q)52 static inline int red_use_harddrop(struct red_sched_data *q)
53 {
54 	return q->flags & TC_RED_HARDDROP;
55 }
56 
red_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)57 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
58 		       struct sk_buff **to_free)
59 {
60 	struct red_sched_data *q = qdisc_priv(sch);
61 	struct Qdisc *child = q->qdisc;
62 	unsigned int len;
63 	int ret;
64 
65 	q->vars.qavg = red_calc_qavg(&q->parms,
66 				     &q->vars,
67 				     child->qstats.backlog);
68 
69 	if (red_is_idling(&q->vars))
70 		red_end_of_idle_period(&q->vars);
71 
72 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
73 	case RED_DONT_MARK:
74 		break;
75 
76 	case RED_PROB_MARK:
77 		qdisc_qstats_overlimit(sch);
78 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
79 			q->stats.prob_drop++;
80 			goto congestion_drop;
81 		}
82 
83 		q->stats.prob_mark++;
84 		break;
85 
86 	case RED_HARD_MARK:
87 		qdisc_qstats_overlimit(sch);
88 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
89 		    !INET_ECN_set_ce(skb)) {
90 			q->stats.forced_drop++;
91 			goto congestion_drop;
92 		}
93 
94 		q->stats.forced_mark++;
95 		break;
96 	}
97 
98 	len = qdisc_pkt_len(skb);
99 	ret = qdisc_enqueue(skb, child, to_free);
100 	if (likely(ret == NET_XMIT_SUCCESS)) {
101 		sch->qstats.backlog += len;
102 		sch->q.qlen++;
103 	} else if (net_xmit_drop_count(ret)) {
104 		q->stats.pdrop++;
105 		qdisc_qstats_drop(sch);
106 	}
107 	return ret;
108 
109 congestion_drop:
110 	qdisc_drop(skb, sch, to_free);
111 	return NET_XMIT_CN;
112 }
113 
red_dequeue(struct Qdisc * sch)114 static struct sk_buff *red_dequeue(struct Qdisc *sch)
115 {
116 	struct sk_buff *skb;
117 	struct red_sched_data *q = qdisc_priv(sch);
118 	struct Qdisc *child = q->qdisc;
119 
120 	skb = child->dequeue(child);
121 	if (skb) {
122 		qdisc_bstats_update(sch, skb);
123 		qdisc_qstats_backlog_dec(sch, skb);
124 		sch->q.qlen--;
125 	} else {
126 		if (!red_is_idling(&q->vars))
127 			red_start_of_idle_period(&q->vars);
128 	}
129 	return skb;
130 }
131 
red_peek(struct Qdisc * sch)132 static struct sk_buff *red_peek(struct Qdisc *sch)
133 {
134 	struct red_sched_data *q = qdisc_priv(sch);
135 	struct Qdisc *child = q->qdisc;
136 
137 	return child->ops->peek(child);
138 }
139 
red_reset(struct Qdisc * sch)140 static void red_reset(struct Qdisc *sch)
141 {
142 	struct red_sched_data *q = qdisc_priv(sch);
143 
144 	qdisc_reset(q->qdisc);
145 	sch->qstats.backlog = 0;
146 	sch->q.qlen = 0;
147 	red_restart(&q->vars);
148 }
149 
red_offload(struct Qdisc * sch,bool enable)150 static int red_offload(struct Qdisc *sch, bool enable)
151 {
152 	struct red_sched_data *q = qdisc_priv(sch);
153 	struct net_device *dev = qdisc_dev(sch);
154 	struct tc_red_qopt_offload opt = {
155 		.handle = sch->handle,
156 		.parent = sch->parent,
157 	};
158 
159 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
160 		return -EOPNOTSUPP;
161 
162 	if (enable) {
163 		opt.command = TC_RED_REPLACE;
164 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
165 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
166 		opt.set.probability = q->parms.max_P;
167 		opt.set.limit = q->limit;
168 		opt.set.is_ecn = red_use_ecn(q);
169 		opt.set.is_harddrop = red_use_harddrop(q);
170 		opt.set.qstats = &sch->qstats;
171 	} else {
172 		opt.command = TC_RED_DESTROY;
173 	}
174 
175 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176 }
177 
red_destroy(struct Qdisc * sch)178 static void red_destroy(struct Qdisc *sch)
179 {
180 	struct red_sched_data *q = qdisc_priv(sch);
181 
182 	del_timer_sync(&q->adapt_timer);
183 	red_offload(sch, false);
184 	qdisc_put(q->qdisc);
185 }
186 
187 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
188 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
189 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
190 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
191 };
192 
red_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)193 static int red_change(struct Qdisc *sch, struct nlattr *opt,
194 		      struct netlink_ext_ack *extack)
195 {
196 	struct Qdisc *old_child = NULL, *child = NULL;
197 	struct red_sched_data *q = qdisc_priv(sch);
198 	struct nlattr *tb[TCA_RED_MAX + 1];
199 	struct tc_red_qopt *ctl;
200 	int err;
201 	u32 max_P;
202 	u8 *stab;
203 
204 	if (opt == NULL)
205 		return -EINVAL;
206 
207 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
208 					  NULL);
209 	if (err < 0)
210 		return err;
211 
212 	if (tb[TCA_RED_PARMS] == NULL ||
213 	    tb[TCA_RED_STAB] == NULL)
214 		return -EINVAL;
215 
216 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
217 
218 	ctl = nla_data(tb[TCA_RED_PARMS]);
219 	stab = nla_data(tb[TCA_RED_STAB]);
220 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
221 			      ctl->Scell_log, stab))
222 		return -EINVAL;
223 
224 	if (ctl->limit > 0) {
225 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
226 					 extack);
227 		if (IS_ERR(child))
228 			return PTR_ERR(child);
229 
230 		/* child is fifo, no need to check for noop_qdisc */
231 		qdisc_hash_add(child, true);
232 	}
233 
234 	sch_tree_lock(sch);
235 	q->flags = ctl->flags;
236 	q->limit = ctl->limit;
237 	if (child) {
238 		qdisc_tree_flush_backlog(q->qdisc);
239 		old_child = q->qdisc;
240 		q->qdisc = child;
241 	}
242 
243 	red_set_parms(&q->parms,
244 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
245 		      ctl->Plog, ctl->Scell_log,
246 		      stab,
247 		      max_P);
248 	red_set_vars(&q->vars);
249 
250 	del_timer(&q->adapt_timer);
251 	if (ctl->flags & TC_RED_ADAPTATIVE)
252 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
253 
254 	if (!q->qdisc->q.qlen)
255 		red_start_of_idle_period(&q->vars);
256 
257 	sch_tree_unlock(sch);
258 
259 	red_offload(sch, true);
260 
261 	if (old_child)
262 		qdisc_put(old_child);
263 	return 0;
264 }
265 
red_adaptative_timer(struct timer_list * t)266 static inline void red_adaptative_timer(struct timer_list *t)
267 {
268 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
269 	struct Qdisc *sch = q->sch;
270 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
271 
272 	spin_lock(root_lock);
273 	red_adaptative_algo(&q->parms, &q->vars);
274 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
275 	spin_unlock(root_lock);
276 }
277 
red_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)278 static int red_init(struct Qdisc *sch, struct nlattr *opt,
279 		    struct netlink_ext_ack *extack)
280 {
281 	struct red_sched_data *q = qdisc_priv(sch);
282 
283 	q->qdisc = &noop_qdisc;
284 	q->sch = sch;
285 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
286 	return red_change(sch, opt, extack);
287 }
288 
red_dump_offload_stats(struct Qdisc * sch)289 static int red_dump_offload_stats(struct Qdisc *sch)
290 {
291 	struct tc_red_qopt_offload hw_stats = {
292 		.command = TC_RED_STATS,
293 		.handle = sch->handle,
294 		.parent = sch->parent,
295 		{
296 			.stats.bstats = &sch->bstats,
297 			.stats.qstats = &sch->qstats,
298 		},
299 	};
300 
301 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
302 }
303 
red_dump(struct Qdisc * sch,struct sk_buff * skb)304 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
305 {
306 	struct red_sched_data *q = qdisc_priv(sch);
307 	struct nlattr *opts = NULL;
308 	struct tc_red_qopt opt = {
309 		.limit		= q->limit,
310 		.flags		= q->flags,
311 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
312 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
313 		.Wlog		= q->parms.Wlog,
314 		.Plog		= q->parms.Plog,
315 		.Scell_log	= q->parms.Scell_log,
316 	};
317 	int err;
318 
319 	err = red_dump_offload_stats(sch);
320 	if (err)
321 		goto nla_put_failure;
322 
323 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
324 	if (opts == NULL)
325 		goto nla_put_failure;
326 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
327 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
328 		goto nla_put_failure;
329 	return nla_nest_end(skb, opts);
330 
331 nla_put_failure:
332 	nla_nest_cancel(skb, opts);
333 	return -EMSGSIZE;
334 }
335 
red_dump_stats(struct Qdisc * sch,struct gnet_dump * d)336 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
337 {
338 	struct red_sched_data *q = qdisc_priv(sch);
339 	struct net_device *dev = qdisc_dev(sch);
340 	struct tc_red_xstats st = {0};
341 
342 	if (sch->flags & TCQ_F_OFFLOADED) {
343 		struct tc_red_qopt_offload hw_stats_request = {
344 			.command = TC_RED_XSTATS,
345 			.handle = sch->handle,
346 			.parent = sch->parent,
347 			{
348 				.xstats = &q->stats,
349 			},
350 		};
351 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
352 					      &hw_stats_request);
353 	}
354 	st.early = q->stats.prob_drop + q->stats.forced_drop;
355 	st.pdrop = q->stats.pdrop;
356 	st.other = q->stats.other;
357 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
358 
359 	return gnet_stats_copy_app(d, &st, sizeof(st));
360 }
361 
red_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)362 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
363 			  struct sk_buff *skb, struct tcmsg *tcm)
364 {
365 	struct red_sched_data *q = qdisc_priv(sch);
366 
367 	tcm->tcm_handle |= TC_H_MIN(1);
368 	tcm->tcm_info = q->qdisc->handle;
369 	return 0;
370 }
371 
red_graft_offload(struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)372 static void red_graft_offload(struct Qdisc *sch,
373 			      struct Qdisc *new, struct Qdisc *old,
374 			      struct netlink_ext_ack *extack)
375 {
376 	struct tc_red_qopt_offload graft_offload = {
377 		.handle		= sch->handle,
378 		.parent		= sch->parent,
379 		.child_handle	= new->handle,
380 		.command	= TC_RED_GRAFT,
381 	};
382 
383 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
384 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
385 }
386 
red_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)387 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
388 		     struct Qdisc **old, struct netlink_ext_ack *extack)
389 {
390 	struct red_sched_data *q = qdisc_priv(sch);
391 
392 	if (new == NULL)
393 		new = &noop_qdisc;
394 
395 	*old = qdisc_replace(sch, new, &q->qdisc);
396 
397 	red_graft_offload(sch, new, *old, extack);
398 	return 0;
399 }
400 
red_leaf(struct Qdisc * sch,unsigned long arg)401 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
402 {
403 	struct red_sched_data *q = qdisc_priv(sch);
404 	return q->qdisc;
405 }
406 
red_find(struct Qdisc * sch,u32 classid)407 static unsigned long red_find(struct Qdisc *sch, u32 classid)
408 {
409 	return 1;
410 }
411 
red_walk(struct Qdisc * sch,struct qdisc_walker * walker)412 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
413 {
414 	if (!walker->stop) {
415 		if (walker->count >= walker->skip)
416 			if (walker->fn(sch, 1, walker) < 0) {
417 				walker->stop = 1;
418 				return;
419 			}
420 		walker->count++;
421 	}
422 }
423 
424 static const struct Qdisc_class_ops red_class_ops = {
425 	.graft		=	red_graft,
426 	.leaf		=	red_leaf,
427 	.find		=	red_find,
428 	.walk		=	red_walk,
429 	.dump		=	red_dump_class,
430 };
431 
432 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
433 	.id		=	"red",
434 	.priv_size	=	sizeof(struct red_sched_data),
435 	.cl_ops		=	&red_class_ops,
436 	.enqueue	=	red_enqueue,
437 	.dequeue	=	red_dequeue,
438 	.peek		=	red_peek,
439 	.init		=	red_init,
440 	.reset		=	red_reset,
441 	.destroy	=	red_destroy,
442 	.change		=	red_change,
443 	.dump		=	red_dump,
444 	.dump_stats	=	red_dump_stats,
445 	.owner		=	THIS_MODULE,
446 };
447 
red_module_init(void)448 static int __init red_module_init(void)
449 {
450 	return register_qdisc(&red_qdisc_ops);
451 }
452 
red_module_exit(void)453 static void __exit red_module_exit(void)
454 {
455 	unregister_qdisc(&red_qdisc_ops);
456 }
457 
458 module_init(red_module_init)
459 module_exit(red_module_exit)
460 
461 MODULE_LICENSE("GPL");
462