1 /*
2 * net/sched/sch_red.c Random Early Detection queue.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
12 * J Hadi Salim 980914: computation fixes
13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14 * J Hadi Salim 980816: ECN support
15 */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24
25
26 /* Parameters, settable by user:
27 -----------------------------
28
29 limit - bytes (must be > qth_max + burst)
30
31 Hard limit on queue length, should be chosen >qth_max
32 to allow packet bursts. This parameter does not
33 affect the algorithms behaviour and can be chosen
34 arbitrarily high (well, less than ram size)
35 Really, this limit will never be reached
36 if RED works correctly.
37 */
38
39 struct red_sched_data {
40 u32 limit; /* HARD maximal queue length */
41 unsigned char flags;
42 struct timer_list adapt_timer;
43 struct red_parms parms;
44 struct red_vars vars;
45 struct red_stats stats;
46 struct Qdisc *qdisc;
47 };
48
red_use_ecn(struct red_sched_data * q)49 static inline int red_use_ecn(struct red_sched_data *q)
50 {
51 return q->flags & TC_RED_ECN;
52 }
53
red_use_harddrop(struct red_sched_data * q)54 static inline int red_use_harddrop(struct red_sched_data *q)
55 {
56 return q->flags & TC_RED_HARDDROP;
57 }
58
red_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)59 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
60 struct sk_buff **to_free)
61 {
62 struct red_sched_data *q = qdisc_priv(sch);
63 struct Qdisc *child = q->qdisc;
64 int ret;
65
66 q->vars.qavg = red_calc_qavg(&q->parms,
67 &q->vars,
68 child->qstats.backlog);
69
70 if (red_is_idling(&q->vars))
71 red_end_of_idle_period(&q->vars);
72
73 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
74 case RED_DONT_MARK:
75 break;
76
77 case RED_PROB_MARK:
78 qdisc_qstats_overlimit(sch);
79 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
80 q->stats.prob_drop++;
81 goto congestion_drop;
82 }
83
84 q->stats.prob_mark++;
85 break;
86
87 case RED_HARD_MARK:
88 qdisc_qstats_overlimit(sch);
89 if (red_use_harddrop(q) || !red_use_ecn(q) ||
90 !INET_ECN_set_ce(skb)) {
91 q->stats.forced_drop++;
92 goto congestion_drop;
93 }
94
95 q->stats.forced_mark++;
96 break;
97 }
98
99 ret = qdisc_enqueue(skb, child, to_free);
100 if (likely(ret == NET_XMIT_SUCCESS)) {
101 qdisc_qstats_backlog_inc(sch, skb);
102 sch->q.qlen++;
103 } else if (net_xmit_drop_count(ret)) {
104 q->stats.pdrop++;
105 qdisc_qstats_drop(sch);
106 }
107 return ret;
108
109 congestion_drop:
110 qdisc_drop(skb, sch, to_free);
111 return NET_XMIT_CN;
112 }
113
red_dequeue(struct Qdisc * sch)114 static struct sk_buff *red_dequeue(struct Qdisc *sch)
115 {
116 struct sk_buff *skb;
117 struct red_sched_data *q = qdisc_priv(sch);
118 struct Qdisc *child = q->qdisc;
119
120 skb = child->dequeue(child);
121 if (skb) {
122 qdisc_bstats_update(sch, skb);
123 qdisc_qstats_backlog_dec(sch, skb);
124 sch->q.qlen--;
125 } else {
126 if (!red_is_idling(&q->vars))
127 red_start_of_idle_period(&q->vars);
128 }
129 return skb;
130 }
131
red_peek(struct Qdisc * sch)132 static struct sk_buff *red_peek(struct Qdisc *sch)
133 {
134 struct red_sched_data *q = qdisc_priv(sch);
135 struct Qdisc *child = q->qdisc;
136
137 return child->ops->peek(child);
138 }
139
red_reset(struct Qdisc * sch)140 static void red_reset(struct Qdisc *sch)
141 {
142 struct red_sched_data *q = qdisc_priv(sch);
143
144 qdisc_reset(q->qdisc);
145 sch->qstats.backlog = 0;
146 sch->q.qlen = 0;
147 red_restart(&q->vars);
148 }
149
red_destroy(struct Qdisc * sch)150 static void red_destroy(struct Qdisc *sch)
151 {
152 struct red_sched_data *q = qdisc_priv(sch);
153
154 del_timer_sync(&q->adapt_timer);
155 qdisc_destroy(q->qdisc);
156 }
157
158 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
159 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
160 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
161 [TCA_RED_MAX_P] = { .type = NLA_U32 },
162 };
163
red_change(struct Qdisc * sch,struct nlattr * opt)164 static int red_change(struct Qdisc *sch, struct nlattr *opt)
165 {
166 struct red_sched_data *q = qdisc_priv(sch);
167 struct nlattr *tb[TCA_RED_MAX + 1];
168 struct tc_red_qopt *ctl;
169 struct Qdisc *child = NULL;
170 int err;
171 u32 max_P;
172
173 if (opt == NULL)
174 return -EINVAL;
175
176 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
177 if (err < 0)
178 return err;
179
180 if (tb[TCA_RED_PARMS] == NULL ||
181 tb[TCA_RED_STAB] == NULL)
182 return -EINVAL;
183
184 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
185
186 ctl = nla_data(tb[TCA_RED_PARMS]);
187 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
188 return -EINVAL;
189
190 if (ctl->limit > 0) {
191 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
192 if (IS_ERR(child))
193 return PTR_ERR(child);
194
195 /* child is fifo, no need to check for noop_qdisc */
196 qdisc_hash_add(child, true);
197 }
198
199 sch_tree_lock(sch);
200 q->flags = ctl->flags;
201 q->limit = ctl->limit;
202 if (child) {
203 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
204 q->qdisc->qstats.backlog);
205 qdisc_destroy(q->qdisc);
206 q->qdisc = child;
207 }
208
209 red_set_parms(&q->parms,
210 ctl->qth_min, ctl->qth_max, ctl->Wlog,
211 ctl->Plog, ctl->Scell_log,
212 nla_data(tb[TCA_RED_STAB]),
213 max_P);
214 red_set_vars(&q->vars);
215
216 del_timer(&q->adapt_timer);
217 if (ctl->flags & TC_RED_ADAPTATIVE)
218 mod_timer(&q->adapt_timer, jiffies + HZ/2);
219
220 if (!q->qdisc->q.qlen)
221 red_start_of_idle_period(&q->vars);
222
223 sch_tree_unlock(sch);
224 return 0;
225 }
226
red_adaptative_timer(unsigned long arg)227 static inline void red_adaptative_timer(unsigned long arg)
228 {
229 struct Qdisc *sch = (struct Qdisc *)arg;
230 struct red_sched_data *q = qdisc_priv(sch);
231 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
232
233 spin_lock(root_lock);
234 red_adaptative_algo(&q->parms, &q->vars);
235 mod_timer(&q->adapt_timer, jiffies + HZ/2);
236 spin_unlock(root_lock);
237 }
238
red_init(struct Qdisc * sch,struct nlattr * opt)239 static int red_init(struct Qdisc *sch, struct nlattr *opt)
240 {
241 struct red_sched_data *q = qdisc_priv(sch);
242
243 q->qdisc = &noop_qdisc;
244 setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
245 return red_change(sch, opt);
246 }
247
red_dump(struct Qdisc * sch,struct sk_buff * skb)248 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
249 {
250 struct red_sched_data *q = qdisc_priv(sch);
251 struct nlattr *opts = NULL;
252 struct tc_red_qopt opt = {
253 .limit = q->limit,
254 .flags = q->flags,
255 .qth_min = q->parms.qth_min >> q->parms.Wlog,
256 .qth_max = q->parms.qth_max >> q->parms.Wlog,
257 .Wlog = q->parms.Wlog,
258 .Plog = q->parms.Plog,
259 .Scell_log = q->parms.Scell_log,
260 };
261
262 sch->qstats.backlog = q->qdisc->qstats.backlog;
263 opts = nla_nest_start(skb, TCA_OPTIONS);
264 if (opts == NULL)
265 goto nla_put_failure;
266 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
267 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
268 goto nla_put_failure;
269 return nla_nest_end(skb, opts);
270
271 nla_put_failure:
272 nla_nest_cancel(skb, opts);
273 return -EMSGSIZE;
274 }
275
red_dump_stats(struct Qdisc * sch,struct gnet_dump * d)276 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
277 {
278 struct red_sched_data *q = qdisc_priv(sch);
279 struct tc_red_xstats st = {
280 .early = q->stats.prob_drop + q->stats.forced_drop,
281 .pdrop = q->stats.pdrop,
282 .other = q->stats.other,
283 .marked = q->stats.prob_mark + q->stats.forced_mark,
284 };
285
286 return gnet_stats_copy_app(d, &st, sizeof(st));
287 }
288
red_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)289 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
290 struct sk_buff *skb, struct tcmsg *tcm)
291 {
292 struct red_sched_data *q = qdisc_priv(sch);
293
294 tcm->tcm_handle |= TC_H_MIN(1);
295 tcm->tcm_info = q->qdisc->handle;
296 return 0;
297 }
298
red_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old)299 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
300 struct Qdisc **old)
301 {
302 struct red_sched_data *q = qdisc_priv(sch);
303
304 if (new == NULL)
305 new = &noop_qdisc;
306
307 *old = qdisc_replace(sch, new, &q->qdisc);
308 return 0;
309 }
310
red_leaf(struct Qdisc * sch,unsigned long arg)311 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
312 {
313 struct red_sched_data *q = qdisc_priv(sch);
314 return q->qdisc;
315 }
316
red_find(struct Qdisc * sch,u32 classid)317 static unsigned long red_find(struct Qdisc *sch, u32 classid)
318 {
319 return 1;
320 }
321
red_walk(struct Qdisc * sch,struct qdisc_walker * walker)322 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
323 {
324 if (!walker->stop) {
325 if (walker->count >= walker->skip)
326 if (walker->fn(sch, 1, walker) < 0) {
327 walker->stop = 1;
328 return;
329 }
330 walker->count++;
331 }
332 }
333
334 static const struct Qdisc_class_ops red_class_ops = {
335 .graft = red_graft,
336 .leaf = red_leaf,
337 .find = red_find,
338 .walk = red_walk,
339 .dump = red_dump_class,
340 };
341
342 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
343 .id = "red",
344 .priv_size = sizeof(struct red_sched_data),
345 .cl_ops = &red_class_ops,
346 .enqueue = red_enqueue,
347 .dequeue = red_dequeue,
348 .peek = red_peek,
349 .init = red_init,
350 .reset = red_reset,
351 .destroy = red_destroy,
352 .change = red_change,
353 .dump = red_dump,
354 .dump_stats = red_dump_stats,
355 .owner = THIS_MODULE,
356 };
357
red_module_init(void)358 static int __init red_module_init(void)
359 {
360 return register_qdisc(&red_qdisc_ops);
361 }
362
red_module_exit(void)363 static void __exit red_module_exit(void)
364 {
365 unregister_qdisc(&red_qdisc_ops);
366 }
367
368 module_init(red_module_init)
369 module_exit(red_module_exit)
370
371 MODULE_LICENSE("GPL");
372