• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * net/sched/sch_choke.c	CHOKE scheduler
3  *
4  * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
5  * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * version 2 as published by the Free Software Foundation.
10  *
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <linux/vmalloc.h>
18 #include <net/pkt_sched.h>
19 #include <net/pkt_cls.h>
20 #include <net/inet_ecn.h>
21 #include <net/red.h>
22 #include <net/flow_dissector.h>
23 
24 /*
25    CHOKe stateless AQM for fair bandwidth allocation
26    =================================================
27 
28    CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
29    unresponsive flows) is a variant of RED that penalizes misbehaving flows but
30    maintains no flow state. The difference from RED is an additional step
31    during the enqueuing process. If average queue size is over the
32    low threshold (qmin), a packet is chosen at random from the queue.
33    If both the new and chosen packet are from the same flow, both
34    are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
35    needs to access packets in queue randomly. It has a minimal class
36    interface to allow overriding the builtin flow classifier with
37    filters.
38 
39    Source:
40    R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
41    Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
42    IEEE INFOCOM, 2000.
43 
44    A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
45    Characteristics", IEEE/ACM Transactions on Networking, 2004
46 
47  */
48 
49 /* Upper bound on size of sk_buff table (packets) */
50 #define CHOKE_MAX_QUEUE	(128*1024 - 1)
51 
52 struct choke_sched_data {
53 /* Parameters */
54 	u32		 limit;
55 	unsigned char	 flags;
56 
57 	struct red_parms parms;
58 
59 /* Variables */
60 	struct red_vars  vars;
61 	struct {
62 		u32	prob_drop;	/* Early probability drops */
63 		u32	prob_mark;	/* Early probability marks */
64 		u32	forced_drop;	/* Forced drops, qavg > max_thresh */
65 		u32	forced_mark;	/* Forced marks, qavg > max_thresh */
66 		u32	pdrop;          /* Drops due to queue limits */
67 		u32	other;          /* Drops due to drop() calls */
68 		u32	matched;	/* Drops to flow match */
69 	} stats;
70 
71 	unsigned int	 head;
72 	unsigned int	 tail;
73 
74 	unsigned int	 tab_mask; /* size - 1 */
75 
76 	struct sk_buff **tab;
77 };
78 
79 /* number of elements in queue including holes */
choke_len(const struct choke_sched_data * q)80 static unsigned int choke_len(const struct choke_sched_data *q)
81 {
82 	return (q->tail - q->head) & q->tab_mask;
83 }
84 
85 /* Is ECN parameter configured */
use_ecn(const struct choke_sched_data * q)86 static int use_ecn(const struct choke_sched_data *q)
87 {
88 	return q->flags & TC_RED_ECN;
89 }
90 
91 /* Should packets over max just be dropped (versus marked) */
use_harddrop(const struct choke_sched_data * q)92 static int use_harddrop(const struct choke_sched_data *q)
93 {
94 	return q->flags & TC_RED_HARDDROP;
95 }
96 
97 /* Move head pointer forward to skip over holes */
choke_zap_head_holes(struct choke_sched_data * q)98 static void choke_zap_head_holes(struct choke_sched_data *q)
99 {
100 	do {
101 		q->head = (q->head + 1) & q->tab_mask;
102 		if (q->head == q->tail)
103 			break;
104 	} while (q->tab[q->head] == NULL);
105 }
106 
107 /* Move tail pointer backwards to reuse holes */
choke_zap_tail_holes(struct choke_sched_data * q)108 static void choke_zap_tail_holes(struct choke_sched_data *q)
109 {
110 	do {
111 		q->tail = (q->tail - 1) & q->tab_mask;
112 		if (q->head == q->tail)
113 			break;
114 	} while (q->tab[q->tail] == NULL);
115 }
116 
117 /* Drop packet from queue array by creating a "hole" */
choke_drop_by_idx(struct Qdisc * sch,unsigned int idx,struct sk_buff ** to_free)118 static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
119 			      struct sk_buff **to_free)
120 {
121 	struct choke_sched_data *q = qdisc_priv(sch);
122 	struct sk_buff *skb = q->tab[idx];
123 
124 	q->tab[idx] = NULL;
125 
126 	if (idx == q->head)
127 		choke_zap_head_holes(q);
128 	if (idx == q->tail)
129 		choke_zap_tail_holes(q);
130 
131 	qdisc_qstats_backlog_dec(sch, skb);
132 	qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
133 	qdisc_drop(skb, sch, to_free);
134 	--sch->q.qlen;
135 }
136 
137 struct choke_skb_cb {
138 	u16			classid;
139 	u8			keys_valid;
140 	struct			flow_keys_digest keys;
141 };
142 
choke_skb_cb(const struct sk_buff * skb)143 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
144 {
145 	qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
146 	return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
147 }
148 
choke_set_classid(struct sk_buff * skb,u16 classid)149 static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
150 {
151 	choke_skb_cb(skb)->classid = classid;
152 }
153 
154 /*
155  * Compare flow of two packets
156  *  Returns true only if source and destination address and port match.
157  *          false for special cases
158  */
choke_match_flow(struct sk_buff * skb1,struct sk_buff * skb2)159 static bool choke_match_flow(struct sk_buff *skb1,
160 			     struct sk_buff *skb2)
161 {
162 	struct flow_keys temp;
163 
164 	if (skb1->protocol != skb2->protocol)
165 		return false;
166 
167 	if (!choke_skb_cb(skb1)->keys_valid) {
168 		choke_skb_cb(skb1)->keys_valid = 1;
169 		skb_flow_dissect_flow_keys(skb1, &temp, 0);
170 		make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
171 	}
172 
173 	if (!choke_skb_cb(skb2)->keys_valid) {
174 		choke_skb_cb(skb2)->keys_valid = 1;
175 		skb_flow_dissect_flow_keys(skb2, &temp, 0);
176 		make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
177 	}
178 
179 	return !memcmp(&choke_skb_cb(skb1)->keys,
180 		       &choke_skb_cb(skb2)->keys,
181 		       sizeof(choke_skb_cb(skb1)->keys));
182 }
183 
184 /*
185  * Select a packet at random from queue
186  * HACK: since queue can have holes from previous deletion; retry several
187  *   times to find a random skb but then just give up and return the head
188  * Will return NULL if queue is empty (q->head == q->tail)
189  */
choke_peek_random(const struct choke_sched_data * q,unsigned int * pidx)190 static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
191 					 unsigned int *pidx)
192 {
193 	struct sk_buff *skb;
194 	int retrys = 3;
195 
196 	do {
197 		*pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
198 		skb = q->tab[*pidx];
199 		if (skb)
200 			return skb;
201 	} while (--retrys > 0);
202 
203 	return q->tab[*pidx = q->head];
204 }
205 
206 /*
207  * Compare new packet with random packet in queue
208  * returns true if matched and sets *pidx
209  */
choke_match_random(const struct choke_sched_data * q,struct sk_buff * nskb,unsigned int * pidx)210 static bool choke_match_random(const struct choke_sched_data *q,
211 			       struct sk_buff *nskb,
212 			       unsigned int *pidx)
213 {
214 	struct sk_buff *oskb;
215 
216 	if (q->head == q->tail)
217 		return false;
218 
219 	oskb = choke_peek_random(q, pidx);
220 	return choke_match_flow(oskb, nskb);
221 }
222 
choke_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)223 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
224 			 struct sk_buff **to_free)
225 {
226 	struct choke_sched_data *q = qdisc_priv(sch);
227 	const struct red_parms *p = &q->parms;
228 
229 	choke_skb_cb(skb)->keys_valid = 0;
230 	/* Compute average queue usage (see RED) */
231 	q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
232 	if (red_is_idling(&q->vars))
233 		red_end_of_idle_period(&q->vars);
234 
235 	/* Is queue small? */
236 	if (q->vars.qavg <= p->qth_min)
237 		q->vars.qcount = -1;
238 	else {
239 		unsigned int idx;
240 
241 		/* Draw a packet at random from queue and compare flow */
242 		if (choke_match_random(q, skb, &idx)) {
243 			q->stats.matched++;
244 			choke_drop_by_idx(sch, idx, to_free);
245 			goto congestion_drop;
246 		}
247 
248 		/* Queue is large, always mark/drop */
249 		if (q->vars.qavg > p->qth_max) {
250 			q->vars.qcount = -1;
251 
252 			qdisc_qstats_overlimit(sch);
253 			if (use_harddrop(q) || !use_ecn(q) ||
254 			    !INET_ECN_set_ce(skb)) {
255 				q->stats.forced_drop++;
256 				goto congestion_drop;
257 			}
258 
259 			q->stats.forced_mark++;
260 		} else if (++q->vars.qcount) {
261 			if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
262 				q->vars.qcount = 0;
263 				q->vars.qR = red_random(p);
264 
265 				qdisc_qstats_overlimit(sch);
266 				if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
267 					q->stats.prob_drop++;
268 					goto congestion_drop;
269 				}
270 
271 				q->stats.prob_mark++;
272 			}
273 		} else
274 			q->vars.qR = red_random(p);
275 	}
276 
277 	/* Admit new packet */
278 	if (sch->q.qlen < q->limit) {
279 		q->tab[q->tail] = skb;
280 		q->tail = (q->tail + 1) & q->tab_mask;
281 		++sch->q.qlen;
282 		qdisc_qstats_backlog_inc(sch, skb);
283 		return NET_XMIT_SUCCESS;
284 	}
285 
286 	q->stats.pdrop++;
287 	return qdisc_drop(skb, sch, to_free);
288 
289 congestion_drop:
290 	qdisc_drop(skb, sch, to_free);
291 	return NET_XMIT_CN;
292 }
293 
choke_dequeue(struct Qdisc * sch)294 static struct sk_buff *choke_dequeue(struct Qdisc *sch)
295 {
296 	struct choke_sched_data *q = qdisc_priv(sch);
297 	struct sk_buff *skb;
298 
299 	if (q->head == q->tail) {
300 		if (!red_is_idling(&q->vars))
301 			red_start_of_idle_period(&q->vars);
302 		return NULL;
303 	}
304 
305 	skb = q->tab[q->head];
306 	q->tab[q->head] = NULL;
307 	choke_zap_head_holes(q);
308 	--sch->q.qlen;
309 	qdisc_qstats_backlog_dec(sch, skb);
310 	qdisc_bstats_update(sch, skb);
311 
312 	return skb;
313 }
314 
choke_reset(struct Qdisc * sch)315 static void choke_reset(struct Qdisc *sch)
316 {
317 	struct choke_sched_data *q = qdisc_priv(sch);
318 
319 	while (q->head != q->tail) {
320 		struct sk_buff *skb = q->tab[q->head];
321 
322 		q->head = (q->head + 1) & q->tab_mask;
323 		if (!skb)
324 			continue;
325 		rtnl_qdisc_drop(skb, sch);
326 	}
327 
328 	sch->q.qlen = 0;
329 	sch->qstats.backlog = 0;
330 	memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
331 	q->head = q->tail = 0;
332 	red_restart(&q->vars);
333 }
334 
335 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
336 	[TCA_CHOKE_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
337 	[TCA_CHOKE_STAB]	= { .len = RED_STAB_SIZE },
338 	[TCA_CHOKE_MAX_P]	= { .type = NLA_U32 },
339 };
340 
341 
choke_free(void * addr)342 static void choke_free(void *addr)
343 {
344 	kvfree(addr);
345 }
346 
choke_change(struct Qdisc * sch,struct nlattr * opt)347 static int choke_change(struct Qdisc *sch, struct nlattr *opt)
348 {
349 	struct choke_sched_data *q = qdisc_priv(sch);
350 	struct nlattr *tb[TCA_CHOKE_MAX + 1];
351 	const struct tc_red_qopt *ctl;
352 	int err;
353 	struct sk_buff **old = NULL;
354 	unsigned int mask;
355 	u32 max_P;
356 
357 	if (opt == NULL)
358 		return -EINVAL;
359 
360 	err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL);
361 	if (err < 0)
362 		return err;
363 
364 	if (tb[TCA_CHOKE_PARMS] == NULL ||
365 	    tb[TCA_CHOKE_STAB] == NULL)
366 		return -EINVAL;
367 
368 	max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
369 
370 	ctl = nla_data(tb[TCA_CHOKE_PARMS]);
371 
372 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
373 		return -EINVAL;
374 
375 	if (ctl->limit > CHOKE_MAX_QUEUE)
376 		return -EINVAL;
377 
378 	mask = roundup_pow_of_two(ctl->limit + 1) - 1;
379 	if (mask != q->tab_mask) {
380 		struct sk_buff **ntab;
381 
382 		ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO);
383 		if (!ntab)
384 			return -ENOMEM;
385 
386 		sch_tree_lock(sch);
387 		old = q->tab;
388 		if (old) {
389 			unsigned int oqlen = sch->q.qlen, tail = 0;
390 			unsigned dropped = 0;
391 
392 			while (q->head != q->tail) {
393 				struct sk_buff *skb = q->tab[q->head];
394 
395 				q->head = (q->head + 1) & q->tab_mask;
396 				if (!skb)
397 					continue;
398 				if (tail < mask) {
399 					ntab[tail++] = skb;
400 					continue;
401 				}
402 				dropped += qdisc_pkt_len(skb);
403 				qdisc_qstats_backlog_dec(sch, skb);
404 				--sch->q.qlen;
405 				rtnl_qdisc_drop(skb, sch);
406 			}
407 			qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
408 			q->head = 0;
409 			q->tail = tail;
410 		}
411 
412 		q->tab_mask = mask;
413 		q->tab = ntab;
414 	} else
415 		sch_tree_lock(sch);
416 
417 	q->flags = ctl->flags;
418 	q->limit = ctl->limit;
419 
420 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
421 		      ctl->Plog, ctl->Scell_log,
422 		      nla_data(tb[TCA_CHOKE_STAB]),
423 		      max_P);
424 	red_set_vars(&q->vars);
425 
426 	if (q->head == q->tail)
427 		red_end_of_idle_period(&q->vars);
428 
429 	sch_tree_unlock(sch);
430 	choke_free(old);
431 	return 0;
432 }
433 
choke_init(struct Qdisc * sch,struct nlattr * opt)434 static int choke_init(struct Qdisc *sch, struct nlattr *opt)
435 {
436 	return choke_change(sch, opt);
437 }
438 
choke_dump(struct Qdisc * sch,struct sk_buff * skb)439 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
440 {
441 	struct choke_sched_data *q = qdisc_priv(sch);
442 	struct nlattr *opts = NULL;
443 	struct tc_red_qopt opt = {
444 		.limit		= q->limit,
445 		.flags		= q->flags,
446 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
447 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
448 		.Wlog		= q->parms.Wlog,
449 		.Plog		= q->parms.Plog,
450 		.Scell_log	= q->parms.Scell_log,
451 	};
452 
453 	opts = nla_nest_start(skb, TCA_OPTIONS);
454 	if (opts == NULL)
455 		goto nla_put_failure;
456 
457 	if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
458 	    nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
459 		goto nla_put_failure;
460 	return nla_nest_end(skb, opts);
461 
462 nla_put_failure:
463 	nla_nest_cancel(skb, opts);
464 	return -EMSGSIZE;
465 }
466 
choke_dump_stats(struct Qdisc * sch,struct gnet_dump * d)467 static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
468 {
469 	struct choke_sched_data *q = qdisc_priv(sch);
470 	struct tc_choke_xstats st = {
471 		.early	= q->stats.prob_drop + q->stats.forced_drop,
472 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
473 		.pdrop	= q->stats.pdrop,
474 		.other	= q->stats.other,
475 		.matched = q->stats.matched,
476 	};
477 
478 	return gnet_stats_copy_app(d, &st, sizeof(st));
479 }
480 
choke_destroy(struct Qdisc * sch)481 static void choke_destroy(struct Qdisc *sch)
482 {
483 	struct choke_sched_data *q = qdisc_priv(sch);
484 
485 	choke_free(q->tab);
486 }
487 
choke_peek_head(struct Qdisc * sch)488 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
489 {
490 	struct choke_sched_data *q = qdisc_priv(sch);
491 
492 	return (q->head != q->tail) ? q->tab[q->head] : NULL;
493 }
494 
495 static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
496 	.id		=	"choke",
497 	.priv_size	=	sizeof(struct choke_sched_data),
498 
499 	.enqueue	=	choke_enqueue,
500 	.dequeue	=	choke_dequeue,
501 	.peek		=	choke_peek_head,
502 	.init		=	choke_init,
503 	.destroy	=	choke_destroy,
504 	.reset		=	choke_reset,
505 	.change		=	choke_change,
506 	.dump		=	choke_dump,
507 	.dump_stats	=	choke_dump_stats,
508 	.owner		=	THIS_MODULE,
509 };
510 
choke_module_init(void)511 static int __init choke_module_init(void)
512 {
513 	return register_qdisc(&choke_qdisc_ops);
514 }
515 
choke_module_exit(void)516 static void __exit choke_module_exit(void)
517 {
518 	unregister_qdisc(&choke_qdisc_ops);
519 }
520 
521 module_init(choke_module_init)
522 module_exit(choke_module_exit)
523 
524 MODULE_LICENSE("GPL");
525