• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21 
22 #include <net/tcp.h>
23 
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_log.h>
31 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
32 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
33 
34 /* "Be conservative in what you do,
35     be liberal in what you accept from others."
36     If it's non-zero, we mark only out of window RST segments as INVALID. */
37 static int nf_ct_tcp_be_liberal __read_mostly = 0;
38 
39 /* If it is set to zero, we disable picking up already established
40    connections. */
41 static int nf_ct_tcp_loose __read_mostly = 1;
42 
43 /* Max number of the retransmitted packets without receiving an (acceptable)
44    ACK from the destination. If this number is reached, a shorter timer
45    will be started. */
46 static int nf_ct_tcp_max_retrans __read_mostly = 3;
47 
48   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
49      closely.  They're more complex. --RR */
50 
51 static const char *const tcp_conntrack_names[] = {
52 	"NONE",
53 	"SYN_SENT",
54 	"SYN_RECV",
55 	"ESTABLISHED",
56 	"FIN_WAIT",
57 	"CLOSE_WAIT",
58 	"LAST_ACK",
59 	"TIME_WAIT",
60 	"CLOSE",
61 	"SYN_SENT2",
62 };
63 
64 #define SECS * HZ
65 #define MINS * 60 SECS
66 #define HOURS * 60 MINS
67 #define DAYS * 24 HOURS
68 
69 static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
70 	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
71 	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
72 	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
73 	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
74 	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
75 	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
76 	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
77 	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
78 	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
79 /* RFC1122 says the R2 limit should be at least 100 seconds.
80    Linux uses 15 packets as limit, which corresponds
81    to ~13-30min depending on RTO. */
82 	[TCP_CONNTRACK_RETRANS]		= 5 MINS,
83 	[TCP_CONNTRACK_UNACK]		= 5 MINS,
84 };
85 
86 #define sNO TCP_CONNTRACK_NONE
87 #define sSS TCP_CONNTRACK_SYN_SENT
88 #define sSR TCP_CONNTRACK_SYN_RECV
89 #define sES TCP_CONNTRACK_ESTABLISHED
90 #define sFW TCP_CONNTRACK_FIN_WAIT
91 #define sCW TCP_CONNTRACK_CLOSE_WAIT
92 #define sLA TCP_CONNTRACK_LAST_ACK
93 #define sTW TCP_CONNTRACK_TIME_WAIT
94 #define sCL TCP_CONNTRACK_CLOSE
95 #define sS2 TCP_CONNTRACK_SYN_SENT2
96 #define sIV TCP_CONNTRACK_MAX
97 #define sIG TCP_CONNTRACK_IGNORE
98 
99 /* What TCP flags are set from RST/SYN/FIN/ACK. */
100 enum tcp_bit_set {
101 	TCP_SYN_SET,
102 	TCP_SYNACK_SET,
103 	TCP_FIN_SET,
104 	TCP_ACK_SET,
105 	TCP_RST_SET,
106 	TCP_NONE_SET,
107 };
108 
109 /*
110  * The TCP state transition table needs a few words...
111  *
112  * We are the man in the middle. All the packets go through us
113  * but might get lost in transit to the destination.
114  * It is assumed that the destinations can't receive segments
115  * we haven't seen.
116  *
117  * The checked segment is in window, but our windows are *not*
118  * equivalent with the ones of the sender/receiver. We always
119  * try to guess the state of the current sender.
120  *
121  * The meaning of the states are:
122  *
123  * NONE:	initial state
124  * SYN_SENT:	SYN-only packet seen
125  * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
126  * SYN_RECV:	SYN-ACK packet seen
127  * ESTABLISHED:	ACK packet seen
128  * FIN_WAIT:	FIN packet seen
129  * CLOSE_WAIT:	ACK seen (after FIN)
130  * LAST_ACK:	FIN seen (after FIN)
131  * TIME_WAIT:	last ACK seen
132  * CLOSE:	closed connection (RST)
133  *
134  * Packets marked as IGNORED (sIG):
135  *	if they may be either invalid or valid
136  *	and the receiver may send back a connection
137  *	closing RST or a SYN/ACK.
138  *
139  * Packets marked as INVALID (sIV):
140  *	if we regard them as truly invalid packets
141  */
142 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
143 	{
144 /* ORIGINAL */
145 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
146 /*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
147 /*
148  *	sNO -> sSS	Initialize a new connection
149  *	sSS -> sSS	Retransmitted SYN
150  *	sS2 -> sS2	Late retransmitted SYN
151  *	sSR -> sIG
152  *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
153  *			are errors. Receiver will reply with RST
154  *			and close the connection.
155  *			Or we are not in sync and hold a dead connection.
156  *	sFW -> sIG
157  *	sCW -> sIG
158  *	sLA -> sIG
159  *	sTW -> sSS	Reopened connection (RFC 1122).
160  *	sCL -> sSS
161  */
162 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
163 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
164 /*
165  *	sNO -> sIV	Too late and no reason to do anything
166  *	sSS -> sIV	Client can't send SYN and then SYN/ACK
167  *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
168  *	sSR -> sSR	Late retransmitted SYN/ACK in simultaneous open
169  *	sES -> sIV	Invalid SYN/ACK packets sent by the client
170  *	sFW -> sIV
171  *	sCW -> sIV
172  *	sLA -> sIV
173  *	sTW -> sIV
174  *	sCL -> sIV
175  */
176 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
177 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
178 /*
179  *	sNO -> sIV	Too late and no reason to do anything...
180  *	sSS -> sIV	Client migth not send FIN in this state:
181  *			we enforce waiting for a SYN/ACK reply first.
182  *	sS2 -> sIV
183  *	sSR -> sFW	Close started.
184  *	sES -> sFW
185  *	sFW -> sLA	FIN seen in both directions, waiting for
186  *			the last ACK.
187  *			Migth be a retransmitted FIN as well...
188  *	sCW -> sLA
189  *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
190  *	sTW -> sTW
191  *	sCL -> sCL
192  */
193 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
194 /*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
195 /*
196  *	sNO -> sES	Assumed.
197  *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
198  *	sS2 -> sIV
199  *	sSR -> sES	Established state is reached.
200  *	sES -> sES	:-)
201  *	sFW -> sCW	Normal close request answered by ACK.
202  *	sCW -> sCW
203  *	sLA -> sTW	Last ACK detected.
204  *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
205  *	sCL -> sCL
206  */
207 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
208 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
209 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
210 	},
211 	{
212 /* REPLY */
213 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
214 /*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
215 /*
216  *	sNO -> sIV	Never reached.
217  *	sSS -> sS2	Simultaneous open
218  *	sS2 -> sS2	Retransmitted simultaneous SYN
219  *	sSR -> sIV	Invalid SYN packets sent by the server
220  *	sES -> sIV
221  *	sFW -> sIV
222  *	sCW -> sIV
223  *	sLA -> sIV
224  *	sTW -> sIV	Reopened connection, but server may not do it.
225  *	sCL -> sIV
226  */
227 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
228 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
229 /*
230  *	sSS -> sSR	Standard open.
231  *	sS2 -> sSR	Simultaneous open
232  *	sSR -> sIG	Retransmitted SYN/ACK, ignore it.
233  *	sES -> sIG	Late retransmitted SYN/ACK?
234  *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
235  *	sCW -> sIG
236  *	sLA -> sIG
237  *	sTW -> sIG
238  *	sCL -> sIG
239  */
240 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
241 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
242 /*
243  *	sSS -> sIV	Server might not send FIN in this state.
244  *	sS2 -> sIV
245  *	sSR -> sFW	Close started.
246  *	sES -> sFW
247  *	sFW -> sLA	FIN seen in both directions.
248  *	sCW -> sLA
249  *	sLA -> sLA	Retransmitted FIN.
250  *	sTW -> sTW
251  *	sCL -> sCL
252  */
253 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
254 /*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
255 /*
256  *	sSS -> sIG	Might be a half-open connection.
257  *	sS2 -> sIG
258  *	sSR -> sSR	Might answer late resent SYN.
259  *	sES -> sES	:-)
260  *	sFW -> sCW	Normal close request answered by ACK.
261  *	sCW -> sCW
262  *	sLA -> sTW	Last ACK detected.
263  *	sTW -> sTW	Retransmitted last ACK.
264  *	sCL -> sCL
265  */
266 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
267 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
268 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
269 	}
270 };
271 
tcp_pernet(struct net * net)272 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
273 {
274 	return &net->ct.nf_ct_proto.tcp;
275 }
276 
tcp_pkt_to_tuple(const struct sk_buff * skb,unsigned int dataoff,struct nf_conntrack_tuple * tuple)277 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
278 			     struct nf_conntrack_tuple *tuple)
279 {
280 	const struct tcphdr *hp;
281 	struct tcphdr _hdr;
282 
283 	/* Actually only need first 8 bytes. */
284 	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
285 	if (hp == NULL)
286 		return false;
287 
288 	tuple->src.u.tcp.port = hp->source;
289 	tuple->dst.u.tcp.port = hp->dest;
290 
291 	return true;
292 }
293 
tcp_invert_tuple(struct nf_conntrack_tuple * tuple,const struct nf_conntrack_tuple * orig)294 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
295 			     const struct nf_conntrack_tuple *orig)
296 {
297 	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
298 	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
299 	return true;
300 }
301 
302 /* Print out the per-protocol part of the tuple. */
tcp_print_tuple(struct seq_file * s,const struct nf_conntrack_tuple * tuple)303 static int tcp_print_tuple(struct seq_file *s,
304 			   const struct nf_conntrack_tuple *tuple)
305 {
306 	return seq_printf(s, "sport=%hu dport=%hu ",
307 			  ntohs(tuple->src.u.tcp.port),
308 			  ntohs(tuple->dst.u.tcp.port));
309 }
310 
311 /* Print out the private part of the conntrack. */
tcp_print_conntrack(struct seq_file * s,struct nf_conn * ct)312 static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
313 {
314 	enum tcp_conntrack state;
315 
316 	spin_lock_bh(&ct->lock);
317 	state = ct->proto.tcp.state;
318 	spin_unlock_bh(&ct->lock);
319 
320 	return seq_printf(s, "%s ", tcp_conntrack_names[state]);
321 }
322 
get_conntrack_index(const struct tcphdr * tcph)323 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
324 {
325 	if (tcph->rst) return TCP_RST_SET;
326 	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
327 	else if (tcph->fin) return TCP_FIN_SET;
328 	else if (tcph->ack) return TCP_ACK_SET;
329 	else return TCP_NONE_SET;
330 }
331 
332 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
333    in IP Filter' by Guido van Rooij.
334 
335    http://www.sane.nl/events/sane2000/papers.html
336    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
337 
338    The boundaries and the conditions are changed according to RFC793:
339    the packet must intersect the window (i.e. segments may be
340    after the right or before the left edge) and thus receivers may ACK
341    segments after the right edge of the window.
342 
343 	td_maxend = max(sack + max(win,1)) seen in reply packets
344 	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
345 	td_maxwin += seq + len - sender.td_maxend
346 			if seq + len > sender.td_maxend
347 	td_end    = max(seq + len) seen in sent packets
348 
349    I.   Upper bound for valid data:	seq <= sender.td_maxend
350    II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
351    III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
352    IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
353 
354    where sack is the highest right edge of sack block found in the packet
355    or ack in the case of packet without SACK option.
356 
357    The upper bound limit for a valid (s)ack is not ignored -
358    we doesn't have to deal with fragments.
359 */
360 
segment_seq_plus_len(__u32 seq,size_t len,unsigned int dataoff,const struct tcphdr * tcph)361 static inline __u32 segment_seq_plus_len(__u32 seq,
362 					 size_t len,
363 					 unsigned int dataoff,
364 					 const struct tcphdr *tcph)
365 {
366 	/* XXX Should I use payload length field in IP/IPv6 header ?
367 	 * - YK */
368 	return (seq + len - dataoff - tcph->doff*4
369 		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
370 }
371 
372 /* Fixme: what about big packets? */
373 #define MAXACKWINCONST			66000
374 #define MAXACKWINDOW(sender)						\
375 	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
376 					      : MAXACKWINCONST)
377 
378 /*
379  * Simplified tcp_parse_options routine from tcp_input.c
380  */
tcp_options(const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph,struct ip_ct_tcp_state * state)381 static void tcp_options(const struct sk_buff *skb,
382 			unsigned int dataoff,
383 			const struct tcphdr *tcph,
384 			struct ip_ct_tcp_state *state)
385 {
386 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
387 	const unsigned char *ptr;
388 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
389 
390 	if (!length)
391 		return;
392 
393 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
394 				 length, buff);
395 	BUG_ON(ptr == NULL);
396 
397 	state->td_scale =
398 	state->flags = 0;
399 
400 	while (length > 0) {
401 		int opcode=*ptr++;
402 		int opsize;
403 
404 		switch (opcode) {
405 		case TCPOPT_EOL:
406 			return;
407 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
408 			length--;
409 			continue;
410 		default:
411 			opsize=*ptr++;
412 			if (opsize < 2) /* "silly options" */
413 				return;
414 			if (opsize > length)
415 				return;	/* don't parse partial options */
416 
417 			if (opcode == TCPOPT_SACK_PERM
418 			    && opsize == TCPOLEN_SACK_PERM)
419 				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
420 			else if (opcode == TCPOPT_WINDOW
421 				 && opsize == TCPOLEN_WINDOW) {
422 				state->td_scale = *(u_int8_t *)ptr;
423 
424 				if (state->td_scale > 14) {
425 					/* See RFC1323 */
426 					state->td_scale = 14;
427 				}
428 				state->flags |=
429 					IP_CT_TCP_FLAG_WINDOW_SCALE;
430 			}
431 			ptr += opsize - 2;
432 			length -= opsize;
433 		}
434 	}
435 }
436 
tcp_sack(const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph,__u32 * sack)437 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
438                      const struct tcphdr *tcph, __u32 *sack)
439 {
440 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
441 	const unsigned char *ptr;
442 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
443 	__u32 tmp;
444 
445 	if (!length)
446 		return;
447 
448 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
449 				 length, buff);
450 	BUG_ON(ptr == NULL);
451 
452 	/* Fast path for timestamp-only option */
453 	if (length == TCPOLEN_TSTAMP_ALIGNED
454 	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
455 				       | (TCPOPT_NOP << 16)
456 				       | (TCPOPT_TIMESTAMP << 8)
457 				       | TCPOLEN_TIMESTAMP))
458 		return;
459 
460 	while (length > 0) {
461 		int opcode = *ptr++;
462 		int opsize, i;
463 
464 		switch (opcode) {
465 		case TCPOPT_EOL:
466 			return;
467 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
468 			length--;
469 			continue;
470 		default:
471 			opsize = *ptr++;
472 			if (opsize < 2) /* "silly options" */
473 				return;
474 			if (opsize > length)
475 				return;	/* don't parse partial options */
476 
477 			if (opcode == TCPOPT_SACK
478 			    && opsize >= (TCPOLEN_SACK_BASE
479 					  + TCPOLEN_SACK_PERBLOCK)
480 			    && !((opsize - TCPOLEN_SACK_BASE)
481 				 % TCPOLEN_SACK_PERBLOCK)) {
482 				for (i = 0;
483 				     i < (opsize - TCPOLEN_SACK_BASE);
484 				     i += TCPOLEN_SACK_PERBLOCK) {
485 					tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
486 
487 					if (after(tmp, *sack))
488 						*sack = tmp;
489 				}
490 				return;
491 			}
492 			ptr += opsize - 2;
493 			length -= opsize;
494 		}
495 	}
496 }
497 
498 #ifdef CONFIG_NF_NAT_NEEDED
nat_offset(const struct nf_conn * ct,enum ip_conntrack_dir dir,u32 seq)499 static inline s16 nat_offset(const struct nf_conn *ct,
500 			     enum ip_conntrack_dir dir,
501 			     u32 seq)
502 {
503 	typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
504 
505 	return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
506 }
507 #define NAT_OFFSET(ct, dir, seq) \
508 	(nat_offset(ct, dir, seq))
509 #else
510 #define NAT_OFFSET(ct, dir, seq)	0
511 #endif
512 
tcp_in_window(const struct nf_conn * ct,struct ip_ct_tcp * state,enum ip_conntrack_dir dir,unsigned int index,const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph,u_int8_t pf)513 static bool tcp_in_window(const struct nf_conn *ct,
514 			  struct ip_ct_tcp *state,
515 			  enum ip_conntrack_dir dir,
516 			  unsigned int index,
517 			  const struct sk_buff *skb,
518 			  unsigned int dataoff,
519 			  const struct tcphdr *tcph,
520 			  u_int8_t pf)
521 {
522 	struct net *net = nf_ct_net(ct);
523 	struct nf_tcp_net *tn = tcp_pernet(net);
524 	struct ip_ct_tcp_state *sender = &state->seen[dir];
525 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
526 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
527 	__u32 seq, ack, sack, end, win, swin;
528 	s16 receiver_offset;
529 	bool res;
530 
531 	/*
532 	 * Get the required data from the packet.
533 	 */
534 	seq = ntohl(tcph->seq);
535 	ack = sack = ntohl(tcph->ack_seq);
536 	win = ntohs(tcph->window);
537 	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
538 
539 	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
540 		tcp_sack(skb, dataoff, tcph, &sack);
541 
542 	/* Take into account NAT sequence number mangling */
543 	receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
544 	ack -= receiver_offset;
545 	sack -= receiver_offset;
546 
547 	pr_debug("tcp_in_window: START\n");
548 	pr_debug("tcp_in_window: ");
549 	nf_ct_dump_tuple(tuple);
550 	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
551 		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
552 	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
553 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
554 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
555 		 sender->td_scale,
556 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
557 		 receiver->td_scale);
558 
559 	if (sender->td_maxwin == 0) {
560 		/*
561 		 * Initialize sender data.
562 		 */
563 		if (tcph->syn) {
564 			/*
565 			 * SYN-ACK in reply to a SYN
566 			 * or SYN from reply direction in simultaneous open.
567 			 */
568 			sender->td_end =
569 			sender->td_maxend = end;
570 			sender->td_maxwin = (win == 0 ? 1 : win);
571 
572 			tcp_options(skb, dataoff, tcph, sender);
573 			/*
574 			 * RFC 1323:
575 			 * Both sides must send the Window Scale option
576 			 * to enable window scaling in either direction.
577 			 */
578 			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
579 			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
580 				sender->td_scale =
581 				receiver->td_scale = 0;
582 			if (!tcph->ack)
583 				/* Simultaneous open */
584 				return true;
585 		} else {
586 			/*
587 			 * We are in the middle of a connection,
588 			 * its history is lost for us.
589 			 * Let's try to use the data from the packet.
590 			 */
591 			sender->td_end = end;
592 			swin = win << sender->td_scale;
593 			sender->td_maxwin = (swin == 0 ? 1 : swin);
594 			sender->td_maxend = end + sender->td_maxwin;
595 			/*
596 			 * We haven't seen traffic in the other direction yet
597 			 * but we have to tweak window tracking to pass III
598 			 * and IV until that happens.
599 			 */
600 			if (receiver->td_maxwin == 0)
601 				receiver->td_end = receiver->td_maxend = sack;
602 		}
603 	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
604 		     && dir == IP_CT_DIR_ORIGINAL)
605 		   || (state->state == TCP_CONNTRACK_SYN_RECV
606 		     && dir == IP_CT_DIR_REPLY))
607 		   && after(end, sender->td_end)) {
608 		/*
609 		 * RFC 793: "if a TCP is reinitialized ... then it need
610 		 * not wait at all; it must only be sure to use sequence
611 		 * numbers larger than those recently used."
612 		 */
613 		sender->td_end =
614 		sender->td_maxend = end;
615 		sender->td_maxwin = (win == 0 ? 1 : win);
616 
617 		tcp_options(skb, dataoff, tcph, sender);
618 	}
619 
620 	if (!(tcph->ack)) {
621 		/*
622 		 * If there is no ACK, just pretend it was set and OK.
623 		 */
624 		ack = sack = receiver->td_end;
625 	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
626 		    (TCP_FLAG_ACK|TCP_FLAG_RST))
627 		   && (ack == 0)) {
628 		/*
629 		 * Broken TCP stacks, that set ACK in RST packets as well
630 		 * with zero ack value.
631 		 */
632 		ack = sack = receiver->td_end;
633 	}
634 
635 	if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
636 		/*
637 		 * RST sent answering SYN.
638 		 */
639 		seq = end = sender->td_end;
640 
641 	pr_debug("tcp_in_window: ");
642 	nf_ct_dump_tuple(tuple);
643 	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
644 		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
645 	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
646 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
647 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
648 		 sender->td_scale,
649 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
650 		 receiver->td_scale);
651 
652 	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
653 		 before(seq, sender->td_maxend + 1),
654 		 after(end, sender->td_end - receiver->td_maxwin - 1),
655 		 before(sack, receiver->td_end + 1),
656 		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
657 
658 	if (before(seq, sender->td_maxend + 1) &&
659 	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
660 	    before(sack, receiver->td_end + 1) &&
661 	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
662 		/*
663 		 * Take into account window scaling (RFC 1323).
664 		 */
665 		if (!tcph->syn)
666 			win <<= sender->td_scale;
667 
668 		/*
669 		 * Update sender data.
670 		 */
671 		swin = win + (sack - ack);
672 		if (sender->td_maxwin < swin)
673 			sender->td_maxwin = swin;
674 		if (after(end, sender->td_end)) {
675 			sender->td_end = end;
676 			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
677 		}
678 		if (tcph->ack) {
679 			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
680 				sender->td_maxack = ack;
681 				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
682 			} else if (after(ack, sender->td_maxack))
683 				sender->td_maxack = ack;
684 		}
685 
686 		/*
687 		 * Update receiver data.
688 		 */
689 		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
690 			receiver->td_maxwin += end - sender->td_maxend;
691 		if (after(sack + win, receiver->td_maxend - 1)) {
692 			receiver->td_maxend = sack + win;
693 			if (win == 0)
694 				receiver->td_maxend++;
695 		}
696 		if (ack == receiver->td_end)
697 			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
698 
699 		/*
700 		 * Check retransmissions.
701 		 */
702 		if (index == TCP_ACK_SET) {
703 			if (state->last_dir == dir
704 			    && state->last_seq == seq
705 			    && state->last_ack == ack
706 			    && state->last_end == end
707 			    && state->last_win == win)
708 				state->retrans++;
709 			else {
710 				state->last_dir = dir;
711 				state->last_seq = seq;
712 				state->last_ack = ack;
713 				state->last_end = end;
714 				state->last_win = win;
715 				state->retrans = 0;
716 			}
717 		}
718 		res = true;
719 	} else {
720 		res = false;
721 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
722 		    tn->tcp_be_liberal)
723 			res = true;
724 		if (!res && LOG_INVALID(net, IPPROTO_TCP))
725 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
726 			"nf_ct_tcp: %s ",
727 			before(seq, sender->td_maxend + 1) ?
728 			after(end, sender->td_end - receiver->td_maxwin - 1) ?
729 			before(sack, receiver->td_end + 1) ?
730 			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
731 			: "ACK is under the lower bound (possible overly delayed ACK)"
732 			: "ACK is over the upper bound (ACKed data not seen yet)"
733 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
734 			: "SEQ is over the upper bound (over the window of the receiver)");
735 	}
736 
737 	pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
738 		 "receiver end=%u maxend=%u maxwin=%u\n",
739 		 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
740 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
741 
742 	return res;
743 }
744 
745 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
746 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
747 				 TCPHDR_URG) + 1] =
748 {
749 	[TCPHDR_SYN]				= 1,
750 	[TCPHDR_SYN|TCPHDR_URG]			= 1,
751 	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
752 	[TCPHDR_RST]				= 1,
753 	[TCPHDR_RST|TCPHDR_ACK]			= 1,
754 	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
755 	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
756 	[TCPHDR_ACK]				= 1,
757 	[TCPHDR_ACK|TCPHDR_URG]			= 1,
758 };
759 
760 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
tcp_error(struct net * net,struct nf_conn * tmpl,struct sk_buff * skb,unsigned int dataoff,enum ip_conntrack_info * ctinfo,u_int8_t pf,unsigned int hooknum)761 static int tcp_error(struct net *net, struct nf_conn *tmpl,
762 		     struct sk_buff *skb,
763 		     unsigned int dataoff,
764 		     enum ip_conntrack_info *ctinfo,
765 		     u_int8_t pf,
766 		     unsigned int hooknum)
767 {
768 	const struct tcphdr *th;
769 	struct tcphdr _tcph;
770 	unsigned int tcplen = skb->len - dataoff;
771 	u_int8_t tcpflags;
772 
773 	/* Smaller that minimal TCP header? */
774 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
775 	if (th == NULL) {
776 		if (LOG_INVALID(net, IPPROTO_TCP))
777 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
778 				"nf_ct_tcp: short packet ");
779 		return -NF_ACCEPT;
780 	}
781 
782 	/* Not whole TCP header or malformed packet */
783 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
784 		if (LOG_INVALID(net, IPPROTO_TCP))
785 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
786 				"nf_ct_tcp: truncated/malformed packet ");
787 		return -NF_ACCEPT;
788 	}
789 
790 	/* Checksum invalid? Ignore.
791 	 * We skip checking packets on the outgoing path
792 	 * because the checksum is assumed to be correct.
793 	 */
794 	/* FIXME: Source route IP option packets --RR */
795 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
796 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
797 		if (LOG_INVALID(net, IPPROTO_TCP))
798 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
799 				  "nf_ct_tcp: bad TCP checksum ");
800 		return -NF_ACCEPT;
801 	}
802 
803 	/* Check TCP flags. */
804 	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
805 	if (!tcp_valid_flags[tcpflags]) {
806 		if (LOG_INVALID(net, IPPROTO_TCP))
807 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
808 				  "nf_ct_tcp: invalid TCP flag combination ");
809 		return -NF_ACCEPT;
810 	}
811 
812 	return NF_ACCEPT;
813 }
814 
tcp_get_timeouts(struct net * net)815 static unsigned int *tcp_get_timeouts(struct net *net)
816 {
817 	return tcp_pernet(net)->timeouts;
818 }
819 
820 /* Returns verdict for packet, or -1 for invalid. */
tcp_packet(struct nf_conn * ct,const struct sk_buff * skb,unsigned int dataoff,enum ip_conntrack_info ctinfo,u_int8_t pf,unsigned int hooknum,unsigned int * timeouts)821 static int tcp_packet(struct nf_conn *ct,
822 		      const struct sk_buff *skb,
823 		      unsigned int dataoff,
824 		      enum ip_conntrack_info ctinfo,
825 		      u_int8_t pf,
826 		      unsigned int hooknum,
827 		      unsigned int *timeouts)
828 {
829 	struct net *net = nf_ct_net(ct);
830 	struct nf_tcp_net *tn = tcp_pernet(net);
831 	struct nf_conntrack_tuple *tuple;
832 	enum tcp_conntrack new_state, old_state;
833 	enum ip_conntrack_dir dir;
834 	const struct tcphdr *th;
835 	struct tcphdr _tcph;
836 	unsigned long timeout;
837 	unsigned int index;
838 
839 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
840 	BUG_ON(th == NULL);
841 
842 	spin_lock_bh(&ct->lock);
843 	old_state = ct->proto.tcp.state;
844 	dir = CTINFO2DIR(ctinfo);
845 	index = get_conntrack_index(th);
846 	new_state = tcp_conntracks[dir][index][old_state];
847 	tuple = &ct->tuplehash[dir].tuple;
848 
849 	switch (new_state) {
850 	case TCP_CONNTRACK_SYN_SENT:
851 		if (old_state < TCP_CONNTRACK_TIME_WAIT)
852 			break;
853 		/* RFC 1122: "When a connection is closed actively,
854 		 * it MUST linger in TIME-WAIT state for a time 2xMSL
855 		 * (Maximum Segment Lifetime). However, it MAY accept
856 		 * a new SYN from the remote TCP to reopen the connection
857 		 * directly from TIME-WAIT state, if..."
858 		 * We ignore the conditions because we are in the
859 		 * TIME-WAIT state anyway.
860 		 *
861 		 * Handle aborted connections: we and the server
862 		 * think there is an existing connection but the client
863 		 * aborts it and starts a new one.
864 		 */
865 		if (((ct->proto.tcp.seen[dir].flags
866 		      | ct->proto.tcp.seen[!dir].flags)
867 		     & IP_CT_TCP_FLAG_CLOSE_INIT)
868 		    || (ct->proto.tcp.last_dir == dir
869 		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
870 			/* Attempt to reopen a closed/aborted connection.
871 			 * Delete this connection and look up again. */
872 			spin_unlock_bh(&ct->lock);
873 
874 			/* Only repeat if we can actually remove the timer.
875 			 * Destruction may already be in progress in process
876 			 * context and we must give it a chance to terminate.
877 			 */
878 			if (nf_ct_kill(ct))
879 				return -NF_REPEAT;
880 			return NF_DROP;
881 		}
882 		/* Fall through */
883 	case TCP_CONNTRACK_IGNORE:
884 		/* Ignored packets:
885 		 *
886 		 * Our connection entry may be out of sync, so ignore
887 		 * packets which may signal the real connection between
888 		 * the client and the server.
889 		 *
890 		 * a) SYN in ORIGINAL
891 		 * b) SYN/ACK in REPLY
892 		 * c) ACK in reply direction after initial SYN in original.
893 		 *
894 		 * If the ignored packet is invalid, the receiver will send
895 		 * a RST we'll catch below.
896 		 */
897 		if (index == TCP_SYNACK_SET
898 		    && ct->proto.tcp.last_index == TCP_SYN_SET
899 		    && ct->proto.tcp.last_dir != dir
900 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
901 			/* b) This SYN/ACK acknowledges a SYN that we earlier
902 			 * ignored as invalid. This means that the client and
903 			 * the server are both in sync, while the firewall is
904 			 * not. We get in sync from the previously annotated
905 			 * values.
906 			 */
907 			old_state = TCP_CONNTRACK_SYN_SENT;
908 			new_state = TCP_CONNTRACK_SYN_RECV;
909 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
910 				ct->proto.tcp.last_end;
911 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
912 				ct->proto.tcp.last_end;
913 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
914 				ct->proto.tcp.last_win == 0 ?
915 					1 : ct->proto.tcp.last_win;
916 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
917 				ct->proto.tcp.last_wscale;
918 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
919 				ct->proto.tcp.last_flags;
920 			memset(&ct->proto.tcp.seen[dir], 0,
921 			       sizeof(struct ip_ct_tcp_state));
922 			break;
923 		}
924 		ct->proto.tcp.last_index = index;
925 		ct->proto.tcp.last_dir = dir;
926 		ct->proto.tcp.last_seq = ntohl(th->seq);
927 		ct->proto.tcp.last_end =
928 		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
929 		ct->proto.tcp.last_win = ntohs(th->window);
930 
931 		/* a) This is a SYN in ORIGINAL. The client and the server
932 		 * may be in sync but we are not. In that case, we annotate
933 		 * the TCP options and let the packet go through. If it is a
934 		 * valid SYN packet, the server will reply with a SYN/ACK, and
935 		 * then we'll get in sync. Otherwise, the server ignores it. */
936 		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
937 			struct ip_ct_tcp_state seen = {};
938 
939 			ct->proto.tcp.last_flags =
940 			ct->proto.tcp.last_wscale = 0;
941 			tcp_options(skb, dataoff, th, &seen);
942 			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
943 				ct->proto.tcp.last_flags |=
944 					IP_CT_TCP_FLAG_WINDOW_SCALE;
945 				ct->proto.tcp.last_wscale = seen.td_scale;
946 			}
947 			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
948 				ct->proto.tcp.last_flags |=
949 					IP_CT_TCP_FLAG_SACK_PERM;
950 			}
951 		}
952 		spin_unlock_bh(&ct->lock);
953 		if (LOG_INVALID(net, IPPROTO_TCP))
954 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
955 				  "nf_ct_tcp: invalid packet ignored in "
956 				  "state %s ", tcp_conntrack_names[old_state]);
957 		return NF_ACCEPT;
958 	case TCP_CONNTRACK_MAX:
959 		/* Invalid packet */
960 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
961 			 dir, get_conntrack_index(th), old_state);
962 		spin_unlock_bh(&ct->lock);
963 		if (LOG_INVALID(net, IPPROTO_TCP))
964 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
965 				  "nf_ct_tcp: invalid state ");
966 		return -NF_ACCEPT;
967 	case TCP_CONNTRACK_CLOSE:
968 		if (index == TCP_RST_SET
969 		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
970 		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
971 			/* Invalid RST  */
972 			spin_unlock_bh(&ct->lock);
973 			if (LOG_INVALID(net, IPPROTO_TCP))
974 				nf_log_packet(net, pf, 0, skb, NULL, NULL,
975 					      NULL, "nf_ct_tcp: invalid RST ");
976 			return -NF_ACCEPT;
977 		}
978 		if (index == TCP_RST_SET
979 		    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
980 			 && ct->proto.tcp.last_index == TCP_SYN_SET)
981 			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
982 			    && ct->proto.tcp.last_index == TCP_ACK_SET))
983 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
984 			/* RST sent to invalid SYN or ACK we had let through
985 			 * at a) and c) above:
986 			 *
987 			 * a) SYN was in window then
988 			 * c) we hold a half-open connection.
989 			 *
990 			 * Delete our connection entry.
991 			 * We skip window checking, because packet might ACK
992 			 * segments we ignored. */
993 			goto in_window;
994 		}
995 		/* Just fall through */
996 	default:
997 		/* Keep compilers happy. */
998 		break;
999 	}
1000 
1001 	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1002 			   skb, dataoff, th, pf)) {
1003 		spin_unlock_bh(&ct->lock);
1004 		return -NF_ACCEPT;
1005 	}
1006      in_window:
1007 	/* From now on we have got in-window packets */
1008 	ct->proto.tcp.last_index = index;
1009 	ct->proto.tcp.last_dir = dir;
1010 
1011 	pr_debug("tcp_conntracks: ");
1012 	nf_ct_dump_tuple(tuple);
1013 	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1014 		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1015 		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1016 		 old_state, new_state);
1017 
1018 	ct->proto.tcp.state = new_state;
1019 	if (old_state != new_state
1020 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
1021 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1022 
1023 	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1024 	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1025 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1026 	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1027 		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1028 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1029 		timeout = timeouts[TCP_CONNTRACK_UNACK];
1030 	else
1031 		timeout = timeouts[new_state];
1032 	spin_unlock_bh(&ct->lock);
1033 
1034 	if (new_state != old_state)
1035 		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1036 
1037 	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1038 		/* If only reply is a RST, we can consider ourselves not to
1039 		   have an established connection: this is a fairly common
1040 		   problem case, so we can delete the conntrack
1041 		   immediately.  --RR */
1042 		if (th->rst) {
1043 			nf_ct_kill_acct(ct, ctinfo, skb);
1044 			return NF_ACCEPT;
1045 		}
1046 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1047 		   && (old_state == TCP_CONNTRACK_SYN_RECV
1048 		       || old_state == TCP_CONNTRACK_ESTABLISHED)
1049 		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1050 		/* Set ASSURED if we see see valid ack in ESTABLISHED
1051 		   after SYN_RECV or a valid answer for a picked up
1052 		   connection. */
1053 		set_bit(IPS_ASSURED_BIT, &ct->status);
1054 		nf_conntrack_event_cache(IPCT_ASSURED, ct);
1055 	}
1056 	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1057 
1058 	return NF_ACCEPT;
1059 }
1060 
1061 /* Called when a new connection for this protocol found. */
tcp_new(struct nf_conn * ct,const struct sk_buff * skb,unsigned int dataoff,unsigned int * timeouts)1062 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1063 		    unsigned int dataoff, unsigned int *timeouts)
1064 {
1065 	enum tcp_conntrack new_state;
1066 	const struct tcphdr *th;
1067 	struct tcphdr _tcph;
1068 	struct net *net = nf_ct_net(ct);
1069 	struct nf_tcp_net *tn = tcp_pernet(net);
1070 	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1071 	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1072 
1073 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1074 	BUG_ON(th == NULL);
1075 
1076 	/* Don't need lock here: this conntrack not in circulation yet */
1077 	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1078 
1079 	/* Invalid: delete conntrack */
1080 	if (new_state >= TCP_CONNTRACK_MAX) {
1081 		pr_debug("nf_ct_tcp: invalid new deleting.\n");
1082 		return false;
1083 	}
1084 
1085 	if (new_state == TCP_CONNTRACK_SYN_SENT) {
1086 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1087 		/* SYN packet */
1088 		ct->proto.tcp.seen[0].td_end =
1089 			segment_seq_plus_len(ntohl(th->seq), skb->len,
1090 					     dataoff, th);
1091 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1092 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1093 			ct->proto.tcp.seen[0].td_maxwin = 1;
1094 		ct->proto.tcp.seen[0].td_maxend =
1095 			ct->proto.tcp.seen[0].td_end;
1096 
1097 		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1098 	} else if (tn->tcp_loose == 0) {
1099 		/* Don't try to pick up connections. */
1100 		return false;
1101 	} else {
1102 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1103 		/*
1104 		 * We are in the middle of a connection,
1105 		 * its history is lost for us.
1106 		 * Let's try to use the data from the packet.
1107 		 */
1108 		ct->proto.tcp.seen[0].td_end =
1109 			segment_seq_plus_len(ntohl(th->seq), skb->len,
1110 					     dataoff, th);
1111 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1112 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1113 			ct->proto.tcp.seen[0].td_maxwin = 1;
1114 		ct->proto.tcp.seen[0].td_maxend =
1115 			ct->proto.tcp.seen[0].td_end +
1116 			ct->proto.tcp.seen[0].td_maxwin;
1117 
1118 		/* We assume SACK and liberal window checking to handle
1119 		 * window scaling */
1120 		ct->proto.tcp.seen[0].flags =
1121 		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1122 					      IP_CT_TCP_FLAG_BE_LIBERAL;
1123 	}
1124 
1125 	/* tcp_packet will set them */
1126 	ct->proto.tcp.last_index = TCP_NONE_SET;
1127 
1128 	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1129 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1130 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
1131 		 sender->td_scale,
1132 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1133 		 receiver->td_scale);
1134 	return true;
1135 }
1136 
1137 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1138 
1139 #include <linux/netfilter/nfnetlink.h>
1140 #include <linux/netfilter/nfnetlink_conntrack.h>
1141 
tcp_to_nlattr(struct sk_buff * skb,struct nlattr * nla,struct nf_conn * ct)1142 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1143 			 struct nf_conn *ct)
1144 {
1145 	struct nlattr *nest_parms;
1146 	struct nf_ct_tcp_flags tmp = {};
1147 
1148 	spin_lock_bh(&ct->lock);
1149 	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1150 	if (!nest_parms)
1151 		goto nla_put_failure;
1152 
1153 	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1154 	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1155 		       ct->proto.tcp.seen[0].td_scale) ||
1156 	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1157 		       ct->proto.tcp.seen[1].td_scale))
1158 		goto nla_put_failure;
1159 
1160 	tmp.flags = ct->proto.tcp.seen[0].flags;
1161 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1162 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1163 		goto nla_put_failure;
1164 
1165 	tmp.flags = ct->proto.tcp.seen[1].flags;
1166 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1167 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1168 		goto nla_put_failure;
1169 	spin_unlock_bh(&ct->lock);
1170 
1171 	nla_nest_end(skb, nest_parms);
1172 
1173 	return 0;
1174 
1175 nla_put_failure:
1176 	spin_unlock_bh(&ct->lock);
1177 	return -1;
1178 }
1179 
1180 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1181 	[CTA_PROTOINFO_TCP_STATE]	    = { .type = NLA_U8 },
1182 	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1183 	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1184 	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1185 	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len =  sizeof(struct nf_ct_tcp_flags) },
1186 };
1187 
nlattr_to_tcp(struct nlattr * cda[],struct nf_conn * ct)1188 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1189 {
1190 	struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1191 	struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1192 	int err;
1193 
1194 	/* updates could not contain anything about the private
1195 	 * protocol info, in that case skip the parsing */
1196 	if (!pattr)
1197 		return 0;
1198 
1199 	err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1200 	if (err < 0)
1201 		return err;
1202 
1203 	if (tb[CTA_PROTOINFO_TCP_STATE] &&
1204 	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1205 		return -EINVAL;
1206 
1207 	spin_lock_bh(&ct->lock);
1208 	if (tb[CTA_PROTOINFO_TCP_STATE])
1209 		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1210 
1211 	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1212 		struct nf_ct_tcp_flags *attr =
1213 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1214 		ct->proto.tcp.seen[0].flags &= ~attr->mask;
1215 		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1216 	}
1217 
1218 	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1219 		struct nf_ct_tcp_flags *attr =
1220 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1221 		ct->proto.tcp.seen[1].flags &= ~attr->mask;
1222 		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1223 	}
1224 
1225 	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1226 	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1227 	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1228 	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1229 		ct->proto.tcp.seen[0].td_scale =
1230 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1231 		ct->proto.tcp.seen[1].td_scale =
1232 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1233 	}
1234 	spin_unlock_bh(&ct->lock);
1235 
1236 	return 0;
1237 }
1238 
tcp_nlattr_size(void)1239 static int tcp_nlattr_size(void)
1240 {
1241 	return nla_total_size(0)	   /* CTA_PROTOINFO_TCP */
1242 		+ nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1243 }
1244 
tcp_nlattr_tuple_size(void)1245 static int tcp_nlattr_tuple_size(void)
1246 {
1247 	return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1248 }
1249 #endif
1250 
1251 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1252 
1253 #include <linux/netfilter/nfnetlink.h>
1254 #include <linux/netfilter/nfnetlink_cttimeout.h>
1255 
tcp_timeout_nlattr_to_obj(struct nlattr * tb[],struct net * net,void * data)1256 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1257 				     struct net *net, void *data)
1258 {
1259 	unsigned int *timeouts = data;
1260 	struct nf_tcp_net *tn = tcp_pernet(net);
1261 	int i;
1262 
1263 	/* set default TCP timeouts. */
1264 	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1265 		timeouts[i] = tn->timeouts[i];
1266 
1267 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1268 		timeouts[TCP_CONNTRACK_SYN_SENT] =
1269 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1270 	}
1271 	if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1272 		timeouts[TCP_CONNTRACK_SYN_RECV] =
1273 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1274 	}
1275 	if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1276 		timeouts[TCP_CONNTRACK_ESTABLISHED] =
1277 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1278 	}
1279 	if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1280 		timeouts[TCP_CONNTRACK_FIN_WAIT] =
1281 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1282 	}
1283 	if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1284 		timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1285 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1286 	}
1287 	if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1288 		timeouts[TCP_CONNTRACK_LAST_ACK] =
1289 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1290 	}
1291 	if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1292 		timeouts[TCP_CONNTRACK_TIME_WAIT] =
1293 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1294 	}
1295 	if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1296 		timeouts[TCP_CONNTRACK_CLOSE] =
1297 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1298 	}
1299 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1300 		timeouts[TCP_CONNTRACK_SYN_SENT2] =
1301 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1302 	}
1303 	if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1304 		timeouts[TCP_CONNTRACK_RETRANS] =
1305 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1306 	}
1307 	if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1308 		timeouts[TCP_CONNTRACK_UNACK] =
1309 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1310 	}
1311 	return 0;
1312 }
1313 
1314 static int
tcp_timeout_obj_to_nlattr(struct sk_buff * skb,const void * data)1315 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1316 {
1317 	const unsigned int *timeouts = data;
1318 
1319 	if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1320 			htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1321 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1322 			 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1323 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1324 			 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1325 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1326 			 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1327 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1328 			 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1329 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1330 			 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1331 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1332 			 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1333 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1334 			 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1335 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1336 			 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1337 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1338 			 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1339 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1340 			 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1341 		goto nla_put_failure;
1342 	return 0;
1343 
1344 nla_put_failure:
1345 	return -ENOSPC;
1346 }
1347 
1348 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1349 	[CTA_TIMEOUT_TCP_SYN_SENT]	= { .type = NLA_U32 },
1350 	[CTA_TIMEOUT_TCP_SYN_RECV]	= { .type = NLA_U32 },
1351 	[CTA_TIMEOUT_TCP_ESTABLISHED]	= { .type = NLA_U32 },
1352 	[CTA_TIMEOUT_TCP_FIN_WAIT]	= { .type = NLA_U32 },
1353 	[CTA_TIMEOUT_TCP_CLOSE_WAIT]	= { .type = NLA_U32 },
1354 	[CTA_TIMEOUT_TCP_LAST_ACK]	= { .type = NLA_U32 },
1355 	[CTA_TIMEOUT_TCP_TIME_WAIT]	= { .type = NLA_U32 },
1356 	[CTA_TIMEOUT_TCP_CLOSE]		= { .type = NLA_U32 },
1357 	[CTA_TIMEOUT_TCP_SYN_SENT2]	= { .type = NLA_U32 },
1358 	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
1359 	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
1360 };
1361 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1362 
1363 #ifdef CONFIG_SYSCTL
1364 static struct ctl_table tcp_sysctl_table[] = {
1365 	{
1366 		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
1367 		.maxlen		= sizeof(unsigned int),
1368 		.mode		= 0644,
1369 		.proc_handler	= proc_dointvec_jiffies,
1370 	},
1371 	{
1372 		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
1373 		.maxlen		= sizeof(unsigned int),
1374 		.mode		= 0644,
1375 		.proc_handler	= proc_dointvec_jiffies,
1376 	},
1377 	{
1378 		.procname	= "nf_conntrack_tcp_timeout_established",
1379 		.maxlen		= sizeof(unsigned int),
1380 		.mode		= 0644,
1381 		.proc_handler	= proc_dointvec_jiffies,
1382 	},
1383 	{
1384 		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
1385 		.maxlen		= sizeof(unsigned int),
1386 		.mode		= 0644,
1387 		.proc_handler	= proc_dointvec_jiffies,
1388 	},
1389 	{
1390 		.procname	= "nf_conntrack_tcp_timeout_close_wait",
1391 		.maxlen		= sizeof(unsigned int),
1392 		.mode		= 0644,
1393 		.proc_handler	= proc_dointvec_jiffies,
1394 	},
1395 	{
1396 		.procname	= "nf_conntrack_tcp_timeout_last_ack",
1397 		.maxlen		= sizeof(unsigned int),
1398 		.mode		= 0644,
1399 		.proc_handler	= proc_dointvec_jiffies,
1400 	},
1401 	{
1402 		.procname	= "nf_conntrack_tcp_timeout_time_wait",
1403 		.maxlen		= sizeof(unsigned int),
1404 		.mode		= 0644,
1405 		.proc_handler	= proc_dointvec_jiffies,
1406 	},
1407 	{
1408 		.procname	= "nf_conntrack_tcp_timeout_close",
1409 		.maxlen		= sizeof(unsigned int),
1410 		.mode		= 0644,
1411 		.proc_handler	= proc_dointvec_jiffies,
1412 	},
1413 	{
1414 		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
1415 		.maxlen		= sizeof(unsigned int),
1416 		.mode		= 0644,
1417 		.proc_handler	= proc_dointvec_jiffies,
1418 	},
1419 	{
1420 		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
1421 		.maxlen		= sizeof(unsigned int),
1422 		.mode		= 0644,
1423 		.proc_handler	= proc_dointvec_jiffies,
1424 	},
1425 	{
1426 		.procname	= "nf_conntrack_tcp_loose",
1427 		.maxlen		= sizeof(unsigned int),
1428 		.mode		= 0644,
1429 		.proc_handler	= proc_dointvec,
1430 	},
1431 	{
1432 		.procname       = "nf_conntrack_tcp_be_liberal",
1433 		.maxlen         = sizeof(unsigned int),
1434 		.mode           = 0644,
1435 		.proc_handler   = proc_dointvec,
1436 	},
1437 	{
1438 		.procname	= "nf_conntrack_tcp_max_retrans",
1439 		.maxlen		= sizeof(unsigned int),
1440 		.mode		= 0644,
1441 		.proc_handler	= proc_dointvec,
1442 	},
1443 	{ }
1444 };
1445 
1446 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1447 static struct ctl_table tcp_compat_sysctl_table[] = {
1448 	{
1449 		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
1450 		.maxlen		= sizeof(unsigned int),
1451 		.mode		= 0644,
1452 		.proc_handler	= proc_dointvec_jiffies,
1453 	},
1454 	{
1455 		.procname	= "ip_conntrack_tcp_timeout_syn_sent2",
1456 		.maxlen		= sizeof(unsigned int),
1457 		.mode		= 0644,
1458 		.proc_handler	= proc_dointvec_jiffies,
1459 	},
1460 	{
1461 		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
1462 		.maxlen		= sizeof(unsigned int),
1463 		.mode		= 0644,
1464 		.proc_handler	= proc_dointvec_jiffies,
1465 	},
1466 	{
1467 		.procname	= "ip_conntrack_tcp_timeout_established",
1468 		.maxlen		= sizeof(unsigned int),
1469 		.mode		= 0644,
1470 		.proc_handler	= proc_dointvec_jiffies,
1471 	},
1472 	{
1473 		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
1474 		.maxlen		= sizeof(unsigned int),
1475 		.mode		= 0644,
1476 		.proc_handler	= proc_dointvec_jiffies,
1477 	},
1478 	{
1479 		.procname	= "ip_conntrack_tcp_timeout_close_wait",
1480 		.maxlen		= sizeof(unsigned int),
1481 		.mode		= 0644,
1482 		.proc_handler	= proc_dointvec_jiffies,
1483 	},
1484 	{
1485 		.procname	= "ip_conntrack_tcp_timeout_last_ack",
1486 		.maxlen		= sizeof(unsigned int),
1487 		.mode		= 0644,
1488 		.proc_handler	= proc_dointvec_jiffies,
1489 	},
1490 	{
1491 		.procname	= "ip_conntrack_tcp_timeout_time_wait",
1492 		.maxlen		= sizeof(unsigned int),
1493 		.mode		= 0644,
1494 		.proc_handler	= proc_dointvec_jiffies,
1495 	},
1496 	{
1497 		.procname	= "ip_conntrack_tcp_timeout_close",
1498 		.maxlen		= sizeof(unsigned int),
1499 		.mode		= 0644,
1500 		.proc_handler	= proc_dointvec_jiffies,
1501 	},
1502 	{
1503 		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
1504 		.maxlen		= sizeof(unsigned int),
1505 		.mode		= 0644,
1506 		.proc_handler	= proc_dointvec_jiffies,
1507 	},
1508 	{
1509 		.procname	= "ip_conntrack_tcp_loose",
1510 		.maxlen		= sizeof(unsigned int),
1511 		.mode		= 0644,
1512 		.proc_handler	= proc_dointvec,
1513 	},
1514 	{
1515 		.procname	= "ip_conntrack_tcp_be_liberal",
1516 		.maxlen		= sizeof(unsigned int),
1517 		.mode		= 0644,
1518 		.proc_handler	= proc_dointvec,
1519 	},
1520 	{
1521 		.procname	= "ip_conntrack_tcp_max_retrans",
1522 		.maxlen		= sizeof(unsigned int),
1523 		.mode		= 0644,
1524 		.proc_handler	= proc_dointvec,
1525 	},
1526 	{ }
1527 };
1528 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1529 #endif /* CONFIG_SYSCTL */
1530 
tcp_kmemdup_sysctl_table(struct nf_proto_net * pn,struct nf_tcp_net * tn)1531 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1532 				    struct nf_tcp_net *tn)
1533 {
1534 #ifdef CONFIG_SYSCTL
1535 	if (pn->ctl_table)
1536 		return 0;
1537 
1538 	pn->ctl_table = kmemdup(tcp_sysctl_table,
1539 				sizeof(tcp_sysctl_table),
1540 				GFP_KERNEL);
1541 	if (!pn->ctl_table)
1542 		return -ENOMEM;
1543 
1544 	pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1545 	pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1546 	pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1547 	pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1548 	pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1549 	pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1550 	pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1551 	pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1552 	pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1553 	pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1554 	pn->ctl_table[10].data = &tn->tcp_loose;
1555 	pn->ctl_table[11].data = &tn->tcp_be_liberal;
1556 	pn->ctl_table[12].data = &tn->tcp_max_retrans;
1557 #endif
1558 	return 0;
1559 }
1560 
tcp_kmemdup_compat_sysctl_table(struct nf_proto_net * pn,struct nf_tcp_net * tn)1561 static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1562 					   struct nf_tcp_net *tn)
1563 {
1564 #ifdef CONFIG_SYSCTL
1565 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1566 	pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1567 				       sizeof(tcp_compat_sysctl_table),
1568 				       GFP_KERNEL);
1569 	if (!pn->ctl_compat_table)
1570 		return -ENOMEM;
1571 
1572 	pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1573 	pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1574 	pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1575 	pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1576 	pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1577 	pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1578 	pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1579 	pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1580 	pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1581 	pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1582 	pn->ctl_compat_table[10].data = &tn->tcp_loose;
1583 	pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1584 	pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1585 #endif
1586 #endif
1587 	return 0;
1588 }
1589 
tcp_init_net(struct net * net,u_int16_t proto)1590 static int tcp_init_net(struct net *net, u_int16_t proto)
1591 {
1592 	int ret;
1593 	struct nf_tcp_net *tn = tcp_pernet(net);
1594 	struct nf_proto_net *pn = &tn->pn;
1595 
1596 	if (!pn->users) {
1597 		int i;
1598 
1599 		for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1600 			tn->timeouts[i] = tcp_timeouts[i];
1601 
1602 		tn->tcp_loose = nf_ct_tcp_loose;
1603 		tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1604 		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1605 	}
1606 
1607 	if (proto == AF_INET) {
1608 		ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1609 		if (ret < 0)
1610 			return ret;
1611 
1612 		ret = tcp_kmemdup_sysctl_table(pn, tn);
1613 		if (ret < 0)
1614 			nf_ct_kfree_compat_sysctl_table(pn);
1615 	} else
1616 		ret = tcp_kmemdup_sysctl_table(pn, tn);
1617 
1618 	return ret;
1619 }
1620 
tcp_get_net_proto(struct net * net)1621 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1622 {
1623 	return &net->ct.nf_ct_proto.tcp.pn;
1624 }
1625 
1626 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1627 {
1628 	.l3proto		= PF_INET,
1629 	.l4proto 		= IPPROTO_TCP,
1630 	.name 			= "tcp",
1631 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1632 	.invert_tuple 		= tcp_invert_tuple,
1633 	.print_tuple 		= tcp_print_tuple,
1634 	.print_conntrack 	= tcp_print_conntrack,
1635 	.packet 		= tcp_packet,
1636 	.get_timeouts		= tcp_get_timeouts,
1637 	.new 			= tcp_new,
1638 	.error			= tcp_error,
1639 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1640 	.to_nlattr		= tcp_to_nlattr,
1641 	.nlattr_size		= tcp_nlattr_size,
1642 	.from_nlattr		= nlattr_to_tcp,
1643 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1644 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1645 	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1646 	.nla_policy		= nf_ct_port_nla_policy,
1647 #endif
1648 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1649 	.ctnl_timeout		= {
1650 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1651 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1652 		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1653 		.obj_size	= sizeof(unsigned int) *
1654 					TCP_CONNTRACK_TIMEOUT_MAX,
1655 		.nla_policy	= tcp_timeout_nla_policy,
1656 	},
1657 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1658 	.init_net		= tcp_init_net,
1659 	.get_net_proto		= tcp_get_net_proto,
1660 };
1661 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1662 
1663 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1664 {
1665 	.l3proto		= PF_INET6,
1666 	.l4proto 		= IPPROTO_TCP,
1667 	.name 			= "tcp",
1668 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1669 	.invert_tuple 		= tcp_invert_tuple,
1670 	.print_tuple 		= tcp_print_tuple,
1671 	.print_conntrack 	= tcp_print_conntrack,
1672 	.packet 		= tcp_packet,
1673 	.get_timeouts		= tcp_get_timeouts,
1674 	.new 			= tcp_new,
1675 	.error			= tcp_error,
1676 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1677 	.to_nlattr		= tcp_to_nlattr,
1678 	.nlattr_size		= tcp_nlattr_size,
1679 	.from_nlattr		= nlattr_to_tcp,
1680 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1681 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1682 	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1683 	.nla_policy		= nf_ct_port_nla_policy,
1684 #endif
1685 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1686 	.ctnl_timeout		= {
1687 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1688 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1689 		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1690 		.obj_size	= sizeof(unsigned int) *
1691 					TCP_CONNTRACK_TIMEOUT_MAX,
1692 		.nla_policy	= tcp_timeout_nla_policy,
1693 	},
1694 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1695 	.init_net		= tcp_init_net,
1696 	.get_net_proto		= tcp_get_net_proto,
1697 };
1698 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1699