1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24 /*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
37 * request_sock handling and moved
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
40 * Added new listen semantics.
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
53 #define pr_fmt(fmt) "TCP: " fmt
54
55 #include <linux/bottom_half.h>
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 #include <linux/slab.h>
65
66 #include <net/net_namespace.h>
67 #include <net/icmp.h>
68 #include <net/inet_hashtables.h>
69 #include <net/tcp.h>
70 #include <net/transp_v6.h>
71 #include <net/ipv6.h>
72 #include <net/inet_common.h>
73 #include <net/timewait_sock.h>
74 #include <net/xfrm.h>
75 #include <net/netdma.h>
76 #include <net/secure_seq.h>
77 #include <net/tcp_memcontrol.h>
78
79 #include <linux/inet.h>
80 #include <linux/ipv6.h>
81 #include <linux/stddef.h>
82 #include <linux/proc_fs.h>
83 #include <linux/seq_file.h>
84
85 #include <linux/crypto.h>
86 #include <linux/scatterlist.h>
87
88 int sysctl_tcp_tw_reuse __read_mostly;
89 int sysctl_tcp_low_latency __read_mostly;
90 EXPORT_SYMBOL(sysctl_tcp_low_latency);
91
92
93 #ifdef CONFIG_TCP_MD5SIG
94 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
96 #endif
97
98 struct inet_hashinfo tcp_hashinfo;
99 EXPORT_SYMBOL(tcp_hashinfo);
100
tcp_v4_init_sequence(const struct sk_buff * skb)101 static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
102 {
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
107 }
108
tcp_twsk_unique(struct sock * sk,struct sock * sktw,void * twp)109 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110 {
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138 }
139 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
141 /* This will initiate an outgoing connection. */
tcp_v4_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)142 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143 {
144 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
145 struct inet_sock *inet = inet_sk(sk);
146 struct tcp_sock *tp = tcp_sk(sk);
147 __be16 orig_sport, orig_dport;
148 __be32 daddr, nexthop;
149 struct flowi4 *fl4;
150 struct rtable *rt;
151 int err;
152 struct ip_options_rcu *inet_opt;
153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
161 inet_opt = rcu_dereference_protected(inet->inet_opt,
162 sock_owned_by_user(sk));
163 if (inet_opt && inet_opt->opt.srr) {
164 if (!daddr)
165 return -EINVAL;
166 nexthop = inet_opt->opt.faddr;
167 }
168
169 orig_sport = inet->inet_sport;
170 orig_dport = usin->sin_port;
171 fl4 = &inet->cork.fl.u.ip4;
172 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true);
176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
179 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
180 return err;
181 }
182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
188 if (!inet_opt || !inet_opt->opt.srr)
189 daddr = fl4->daddr;
190
191 if (!inet->inet_saddr)
192 inet->inet_saddr = fl4->saddr;
193 inet->inet_rcv_saddr = inet->inet_saddr;
194
195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
199 if (likely(!tp->repair))
200 tp->write_seq = 0;
201 }
202
203 if (tcp_death_row.sysctl_tw_recycle &&
204 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
205 tcp_fetch_timewait_stamp(sk, &rt->dst);
206
207 inet->inet_dport = usin->sin_port;
208 inet->inet_daddr = daddr;
209
210 inet_csk(sk)->icsk_ext_hdr_len = 0;
211 if (inet_opt)
212 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
213
214 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
215
216 /* Socket identity is still unknown (sport may be zero).
217 * However we set state to SYN-SENT and not releasing socket
218 * lock select source port, enter ourselves into the hash tables and
219 * complete initialization after this.
220 */
221 tcp_set_state(sk, TCP_SYN_SENT);
222 err = inet_hash_connect(&tcp_death_row, sk);
223 if (err)
224 goto failure;
225
226 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
227 inet->inet_sport, inet->inet_dport, sk);
228 if (IS_ERR(rt)) {
229 err = PTR_ERR(rt);
230 rt = NULL;
231 goto failure;
232 }
233 /* OK, now commit destination to socket. */
234 sk->sk_gso_type = SKB_GSO_TCPV4;
235 sk_setup_caps(sk, &rt->dst);
236
237 if (!tp->write_seq && likely(!tp->repair))
238 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
239 inet->inet_daddr,
240 inet->inet_sport,
241 usin->sin_port);
242
243 inet->inet_id = tp->write_seq ^ jiffies;
244
245 err = tcp_connect(sk);
246
247 rt = NULL;
248 if (err)
249 goto failure;
250
251 return 0;
252
253 failure:
254 /*
255 * This unhashes the socket and releases the local port,
256 * if necessary.
257 */
258 tcp_set_state(sk, TCP_CLOSE);
259 ip_rt_put(rt);
260 sk->sk_route_caps = 0;
261 inet->inet_dport = 0;
262 return err;
263 }
264 EXPORT_SYMBOL(tcp_v4_connect);
265
266 /*
267 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
268 * It can be called through tcp_release_cb() if socket was owned by user
269 * at the time tcp_v4_err() was called to handle ICMP message.
270 */
tcp_v4_mtu_reduced(struct sock * sk)271 static void tcp_v4_mtu_reduced(struct sock *sk)
272 {
273 struct dst_entry *dst;
274 struct inet_sock *inet = inet_sk(sk);
275 u32 mtu = tcp_sk(sk)->mtu_info;
276
277 dst = inet_csk_update_pmtu(sk, mtu);
278 if (!dst)
279 return;
280
281 /* Something is about to be wrong... Remember soft error
282 * for the case, if this connection will not able to recover.
283 */
284 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
285 sk->sk_err_soft = EMSGSIZE;
286
287 mtu = dst_mtu(dst);
288
289 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
290 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
291 tcp_sync_mss(sk, mtu);
292
293 /* Resend the TCP packet because it's
294 * clear that the old packet has been
295 * dropped. This is the new "fast" path mtu
296 * discovery.
297 */
298 tcp_simple_retransmit(sk);
299 } /* else let the usual retransmit timer handle it */
300 }
301
do_redirect(struct sk_buff * skb,struct sock * sk)302 static void do_redirect(struct sk_buff *skb, struct sock *sk)
303 {
304 struct dst_entry *dst = __sk_dst_check(sk, 0);
305
306 if (dst)
307 dst->ops->redirect(dst, sk, skb);
308 }
309
310 /*
311 * This routine is called by the ICMP module when it gets some
312 * sort of error condition. If err < 0 then the socket should
313 * be closed and the error returned to the user. If err > 0
314 * it's just the icmp type << 8 | icmp code. After adjustment
315 * header points to the first 8 bytes of the tcp header. We need
316 * to find the appropriate port.
317 *
318 * The locking strategy used here is very "optimistic". When
319 * someone else accesses the socket the ICMP is just dropped
320 * and for some paths there is no check at all.
321 * A more general error queue to queue errors for later handling
322 * is probably better.
323 *
324 */
325
tcp_v4_err(struct sk_buff * icmp_skb,u32 info)326 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
327 {
328 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
329 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
330 struct inet_connection_sock *icsk;
331 struct tcp_sock *tp;
332 struct inet_sock *inet;
333 const int type = icmp_hdr(icmp_skb)->type;
334 const int code = icmp_hdr(icmp_skb)->code;
335 struct sock *sk;
336 struct sk_buff *skb;
337 struct request_sock *req;
338 __u32 seq;
339 __u32 remaining;
340 int err;
341 struct net *net = dev_net(icmp_skb->dev);
342
343 if (icmp_skb->len < (iph->ihl << 2) + 8) {
344 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
345 return;
346 }
347
348 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
349 iph->saddr, th->source, inet_iif(icmp_skb));
350 if (!sk) {
351 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
352 return;
353 }
354 if (sk->sk_state == TCP_TIME_WAIT) {
355 inet_twsk_put(inet_twsk(sk));
356 return;
357 }
358
359 bh_lock_sock(sk);
360 /* If too many ICMPs get dropped on busy
361 * servers this needs to be solved differently.
362 * We do take care of PMTU discovery (RFC1191) special case :
363 * we can receive locally generated ICMP messages while socket is held.
364 */
365 if (sock_owned_by_user(sk)) {
366 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
367 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
368 }
369 if (sk->sk_state == TCP_CLOSE)
370 goto out;
371
372 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
373 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
374 goto out;
375 }
376
377 icsk = inet_csk(sk);
378 tp = tcp_sk(sk);
379 req = tp->fastopen_rsk;
380 seq = ntohl(th->seq);
381 if (sk->sk_state != TCP_LISTEN &&
382 !between(seq, tp->snd_una, tp->snd_nxt) &&
383 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
384 /* For a Fast Open socket, allow seq to be snt_isn. */
385 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
386 goto out;
387 }
388
389 switch (type) {
390 case ICMP_REDIRECT:
391 do_redirect(icmp_skb, sk);
392 goto out;
393 case ICMP_SOURCE_QUENCH:
394 /* Just silently ignore these. */
395 goto out;
396 case ICMP_PARAMETERPROB:
397 err = EPROTO;
398 break;
399 case ICMP_DEST_UNREACH:
400 if (code > NR_ICMP_UNREACH)
401 goto out;
402
403 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
404 /* We are not interested in TCP_LISTEN and open_requests
405 * (SYN-ACKs send out by Linux are always <576bytes so
406 * they should go through unfragmented).
407 */
408 if (sk->sk_state == TCP_LISTEN)
409 goto out;
410
411 tp->mtu_info = info;
412 if (!sock_owned_by_user(sk)) {
413 tcp_v4_mtu_reduced(sk);
414 } else {
415 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
416 sock_hold(sk);
417 }
418 goto out;
419 }
420
421 err = icmp_err_convert[code].errno;
422 /* check if icmp_skb allows revert of backoff
423 * (see draft-zimmermann-tcp-lcd) */
424 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
425 break;
426 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
427 !icsk->icsk_backoff)
428 break;
429
430 /* XXX (TFO) - revisit the following logic for TFO */
431
432 if (sock_owned_by_user(sk))
433 break;
434
435 icsk->icsk_backoff--;
436 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
437 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
438 tcp_bound_rto(sk);
439
440 skb = tcp_write_queue_head(sk);
441 BUG_ON(!skb);
442
443 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
444 tcp_time_stamp - TCP_SKB_CB(skb)->when);
445
446 if (remaining) {
447 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
448 remaining, TCP_RTO_MAX);
449 } else {
450 /* RTO revert clocked out retransmission.
451 * Will retransmit now */
452 tcp_retransmit_timer(sk);
453 }
454
455 break;
456 case ICMP_TIME_EXCEEDED:
457 err = EHOSTUNREACH;
458 break;
459 default:
460 goto out;
461 }
462
463 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
464 * than following the TCP_SYN_RECV case and closing the socket,
465 * we ignore the ICMP error and keep trying like a fully established
466 * socket. Is this the right thing to do?
467 */
468 if (req && req->sk == NULL)
469 goto out;
470
471 switch (sk->sk_state) {
472 struct request_sock *req, **prev;
473 case TCP_LISTEN:
474 if (sock_owned_by_user(sk))
475 goto out;
476
477 req = inet_csk_search_req(sk, &prev, th->dest,
478 iph->daddr, iph->saddr);
479 if (!req)
480 goto out;
481
482 /* ICMPs are not backlogged, hence we cannot get
483 an established socket here.
484 */
485 WARN_ON(req->sk);
486
487 if (seq != tcp_rsk(req)->snt_isn) {
488 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
489 goto out;
490 }
491
492 /*
493 * Still in SYN_RECV, just remove it silently.
494 * There is no good way to pass the error to the newly
495 * created socket, and POSIX does not want network
496 * errors returned from accept().
497 */
498 inet_csk_reqsk_queue_drop(sk, req, prev);
499 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
500 goto out;
501
502 case TCP_SYN_SENT:
503 case TCP_SYN_RECV: /* Cannot happen.
504 It can f.e. if SYNs crossed,
505 or Fast Open.
506 */
507 if (!sock_owned_by_user(sk)) {
508 sk->sk_err = err;
509
510 sk->sk_error_report(sk);
511
512 tcp_done(sk);
513 } else {
514 sk->sk_err_soft = err;
515 }
516 goto out;
517 }
518
519 /* If we've already connected we will keep trying
520 * until we time out, or the user gives up.
521 *
522 * rfc1122 4.2.3.9 allows to consider as hard errors
523 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
524 * but it is obsoleted by pmtu discovery).
525 *
526 * Note, that in modern internet, where routing is unreliable
527 * and in each dark corner broken firewalls sit, sending random
528 * errors ordered by their masters even this two messages finally lose
529 * their original sense (even Linux sends invalid PORT_UNREACHs)
530 *
531 * Now we are in compliance with RFCs.
532 * --ANK (980905)
533 */
534
535 inet = inet_sk(sk);
536 if (!sock_owned_by_user(sk) && inet->recverr) {
537 sk->sk_err = err;
538 sk->sk_error_report(sk);
539 } else { /* Only an error on timeout */
540 sk->sk_err_soft = err;
541 }
542
543 out:
544 bh_unlock_sock(sk);
545 sock_put(sk);
546 }
547
__tcp_v4_send_check(struct sk_buff * skb,__be32 saddr,__be32 daddr)548 static void __tcp_v4_send_check(struct sk_buff *skb,
549 __be32 saddr, __be32 daddr)
550 {
551 struct tcphdr *th = tcp_hdr(skb);
552
553 if (skb->ip_summed == CHECKSUM_PARTIAL) {
554 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
555 skb->csum_start = skb_transport_header(skb) - skb->head;
556 skb->csum_offset = offsetof(struct tcphdr, check);
557 } else {
558 th->check = tcp_v4_check(skb->len, saddr, daddr,
559 csum_partial(th,
560 th->doff << 2,
561 skb->csum));
562 }
563 }
564
565 /* This routine computes an IPv4 TCP checksum. */
tcp_v4_send_check(struct sock * sk,struct sk_buff * skb)566 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
567 {
568 const struct inet_sock *inet = inet_sk(sk);
569
570 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
571 }
572 EXPORT_SYMBOL(tcp_v4_send_check);
573
tcp_v4_gso_send_check(struct sk_buff * skb)574 int tcp_v4_gso_send_check(struct sk_buff *skb)
575 {
576 const struct iphdr *iph;
577 struct tcphdr *th;
578
579 if (!pskb_may_pull(skb, sizeof(*th)))
580 return -EINVAL;
581
582 iph = ip_hdr(skb);
583 th = tcp_hdr(skb);
584
585 th->check = 0;
586 skb->ip_summed = CHECKSUM_PARTIAL;
587 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
588 return 0;
589 }
590
591 /*
592 * This routine will send an RST to the other tcp.
593 *
594 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
595 * for reset.
596 * Answer: if a packet caused RST, it is not for a socket
597 * existing in our system, if it is matched to a socket,
598 * it is just duplicate segment or bug in other side's TCP.
599 * So that we build reply only basing on parameters
600 * arrived with segment.
601 * Exception: precedence violation. We do not implement it in any case.
602 */
603
tcp_v4_send_reset(struct sock * sk,struct sk_buff * skb)604 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
605 {
606 const struct tcphdr *th = tcp_hdr(skb);
607 struct {
608 struct tcphdr th;
609 #ifdef CONFIG_TCP_MD5SIG
610 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
611 #endif
612 } rep;
613 struct ip_reply_arg arg;
614 #ifdef CONFIG_TCP_MD5SIG
615 struct tcp_md5sig_key *key;
616 const __u8 *hash_location = NULL;
617 unsigned char newhash[16];
618 int genhash;
619 struct sock *sk1 = NULL;
620 #endif
621 struct net *net;
622
623 /* Never send a reset in response to a reset. */
624 if (th->rst)
625 return;
626
627 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
628 return;
629
630 /* Swap the send and the receive. */
631 memset(&rep, 0, sizeof(rep));
632 rep.th.dest = th->source;
633 rep.th.source = th->dest;
634 rep.th.doff = sizeof(struct tcphdr) / 4;
635 rep.th.rst = 1;
636
637 if (th->ack) {
638 rep.th.seq = th->ack_seq;
639 } else {
640 rep.th.ack = 1;
641 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
642 skb->len - (th->doff << 2));
643 }
644
645 memset(&arg, 0, sizeof(arg));
646 arg.iov[0].iov_base = (unsigned char *)&rep;
647 arg.iov[0].iov_len = sizeof(rep.th);
648
649 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
650 #ifdef CONFIG_TCP_MD5SIG
651 hash_location = tcp_parse_md5sig_option(th);
652 if (!sk && hash_location) {
653 /*
654 * active side is lost. Try to find listening socket through
655 * source port, and then find md5 key through listening socket.
656 * we are not loose security here:
657 * Incoming packet is checked with md5 hash with finding key,
658 * no RST generated if md5 hash doesn't match.
659 */
660 sk1 = __inet_lookup_listener(net,
661 &tcp_hashinfo, ip_hdr(skb)->saddr,
662 th->source, ip_hdr(skb)->daddr,
663 ntohs(th->source), inet_iif(skb));
664 /* don't send rst if it can't find key */
665 if (!sk1)
666 return;
667 rcu_read_lock();
668 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
669 &ip_hdr(skb)->saddr, AF_INET);
670 if (!key)
671 goto release_sk1;
672
673 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
674 if (genhash || memcmp(hash_location, newhash, 16) != 0)
675 goto release_sk1;
676 } else {
677 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
678 &ip_hdr(skb)->saddr,
679 AF_INET) : NULL;
680 }
681
682 if (key) {
683 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
684 (TCPOPT_NOP << 16) |
685 (TCPOPT_MD5SIG << 8) |
686 TCPOLEN_MD5SIG);
687 /* Update length and the length the header thinks exists */
688 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
689 rep.th.doff = arg.iov[0].iov_len / 4;
690
691 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
692 key, ip_hdr(skb)->saddr,
693 ip_hdr(skb)->daddr, &rep.th);
694 }
695 #endif
696 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
697 ip_hdr(skb)->saddr, /* XXX */
698 arg.iov[0].iov_len, IPPROTO_TCP, 0);
699 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
700 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
701 /* When socket is gone, all binding information is lost.
702 * routing might fail in this case. No choice here, if we choose to force
703 * input interface, we will misroute in case of asymmetric route.
704 */
705 if (sk)
706 arg.bound_dev_if = sk->sk_bound_dev_if;
707
708 arg.tos = ip_hdr(skb)->tos;
709 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
710 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
711 skb, ip_hdr(skb)->saddr,
712 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
713
714 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
715 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
716
717 #ifdef CONFIG_TCP_MD5SIG
718 release_sk1:
719 if (sk1) {
720 rcu_read_unlock();
721 sock_put(sk1);
722 }
723 #endif
724 }
725
726 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
727 outside socket context is ugly, certainly. What can I do?
728 */
729
tcp_v4_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int reply_flags,u8 tos)730 static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb,
731 u32 seq, u32 ack,
732 u32 win, u32 tsval, u32 tsecr, int oif,
733 struct tcp_md5sig_key *key,
734 int reply_flags, u8 tos)
735 {
736 const struct tcphdr *th = tcp_hdr(skb);
737 struct {
738 struct tcphdr th;
739 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
740 #ifdef CONFIG_TCP_MD5SIG
741 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
742 #endif
743 ];
744 } rep;
745 struct ip_reply_arg arg;
746 struct net *net = sock_net(sk);
747
748 memset(&rep.th, 0, sizeof(struct tcphdr));
749 memset(&arg, 0, sizeof(arg));
750
751 arg.iov[0].iov_base = (unsigned char *)&rep;
752 arg.iov[0].iov_len = sizeof(rep.th);
753 if (tsecr) {
754 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
755 (TCPOPT_TIMESTAMP << 8) |
756 TCPOLEN_TIMESTAMP);
757 rep.opt[1] = htonl(tsval);
758 rep.opt[2] = htonl(tsecr);
759 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
760 }
761
762 /* Swap the send and the receive. */
763 rep.th.dest = th->source;
764 rep.th.source = th->dest;
765 rep.th.doff = arg.iov[0].iov_len / 4;
766 rep.th.seq = htonl(seq);
767 rep.th.ack_seq = htonl(ack);
768 rep.th.ack = 1;
769 rep.th.window = htons(win);
770
771 #ifdef CONFIG_TCP_MD5SIG
772 if (key) {
773 int offset = (tsecr) ? 3 : 0;
774
775 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
776 (TCPOPT_NOP << 16) |
777 (TCPOPT_MD5SIG << 8) |
778 TCPOLEN_MD5SIG);
779 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
780 rep.th.doff = arg.iov[0].iov_len/4;
781
782 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
783 key, ip_hdr(skb)->saddr,
784 ip_hdr(skb)->daddr, &rep.th);
785 }
786 #endif
787 arg.flags = reply_flags;
788 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
789 ip_hdr(skb)->saddr, /* XXX */
790 arg.iov[0].iov_len, IPPROTO_TCP, 0);
791 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
792 if (oif)
793 arg.bound_dev_if = oif;
794 arg.tos = tos;
795 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
796 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
797 skb, ip_hdr(skb)->saddr,
798 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
799
800 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
801 }
802
tcp_v4_timewait_ack(struct sock * sk,struct sk_buff * skb)803 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
804 {
805 struct inet_timewait_sock *tw = inet_twsk(sk);
806 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
807
808 tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
809 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
810 tcp_time_stamp + tcptw->tw_ts_offset,
811 tcptw->tw_ts_recent,
812 tw->tw_bound_dev_if,
813 tcp_twsk_md5_key(tcptw),
814 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
815 tw->tw_tos
816 );
817
818 inet_twsk_put(tw);
819 }
820
tcp_v4_reqsk_send_ack(struct sock * sk,struct sk_buff * skb,struct request_sock * req)821 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
822 struct request_sock *req)
823 {
824 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
825 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
826 */
827 tcp_v4_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
828 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
829 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
830 tcp_time_stamp,
831 req->ts_recent,
832 0,
833 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
834 AF_INET),
835 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
836 ip_hdr(skb)->tos);
837 }
838
839 /*
840 * Send a SYN-ACK after having received a SYN.
841 * This still operates on a request_sock only, not on a big
842 * socket.
843 */
tcp_v4_send_synack(struct sock * sk,struct dst_entry * dst,struct request_sock * req,u16 queue_mapping,bool nocache)844 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
845 struct request_sock *req,
846 u16 queue_mapping,
847 bool nocache)
848 {
849 const struct inet_request_sock *ireq = inet_rsk(req);
850 struct flowi4 fl4;
851 int err = -1;
852 struct sk_buff * skb;
853
854 /* First, grab a route. */
855 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
856 return -1;
857
858 skb = tcp_make_synack(sk, dst, req, NULL);
859
860 if (skb) {
861 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
862
863 skb_set_queue_mapping(skb, queue_mapping);
864 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
865 ireq->rmt_addr,
866 ireq->opt);
867 err = net_xmit_eval(err);
868 if (!tcp_rsk(req)->snt_synack && !err)
869 tcp_rsk(req)->snt_synack = tcp_time_stamp;
870 }
871
872 return err;
873 }
874
tcp_v4_rtx_synack(struct sock * sk,struct request_sock * req)875 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
876 {
877 int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
878
879 if (!res)
880 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
881 return res;
882 }
883
884 /*
885 * IPv4 request_sock destructor.
886 */
tcp_v4_reqsk_destructor(struct request_sock * req)887 static void tcp_v4_reqsk_destructor(struct request_sock *req)
888 {
889 kfree(inet_rsk(req)->opt);
890 }
891
892 /*
893 * Return true if a syncookie should be sent
894 */
tcp_syn_flood_action(struct sock * sk,const struct sk_buff * skb,const char * proto)895 bool tcp_syn_flood_action(struct sock *sk,
896 const struct sk_buff *skb,
897 const char *proto)
898 {
899 const char *msg = "Dropping request";
900 bool want_cookie = false;
901 struct listen_sock *lopt;
902
903
904
905 #ifdef CONFIG_SYN_COOKIES
906 if (sysctl_tcp_syncookies) {
907 msg = "Sending cookies";
908 want_cookie = true;
909 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
910 } else
911 #endif
912 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
913
914 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
915 if (!lopt->synflood_warned) {
916 lopt->synflood_warned = 1;
917 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
918 proto, ntohs(tcp_hdr(skb)->dest), msg);
919 }
920 return want_cookie;
921 }
922 EXPORT_SYMBOL(tcp_syn_flood_action);
923
924 /*
925 * Save and compile IPv4 options into the request_sock if needed.
926 */
tcp_v4_save_options(struct sk_buff * skb)927 static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
928 {
929 const struct ip_options *opt = &(IPCB(skb)->opt);
930 struct ip_options_rcu *dopt = NULL;
931
932 if (opt && opt->optlen) {
933 int opt_size = sizeof(*dopt) + opt->optlen;
934
935 dopt = kmalloc(opt_size, GFP_ATOMIC);
936 if (dopt) {
937 if (ip_options_echo(&dopt->opt, skb)) {
938 kfree(dopt);
939 dopt = NULL;
940 }
941 }
942 }
943 return dopt;
944 }
945
946 #ifdef CONFIG_TCP_MD5SIG
947 /*
948 * RFC2385 MD5 checksumming requires a mapping of
949 * IP address->MD5 Key.
950 * We need to maintain these in the sk structure.
951 */
952
953 /* Find the Key structure for an address. */
tcp_md5_do_lookup(struct sock * sk,const union tcp_md5_addr * addr,int family)954 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
955 const union tcp_md5_addr *addr,
956 int family)
957 {
958 struct tcp_sock *tp = tcp_sk(sk);
959 struct tcp_md5sig_key *key;
960 unsigned int size = sizeof(struct in_addr);
961 struct tcp_md5sig_info *md5sig;
962
963 /* caller either holds rcu_read_lock() or socket lock */
964 md5sig = rcu_dereference_check(tp->md5sig_info,
965 sock_owned_by_user(sk) ||
966 lockdep_is_held(&sk->sk_lock.slock));
967 if (!md5sig)
968 return NULL;
969 #if IS_ENABLED(CONFIG_IPV6)
970 if (family == AF_INET6)
971 size = sizeof(struct in6_addr);
972 #endif
973 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
974 if (key->family != family)
975 continue;
976 if (!memcmp(&key->addr, addr, size))
977 return key;
978 }
979 return NULL;
980 }
981 EXPORT_SYMBOL(tcp_md5_do_lookup);
982
tcp_v4_md5_lookup(struct sock * sk,struct sock * addr_sk)983 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
984 struct sock *addr_sk)
985 {
986 union tcp_md5_addr *addr;
987
988 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
989 return tcp_md5_do_lookup(sk, addr, AF_INET);
990 }
991 EXPORT_SYMBOL(tcp_v4_md5_lookup);
992
tcp_v4_reqsk_md5_lookup(struct sock * sk,struct request_sock * req)993 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
994 struct request_sock *req)
995 {
996 union tcp_md5_addr *addr;
997
998 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
999 return tcp_md5_do_lookup(sk, addr, AF_INET);
1000 }
1001
1002 /* This can be called on a newly created socket, from other files */
tcp_md5_do_add(struct sock * sk,const union tcp_md5_addr * addr,int family,const u8 * newkey,u8 newkeylen,gfp_t gfp)1003 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1004 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
1005 {
1006 /* Add Key to the list */
1007 struct tcp_md5sig_key *key;
1008 struct tcp_sock *tp = tcp_sk(sk);
1009 struct tcp_md5sig_info *md5sig;
1010
1011 key = tcp_md5_do_lookup(sk, addr, family);
1012 if (key) {
1013 /* Pre-existing entry - just update that one. */
1014 memcpy(key->key, newkey, newkeylen);
1015 key->keylen = newkeylen;
1016 return 0;
1017 }
1018
1019 md5sig = rcu_dereference_protected(tp->md5sig_info,
1020 sock_owned_by_user(sk));
1021 if (!md5sig) {
1022 md5sig = kmalloc(sizeof(*md5sig), gfp);
1023 if (!md5sig)
1024 return -ENOMEM;
1025
1026 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1027 INIT_HLIST_HEAD(&md5sig->head);
1028 rcu_assign_pointer(tp->md5sig_info, md5sig);
1029 }
1030
1031 key = sock_kmalloc(sk, sizeof(*key), gfp);
1032 if (!key)
1033 return -ENOMEM;
1034 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1035 sock_kfree_s(sk, key, sizeof(*key));
1036 return -ENOMEM;
1037 }
1038
1039 memcpy(key->key, newkey, newkeylen);
1040 key->keylen = newkeylen;
1041 key->family = family;
1042 memcpy(&key->addr, addr,
1043 (family == AF_INET6) ? sizeof(struct in6_addr) :
1044 sizeof(struct in_addr));
1045 hlist_add_head_rcu(&key->node, &md5sig->head);
1046 return 0;
1047 }
1048 EXPORT_SYMBOL(tcp_md5_do_add);
1049
tcp_md5_do_del(struct sock * sk,const union tcp_md5_addr * addr,int family)1050 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1051 {
1052 struct tcp_sock *tp = tcp_sk(sk);
1053 struct tcp_md5sig_key *key;
1054 struct tcp_md5sig_info *md5sig;
1055
1056 key = tcp_md5_do_lookup(sk, addr, family);
1057 if (!key)
1058 return -ENOENT;
1059 hlist_del_rcu(&key->node);
1060 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1061 kfree_rcu(key, rcu);
1062 md5sig = rcu_dereference_protected(tp->md5sig_info,
1063 sock_owned_by_user(sk));
1064 if (hlist_empty(&md5sig->head))
1065 tcp_free_md5sig_pool();
1066 return 0;
1067 }
1068 EXPORT_SYMBOL(tcp_md5_do_del);
1069
tcp_clear_md5_list(struct sock * sk)1070 static void tcp_clear_md5_list(struct sock *sk)
1071 {
1072 struct tcp_sock *tp = tcp_sk(sk);
1073 struct tcp_md5sig_key *key;
1074 struct hlist_node *n;
1075 struct tcp_md5sig_info *md5sig;
1076
1077 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1078
1079 if (!hlist_empty(&md5sig->head))
1080 tcp_free_md5sig_pool();
1081 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1082 hlist_del_rcu(&key->node);
1083 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1084 kfree_rcu(key, rcu);
1085 }
1086 }
1087
tcp_v4_parse_md5_keys(struct sock * sk,char __user * optval,int optlen)1088 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1089 int optlen)
1090 {
1091 struct tcp_md5sig cmd;
1092 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1093
1094 if (optlen < sizeof(cmd))
1095 return -EINVAL;
1096
1097 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1098 return -EFAULT;
1099
1100 if (sin->sin_family != AF_INET)
1101 return -EINVAL;
1102
1103 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1104 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1105 AF_INET);
1106
1107 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1108 return -EINVAL;
1109
1110 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1111 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1112 GFP_KERNEL);
1113 }
1114
tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool * hp,__be32 daddr,__be32 saddr,int nbytes)1115 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1116 __be32 daddr, __be32 saddr, int nbytes)
1117 {
1118 struct tcp4_pseudohdr *bp;
1119 struct scatterlist sg;
1120
1121 bp = &hp->md5_blk.ip4;
1122
1123 /*
1124 * 1. the TCP pseudo-header (in the order: source IP address,
1125 * destination IP address, zero-padded protocol number, and
1126 * segment length)
1127 */
1128 bp->saddr = saddr;
1129 bp->daddr = daddr;
1130 bp->pad = 0;
1131 bp->protocol = IPPROTO_TCP;
1132 bp->len = cpu_to_be16(nbytes);
1133
1134 sg_init_one(&sg, bp, sizeof(*bp));
1135 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1136 }
1137
tcp_v4_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,__be32 daddr,__be32 saddr,const struct tcphdr * th)1138 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1139 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1140 {
1141 struct tcp_md5sig_pool *hp;
1142 struct hash_desc *desc;
1143
1144 hp = tcp_get_md5sig_pool();
1145 if (!hp)
1146 goto clear_hash_noput;
1147 desc = &hp->md5_desc;
1148
1149 if (crypto_hash_init(desc))
1150 goto clear_hash;
1151 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1152 goto clear_hash;
1153 if (tcp_md5_hash_header(hp, th))
1154 goto clear_hash;
1155 if (tcp_md5_hash_key(hp, key))
1156 goto clear_hash;
1157 if (crypto_hash_final(desc, md5_hash))
1158 goto clear_hash;
1159
1160 tcp_put_md5sig_pool();
1161 return 0;
1162
1163 clear_hash:
1164 tcp_put_md5sig_pool();
1165 clear_hash_noput:
1166 memset(md5_hash, 0, 16);
1167 return 1;
1168 }
1169
tcp_v4_md5_hash_skb(char * md5_hash,struct tcp_md5sig_key * key,const struct sock * sk,const struct request_sock * req,const struct sk_buff * skb)1170 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1171 const struct sock *sk, const struct request_sock *req,
1172 const struct sk_buff *skb)
1173 {
1174 struct tcp_md5sig_pool *hp;
1175 struct hash_desc *desc;
1176 const struct tcphdr *th = tcp_hdr(skb);
1177 __be32 saddr, daddr;
1178
1179 if (sk) {
1180 saddr = inet_sk(sk)->inet_saddr;
1181 daddr = inet_sk(sk)->inet_daddr;
1182 } else if (req) {
1183 saddr = inet_rsk(req)->loc_addr;
1184 daddr = inet_rsk(req)->rmt_addr;
1185 } else {
1186 const struct iphdr *iph = ip_hdr(skb);
1187 saddr = iph->saddr;
1188 daddr = iph->daddr;
1189 }
1190
1191 hp = tcp_get_md5sig_pool();
1192 if (!hp)
1193 goto clear_hash_noput;
1194 desc = &hp->md5_desc;
1195
1196 if (crypto_hash_init(desc))
1197 goto clear_hash;
1198
1199 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1200 goto clear_hash;
1201 if (tcp_md5_hash_header(hp, th))
1202 goto clear_hash;
1203 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1204 goto clear_hash;
1205 if (tcp_md5_hash_key(hp, key))
1206 goto clear_hash;
1207 if (crypto_hash_final(desc, md5_hash))
1208 goto clear_hash;
1209
1210 tcp_put_md5sig_pool();
1211 return 0;
1212
1213 clear_hash:
1214 tcp_put_md5sig_pool();
1215 clear_hash_noput:
1216 memset(md5_hash, 0, 16);
1217 return 1;
1218 }
1219 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1220
tcp_v4_inbound_md5_hash(struct sock * sk,const struct sk_buff * skb)1221 static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1222 {
1223 /*
1224 * This gets called for each TCP segment that arrives
1225 * so we want to be efficient.
1226 * We have 3 drop cases:
1227 * o No MD5 hash and one expected.
1228 * o MD5 hash and we're not expecting one.
1229 * o MD5 hash and its wrong.
1230 */
1231 const __u8 *hash_location = NULL;
1232 struct tcp_md5sig_key *hash_expected;
1233 const struct iphdr *iph = ip_hdr(skb);
1234 const struct tcphdr *th = tcp_hdr(skb);
1235 int genhash;
1236 unsigned char newhash[16];
1237
1238 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1239 AF_INET);
1240 hash_location = tcp_parse_md5sig_option(th);
1241
1242 /* We've parsed the options - do we have a hash? */
1243 if (!hash_expected && !hash_location)
1244 return false;
1245
1246 if (hash_expected && !hash_location) {
1247 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1248 return true;
1249 }
1250
1251 if (!hash_expected && hash_location) {
1252 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1253 return true;
1254 }
1255
1256 /* Okay, so this is hash_expected and hash_location -
1257 * so we need to calculate the checksum.
1258 */
1259 genhash = tcp_v4_md5_hash_skb(newhash,
1260 hash_expected,
1261 NULL, NULL, skb);
1262
1263 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1264 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1265 &iph->saddr, ntohs(th->source),
1266 &iph->daddr, ntohs(th->dest),
1267 genhash ? " tcp_v4_calc_md5_hash failed"
1268 : "");
1269 return true;
1270 }
1271 return false;
1272 }
1273
1274 #endif
1275
1276 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1277 .family = PF_INET,
1278 .obj_size = sizeof(struct tcp_request_sock),
1279 .rtx_syn_ack = tcp_v4_rtx_synack,
1280 .send_ack = tcp_v4_reqsk_send_ack,
1281 .destructor = tcp_v4_reqsk_destructor,
1282 .send_reset = tcp_v4_send_reset,
1283 .syn_ack_timeout = tcp_syn_ack_timeout,
1284 };
1285
1286 #ifdef CONFIG_TCP_MD5SIG
1287 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1288 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1289 .calc_md5_hash = tcp_v4_md5_hash_skb,
1290 };
1291 #endif
1292
tcp_fastopen_check(struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct tcp_fastopen_cookie * foc,struct tcp_fastopen_cookie * valid_foc)1293 static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1294 struct request_sock *req,
1295 struct tcp_fastopen_cookie *foc,
1296 struct tcp_fastopen_cookie *valid_foc)
1297 {
1298 bool skip_cookie = false;
1299 struct fastopen_queue *fastopenq;
1300
1301 if (likely(!fastopen_cookie_present(foc))) {
1302 /* See include/net/tcp.h for the meaning of these knobs */
1303 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1304 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1305 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1306 skip_cookie = true; /* no cookie to validate */
1307 else
1308 return false;
1309 }
1310 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1311 /* A FO option is present; bump the counter. */
1312 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1313
1314 /* Make sure the listener has enabled fastopen, and we don't
1315 * exceed the max # of pending TFO requests allowed before trying
1316 * to validating the cookie in order to avoid burning CPU cycles
1317 * unnecessarily.
1318 *
1319 * XXX (TFO) - The implication of checking the max_qlen before
1320 * processing a cookie request is that clients can't differentiate
1321 * between qlen overflow causing Fast Open to be disabled
1322 * temporarily vs a server not supporting Fast Open at all.
1323 */
1324 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1325 fastopenq == NULL || fastopenq->max_qlen == 0)
1326 return false;
1327
1328 if (fastopenq->qlen >= fastopenq->max_qlen) {
1329 struct request_sock *req1;
1330 spin_lock(&fastopenq->lock);
1331 req1 = fastopenq->rskq_rst_head;
1332 if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1333 spin_unlock(&fastopenq->lock);
1334 NET_INC_STATS_BH(sock_net(sk),
1335 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1336 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1337 foc->len = -1;
1338 return false;
1339 }
1340 fastopenq->rskq_rst_head = req1->dl_next;
1341 fastopenq->qlen--;
1342 spin_unlock(&fastopenq->lock);
1343 reqsk_free(req1);
1344 }
1345 if (skip_cookie) {
1346 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1347 return true;
1348 }
1349 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1350 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1351 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1352 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1353 memcmp(&foc->val[0], &valid_foc->val[0],
1354 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1355 return false;
1356 valid_foc->len = -1;
1357 }
1358 /* Acknowledge the data received from the peer. */
1359 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1360 return true;
1361 } else if (foc->len == 0) { /* Client requesting a cookie */
1362 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1363 NET_INC_STATS_BH(sock_net(sk),
1364 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1365 } else {
1366 /* Client sent a cookie with wrong size. Treat it
1367 * the same as invalid and return a valid one.
1368 */
1369 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1370 }
1371 return false;
1372 }
1373
tcp_v4_conn_req_fastopen(struct sock * sk,struct sk_buff * skb,struct sk_buff * skb_synack,struct request_sock * req)1374 static int tcp_v4_conn_req_fastopen(struct sock *sk,
1375 struct sk_buff *skb,
1376 struct sk_buff *skb_synack,
1377 struct request_sock *req)
1378 {
1379 struct tcp_sock *tp = tcp_sk(sk);
1380 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1381 const struct inet_request_sock *ireq = inet_rsk(req);
1382 struct sock *child;
1383 int err;
1384
1385 req->num_retrans = 0;
1386 req->num_timeout = 0;
1387 req->sk = NULL;
1388
1389 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1390 if (child == NULL) {
1391 NET_INC_STATS_BH(sock_net(sk),
1392 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1393 kfree_skb(skb_synack);
1394 return -1;
1395 }
1396 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1397 ireq->rmt_addr, ireq->opt);
1398 err = net_xmit_eval(err);
1399 if (!err)
1400 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1401 /* XXX (TFO) - is it ok to ignore error and continue? */
1402
1403 spin_lock(&queue->fastopenq->lock);
1404 queue->fastopenq->qlen++;
1405 spin_unlock(&queue->fastopenq->lock);
1406
1407 /* Initialize the child socket. Have to fix some values to take
1408 * into account the child is a Fast Open socket and is created
1409 * only out of the bits carried in the SYN packet.
1410 */
1411 tp = tcp_sk(child);
1412
1413 tp->fastopen_rsk = req;
1414 /* Do a hold on the listner sk so that if the listener is being
1415 * closed, the child that has been accepted can live on and still
1416 * access listen_lock.
1417 */
1418 sock_hold(sk);
1419 tcp_rsk(req)->listener = sk;
1420
1421 /* RFC1323: The window in SYN & SYN/ACK segments is never
1422 * scaled. So correct it appropriately.
1423 */
1424 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1425
1426 /* Activate the retrans timer so that SYNACK can be retransmitted.
1427 * The request socket is not added to the SYN table of the parent
1428 * because it's been added to the accept queue directly.
1429 */
1430 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
1431 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
1432
1433 /* Add the child socket directly into the accept queue */
1434 inet_csk_reqsk_queue_add(sk, req, child);
1435
1436 /* Now finish processing the fastopen child socket. */
1437 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1438 tcp_init_congestion_control(child);
1439 tcp_mtup_init(child);
1440 tcp_init_buffer_space(child);
1441 tcp_init_metrics(child);
1442
1443 /* Queue the data carried in the SYN packet. We need to first
1444 * bump skb's refcnt because the caller will attempt to free it.
1445 *
1446 * XXX (TFO) - we honor a zero-payload TFO request for now.
1447 * (Any reason not to?)
1448 */
1449 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1450 /* Don't queue the skb if there is no payload in SYN.
1451 * XXX (TFO) - How about SYN+FIN?
1452 */
1453 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1454 } else {
1455 skb = skb_get(skb);
1456 skb_dst_drop(skb);
1457 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1458 skb_set_owner_r(skb, child);
1459 __skb_queue_tail(&child->sk_receive_queue, skb);
1460 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1461 tp->syn_data_acked = 1;
1462 }
1463 sk->sk_data_ready(sk, 0);
1464 bh_unlock_sock(child);
1465 sock_put(child);
1466 WARN_ON(req->sk == NULL);
1467 return 0;
1468 }
1469
tcp_v4_conn_request(struct sock * sk,struct sk_buff * skb)1470 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1471 {
1472 struct tcp_options_received tmp_opt;
1473 struct request_sock *req;
1474 struct inet_request_sock *ireq;
1475 struct tcp_sock *tp = tcp_sk(sk);
1476 struct dst_entry *dst = NULL;
1477 __be32 saddr = ip_hdr(skb)->saddr;
1478 __be32 daddr = ip_hdr(skb)->daddr;
1479 __u32 isn = TCP_SKB_CB(skb)->when;
1480 bool want_cookie = false;
1481 struct flowi4 fl4;
1482 struct tcp_fastopen_cookie foc = { .len = -1 };
1483 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1484 struct sk_buff *skb_synack;
1485 int do_fastopen;
1486
1487 /* Never answer to SYNs send to broadcast or multicast */
1488 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1489 goto drop;
1490
1491 /* TW buckets are converted to open requests without
1492 * limitations, they conserve resources and peer is
1493 * evidently real one.
1494 */
1495 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1496 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1497 if (!want_cookie)
1498 goto drop;
1499 }
1500
1501 /* Accept backlog is full. If we have already queued enough
1502 * of warm entries in syn queue, drop request. It is better than
1503 * clogging syn queue with openreqs with exponentially increasing
1504 * timeout.
1505 */
1506 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1507 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1508 goto drop;
1509 }
1510
1511 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1512 if (!req)
1513 goto drop;
1514
1515 #ifdef CONFIG_TCP_MD5SIG
1516 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1517 #endif
1518
1519 tcp_clear_options(&tmp_opt);
1520 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1521 tmp_opt.user_mss = tp->rx_opt.user_mss;
1522 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1523
1524 if (want_cookie && !tmp_opt.saw_tstamp)
1525 tcp_clear_options(&tmp_opt);
1526
1527 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1528 tcp_openreq_init(req, &tmp_opt, skb);
1529
1530 ireq = inet_rsk(req);
1531 ireq->loc_addr = daddr;
1532 ireq->rmt_addr = saddr;
1533 ireq->no_srccheck = inet_sk(sk)->transparent;
1534 ireq->opt = tcp_v4_save_options(skb);
1535 ireq->ir_mark = inet_request_mark(sk, skb);
1536
1537 if (security_inet_conn_request(sk, skb, req))
1538 goto drop_and_free;
1539
1540 if (!want_cookie || tmp_opt.tstamp_ok)
1541 TCP_ECN_create_request(req, skb, sock_net(sk));
1542
1543 if (want_cookie) {
1544 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1545 req->cookie_ts = tmp_opt.tstamp_ok;
1546 } else if (!isn) {
1547 /* VJ's idea. We save last timestamp seen
1548 * from the destination in peer table, when entering
1549 * state TIME-WAIT, and check against it before
1550 * accepting new connection request.
1551 *
1552 * If "isn" is not zero, this request hit alive
1553 * timewait bucket, so that all the necessary checks
1554 * are made in the function processing timewait state.
1555 */
1556 if (tmp_opt.saw_tstamp &&
1557 tcp_death_row.sysctl_tw_recycle &&
1558 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1559 fl4.daddr == saddr) {
1560 if (!tcp_peer_is_proven(req, dst, true)) {
1561 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1562 goto drop_and_release;
1563 }
1564 }
1565 /* Kill the following clause, if you dislike this way. */
1566 else if (!sysctl_tcp_syncookies &&
1567 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1568 (sysctl_max_syn_backlog >> 2)) &&
1569 !tcp_peer_is_proven(req, dst, false)) {
1570 /* Without syncookies last quarter of
1571 * backlog is filled with destinations,
1572 * proven to be alive.
1573 * It means that we continue to communicate
1574 * to destinations, already remembered
1575 * to the moment of synflood.
1576 */
1577 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1578 &saddr, ntohs(tcp_hdr(skb)->source));
1579 goto drop_and_release;
1580 }
1581
1582 isn = tcp_v4_init_sequence(skb);
1583 }
1584 tcp_rsk(req)->snt_isn = isn;
1585
1586 if (dst == NULL) {
1587 dst = inet_csk_route_req(sk, &fl4, req);
1588 if (dst == NULL)
1589 goto drop_and_free;
1590 }
1591 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1592
1593 /* We don't call tcp_v4_send_synack() directly because we need
1594 * to make sure a child socket can be created successfully before
1595 * sending back synack!
1596 *
1597 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1598 * (or better yet, call tcp_send_synack() in the child context
1599 * directly, but will have to fix bunch of other code first)
1600 * after syn_recv_sock() except one will need to first fix the
1601 * latter to remove its dependency on the current implementation
1602 * of tcp_v4_send_synack()->tcp_select_initial_window().
1603 */
1604 skb_synack = tcp_make_synack(sk, dst, req,
1605 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1606
1607 if (skb_synack) {
1608 __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
1609 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1610 } else
1611 goto drop_and_free;
1612
1613 if (likely(!do_fastopen)) {
1614 int err;
1615 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1616 ireq->rmt_addr, ireq->opt);
1617 err = net_xmit_eval(err);
1618 if (err || want_cookie)
1619 goto drop_and_free;
1620
1621 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1622 tcp_rsk(req)->listener = NULL;
1623 /* Add the request_sock to the SYN table */
1624 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1625 if (fastopen_cookie_present(&foc) && foc.len != 0)
1626 NET_INC_STATS_BH(sock_net(sk),
1627 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1628 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1629 goto drop_and_free;
1630
1631 return 0;
1632
1633 drop_and_release:
1634 dst_release(dst);
1635 drop_and_free:
1636 reqsk_free(req);
1637 drop:
1638 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1639 return 0;
1640 }
1641 EXPORT_SYMBOL(tcp_v4_conn_request);
1642
1643
1644 /*
1645 * The three way handshake has completed - we got a valid synack -
1646 * now create the new socket.
1647 */
tcp_v4_syn_recv_sock(struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst)1648 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1649 struct request_sock *req,
1650 struct dst_entry *dst)
1651 {
1652 struct inet_request_sock *ireq;
1653 struct inet_sock *newinet;
1654 struct tcp_sock *newtp;
1655 struct sock *newsk;
1656 #ifdef CONFIG_TCP_MD5SIG
1657 struct tcp_md5sig_key *key;
1658 #endif
1659 struct ip_options_rcu *inet_opt;
1660
1661 if (sk_acceptq_is_full(sk))
1662 goto exit_overflow;
1663
1664 newsk = tcp_create_openreq_child(sk, req, skb);
1665 if (!newsk)
1666 goto exit_nonewsk;
1667
1668 newsk->sk_gso_type = SKB_GSO_TCPV4;
1669 inet_sk_rx_dst_set(newsk, skb);
1670
1671 newtp = tcp_sk(newsk);
1672 newinet = inet_sk(newsk);
1673 ireq = inet_rsk(req);
1674 newinet->inet_daddr = ireq->rmt_addr;
1675 newinet->inet_rcv_saddr = ireq->loc_addr;
1676 newinet->inet_saddr = ireq->loc_addr;
1677 inet_opt = ireq->opt;
1678 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1679 ireq->opt = NULL;
1680 newinet->mc_index = inet_iif(skb);
1681 newinet->mc_ttl = ip_hdr(skb)->ttl;
1682 newinet->rcv_tos = ip_hdr(skb)->tos;
1683 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1684 if (inet_opt)
1685 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1686 newinet->inet_id = newtp->write_seq ^ jiffies;
1687
1688 if (!dst) {
1689 dst = inet_csk_route_child_sock(sk, newsk, req);
1690 if (!dst)
1691 goto put_and_exit;
1692 } else {
1693 /* syncookie case : see end of cookie_v4_check() */
1694 }
1695 sk_setup_caps(newsk, dst);
1696
1697 tcp_mtup_init(newsk);
1698 tcp_sync_mss(newsk, dst_mtu(dst));
1699 newtp->advmss = dst_metric_advmss(dst);
1700 if (tcp_sk(sk)->rx_opt.user_mss &&
1701 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1702 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1703
1704 tcp_initialize_rcv_mss(newsk);
1705 tcp_synack_rtt_meas(newsk, req);
1706 newtp->total_retrans = req->num_retrans;
1707
1708 #ifdef CONFIG_TCP_MD5SIG
1709 /* Copy over the MD5 key from the original socket */
1710 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1711 AF_INET);
1712 if (key != NULL) {
1713 /*
1714 * We're using one, so create a matching key
1715 * on the newsk structure. If we fail to get
1716 * memory, then we end up not copying the key
1717 * across. Shucks.
1718 */
1719 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1720 AF_INET, key->key, key->keylen, GFP_ATOMIC);
1721 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1722 }
1723 #endif
1724
1725 if (__inet_inherit_port(sk, newsk) < 0)
1726 goto put_and_exit;
1727 __inet_hash_nolisten(newsk, NULL);
1728
1729 return newsk;
1730
1731 exit_overflow:
1732 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1733 exit_nonewsk:
1734 dst_release(dst);
1735 exit:
1736 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1737 return NULL;
1738 put_and_exit:
1739 inet_csk_prepare_forced_close(newsk);
1740 tcp_done(newsk);
1741 goto exit;
1742 }
1743 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1744
tcp_v4_hnd_req(struct sock * sk,struct sk_buff * skb)1745 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1746 {
1747 struct tcphdr *th = tcp_hdr(skb);
1748 const struct iphdr *iph = ip_hdr(skb);
1749 struct sock *nsk;
1750 struct request_sock **prev;
1751 /* Find possible connection requests. */
1752 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1753 iph->saddr, iph->daddr);
1754 if (req)
1755 return tcp_check_req(sk, skb, req, prev, false);
1756
1757 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1758 th->source, iph->daddr, th->dest, inet_iif(skb));
1759
1760 if (nsk) {
1761 if (nsk->sk_state != TCP_TIME_WAIT) {
1762 bh_lock_sock(nsk);
1763 return nsk;
1764 }
1765 inet_twsk_put(inet_twsk(nsk));
1766 return NULL;
1767 }
1768
1769 #ifdef CONFIG_SYN_COOKIES
1770 if (!th->syn)
1771 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1772 #endif
1773 return sk;
1774 }
1775
tcp_v4_checksum_init(struct sk_buff * skb)1776 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1777 {
1778 const struct iphdr *iph = ip_hdr(skb);
1779
1780 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1781 if (!tcp_v4_check(skb->len, iph->saddr,
1782 iph->daddr, skb->csum)) {
1783 skb->ip_summed = CHECKSUM_UNNECESSARY;
1784 return 0;
1785 }
1786 }
1787
1788 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1789 skb->len, IPPROTO_TCP, 0);
1790
1791 if (skb->len <= 76) {
1792 return __skb_checksum_complete(skb);
1793 }
1794 return 0;
1795 }
1796
1797
1798 /* The socket must have it's spinlock held when we get
1799 * here.
1800 *
1801 * We have a potential double-lock case here, so even when
1802 * doing backlog processing we use the BH locking scheme.
1803 * This is because we cannot sleep with the original spinlock
1804 * held.
1805 */
tcp_v4_do_rcv(struct sock * sk,struct sk_buff * skb)1806 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1807 {
1808 struct sock *rsk;
1809 #ifdef CONFIG_TCP_MD5SIG
1810 /*
1811 * We really want to reject the packet as early as possible
1812 * if:
1813 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1814 * o There is an MD5 option and we're not expecting one
1815 */
1816 if (tcp_v4_inbound_md5_hash(sk, skb))
1817 goto discard;
1818 #endif
1819
1820 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1821 struct dst_entry *dst = sk->sk_rx_dst;
1822
1823 sock_rps_save_rxhash(sk, skb);
1824 if (dst) {
1825 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1826 dst->ops->check(dst, 0) == NULL) {
1827 dst_release(dst);
1828 sk->sk_rx_dst = NULL;
1829 }
1830 }
1831 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1832 rsk = sk;
1833 goto reset;
1834 }
1835 return 0;
1836 }
1837
1838 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1839 goto csum_err;
1840
1841 if (sk->sk_state == TCP_LISTEN) {
1842 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1843 if (!nsk)
1844 goto discard;
1845
1846 if (nsk != sk) {
1847 sock_rps_save_rxhash(nsk, skb);
1848 if (tcp_child_process(sk, nsk, skb)) {
1849 rsk = nsk;
1850 goto reset;
1851 }
1852 return 0;
1853 }
1854 } else
1855 sock_rps_save_rxhash(sk, skb);
1856
1857 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1858 rsk = sk;
1859 goto reset;
1860 }
1861 return 0;
1862
1863 reset:
1864 tcp_v4_send_reset(rsk, skb);
1865 discard:
1866 kfree_skb(skb);
1867 /* Be careful here. If this function gets more complicated and
1868 * gcc suffers from register pressure on the x86, sk (in %ebx)
1869 * might be destroyed here. This current version compiles correctly,
1870 * but you have been warned.
1871 */
1872 return 0;
1873
1874 csum_err:
1875 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1876 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1877 goto discard;
1878 }
1879 EXPORT_SYMBOL(tcp_v4_do_rcv);
1880
tcp_v4_early_demux(struct sk_buff * skb)1881 void tcp_v4_early_demux(struct sk_buff *skb)
1882 {
1883 const struct iphdr *iph;
1884 const struct tcphdr *th;
1885 struct sock *sk;
1886
1887 if (skb->pkt_type != PACKET_HOST)
1888 return;
1889
1890 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1891 return;
1892
1893 iph = ip_hdr(skb);
1894 th = tcp_hdr(skb);
1895
1896 if (th->doff < sizeof(struct tcphdr) / 4)
1897 return;
1898
1899 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1900 iph->saddr, th->source,
1901 iph->daddr, ntohs(th->dest),
1902 skb->skb_iif);
1903 if (sk) {
1904 skb->sk = sk;
1905 skb->destructor = sock_edemux;
1906 if (sk->sk_state != TCP_TIME_WAIT) {
1907 struct dst_entry *dst = sk->sk_rx_dst;
1908
1909 if (dst)
1910 dst = dst_check(dst, 0);
1911 if (dst &&
1912 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1913 skb_dst_set_noref(skb, dst);
1914 }
1915 }
1916 }
1917
1918 /* Packet is added to VJ-style prequeue for processing in process
1919 * context, if a reader task is waiting. Apparently, this exciting
1920 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1921 * failed somewhere. Latency? Burstiness? Well, at least now we will
1922 * see, why it failed. 8)8) --ANK
1923 *
1924 */
tcp_prequeue(struct sock * sk,struct sk_buff * skb)1925 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1926 {
1927 struct tcp_sock *tp = tcp_sk(sk);
1928
1929 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1930 return false;
1931
1932 if (skb->len <= tcp_hdrlen(skb) &&
1933 skb_queue_len(&tp->ucopy.prequeue) == 0)
1934 return false;
1935
1936 skb_dst_force(skb);
1937 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1938 tp->ucopy.memory += skb->truesize;
1939 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1940 struct sk_buff *skb1;
1941
1942 BUG_ON(sock_owned_by_user(sk));
1943
1944 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1945 sk_backlog_rcv(sk, skb1);
1946 NET_INC_STATS_BH(sock_net(sk),
1947 LINUX_MIB_TCPPREQUEUEDROPPED);
1948 }
1949
1950 tp->ucopy.memory = 0;
1951 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1952 wake_up_interruptible_sync_poll(sk_sleep(sk),
1953 POLLIN | POLLRDNORM | POLLRDBAND);
1954 if (!inet_csk_ack_scheduled(sk))
1955 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1956 (3 * tcp_rto_min(sk)) / 4,
1957 TCP_RTO_MAX);
1958 }
1959 return true;
1960 }
1961 EXPORT_SYMBOL(tcp_prequeue);
1962
1963 /*
1964 * From tcp_input.c
1965 */
1966
tcp_v4_rcv(struct sk_buff * skb)1967 int tcp_v4_rcv(struct sk_buff *skb)
1968 {
1969 const struct iphdr *iph;
1970 const struct tcphdr *th;
1971 struct sock *sk;
1972 int ret;
1973 struct net *net = dev_net(skb->dev);
1974
1975 if (skb->pkt_type != PACKET_HOST)
1976 goto discard_it;
1977
1978 /* Count it even if it's bad */
1979 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1980
1981 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1982 goto discard_it;
1983
1984 th = tcp_hdr(skb);
1985
1986 if (th->doff < sizeof(struct tcphdr) / 4)
1987 goto bad_packet;
1988 if (!pskb_may_pull(skb, th->doff * 4))
1989 goto discard_it;
1990
1991 /* An explanation is required here, I think.
1992 * Packet length and doff are validated by header prediction,
1993 * provided case of th->doff==0 is eliminated.
1994 * So, we defer the checks. */
1995 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1996 goto csum_error;
1997
1998 th = tcp_hdr(skb);
1999 iph = ip_hdr(skb);
2000 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
2001 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
2002 skb->len - th->doff * 4);
2003 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
2004 TCP_SKB_CB(skb)->when = 0;
2005 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
2006 TCP_SKB_CB(skb)->sacked = 0;
2007
2008 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
2009 if (!sk)
2010 goto no_tcp_socket;
2011
2012 process:
2013 if (sk->sk_state == TCP_TIME_WAIT)
2014 goto do_time_wait;
2015
2016 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
2017 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
2018 goto discard_and_relse;
2019 }
2020
2021 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
2022 goto discard_and_relse;
2023 nf_reset(skb);
2024
2025 if (sk_filter(sk, skb))
2026 goto discard_and_relse;
2027
2028 skb->dev = NULL;
2029
2030 bh_lock_sock_nested(sk);
2031 ret = 0;
2032 if (!sock_owned_by_user(sk)) {
2033 #ifdef CONFIG_NET_DMA
2034 struct tcp_sock *tp = tcp_sk(sk);
2035 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
2036 tp->ucopy.dma_chan = net_dma_find_channel();
2037 if (tp->ucopy.dma_chan)
2038 ret = tcp_v4_do_rcv(sk, skb);
2039 else
2040 #endif
2041 {
2042 if (!tcp_prequeue(sk, skb))
2043 ret = tcp_v4_do_rcv(sk, skb);
2044 }
2045 } else if (unlikely(sk_add_backlog(sk, skb,
2046 sk->sk_rcvbuf + sk->sk_sndbuf))) {
2047 bh_unlock_sock(sk);
2048 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
2049 goto discard_and_relse;
2050 }
2051 bh_unlock_sock(sk);
2052
2053 sock_put(sk);
2054
2055 return ret;
2056
2057 no_tcp_socket:
2058 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
2059 goto discard_it;
2060
2061 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
2062 csum_error:
2063 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
2064 bad_packet:
2065 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
2066 } else {
2067 tcp_v4_send_reset(NULL, skb);
2068 }
2069
2070 discard_it:
2071 /* Discard frame. */
2072 kfree_skb(skb);
2073 return 0;
2074
2075 discard_and_relse:
2076 sock_put(sk);
2077 goto discard_it;
2078
2079 do_time_wait:
2080 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
2081 inet_twsk_put(inet_twsk(sk));
2082 goto discard_it;
2083 }
2084
2085 if (skb->len < (th->doff << 2)) {
2086 inet_twsk_put(inet_twsk(sk));
2087 goto bad_packet;
2088 }
2089 if (tcp_checksum_complete(skb)) {
2090 inet_twsk_put(inet_twsk(sk));
2091 goto csum_error;
2092 }
2093 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
2094 case TCP_TW_SYN: {
2095 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
2096 &tcp_hashinfo,
2097 iph->saddr, th->source,
2098 iph->daddr, th->dest,
2099 inet_iif(skb));
2100 if (sk2) {
2101 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
2102 inet_twsk_put(inet_twsk(sk));
2103 sk = sk2;
2104 goto process;
2105 }
2106 /* Fall through to ACK */
2107 }
2108 case TCP_TW_ACK:
2109 tcp_v4_timewait_ack(sk, skb);
2110 break;
2111 case TCP_TW_RST:
2112 goto no_tcp_socket;
2113 case TCP_TW_SUCCESS:;
2114 }
2115 goto discard_it;
2116 }
2117
2118 static struct timewait_sock_ops tcp_timewait_sock_ops = {
2119 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2120 .twsk_unique = tcp_twsk_unique,
2121 .twsk_destructor= tcp_twsk_destructor,
2122 };
2123
inet_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)2124 void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
2125 {
2126 struct dst_entry *dst = skb_dst(skb);
2127
2128 dst_hold(dst);
2129 sk->sk_rx_dst = dst;
2130 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2131 }
2132 EXPORT_SYMBOL(inet_sk_rx_dst_set);
2133
2134 const struct inet_connection_sock_af_ops ipv4_specific = {
2135 .queue_xmit = ip_queue_xmit,
2136 .send_check = tcp_v4_send_check,
2137 .rebuild_header = inet_sk_rebuild_header,
2138 .sk_rx_dst_set = inet_sk_rx_dst_set,
2139 .conn_request = tcp_v4_conn_request,
2140 .syn_recv_sock = tcp_v4_syn_recv_sock,
2141 .net_header_len = sizeof(struct iphdr),
2142 .setsockopt = ip_setsockopt,
2143 .getsockopt = ip_getsockopt,
2144 .addr2sockaddr = inet_csk_addr2sockaddr,
2145 .sockaddr_len = sizeof(struct sockaddr_in),
2146 .bind_conflict = inet_csk_bind_conflict,
2147 #ifdef CONFIG_COMPAT
2148 .compat_setsockopt = compat_ip_setsockopt,
2149 .compat_getsockopt = compat_ip_getsockopt,
2150 #endif
2151 };
2152 EXPORT_SYMBOL(ipv4_specific);
2153
2154 #ifdef CONFIG_TCP_MD5SIG
2155 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
2156 .md5_lookup = tcp_v4_md5_lookup,
2157 .calc_md5_hash = tcp_v4_md5_hash_skb,
2158 .md5_parse = tcp_v4_parse_md5_keys,
2159 };
2160 #endif
2161
2162 /* NOTE: A lot of things set to zero explicitly by call to
2163 * sk_alloc() so need not be done here.
2164 */
tcp_v4_init_sock(struct sock * sk)2165 static int tcp_v4_init_sock(struct sock *sk)
2166 {
2167 struct inet_connection_sock *icsk = inet_csk(sk);
2168
2169 tcp_init_sock(sk);
2170
2171 icsk->icsk_af_ops = &ipv4_specific;
2172
2173 #ifdef CONFIG_TCP_MD5SIG
2174 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
2175 #endif
2176
2177 return 0;
2178 }
2179
tcp_v4_destroy_sock(struct sock * sk)2180 void tcp_v4_destroy_sock(struct sock *sk)
2181 {
2182 struct tcp_sock *tp = tcp_sk(sk);
2183
2184 tcp_clear_xmit_timers(sk);
2185
2186 tcp_cleanup_congestion_control(sk);
2187
2188 /* Cleanup up the write buffer. */
2189 tcp_write_queue_purge(sk);
2190
2191 /* Cleans up our, hopefully empty, out_of_order_queue. */
2192 __skb_queue_purge(&tp->out_of_order_queue);
2193
2194 #ifdef CONFIG_TCP_MD5SIG
2195 /* Clean up the MD5 key list, if any */
2196 if (tp->md5sig_info) {
2197 tcp_clear_md5_list(sk);
2198 kfree_rcu(tp->md5sig_info, rcu);
2199 tp->md5sig_info = NULL;
2200 }
2201 #endif
2202
2203 #ifdef CONFIG_NET_DMA
2204 /* Cleans up our sk_async_wait_queue */
2205 __skb_queue_purge(&sk->sk_async_wait_queue);
2206 #endif
2207
2208 /* Clean prequeue, it must be empty really */
2209 __skb_queue_purge(&tp->ucopy.prequeue);
2210
2211 /* Clean up a referenced TCP bind bucket. */
2212 if (inet_csk(sk)->icsk_bind_hash)
2213 inet_put_port(sk);
2214
2215 BUG_ON(tp->fastopen_rsk != NULL);
2216
2217 /* If socket is aborted during connect operation */
2218 tcp_free_fastopen_req(tp);
2219
2220 sk_sockets_allocated_dec(sk);
2221 sock_release_memcg(sk);
2222 }
2223 EXPORT_SYMBOL(tcp_v4_destroy_sock);
2224
2225 #ifdef CONFIG_PROC_FS
2226 /* Proc filesystem TCP sock list dumping. */
2227
tw_head(struct hlist_nulls_head * head)2228 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
2229 {
2230 return hlist_nulls_empty(head) ? NULL :
2231 list_entry(head->first, struct inet_timewait_sock, tw_node);
2232 }
2233
tw_next(struct inet_timewait_sock * tw)2234 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
2235 {
2236 return !is_a_nulls(tw->tw_node.next) ?
2237 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2238 }
2239
2240 /*
2241 * Get next listener socket follow cur. If cur is NULL, get first socket
2242 * starting from bucket given in st->bucket; when st->bucket is zero the
2243 * very first socket in the hash table is returned.
2244 */
listening_get_next(struct seq_file * seq,void * cur)2245 static void *listening_get_next(struct seq_file *seq, void *cur)
2246 {
2247 struct inet_connection_sock *icsk;
2248 struct hlist_nulls_node *node;
2249 struct sock *sk = cur;
2250 struct inet_listen_hashbucket *ilb;
2251 struct tcp_iter_state *st = seq->private;
2252 struct net *net = seq_file_net(seq);
2253
2254 if (!sk) {
2255 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2256 spin_lock_bh(&ilb->lock);
2257 sk = sk_nulls_head(&ilb->head);
2258 st->offset = 0;
2259 goto get_sk;
2260 }
2261 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2262 ++st->num;
2263 ++st->offset;
2264
2265 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2266 struct request_sock *req = cur;
2267
2268 icsk = inet_csk(st->syn_wait_sk);
2269 req = req->dl_next;
2270 while (1) {
2271 while (req) {
2272 if (req->rsk_ops->family == st->family) {
2273 cur = req;
2274 goto out;
2275 }
2276 req = req->dl_next;
2277 }
2278 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2279 break;
2280 get_req:
2281 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2282 }
2283 sk = sk_nulls_next(st->syn_wait_sk);
2284 st->state = TCP_SEQ_STATE_LISTENING;
2285 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2286 } else {
2287 icsk = inet_csk(sk);
2288 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2289 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2290 goto start_req;
2291 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2292 sk = sk_nulls_next(sk);
2293 }
2294 get_sk:
2295 sk_nulls_for_each_from(sk, node) {
2296 if (!net_eq(sock_net(sk), net))
2297 continue;
2298 if (sk->sk_family == st->family) {
2299 cur = sk;
2300 goto out;
2301 }
2302 icsk = inet_csk(sk);
2303 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2304 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2305 start_req:
2306 st->uid = sock_i_uid(sk);
2307 st->syn_wait_sk = sk;
2308 st->state = TCP_SEQ_STATE_OPENREQ;
2309 st->sbucket = 0;
2310 goto get_req;
2311 }
2312 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2313 }
2314 spin_unlock_bh(&ilb->lock);
2315 st->offset = 0;
2316 if (++st->bucket < INET_LHTABLE_SIZE) {
2317 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2318 spin_lock_bh(&ilb->lock);
2319 sk = sk_nulls_head(&ilb->head);
2320 goto get_sk;
2321 }
2322 cur = NULL;
2323 out:
2324 return cur;
2325 }
2326
listening_get_idx(struct seq_file * seq,loff_t * pos)2327 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2328 {
2329 struct tcp_iter_state *st = seq->private;
2330 void *rc;
2331
2332 st->bucket = 0;
2333 st->offset = 0;
2334 rc = listening_get_next(seq, NULL);
2335
2336 while (rc && *pos) {
2337 rc = listening_get_next(seq, rc);
2338 --*pos;
2339 }
2340 return rc;
2341 }
2342
empty_bucket(struct tcp_iter_state * st)2343 static inline bool empty_bucket(struct tcp_iter_state *st)
2344 {
2345 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2346 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2347 }
2348
2349 /*
2350 * Get first established socket starting from bucket given in st->bucket.
2351 * If st->bucket is zero, the very first socket in the hash is returned.
2352 */
established_get_first(struct seq_file * seq)2353 static void *established_get_first(struct seq_file *seq)
2354 {
2355 struct tcp_iter_state *st = seq->private;
2356 struct net *net = seq_file_net(seq);
2357 void *rc = NULL;
2358
2359 st->offset = 0;
2360 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2361 struct sock *sk;
2362 struct hlist_nulls_node *node;
2363 struct inet_timewait_sock *tw;
2364 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2365
2366 /* Lockless fast path for the common case of empty buckets */
2367 if (empty_bucket(st))
2368 continue;
2369
2370 spin_lock_bh(lock);
2371 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2372 if (sk->sk_family != st->family ||
2373 !net_eq(sock_net(sk), net)) {
2374 continue;
2375 }
2376 rc = sk;
2377 goto out;
2378 }
2379 st->state = TCP_SEQ_STATE_TIME_WAIT;
2380 inet_twsk_for_each(tw, node,
2381 &tcp_hashinfo.ehash[st->bucket].twchain) {
2382 if (tw->tw_family != st->family ||
2383 !net_eq(twsk_net(tw), net)) {
2384 continue;
2385 }
2386 rc = tw;
2387 goto out;
2388 }
2389 spin_unlock_bh(lock);
2390 st->state = TCP_SEQ_STATE_ESTABLISHED;
2391 }
2392 out:
2393 return rc;
2394 }
2395
established_get_next(struct seq_file * seq,void * cur)2396 static void *established_get_next(struct seq_file *seq, void *cur)
2397 {
2398 struct sock *sk = cur;
2399 struct inet_timewait_sock *tw;
2400 struct hlist_nulls_node *node;
2401 struct tcp_iter_state *st = seq->private;
2402 struct net *net = seq_file_net(seq);
2403
2404 ++st->num;
2405 ++st->offset;
2406
2407 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2408 tw = cur;
2409 tw = tw_next(tw);
2410 get_tw:
2411 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2412 tw = tw_next(tw);
2413 }
2414 if (tw) {
2415 cur = tw;
2416 goto out;
2417 }
2418 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2419 st->state = TCP_SEQ_STATE_ESTABLISHED;
2420
2421 /* Look for next non empty bucket */
2422 st->offset = 0;
2423 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2424 empty_bucket(st))
2425 ;
2426 if (st->bucket > tcp_hashinfo.ehash_mask)
2427 return NULL;
2428
2429 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2430 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2431 } else
2432 sk = sk_nulls_next(sk);
2433
2434 sk_nulls_for_each_from(sk, node) {
2435 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2436 goto found;
2437 }
2438
2439 st->state = TCP_SEQ_STATE_TIME_WAIT;
2440 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2441 goto get_tw;
2442 found:
2443 cur = sk;
2444 out:
2445 return cur;
2446 }
2447
established_get_idx(struct seq_file * seq,loff_t pos)2448 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2449 {
2450 struct tcp_iter_state *st = seq->private;
2451 void *rc;
2452
2453 st->bucket = 0;
2454 rc = established_get_first(seq);
2455
2456 while (rc && pos) {
2457 rc = established_get_next(seq, rc);
2458 --pos;
2459 }
2460 return rc;
2461 }
2462
tcp_get_idx(struct seq_file * seq,loff_t pos)2463 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2464 {
2465 void *rc;
2466 struct tcp_iter_state *st = seq->private;
2467
2468 st->state = TCP_SEQ_STATE_LISTENING;
2469 rc = listening_get_idx(seq, &pos);
2470
2471 if (!rc) {
2472 st->state = TCP_SEQ_STATE_ESTABLISHED;
2473 rc = established_get_idx(seq, pos);
2474 }
2475
2476 return rc;
2477 }
2478
tcp_seek_last_pos(struct seq_file * seq)2479 static void *tcp_seek_last_pos(struct seq_file *seq)
2480 {
2481 struct tcp_iter_state *st = seq->private;
2482 int offset = st->offset;
2483 int orig_num = st->num;
2484 void *rc = NULL;
2485
2486 switch (st->state) {
2487 case TCP_SEQ_STATE_OPENREQ:
2488 case TCP_SEQ_STATE_LISTENING:
2489 if (st->bucket >= INET_LHTABLE_SIZE)
2490 break;
2491 st->state = TCP_SEQ_STATE_LISTENING;
2492 rc = listening_get_next(seq, NULL);
2493 while (offset-- && rc)
2494 rc = listening_get_next(seq, rc);
2495 if (rc)
2496 break;
2497 st->bucket = 0;
2498 /* Fallthrough */
2499 case TCP_SEQ_STATE_ESTABLISHED:
2500 case TCP_SEQ_STATE_TIME_WAIT:
2501 st->state = TCP_SEQ_STATE_ESTABLISHED;
2502 if (st->bucket > tcp_hashinfo.ehash_mask)
2503 break;
2504 rc = established_get_first(seq);
2505 while (offset-- && rc)
2506 rc = established_get_next(seq, rc);
2507 }
2508
2509 st->num = orig_num;
2510
2511 return rc;
2512 }
2513
tcp_seq_start(struct seq_file * seq,loff_t * pos)2514 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2515 {
2516 struct tcp_iter_state *st = seq->private;
2517 void *rc;
2518
2519 if (*pos && *pos == st->last_pos) {
2520 rc = tcp_seek_last_pos(seq);
2521 if (rc)
2522 goto out;
2523 }
2524
2525 st->state = TCP_SEQ_STATE_LISTENING;
2526 st->num = 0;
2527 st->bucket = 0;
2528 st->offset = 0;
2529 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2530
2531 out:
2532 st->last_pos = *pos;
2533 return rc;
2534 }
2535
tcp_seq_next(struct seq_file * seq,void * v,loff_t * pos)2536 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2537 {
2538 struct tcp_iter_state *st = seq->private;
2539 void *rc = NULL;
2540
2541 if (v == SEQ_START_TOKEN) {
2542 rc = tcp_get_idx(seq, 0);
2543 goto out;
2544 }
2545
2546 switch (st->state) {
2547 case TCP_SEQ_STATE_OPENREQ:
2548 case TCP_SEQ_STATE_LISTENING:
2549 rc = listening_get_next(seq, v);
2550 if (!rc) {
2551 st->state = TCP_SEQ_STATE_ESTABLISHED;
2552 st->bucket = 0;
2553 st->offset = 0;
2554 rc = established_get_first(seq);
2555 }
2556 break;
2557 case TCP_SEQ_STATE_ESTABLISHED:
2558 case TCP_SEQ_STATE_TIME_WAIT:
2559 rc = established_get_next(seq, v);
2560 break;
2561 }
2562 out:
2563 ++*pos;
2564 st->last_pos = *pos;
2565 return rc;
2566 }
2567
tcp_seq_stop(struct seq_file * seq,void * v)2568 static void tcp_seq_stop(struct seq_file *seq, void *v)
2569 {
2570 struct tcp_iter_state *st = seq->private;
2571
2572 switch (st->state) {
2573 case TCP_SEQ_STATE_OPENREQ:
2574 if (v) {
2575 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2576 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2577 }
2578 case TCP_SEQ_STATE_LISTENING:
2579 if (v != SEQ_START_TOKEN)
2580 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2581 break;
2582 case TCP_SEQ_STATE_TIME_WAIT:
2583 case TCP_SEQ_STATE_ESTABLISHED:
2584 if (v)
2585 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2586 break;
2587 }
2588 }
2589
tcp_seq_open(struct inode * inode,struct file * file)2590 int tcp_seq_open(struct inode *inode, struct file *file)
2591 {
2592 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2593 struct tcp_iter_state *s;
2594 int err;
2595
2596 err = seq_open_net(inode, file, &afinfo->seq_ops,
2597 sizeof(struct tcp_iter_state));
2598 if (err < 0)
2599 return err;
2600
2601 s = ((struct seq_file *)file->private_data)->private;
2602 s->family = afinfo->family;
2603 s->last_pos = 0;
2604 return 0;
2605 }
2606 EXPORT_SYMBOL(tcp_seq_open);
2607
tcp_proc_register(struct net * net,struct tcp_seq_afinfo * afinfo)2608 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2609 {
2610 int rc = 0;
2611 struct proc_dir_entry *p;
2612
2613 afinfo->seq_ops.start = tcp_seq_start;
2614 afinfo->seq_ops.next = tcp_seq_next;
2615 afinfo->seq_ops.stop = tcp_seq_stop;
2616
2617 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2618 afinfo->seq_fops, afinfo);
2619 if (!p)
2620 rc = -ENOMEM;
2621 return rc;
2622 }
2623 EXPORT_SYMBOL(tcp_proc_register);
2624
tcp_proc_unregister(struct net * net,struct tcp_seq_afinfo * afinfo)2625 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2626 {
2627 remove_proc_entry(afinfo->name, net->proc_net);
2628 }
2629 EXPORT_SYMBOL(tcp_proc_unregister);
2630
get_openreq4(const struct sock * sk,const struct request_sock * req,struct seq_file * f,int i,kuid_t uid,int * len)2631 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2632 struct seq_file *f, int i, kuid_t uid, int *len)
2633 {
2634 const struct inet_request_sock *ireq = inet_rsk(req);
2635 long delta = req->expires - jiffies;
2636
2637 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2638 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2639 i,
2640 ireq->loc_addr,
2641 ntohs(inet_sk(sk)->inet_sport),
2642 ireq->rmt_addr,
2643 ntohs(ireq->rmt_port),
2644 TCP_SYN_RECV,
2645 0, 0, /* could print option size, but that is af dependent. */
2646 1, /* timers active (only the expire timer) */
2647 jiffies_delta_to_clock_t(delta),
2648 req->num_timeout,
2649 from_kuid_munged(seq_user_ns(f), uid),
2650 0, /* non standard timer */
2651 0, /* open_requests have no inode */
2652 atomic_read(&sk->sk_refcnt),
2653 req,
2654 len);
2655 }
2656
get_tcp4_sock(struct sock * sk,struct seq_file * f,int i,int * len)2657 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2658 {
2659 int timer_active;
2660 unsigned long timer_expires;
2661 const struct tcp_sock *tp = tcp_sk(sk);
2662 const struct inet_connection_sock *icsk = inet_csk(sk);
2663 const struct inet_sock *inet = inet_sk(sk);
2664 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
2665 __be32 dest = inet->inet_daddr;
2666 __be32 src = inet->inet_rcv_saddr;
2667 __u16 destp = ntohs(inet->inet_dport);
2668 __u16 srcp = ntohs(inet->inet_sport);
2669 int rx_queue;
2670
2671 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2672 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2673 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2674 timer_active = 1;
2675 timer_expires = icsk->icsk_timeout;
2676 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2677 timer_active = 4;
2678 timer_expires = icsk->icsk_timeout;
2679 } else if (timer_pending(&sk->sk_timer)) {
2680 timer_active = 2;
2681 timer_expires = sk->sk_timer.expires;
2682 } else {
2683 timer_active = 0;
2684 timer_expires = jiffies;
2685 }
2686
2687 if (sk->sk_state == TCP_LISTEN)
2688 rx_queue = sk->sk_ack_backlog;
2689 else
2690 /*
2691 * because we dont lock socket, we might find a transient negative value
2692 */
2693 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2694
2695 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2696 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2697 i, src, srcp, dest, destp, sk->sk_state,
2698 tp->write_seq - tp->snd_una,
2699 rx_queue,
2700 timer_active,
2701 jiffies_delta_to_clock_t(timer_expires - jiffies),
2702 icsk->icsk_retransmits,
2703 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2704 icsk->icsk_probes_out,
2705 sock_i_ino(sk),
2706 atomic_read(&sk->sk_refcnt), sk,
2707 jiffies_to_clock_t(icsk->icsk_rto),
2708 jiffies_to_clock_t(icsk->icsk_ack.ato),
2709 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2710 tp->snd_cwnd,
2711 sk->sk_state == TCP_LISTEN ?
2712 (fastopenq ? fastopenq->max_qlen : 0) :
2713 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
2714 len);
2715 }
2716
get_timewait4_sock(const struct inet_timewait_sock * tw,struct seq_file * f,int i,int * len)2717 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2718 struct seq_file *f, int i, int *len)
2719 {
2720 __be32 dest, src;
2721 __u16 destp, srcp;
2722 long delta = tw->tw_ttd - jiffies;
2723
2724 dest = tw->tw_daddr;
2725 src = tw->tw_rcv_saddr;
2726 destp = ntohs(tw->tw_dport);
2727 srcp = ntohs(tw->tw_sport);
2728
2729 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2730 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2731 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2732 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2733 atomic_read(&tw->tw_refcnt), tw, len);
2734 }
2735
2736 #define TMPSZ 150
2737
tcp4_seq_show(struct seq_file * seq,void * v)2738 static int tcp4_seq_show(struct seq_file *seq, void *v)
2739 {
2740 struct tcp_iter_state *st;
2741 int len;
2742
2743 if (v == SEQ_START_TOKEN) {
2744 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2745 " sl local_address rem_address st tx_queue "
2746 "rx_queue tr tm->when retrnsmt uid timeout "
2747 "inode");
2748 goto out;
2749 }
2750 st = seq->private;
2751
2752 switch (st->state) {
2753 case TCP_SEQ_STATE_LISTENING:
2754 case TCP_SEQ_STATE_ESTABLISHED:
2755 get_tcp4_sock(v, seq, st->num, &len);
2756 break;
2757 case TCP_SEQ_STATE_OPENREQ:
2758 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2759 break;
2760 case TCP_SEQ_STATE_TIME_WAIT:
2761 get_timewait4_sock(v, seq, st->num, &len);
2762 break;
2763 }
2764 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2765 out:
2766 return 0;
2767 }
2768
2769 static const struct file_operations tcp_afinfo_seq_fops = {
2770 .owner = THIS_MODULE,
2771 .open = tcp_seq_open,
2772 .read = seq_read,
2773 .llseek = seq_lseek,
2774 .release = seq_release_net
2775 };
2776
2777 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2778 .name = "tcp",
2779 .family = AF_INET,
2780 .seq_fops = &tcp_afinfo_seq_fops,
2781 .seq_ops = {
2782 .show = tcp4_seq_show,
2783 },
2784 };
2785
tcp4_proc_init_net(struct net * net)2786 static int __net_init tcp4_proc_init_net(struct net *net)
2787 {
2788 return tcp_proc_register(net, &tcp4_seq_afinfo);
2789 }
2790
tcp4_proc_exit_net(struct net * net)2791 static void __net_exit tcp4_proc_exit_net(struct net *net)
2792 {
2793 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2794 }
2795
2796 static struct pernet_operations tcp4_net_ops = {
2797 .init = tcp4_proc_init_net,
2798 .exit = tcp4_proc_exit_net,
2799 };
2800
tcp4_proc_init(void)2801 int __init tcp4_proc_init(void)
2802 {
2803 return register_pernet_subsys(&tcp4_net_ops);
2804 }
2805
tcp4_proc_exit(void)2806 void tcp4_proc_exit(void)
2807 {
2808 unregister_pernet_subsys(&tcp4_net_ops);
2809 }
2810 #endif /* CONFIG_PROC_FS */
2811
tcp4_gro_receive(struct sk_buff ** head,struct sk_buff * skb)2812 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2813 {
2814 const struct iphdr *iph = skb_gro_network_header(skb);
2815 __wsum wsum;
2816 __sum16 sum;
2817
2818 switch (skb->ip_summed) {
2819 case CHECKSUM_COMPLETE:
2820 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2821 skb->csum)) {
2822 skb->ip_summed = CHECKSUM_UNNECESSARY;
2823 break;
2824 }
2825 flush:
2826 NAPI_GRO_CB(skb)->flush = 1;
2827 return NULL;
2828
2829 case CHECKSUM_NONE:
2830 wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
2831 skb_gro_len(skb), IPPROTO_TCP, 0);
2832 sum = csum_fold(skb_checksum(skb,
2833 skb_gro_offset(skb),
2834 skb_gro_len(skb),
2835 wsum));
2836 if (sum)
2837 goto flush;
2838
2839 skb->ip_summed = CHECKSUM_UNNECESSARY;
2840 break;
2841 }
2842
2843 return tcp_gro_receive(head, skb);
2844 }
2845
tcp4_gro_complete(struct sk_buff * skb)2846 int tcp4_gro_complete(struct sk_buff *skb)
2847 {
2848 const struct iphdr *iph = ip_hdr(skb);
2849 struct tcphdr *th = tcp_hdr(skb);
2850
2851 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2852 iph->saddr, iph->daddr, 0);
2853 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2854
2855 return tcp_gro_complete(skb);
2856 }
2857
2858 struct proto tcp_prot = {
2859 .name = "TCP",
2860 .owner = THIS_MODULE,
2861 .close = tcp_close,
2862 .connect = tcp_v4_connect,
2863 .disconnect = tcp_disconnect,
2864 .accept = inet_csk_accept,
2865 .ioctl = tcp_ioctl,
2866 .init = tcp_v4_init_sock,
2867 .destroy = tcp_v4_destroy_sock,
2868 .shutdown = tcp_shutdown,
2869 .setsockopt = tcp_setsockopt,
2870 .getsockopt = tcp_getsockopt,
2871 .recvmsg = tcp_recvmsg,
2872 .sendmsg = tcp_sendmsg,
2873 .sendpage = tcp_sendpage,
2874 .backlog_rcv = tcp_v4_do_rcv,
2875 .release_cb = tcp_release_cb,
2876 .mtu_reduced = tcp_v4_mtu_reduced,
2877 .hash = inet_hash,
2878 .unhash = inet_unhash,
2879 .get_port = inet_csk_get_port,
2880 .enter_memory_pressure = tcp_enter_memory_pressure,
2881 .sockets_allocated = &tcp_sockets_allocated,
2882 .orphan_count = &tcp_orphan_count,
2883 .memory_allocated = &tcp_memory_allocated,
2884 .memory_pressure = &tcp_memory_pressure,
2885 .sysctl_wmem = sysctl_tcp_wmem,
2886 .sysctl_rmem = sysctl_tcp_rmem,
2887 .max_header = MAX_TCP_HEADER,
2888 .obj_size = sizeof(struct tcp_sock),
2889 .slab_flags = SLAB_DESTROY_BY_RCU,
2890 .twsk_prot = &tcp_timewait_sock_ops,
2891 .rsk_prot = &tcp_request_sock_ops,
2892 .h.hashinfo = &tcp_hashinfo,
2893 .no_autobind = true,
2894 #ifdef CONFIG_COMPAT
2895 .compat_setsockopt = compat_tcp_setsockopt,
2896 .compat_getsockopt = compat_tcp_getsockopt,
2897 #endif
2898 #ifdef CONFIG_MEMCG_KMEM
2899 .init_cgroup = tcp_init_cgroup,
2900 .destroy_cgroup = tcp_destroy_cgroup,
2901 .proto_cgroup = tcp_proto_cgroup,
2902 #endif
2903 .diag_destroy = tcp_abort,
2904 };
2905 EXPORT_SYMBOL(tcp_prot);
2906
tcp_sk_exit(struct net * net)2907 static void __net_exit tcp_sk_exit(struct net *net)
2908 {
2909 int cpu;
2910
2911 for_each_possible_cpu(cpu)
2912 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2913 free_percpu(net->ipv4.tcp_sk);
2914 }
2915
tcp_sk_init(struct net * net)2916 static int __net_init tcp_sk_init(struct net *net)
2917 {
2918 int res, cpu;
2919
2920 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2921 if (!net->ipv4.tcp_sk)
2922 return -ENOMEM;
2923
2924 for_each_possible_cpu(cpu) {
2925 struct sock *sk;
2926
2927 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2928 IPPROTO_TCP, net);
2929 if (res)
2930 goto fail;
2931 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2932 }
2933 net->ipv4.sysctl_tcp_ecn = 2;
2934 return 0;
2935
2936 fail:
2937 tcp_sk_exit(net);
2938
2939 return res;
2940 }
2941
tcp_sk_exit_batch(struct list_head * net_exit_list)2942 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2943 {
2944 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2945 }
2946
2947 static struct pernet_operations __net_initdata tcp_sk_ops = {
2948 .init = tcp_sk_init,
2949 .exit = tcp_sk_exit,
2950 .exit_batch = tcp_sk_exit_batch,
2951 };
2952
tcp_v4_init(void)2953 void __init tcp_v4_init(void)
2954 {
2955 inet_hashinfo_init(&tcp_hashinfo);
2956 if (register_pernet_subsys(&tcp_sk_ops))
2957 panic("Failed to create the TCP control socket.\n");
2958 }
2959