• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Based on net/ipv4/tcp_timer.c
4  * Authors:	Ross Biro
5  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
6  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
7  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
8  *		Florian La Roche, <flla@stud.uni-sb.de>
9  *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
10  *		Linus Torvalds, <torvalds@cs.helsinki.fi>
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Matthew Dillon, <dillon@apollo.west.oic.com>
13  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
14  *		Jorge Cwik, <jorge@laser.satlink.net>
15  *
16  * NewIP INET
17  * An implementation of the TCP/IP protocol suite for the LINUX
18  * operating system. NewIP INET is implemented using the  BSD Socket
19  * interface as the means of communication with the user level.
20  *
21  * Implementation of the Transmission Control Protocol(TCP).
22  */
23 #define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__
24 
25 #include <net/tcp_nip.h>
26 #include <linux/module.h>
27 #include "tcp_nip_parameter.h"
28 
29 #define TCP_ORPHAN_RETRIES 8
30 /**
31  *  tcp_nip_orphan_retries() - Returns maximal number of retries on an orphaned socket
32  *  @sk:    Pointer to the current socket.
33  *  @alive: bool, socket alive state
34  */
tcp_nip_orphan_retries(struct sock * sk,bool alive)35 static int tcp_nip_orphan_retries(struct sock *sk, bool alive)
36 {
37 	int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
38 
39 	/* We know from an ICMP that something is wrong. */
40 	if (sk->sk_err_soft && !alive)
41 		retries = 0;
42 
43 	/* However, if socket sent something recently, select some safe
44 	 * number of retries. 8 corresponds to >100 seconds with minimal
45 	 * RTO of 200msec.
46 	 */
47 	if (retries == 0 && alive)
48 		retries = TCP_ORPHAN_RETRIES;
49 	return retries;
50 }
51 
tcp_nip_delack_timer_handler(struct sock * sk)52 void tcp_nip_delack_timer_handler(struct sock *sk)
53 {
54 	struct inet_connection_sock *icsk = inet_csk(sk);
55 
56 	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
57 	    !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
58 		goto out;
59 
60 	if (time_after(icsk->icsk_ack.timeout, jiffies)) {
61 		sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
62 		goto out;
63 	}
64 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
65 
66 	if (inet_csk_ack_scheduled(sk)) {
67 		icsk->icsk_ack.ato      = TCP_ATO_MIN;
68 		tcp_mstamp_refresh(tcp_sk(sk));
69 		tcp_nip_send_ack(sk);
70 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
71 	}
72 
73 out:;
74 }
75 
tcp_nip_write_err(struct sock * sk)76 static void tcp_nip_write_err(struct sock *sk)
77 {
78 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
79 	sk->sk_error_report(sk);
80 	/* Releasing TCP Resources */
81 	tcp_nip_done(sk);
82 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
83 }
84 
tcp_nip_delack_timer(struct timer_list * t)85 static void tcp_nip_delack_timer(struct timer_list *t)
86 {
87 	struct inet_connection_sock *icsk =
88 			from_timer(icsk, t, icsk_delack_timer);
89 	struct sock *sk = &icsk->icsk_inet.sk;
90 
91 	bh_lock_sock(sk);
92 	if (!sock_owned_by_user(sk)) {
93 		tcp_nip_delack_timer_handler(sk);
94 	} else {
95 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
96 		/* deleguate our work to tcp_release_cb() */
97 		if (!test_and_set_bit(TCP_NIP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
98 			sock_hold(sk);
99 	}
100 	bh_unlock_sock(sk);
101 	sock_put(sk);
102 }
103 
retransmits_nip_timed_out(struct sock * sk,unsigned int boundary,unsigned int timeout,bool syn_set)104 static bool retransmits_nip_timed_out(struct sock *sk,
105 				      unsigned int boundary,
106 				      unsigned int timeout,
107 				      bool syn_set)
108 {
109 	/* Newip does not support the calculation of the timeout period based on the timestamp.
110 	 * Currently, it determines whether the timeout period is based on
111 	 * the retransmission times
112 	 */
113 	nip_dbg("icsk->retransmits=%u, boundary=%u",
114 		inet_csk(sk)->icsk_retransmits, boundary);
115 	return inet_csk(sk)->icsk_retransmits > boundary;
116 }
117 
tcp_nip_write_timeout(struct sock * sk)118 static int tcp_nip_write_timeout(struct sock *sk)
119 {
120 	struct inet_connection_sock *icsk = inet_csk(sk);
121 	struct net *net = sock_net(sk);
122 	int retry_until;
123 	bool syn_set = false;
124 
125 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
126 		retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
127 		syn_set = true;
128 	} else {
129 		retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
130 		if (sock_flag(sk, SOCK_DEAD)) {
131 			const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
132 
133 			/* In the case of SOCK_DEAD, the retry_until value is smaller */
134 			retry_until = tcp_nip_orphan_retries(sk, alive);
135 		}
136 	}
137 
138 	if (retransmits_nip_timed_out(sk, retry_until,
139 				      syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
140 		nip_dbg("tcp retransmit time out");
141 		tcp_nip_write_err(sk);
142 		return 1;
143 	}
144 	return 0;
145 }
146 
tcp_nip_retransmit_timer(struct sock * sk)147 void tcp_nip_retransmit_timer(struct sock *sk)
148 {
149 	struct tcp_sock *tp = tcp_sk(sk);
150 	struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common;
151 	struct inet_connection_sock *icsk = inet_csk(sk);
152 	struct sk_buff *skb = tcp_write_queue_head(sk);
153 	struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
154 	struct net *net = sock_net(sk);
155 	u32 icsk_rto_last;
156 
157 	if (!tp->packets_out)
158 		return;
159 
160 	if (tcp_nip_write_queue_empty(sk))
161 		return;
162 
163 	tp->tlp_high_seq = 0;
164 
165 	if (tcp_nip_write_timeout(sk))
166 		return;
167 
168 	if (tcp_nip_retransmit_skb(sk, skb, 1) > 0) {
169 		if (!icsk->icsk_retransmits)
170 			icsk->icsk_retransmits = 1;
171 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
172 					  min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
173 					  TCP_RTO_MAX);
174 
175 		nip_dbg("seq %u retransmit fail, win=%u, rto=%u, pkt_out=%u, icsk_backoff=%u",
176 			scb->seq, ntp->nip_ssthresh,
177 			icsk->icsk_rto, tp->packets_out, icsk->icsk_backoff);
178 		return;
179 	}
180 
181 	if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
182 		icsk->icsk_backoff++;
183 	icsk->icsk_retransmits++;
184 
185 	icsk_rto_last = icsk->icsk_rto;
186 	/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
187 	 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
188 	 * might be increased if the stream oscillates between thin and thick,
189 	 * thus the old value might already be too high compared to the value
190 	 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
191 	 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
192 	 * exponential backoff behaviour to avoid continue hammering
193 	 * linear-timeout retransmissions into a black hole
194 	 */
195 	if (sk->sk_state == TCP_ESTABLISHED &&
196 	    (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
197 	    tcp_stream_is_thin(tp) &&
198 	    icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
199 		icsk->icsk_backoff = 0;
200 		icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
201 	} else {
202 		/* Use normal (exponential) backoff */
203 		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
204 	}
205 
206 	nip_dbg("seq %u, win[%u-%u] rto[%u-%u] pkt_out=%u, icsk_backoff=%u, retransmits=%u",
207 		scb->seq, ntp->nip_ssthresh, get_ssthresh_low(),
208 		icsk_rto_last, icsk->icsk_rto, tp->packets_out, icsk->icsk_backoff,
209 		icsk->icsk_retransmits);
210 
211 	ntp->nip_ssthresh = get_ssthresh_low();
212 
213 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
214 }
215 
tcp_nip_probe_timer(struct sock * sk)216 void tcp_nip_probe_timer(struct sock *sk)
217 {
218 	struct inet_connection_sock *icsk = inet_csk(sk);
219 	struct tcp_sock *tp = tcp_sk(sk);
220 	int max_probes;
221 	int icsk_backoff;
222 	int icsk_probes_out;
223 
224 	if (tp->packets_out || !tcp_nip_send_head(sk)) {
225 		icsk->icsk_probes_out = 0;
226 		icsk->icsk_probes_tstamp = 0;
227 		icsk->icsk_backoff = 0;  /* V4 no modified this line */
228 		nip_dbg("packets_out(%u) not 0 or send_head is NULL, cancel probe0 timer",
229 			tp->packets_out);
230 		return;
231 	}
232 
233 	/* default: sock_net(sk)->ipv4.sysctl_tcp_retries2 */
234 	max_probes = get_nip_probe_max(); /* fix session auto close */
235 
236 	if (sock_flag(sk, SOCK_DEAD)) {
237 		const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
238 
239 		max_probes = tcp_nip_orphan_retries(sk, alive);
240 		nip_dbg("sock dead, icsk_backoff=%u, max_probes=%u, alive=%u",
241 			icsk->icsk_backoff, max_probes, alive);
242 		if (!alive && icsk->icsk_backoff >= max_probes) {
243 			nip_dbg("will close session, icsk_backoff=%u, max_probes=%u",
244 				icsk->icsk_backoff, max_probes);
245 			goto abort;
246 		}
247 	}
248 
249 	if (icsk->icsk_probes_out >= max_probes) {
250 abort:		icsk_backoff = icsk->icsk_backoff;
251 		icsk_probes_out = icsk->icsk_probes_out;
252 		nip_dbg("close session, probes_out=%u, icsk_backoff=%u, max_probes=%u",
253 			icsk_probes_out, icsk_backoff, max_probes);
254 		tcp_nip_write_err(sk);
255 	} else {
256 		icsk_backoff = icsk->icsk_backoff;
257 		icsk_probes_out = icsk->icsk_probes_out;
258 		nip_dbg("will send probe0, probes_out=%u, icsk_backoff=%u, max_probes=%u",
259 			icsk_probes_out, icsk_backoff, max_probes);
260 		/* Only send another probe if we didn't close things up. */
261 		tcp_nip_send_probe0(sk);
262 	}
263 }
264 
tcp_nip_write_timer_handler(struct sock * sk)265 void tcp_nip_write_timer_handler(struct sock *sk)
266 {
267 	struct inet_connection_sock *icsk = inet_csk(sk);
268 	int event;
269 
270 	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !icsk->icsk_pending)
271 		return;
272 
273 	if (time_after(icsk->icsk_timeout, jiffies)) {
274 		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
275 		return;
276 	}
277 	tcp_mstamp_refresh(tcp_sk(sk));
278 	event = icsk->icsk_pending;
279 
280 	switch (event) {
281 	case ICSK_TIME_RETRANS:
282 		icsk->icsk_pending = 0;
283 		tcp_nip_retransmit_timer(sk);
284 		break;
285 	case ICSK_TIME_PROBE0:
286 		icsk->icsk_pending = 0;
287 		tcp_nip_probe_timer(sk);
288 		break;
289 	default:
290 		break;
291 	}
292 }
293 
tcp_nip_write_timer(struct timer_list * t)294 static void tcp_nip_write_timer(struct timer_list *t)
295 {
296 	struct inet_connection_sock *icsk =
297 			from_timer(icsk, t, icsk_retransmit_timer);
298 	struct sock *sk = &icsk->icsk_inet.sk;
299 
300 	bh_lock_sock(sk);
301 	if (!sock_owned_by_user(sk)) {
302 		tcp_nip_write_timer_handler(sk);
303 	} else {
304 		/* delegate our work to tcp_release_cb() */
305 		if (!test_and_set_bit(TCP_NIP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
306 			sock_hold(sk);
307 	}
308 	bh_unlock_sock(sk);
309 	sock_put(sk);
310 }
311 
tcp_nip_keepalive_is_timeout(struct sock * sk,u32 elapsed)312 static bool tcp_nip_keepalive_is_timeout(struct sock *sk, u32 elapsed)
313 {
314 	struct inet_connection_sock *icsk = inet_csk(sk);
315 	struct tcp_sock *tp = tcp_sk(sk);
316 	struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common;
317 	u32 keepalive_time = keepalive_time_when(tp);
318 	bool is_timeout = false;
319 
320 	/* keepalive set by setsockopt */
321 	if (keepalive_time > HZ) {
322 		/* If the TCP_USER_TIMEOUT option is enabled, use that
323 		 * to determine when to timeout instead.
324 		 */
325 		if ((icsk->icsk_user_timeout != 0 &&
326 		     elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
327 		     ntp->nip_keepalive_out > 0) ||
328 		     (icsk->icsk_user_timeout == 0 &&
329 		      ntp->nip_keepalive_out >= keepalive_probes(tp))) {
330 			nip_dbg("normal keepalive timeout, keepalive_out=%u",
331 				ntp->nip_keepalive_out);
332 			tcp_nip_write_err(sk);
333 			is_timeout = true;
334 		}
335 	}
336 
337 	return is_timeout;
338 }
339 
tcp_nip_keepalive_timer(struct timer_list * t)340 static void tcp_nip_keepalive_timer(struct timer_list *t)
341 {
342 	struct sock *sk = from_timer(sk, t, sk_timer);
343 	struct tcp_sock *tp = tcp_sk(sk);
344 	struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common;
345 	u32 elapsed;
346 
347 	/* Only process if socket is not in use. */
348 	bh_lock_sock(sk);
349 	if (sock_owned_by_user(sk)) {
350 		/* Try again later. */
351 		inet_csk_reset_keepalive_timer(sk, HZ / TCP_NIP_KEEPALIVE_CYCLE_MS_DIVISOR);
352 		goto out;
353 	}
354 
355 	if (sk->sk_state == TCP_LISTEN) {
356 		nip_dbg("keepalive on a LISTEN");
357 		goto out;
358 	}
359 	tcp_mstamp_refresh(tp);
360 	/* 2022-02-18
361 	 * NewIP TCP doesn't have TIME_WAIT state, so socket in TCP_CLOSING
362 	 * uses keepalive timer to release socket.
363 	 */
364 	if ((sk->sk_state == TCP_FIN_WAIT2 || sk->sk_state == TCP_CLOSING) &&
365 	    sock_flag(sk, SOCK_DEAD)) {
366 		nip_dbg("finish wait, close sock, sk_state=%u", sk->sk_state);
367 		goto death;
368 	}
369 
370 	if (!sock_flag(sk, SOCK_KEEPOPEN) ||
371 	    ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
372 		goto out;
373 
374 	elapsed = keepalive_time_when(tp);
375 
376 	/* It is alive without keepalive 8) */
377 	if (tp->packets_out || !tcp_write_queue_empty(sk))
378 		goto resched;
379 
380 	elapsed = keepalive_time_elapsed(tp);
381 	if (elapsed >= keepalive_time_when(tp)) {
382 		if (tcp_nip_keepalive_is_timeout(sk, elapsed))
383 			goto out;
384 
385 		if (tcp_nip_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
386 			ntp->nip_keepalive_out++;
387 			ntp->idle_ka_probes_out++;
388 			elapsed = keepalive_intvl_when(tp);
389 		} else {
390 			/* If keepalive was lost due to local congestion,
391 			 * try harder.
392 			 */
393 			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
394 		}
395 	} else {
396 		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
397 		elapsed = keepalive_time_when(tp) - elapsed;
398 	}
399 
400 	sk_mem_reclaim(sk);
401 
402 resched:
403 	inet_csk_reset_keepalive_timer(sk, elapsed);
404 	goto out;
405 
406 death:
407 	tcp_nip_done(sk);
408 
409 out:
410 	tcp_nip_keepalive_disable(sk);
411 	bh_unlock_sock(sk);
412 	sock_put(sk);
413 }
414 
tcp_nip_init_xmit_timers(struct sock * sk)415 void tcp_nip_init_xmit_timers(struct sock *sk)
416 {
417 	inet_csk_init_xmit_timers(sk, &tcp_nip_write_timer, &tcp_nip_delack_timer,
418 				  &tcp_nip_keepalive_timer);
419 }
420 
tcp_nip_clear_xmit_timers(struct sock * sk)421 void tcp_nip_clear_xmit_timers(struct sock *sk)
422 {
423 	inet_csk_clear_xmit_timers(sk);
424 }
425