• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
30  */
31 
32 #include <string.h>
33 
34 #include "tcp.h"
35 #include "tcp_fsm.h"
36 #include "tcp_seq.h"
37 #include "tcp_timer.h"
38 #include "tcp_var.h"
39 
40 #include "tcp_const.h"
41 #include <openthread/ip6.h>
42 #include <openthread/message.h>
43 
44 /*
45  * samkumar: The V_nolocaltimewait variable corresponds to the
46  * net.inet.tcp.nolocaltimewait option in FreeBSD. When set to 1, it skips the
47  * TIME-WAIT state for TCP connections where both endpoints are local IP
48  * addresses, to save resources on HTTP accelerators, database servers/clients,
49  * etc. In TCPlp, I eliminated support for this feature, but I have kept the
50  * code for it, commented out with "#if 0", in case we choose to bring it back
51  * at a later time.
52  *
53  * See also the "#if 0" block in tcp_twstart.
54  */
55 #if 0
56 enum tcp_timewait_consts {
57 	V_nolocaltimewait = 0
58 };
59 #endif
60 
61 /*
62  * samkumar: The FreeBSD code used a separate, smaller structure, called
63  * struct tcptw, to respresent connections in the TIME-WAIT state. In TCPlp,
64  * we use the full struct tcpcb structure even in the TIME-WAIT state. This
65  * consumes more memory, but switching to a different structure like
66  * struct tcptw to save memory would be difficult because the host system or
67  * application has allocated these structures; we can't simply "free" the
68  * struct tcpcb. It would have to have been done via a callback or something,
69  * and in the common case of statically allocated sockets, this would actually
70  * result in more memory (since an application would need to allocate both the
71  * struct tcpcb and the struct tcptw, if it uses a static allocation approach).
72  *
73  * Below, I've changed the function signatures to accept "struct tcpcb* tp"
74  * instead of "struct tcptw *tw" and I have reimplemented the functions
75  * to work using tp (of type struct tcpcb) instead of tw (of type
76  * struct tcptw).
77  *
78  * Conceptually, the biggest change is in how timers are handled. The FreeBSD
79  * code had a 2MSL timer, which was set for sockets that enter certain
80  * "closing" states of the TCP state machine. But when the TIME-WAIT state was
81  * entered, the state is transferred from struct tcpcb into struct tcptw.
82  * The final timeout is handled as follows; the function tcp_tw_2msl_scan is
83  * called periodically on the slow timer, and it iterates over a linked list
84  * of all the struct tcptw and checks the tw->tw_time field to identify which
85  * TIME-WAIT sockets have expired.
86  *
87  * In our switch to using struct tcpcb even in the TIME-WAIT state, we rely on
88  * the timer system for struct tcpcb. I modified the 2msl callback in
89  * tcp_timer.c to check for the TIME-WAIT case and handle it correctly.
90  */
91 
92 static void
tcp_tw_2msl_reset(struct tcpcb * tp,int rearm)93 tcp_tw_2msl_reset(struct tcpcb* tp, int rearm)
94 {
95 	/*
96 	 * samkumar: This function used to set tw->tw_time to ticks + 2 * tcp_msl
97 	 * and insert tw into the linked list V_twq_2msl. I've replaced this, along
98 	 * with the associated locking logic, with the following call, which uses
99 	 * the timer system in place for full TCBs.
100 	 */
101 	tcp_timer_activate(tp, TT_2MSL, 2 * tcp_msl);
102 }
103 
104 /*
105  * samkumar: I've rewritten this code since I need to send out packets via the
106  * host system for TCPlp: allocating buffers from the host system, populate
107  * them, and then pass them back to the host system. I simplified the code by
108  * only using the logic that was fully necessary, eliminating the code for IPv4
109  * packets and keeping only the code for IPv6 packets. I also removed all of
110  * the mbuf logic, instead using the logic for using the host system's
111  * buffering.
112  *
113  * This rewritten code always returns 0. The original code would return
114  * whatever is returned by ip_output or ip6_output (FreeBSD's functions for
115  * sending out IP packets). I believe 0 indicates success, and a nonzero
116  * value represents an error code. It seems that the return value of
117  * tcp_twrespond is ignored by all instances of its use in TCPlp (maybe even
118  * in all of FreeBSD), so this is a moot point.
119  */
120 static int
tcp_twrespond(struct tcpcb * tp,int flags)121 tcp_twrespond(struct tcpcb* tp, int flags)
122 {
123 	struct tcphdr* nth;
124 	struct tcpopt to;
125 	uint32_t optlen = 0;
126 	uint8_t opt[TCP_MAXOLEN];
127 
128 	to.to_flags = 0;
129 
130 	/*
131 	 * Send a timestamp and echo-reply if both our side and our peer
132 	 * have sent timestamps in our SYN's and this is not a RST.
133 	 */
134 	if ((tp->t_flags & TF_RCVD_TSTMP) && flags == TH_ACK) {
135 		to.to_flags |= TOF_TS;
136 		to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
137 		to.to_tsecr = tp->ts_recent;
138 	}
139 	optlen = tcp_addoptions(&to, opt);
140 
141 	otMessage* message = tcplp_sys_new_message(tp->instance);
142 	if (message == NULL) {
143 		return 0; // drop the message
144 	}
145 	if (otMessageSetLength(message, sizeof(struct tcphdr) + optlen) != OT_ERROR_NONE) {
146 		tcplp_sys_free_message(tp->instance, message);
147 		return 0; // drop the message
148 	}
149 
150 	char outbuf[sizeof(struct tcphdr) + optlen];
151 	nth = (struct tcphdr*) &outbuf[0];
152 	otMessageInfo ip6info;
153 	memset(&ip6info, 0x00, sizeof(ip6info));
154 
155 	memcpy(&ip6info.mSockAddr, &tp->laddr, sizeof(ip6info.mSockAddr));
156 	memcpy(&ip6info.mPeerAddr, &tp->faddr, sizeof(ip6info.mPeerAddr));
157 	nth->th_sport = tp->lport;
158 	nth->th_dport = tp->fport;
159 	nth->th_seq = htonl(tp->snd_nxt);
160 	nth->th_ack = htonl(tp->rcv_nxt);
161 	nth->th_off_x2 = ((sizeof(struct tcphdr) + optlen) >> 2) << TH_OFF_SHIFT;
162 	nth->th_flags = flags;
163 	nth->th_win = htons(tp->tw_last_win);
164 	nth->th_urp = 0;
165 	nth->th_sum = 0;
166 
167 	memcpy(nth + 1, opt, optlen);
168 	otMessageWrite(message, 0, outbuf, sizeof(struct tcphdr) + optlen);
169 	tcplp_sys_send_message(tp->instance, message, &ip6info);
170 
171 	return 0;
172 }
173 
174 /*
175  * Move a TCP connection into TIME_WAIT state.
176  *    tcbinfo is locked.
177  *    inp is locked, and is unlocked before returning.
178  */
179 /*
180  * samkumar: Locking is removed (so above comments regarding locks are no
181  * not relevant for TCPlp). Rather than allocating a struct tcptw and
182  * discarding the struct tcpcb, this function just switches the tcpcb state
183  * to correspond to TIME-WAIT (updating variables as appropriate). We also
184  * eliminate the "V_nolocaltimewait" optimization.
185  */
186 void
tcp_twstart(struct tcpcb * tp)187 tcp_twstart(struct tcpcb *tp)
188 {
189 	int acknow;
190 
191 	/*
192 	 * samkumar: The following code, commented out using "#if 0", handles the
193 	 * net.inet.tcp.nolocaltimewait option in FreeBSD. The option skips the
194 	 * TIME-WAIT state for TCP connections where both endpoints are local.
195 	 * I'm removing this optimization for TCPlp, but I've left the code
196 	 * commented out as it's a potentially useful feature that we may choose
197 	 * to restore later.
198 	 *
199 	 * See also the "#if 0" block near the top of this file.
200 	 */
201 #if 0
202 	if (V_nolocaltimewait) {
203 		int error = 0;
204 #ifdef INET6
205 		if (isipv6)
206 			error = in6_localaddr(&inp->in6p_faddr);
207 #endif
208 #if defined(INET6) && defined(INET)
209 		else
210 #endif
211 #ifdef INET
212 			error = in_localip(inp->inp_faddr);
213 #endif
214 		if (error) {
215 			tp = tcp_close(tp);
216 			if (tp != NULL)
217 				INP_WUNLOCK(inp);
218 			return;
219 		}
220 	}
221 #endif
222 
223 	/*
224 	 * For use only by DTrace.  We do not reference the state
225 	 * after this point so modifying it in place is not a problem.
226 	 */
227 	/*
228 	 * samkumar: The above comment is not true anymore. I use this state, since
229 	 * I don't associate every struct tcpcb with a struct inpcb.
230 	 */
231 	tcp_state_change(tp, TCPS_TIME_WAIT);
232 
233 	/*
234 	 * samkumar: There used to be code here to allocate a struct tcptw
235 	 * using "tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);" and if it fails, close
236 	 * an existing TIME-WAIT connection, in LRU fashion, to allocate memory.
237 	 */
238 
239 	/*
240 	 * Recover last window size sent.
241 	 */
242 	if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
243 		tp->tw_last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
244 	else
245 		tp->tw_last_win = 0;
246 
247 	/*
248 	 * Set t_recent if timestamps are used on the connection.
249 	 */
250 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
251 	    (TF_REQ_TSTMP|TF_RCVD_TSTMP)) {
252 		/*
253 		 * samkumar: This used to do:
254 		 * tw->t_recent = tp->ts_recent;
255 		 * tw->ts_offset = tp->ts_offset;
256 		 * But since we're keeping the state in tp, we don't need to do this
257 		 * anymore. */
258 	} else {
259 		tp->ts_recent = 0;
260 		tp->ts_offset = 0;
261 	}
262 
263 	/*
264 	 * samkumar: There used to be code here to populate various fields in
265 	 * tw based on their values in tp, but there's no need for that now since
266 	 * we can just read the values from tp. tw->tw_time was set to 0, but we
267 	 * don't need to do that either since we're relying on the old timer system
268 	 * anyway.
269 	 */
270 
271 /* XXX
272  * If this code will
273  * be used for fin-wait-2 state also, then we may need
274  * a ts_recent from the last segment.
275  */
276 	acknow = tp->t_flags & TF_ACKNOW;
277 
278 	/*
279 	 * First, discard tcpcb state, which includes stopping its timers and
280 	 * freeing it.  tcp_discardcb() used to also release the inpcb, but
281 	 * that work is now done in the caller.
282 	 *
283 	 * Note: soisdisconnected() call used to be made in tcp_discardcb(),
284 	 * and might not be needed here any longer.
285 	 */
286 	/*
287 	 * samkumar: Below, I removed the code to discard tp, update inpcb and
288 	 * release a reference to socket, but kept the rest. I also added a call
289 	 * to cancel any pending timers on the TCB (which discarding it, as the
290 	 * original code did, would have done).
291 	 */
292 	tcp_cancel_timers(tp);
293 	if (acknow)
294 		tcp_twrespond(tp, TH_ACK);
295 	tcp_tw_2msl_reset(tp, 0);
296 }
297 
298 /*
299  * Returns 1 if the TIME_WAIT state was killed and we should start over,
300  * looking for a pcb in the listen state.  Returns 0 otherwise.
301  */
302 /*
303  * samkumar: Old signature was
304  * int
305  * tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
306  *    struct mbuf *m, int tlen)
307  */
308 int
tcp_twcheck(struct tcpcb * tp,struct tcphdr * th,int tlen)309 tcp_twcheck(struct tcpcb* tp, struct tcphdr *th, int tlen)
310 {
311 	int thflags;
312 	tcp_seq seq;
313 
314 	/*
315 	 * samkumar: There used to be code here that obtains the struct tcptw from
316 	 * the inpcb, and does "goto drop" if that fails.
317 	 */
318 
319 	thflags = th->th_flags;
320 
321 	/*
322 	 * NOTE: for FIN_WAIT_2 (to be added later),
323 	 * must validate sequence number before accepting RST
324 	 */
325 
326 	/*
327 	 * If the segment contains RST:
328 	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
329 	 *      RFC 1337.
330 	 */
331 	if (thflags & TH_RST)
332 		goto drop;
333 
334 	/*
335 	 * samkumar: This was commented out (using #if 0) in the original FreeBSD
336 	 * code.
337 	 */
338 #if 0
339 /* PAWS not needed at the moment */
340 	/*
341 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
342 	 * and it's less than ts_recent, drop it.
343 	 */
344 	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
345 	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
346 		if ((thflags & TH_ACK) == 0)
347 			goto drop;
348 		goto ack;
349 	}
350 	/*
351 	 * ts_recent is never updated because we never accept new segments.
352 	 */
353 #endif
354 
355 	/*
356 	 * If a new connection request is received
357 	 * while in TIME_WAIT, drop the old connection
358 	 * and start over if the sequence numbers
359 	 * are above the previous ones.
360 	 */
361 	if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tp->rcv_nxt)) {
362 		/*
363 		 * samkumar: The FreeBSD code would call tcp_twclose(tw, 0); but we
364 		 * do it as below since TCPlp represents TIME-WAIT connects as
365 		 * struct tcpcb's.
366 		 */
367 		tcp_close(tp);
368 		tcplp_sys_connection_lost(tp, CONN_LOST_NORMAL);
369 		return (1);
370 	}
371 
372 	/*
373 	 * Drop the segment if it does not contain an ACK.
374 	 */
375 	if ((thflags & TH_ACK) == 0)
376 		goto drop;
377 
378 	/*
379 	 * Reset the 2MSL timer if this is a duplicate FIN.
380 	 */
381 	if (thflags & TH_FIN) {
382 		seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
383 		if (seq + 1 == tp->rcv_nxt)
384 			tcp_tw_2msl_reset(tp, 1);
385 	}
386 
387 	/*
388 	 * Acknowledge the segment if it has data or is not a duplicate ACK.
389 	 */
390 	if (thflags != TH_ACK || tlen != 0 ||
391 	    th->th_seq != tp->rcv_nxt || th->th_ack != tp->snd_nxt)
392 		tcp_twrespond(tp, TH_ACK);
393 drop:
394 	return (0);
395 }
396