1 | /* |
---|
2 | * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 |
---|
3 | * The Regents of the University of California. All rights reserved. |
---|
4 | * |
---|
5 | * Redistribution and use in source and binary forms, with or without |
---|
6 | * modification, are permitted provided that the following conditions |
---|
7 | * are met: |
---|
8 | * 1. Redistributions of source code must retain the above copyright |
---|
9 | * notice, this list of conditions and the following disclaimer. |
---|
10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer in the |
---|
12 | * documentation and/or other materials provided with the distribution. |
---|
13 | * 3. All advertising materials mentioning features or use of this software |
---|
14 | * must display the following acknowledgement: |
---|
15 | * This product includes software developed by the University of |
---|
16 | * California, Berkeley and its contributors. |
---|
17 | * 4. Neither the name of the University nor the names of its contributors |
---|
18 | * may be used to endorse or promote products derived from this software |
---|
19 | * without specific prior written permission. |
---|
20 | * |
---|
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
---|
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
---|
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
31 | * SUCH DAMAGE. |
---|
32 | * |
---|
33 | * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 |
---|
34 | * $Id$ |
---|
35 | */ |
---|
36 | |
---|
37 | #include "opt_tcpdebug.h" |
---|
38 | |
---|
39 | #include <sys/param.h> |
---|
40 | #include <sys/queue.h> |
---|
41 | #include <sys/systm.h> |
---|
42 | #include <sys/malloc.h> |
---|
43 | #include <sys/mbuf.h> |
---|
44 | #include <sys/protosw.h> |
---|
45 | #include <sys/socket.h> |
---|
46 | #include <sys/socketvar.h> |
---|
47 | #include <sys/errno.h> |
---|
48 | |
---|
49 | #include <net/route.h> |
---|
50 | |
---|
51 | #include <netinet/in.h> |
---|
52 | #include <netinet/in_systm.h> |
---|
53 | #include <netinet/ip.h> |
---|
54 | #include <netinet/in_pcb.h> |
---|
55 | #include <netinet/ip_var.h> |
---|
56 | #include <netinet/tcp.h> |
---|
57 | #define TCPOUTFLAGS |
---|
58 | #include <netinet/tcp_fsm.h> |
---|
59 | #include <netinet/tcp_seq.h> |
---|
60 | #include <netinet/tcp_timer.h> |
---|
61 | #include <netinet/tcp_var.h> |
---|
62 | #include <netinet/tcpip.h> |
---|
63 | #ifdef TCPDEBUG |
---|
64 | #include <netinet/tcp_debug.h> |
---|
65 | #endif |
---|
66 | |
---|
67 | #ifdef notyet |
---|
68 | extern struct mbuf *m_copypack(); |
---|
69 | #endif |
---|
70 | |
---|
71 | |
---|
72 | /* |
---|
73 | * Tcp output routine: figure out what should be sent and send it. |
---|
74 | */ |
---|
75 | int |
---|
76 | tcp_output(tp) |
---|
77 | register struct tcpcb *tp; |
---|
78 | { |
---|
79 | register struct socket *so = tp->t_inpcb->inp_socket; |
---|
80 | register long len, win; |
---|
81 | int off, flags, error; |
---|
82 | register struct mbuf *m; |
---|
83 | register struct tcpiphdr *ti; |
---|
84 | u_char opt[TCP_MAXOLEN]; |
---|
85 | unsigned optlen, hdrlen; |
---|
86 | int idle, sendalot; |
---|
87 | struct rmxp_tao *taop; |
---|
88 | struct rmxp_tao tao_noncached; |
---|
89 | |
---|
90 | /* |
---|
91 | * Determine length of data that should be transmitted, |
---|
92 | * and flags that will be used. |
---|
93 | * If there is some data or critical controls (SYN, RST) |
---|
94 | * to send, then transmit; otherwise, investigate further. |
---|
95 | */ |
---|
96 | idle = (tp->snd_max == tp->snd_una); |
---|
97 | if (idle && tp->t_idle >= tp->t_rxtcur) |
---|
98 | /* |
---|
99 | * We have been idle for "a while" and no acks are |
---|
100 | * expected to clock out any data we send -- |
---|
101 | * slow start to get ack "clock" running again. |
---|
102 | */ |
---|
103 | tp->snd_cwnd = tp->t_maxseg; |
---|
104 | again: |
---|
105 | sendalot = 0; |
---|
106 | off = tp->snd_nxt - tp->snd_una; |
---|
107 | win = min(tp->snd_wnd, tp->snd_cwnd); |
---|
108 | |
---|
109 | flags = tcp_outflags[tp->t_state]; |
---|
110 | /* |
---|
111 | * Get standard flags, and add SYN or FIN if requested by 'hidden' |
---|
112 | * state flags. |
---|
113 | */ |
---|
114 | if (tp->t_flags & TF_NEEDFIN) |
---|
115 | flags |= TH_FIN; |
---|
116 | if (tp->t_flags & TF_NEEDSYN) |
---|
117 | flags |= TH_SYN; |
---|
118 | |
---|
119 | /* |
---|
120 | * If in persist timeout with window of 0, send 1 byte. |
---|
121 | * Otherwise, if window is small but nonzero |
---|
122 | * and timer expired, we will send what we can |
---|
123 | * and go to transmit state. |
---|
124 | */ |
---|
125 | if (tp->t_force) { |
---|
126 | if (win == 0) { |
---|
127 | /* |
---|
128 | * If we still have some data to send, then |
---|
129 | * clear the FIN bit. Usually this would |
---|
130 | * happen below when it realizes that we |
---|
131 | * aren't sending all the data. However, |
---|
132 | * if we have exactly 1 byte of unset data, |
---|
133 | * then it won't clear the FIN bit below, |
---|
134 | * and if we are in persist state, we wind |
---|
135 | * up sending the packet without recording |
---|
136 | * that we sent the FIN bit. |
---|
137 | * |
---|
138 | * We can't just blindly clear the FIN bit, |
---|
139 | * because if we don't have any more data |
---|
140 | * to send then the probe will be the FIN |
---|
141 | * itself. |
---|
142 | */ |
---|
143 | if (off < so->so_snd.sb_cc) |
---|
144 | flags &= ~TH_FIN; |
---|
145 | win = 1; |
---|
146 | } else { |
---|
147 | tp->t_timer[TCPT_PERSIST] = 0; |
---|
148 | tp->t_rxtshift = 0; |
---|
149 | } |
---|
150 | } |
---|
151 | |
---|
152 | len = min(so->so_snd.sb_cc, win) - off; |
---|
153 | |
---|
154 | if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { |
---|
155 | taop = &tao_noncached; |
---|
156 | bzero(taop, sizeof(*taop)); |
---|
157 | } |
---|
158 | |
---|
159 | /* |
---|
160 | * Lop off SYN bit if it has already been sent. However, if this |
---|
161 | * is SYN-SENT state and if segment contains data and if we don't |
---|
162 | * know that foreign host supports TAO, suppress sending segment. |
---|
163 | */ |
---|
164 | if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { |
---|
165 | flags &= ~TH_SYN; |
---|
166 | off--, len++; |
---|
167 | if (len > 0 && tp->t_state == TCPS_SYN_SENT && |
---|
168 | taop->tao_ccsent == 0) |
---|
169 | return 0; |
---|
170 | } |
---|
171 | |
---|
172 | /* |
---|
173 | * Be careful not to send data and/or FIN on SYN segments |
---|
174 | * in cases when no CC option will be sent. |
---|
175 | * This measure is needed to prevent interoperability problems |
---|
176 | * with not fully conformant TCP implementations. |
---|
177 | */ |
---|
178 | if ((flags & TH_SYN) && |
---|
179 | ((tp->t_flags & TF_NOOPT) || !(tp->t_flags & TF_REQ_CC) || |
---|
180 | ((flags & TH_ACK) && !(tp->t_flags & TF_RCVD_CC)))) { |
---|
181 | len = 0; |
---|
182 | flags &= ~TH_FIN; |
---|
183 | } |
---|
184 | |
---|
185 | if (len < 0) { |
---|
186 | /* |
---|
187 | * If FIN has been sent but not acked, |
---|
188 | * but we haven't been called to retransmit, |
---|
189 | * len will be -1. Otherwise, window shrank |
---|
190 | * after we sent into it. If window shrank to 0, |
---|
191 | * cancel pending retransmit, pull snd_nxt back |
---|
192 | * to (closed) window, and set the persist timer |
---|
193 | * if it isn't already going. If the window didn't |
---|
194 | * close completely, just wait for an ACK. |
---|
195 | */ |
---|
196 | len = 0; |
---|
197 | if (win == 0) { |
---|
198 | tp->t_timer[TCPT_REXMT] = 0; |
---|
199 | tp->t_rxtshift = 0; |
---|
200 | tp->snd_nxt = tp->snd_una; |
---|
201 | if (tp->t_timer[TCPT_PERSIST] == 0) |
---|
202 | tcp_setpersist(tp); |
---|
203 | } |
---|
204 | } |
---|
205 | if (len > tp->t_maxseg) { |
---|
206 | len = tp->t_maxseg; |
---|
207 | sendalot = 1; |
---|
208 | } |
---|
209 | if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) |
---|
210 | flags &= ~TH_FIN; |
---|
211 | |
---|
212 | win = sbspace(&so->so_rcv); |
---|
213 | |
---|
214 | /* |
---|
215 | * Sender silly window avoidance. If connection is idle |
---|
216 | * and can send all data, a maximum segment, |
---|
217 | * at least a maximum default-size segment do it, |
---|
218 | * or are forced, do it; otherwise don't bother. |
---|
219 | * If peer's buffer is tiny, then send |
---|
220 | * when window is at least half open. |
---|
221 | * If retransmitting (possibly after persist timer forced us |
---|
222 | * to send into a small window), then must resend. |
---|
223 | */ |
---|
224 | if (len) { |
---|
225 | if (len == tp->t_maxseg) |
---|
226 | goto send; |
---|
227 | if ((idle || tp->t_flags & TF_NODELAY) && |
---|
228 | (tp->t_flags & TF_NOPUSH) == 0 && |
---|
229 | len + off >= so->so_snd.sb_cc) |
---|
230 | goto send; |
---|
231 | if (tp->t_force) |
---|
232 | goto send; |
---|
233 | if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) |
---|
234 | goto send; |
---|
235 | if (SEQ_LT(tp->snd_nxt, tp->snd_max)) |
---|
236 | goto send; |
---|
237 | } |
---|
238 | |
---|
239 | /* |
---|
240 | * Compare available window to amount of window |
---|
241 | * known to peer (as advertised window less |
---|
242 | * next expected input). If the difference is at least two |
---|
243 | * max size segments, or at least 50% of the maximum possible |
---|
244 | * window, then want to send a window update to peer. |
---|
245 | */ |
---|
246 | if (win > 0) { |
---|
247 | /* |
---|
248 | * "adv" is the amount we can increase the window, |
---|
249 | * taking into account that we are limited by |
---|
250 | * TCP_MAXWIN << tp->rcv_scale. |
---|
251 | */ |
---|
252 | long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) - |
---|
253 | (tp->rcv_adv - tp->rcv_nxt); |
---|
254 | |
---|
255 | if (adv >= (long) (2 * tp->t_maxseg)) |
---|
256 | goto send; |
---|
257 | if (2 * adv >= (long) so->so_rcv.sb_hiwat) |
---|
258 | goto send; |
---|
259 | } |
---|
260 | |
---|
261 | /* |
---|
262 | * Send if we owe peer an ACK. |
---|
263 | */ |
---|
264 | if (tp->t_flags & TF_ACKNOW) |
---|
265 | goto send; |
---|
266 | if ((flags & TH_RST) || |
---|
267 | ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) |
---|
268 | goto send; |
---|
269 | if (SEQ_GT(tp->snd_up, tp->snd_una)) |
---|
270 | goto send; |
---|
271 | /* |
---|
272 | * If our state indicates that FIN should be sent |
---|
273 | * and we have not yet done so, or we're retransmitting the FIN, |
---|
274 | * then we need to send. |
---|
275 | */ |
---|
276 | if (flags & TH_FIN && |
---|
277 | ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) |
---|
278 | goto send; |
---|
279 | |
---|
280 | /* |
---|
281 | * TCP window updates are not reliable, rather a polling protocol |
---|
282 | * using ``persist'' packets is used to insure receipt of window |
---|
283 | * updates. The three ``states'' for the output side are: |
---|
284 | * idle not doing retransmits or persists |
---|
285 | * persisting to move a small or zero window |
---|
286 | * (re)transmitting and thereby not persisting |
---|
287 | * |
---|
288 | * tp->t_timer[TCPT_PERSIST] |
---|
289 | * is set when we are in persist state. |
---|
290 | * tp->t_force |
---|
291 | * is set when we are called to send a persist packet. |
---|
292 | * tp->t_timer[TCPT_REXMT] |
---|
293 | * is set when we are retransmitting |
---|
294 | * The output side is idle when both timers are zero. |
---|
295 | * |
---|
296 | * If send window is too small, there is data to transmit, and no |
---|
297 | * retransmit or persist is pending, then go to persist state. |
---|
298 | * If nothing happens soon, send when timer expires: |
---|
299 | * if window is nonzero, transmit what we can, |
---|
300 | * otherwise force out a byte. |
---|
301 | */ |
---|
302 | if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && |
---|
303 | tp->t_timer[TCPT_PERSIST] == 0) { |
---|
304 | tp->t_rxtshift = 0; |
---|
305 | tcp_setpersist(tp); |
---|
306 | } |
---|
307 | |
---|
308 | /* |
---|
309 | * No reason to send a segment, just return. |
---|
310 | */ |
---|
311 | return (0); |
---|
312 | |
---|
313 | send: |
---|
314 | /* |
---|
315 | * Before ESTABLISHED, force sending of initial options |
---|
316 | * unless TCP set not to do any options. |
---|
317 | * NOTE: we assume that the IP/TCP header plus TCP options |
---|
318 | * always fit in a single mbuf, leaving room for a maximum |
---|
319 | * link header, i.e. |
---|
320 | * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN |
---|
321 | */ |
---|
322 | optlen = 0; |
---|
323 | hdrlen = sizeof (struct tcpiphdr); |
---|
324 | if (flags & TH_SYN) { |
---|
325 | tp->snd_nxt = tp->iss; |
---|
326 | if ((tp->t_flags & TF_NOOPT) == 0) { |
---|
327 | u_short mss; |
---|
328 | |
---|
329 | opt[0] = TCPOPT_MAXSEG; |
---|
330 | opt[1] = TCPOLEN_MAXSEG; |
---|
331 | mss = htons((u_short) tcp_mssopt(tp)); |
---|
332 | (void)memcpy(opt + 2, &mss, sizeof(mss)); |
---|
333 | optlen = TCPOLEN_MAXSEG; |
---|
334 | |
---|
335 | if ((tp->t_flags & TF_REQ_SCALE) && |
---|
336 | ((flags & TH_ACK) == 0 || |
---|
337 | (tp->t_flags & TF_RCVD_SCALE))) { |
---|
338 | *((u_long *) (opt + optlen)) = htonl( |
---|
339 | TCPOPT_NOP << 24 | |
---|
340 | TCPOPT_WINDOW << 16 | |
---|
341 | TCPOLEN_WINDOW << 8 | |
---|
342 | tp->request_r_scale); |
---|
343 | optlen += 4; |
---|
344 | } |
---|
345 | } |
---|
346 | } |
---|
347 | |
---|
348 | /* |
---|
349 | * Send a timestamp and echo-reply if this is a SYN and our side |
---|
350 | * wants to use timestamps (TF_REQ_TSTMP is set) or both our side |
---|
351 | * and our peer have sent timestamps in our SYN's. |
---|
352 | */ |
---|
353 | if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && |
---|
354 | (flags & TH_RST) == 0 && |
---|
355 | ((flags & TH_ACK) == 0 || |
---|
356 | (tp->t_flags & TF_RCVD_TSTMP))) { |
---|
357 | u_long *lp = (u_long *)(opt + optlen); |
---|
358 | |
---|
359 | /* Form timestamp option as shown in appendix A of RFC 1323. */ |
---|
360 | *lp++ = htonl(TCPOPT_TSTAMP_HDR); |
---|
361 | *lp++ = htonl(tcp_now); |
---|
362 | *lp = htonl(tp->ts_recent); |
---|
363 | optlen += TCPOLEN_TSTAMP_APPA; |
---|
364 | } |
---|
365 | |
---|
366 | /* |
---|
367 | * Send `CC-family' options if our side wants to use them (TF_REQ_CC), |
---|
368 | * options are allowed (!TF_NOOPT) and it's not a RST. |
---|
369 | */ |
---|
370 | if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && |
---|
371 | (flags & TH_RST) == 0) { |
---|
372 | switch (flags & (TH_SYN|TH_ACK)) { |
---|
373 | /* |
---|
374 | * This is a normal ACK, send CC if we received CC before |
---|
375 | * from our peer. |
---|
376 | */ |
---|
377 | case TH_ACK: |
---|
378 | if (!(tp->t_flags & TF_RCVD_CC)) |
---|
379 | break; |
---|
380 | /*FALLTHROUGH*/ |
---|
381 | |
---|
382 | /* |
---|
383 | * We can only get here in T/TCP's SYN_SENT* state, when |
---|
384 | * we're a sending a non-SYN segment without waiting for |
---|
385 | * the ACK of our SYN. A check above assures that we only |
---|
386 | * do this if our peer understands T/TCP. |
---|
387 | */ |
---|
388 | case 0: |
---|
389 | opt[optlen++] = TCPOPT_NOP; |
---|
390 | opt[optlen++] = TCPOPT_NOP; |
---|
391 | opt[optlen++] = TCPOPT_CC; |
---|
392 | opt[optlen++] = TCPOLEN_CC; |
---|
393 | *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send); |
---|
394 | |
---|
395 | optlen += 4; |
---|
396 | break; |
---|
397 | |
---|
398 | /* |
---|
399 | * This is our initial SYN, check whether we have to use |
---|
400 | * CC or CC.new. |
---|
401 | */ |
---|
402 | case TH_SYN: |
---|
403 | opt[optlen++] = TCPOPT_NOP; |
---|
404 | opt[optlen++] = TCPOPT_NOP; |
---|
405 | opt[optlen++] = tp->t_flags & TF_SENDCCNEW ? |
---|
406 | TCPOPT_CCNEW : TCPOPT_CC; |
---|
407 | opt[optlen++] = TCPOLEN_CC; |
---|
408 | *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send); |
---|
409 | optlen += 4; |
---|
410 | break; |
---|
411 | |
---|
412 | /* |
---|
413 | * This is a SYN,ACK; send CC and CC.echo if we received |
---|
414 | * CC from our peer. |
---|
415 | */ |
---|
416 | case (TH_SYN|TH_ACK): |
---|
417 | if (tp->t_flags & TF_RCVD_CC) { |
---|
418 | opt[optlen++] = TCPOPT_NOP; |
---|
419 | opt[optlen++] = TCPOPT_NOP; |
---|
420 | opt[optlen++] = TCPOPT_CC; |
---|
421 | opt[optlen++] = TCPOLEN_CC; |
---|
422 | *(u_int32_t *)&opt[optlen] = |
---|
423 | htonl(tp->cc_send); |
---|
424 | optlen += 4; |
---|
425 | opt[optlen++] = TCPOPT_NOP; |
---|
426 | opt[optlen++] = TCPOPT_NOP; |
---|
427 | opt[optlen++] = TCPOPT_CCECHO; |
---|
428 | opt[optlen++] = TCPOLEN_CC; |
---|
429 | *(u_int32_t *)&opt[optlen] = |
---|
430 | htonl(tp->cc_recv); |
---|
431 | optlen += 4; |
---|
432 | } |
---|
433 | break; |
---|
434 | } |
---|
435 | } |
---|
436 | |
---|
437 | hdrlen += optlen; |
---|
438 | |
---|
439 | /* |
---|
440 | * Adjust data length if insertion of options will |
---|
441 | * bump the packet length beyond the t_maxopd length. |
---|
442 | * Clear the FIN bit because we cut off the tail of |
---|
443 | * the segment. |
---|
444 | */ |
---|
445 | if (len + optlen > tp->t_maxopd) { |
---|
446 | /* |
---|
447 | * If there is still more to send, don't close the connection. |
---|
448 | */ |
---|
449 | flags &= ~TH_FIN; |
---|
450 | len = tp->t_maxopd - optlen; |
---|
451 | sendalot = 1; |
---|
452 | } |
---|
453 | |
---|
454 | /*#ifdef DIAGNOSTIC*/ |
---|
455 | if (max_linkhdr + hdrlen > MHLEN) |
---|
456 | panic("tcphdr too big"); |
---|
457 | /*#endif*/ |
---|
458 | |
---|
459 | /* |
---|
460 | * Grab a header mbuf, attaching a copy of data to |
---|
461 | * be transmitted, and initialize the header from |
---|
462 | * the template for sends on this connection. |
---|
463 | */ |
---|
464 | if (len) { |
---|
465 | if (tp->t_force && len == 1) |
---|
466 | tcpstat.tcps_sndprobe++; |
---|
467 | else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { |
---|
468 | tcpstat.tcps_sndrexmitpack++; |
---|
469 | tcpstat.tcps_sndrexmitbyte += len; |
---|
470 | } else { |
---|
471 | tcpstat.tcps_sndpack++; |
---|
472 | tcpstat.tcps_sndbyte += len; |
---|
473 | } |
---|
474 | #ifdef notyet |
---|
475 | if ((m = m_copypack(so->so_snd.sb_mb, off, |
---|
476 | (int)len, max_linkhdr + hdrlen)) == 0) { |
---|
477 | error = ENOBUFS; |
---|
478 | goto out; |
---|
479 | } |
---|
480 | /* |
---|
481 | * m_copypack left space for our hdr; use it. |
---|
482 | */ |
---|
483 | m->m_len += hdrlen; |
---|
484 | m->m_data -= hdrlen; |
---|
485 | #else |
---|
486 | MGETHDR(m, M_DONTWAIT, MT_HEADER); |
---|
487 | if (m == NULL) { |
---|
488 | error = ENOBUFS; |
---|
489 | goto out; |
---|
490 | } |
---|
491 | m->m_data += max_linkhdr; |
---|
492 | m->m_len = hdrlen; |
---|
493 | if (len <= MHLEN - hdrlen - max_linkhdr) { |
---|
494 | m_copydata(so->so_snd.sb_mb, off, (int) len, |
---|
495 | mtod(m, caddr_t) + hdrlen); |
---|
496 | m->m_len += len; |
---|
497 | } else { |
---|
498 | m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len); |
---|
499 | if (m->m_next == 0) { |
---|
500 | (void) m_free(m); |
---|
501 | error = ENOBUFS; |
---|
502 | goto out; |
---|
503 | } |
---|
504 | } |
---|
505 | #endif |
---|
506 | /* |
---|
507 | * If we're sending everything we've got, set PUSH. |
---|
508 | * (This will keep happy those implementations which only |
---|
509 | * give data to the user when a buffer fills or |
---|
510 | * a PUSH comes in.) |
---|
511 | */ |
---|
512 | if (off + len == so->so_snd.sb_cc) |
---|
513 | flags |= TH_PUSH; |
---|
514 | } else { |
---|
515 | if (tp->t_flags & TF_ACKNOW) |
---|
516 | tcpstat.tcps_sndacks++; |
---|
517 | else if (flags & (TH_SYN|TH_FIN|TH_RST)) |
---|
518 | tcpstat.tcps_sndctrl++; |
---|
519 | else if (SEQ_GT(tp->snd_up, tp->snd_una)) |
---|
520 | tcpstat.tcps_sndurg++; |
---|
521 | else |
---|
522 | tcpstat.tcps_sndwinup++; |
---|
523 | |
---|
524 | MGETHDR(m, M_DONTWAIT, MT_HEADER); |
---|
525 | if (m == NULL) { |
---|
526 | error = ENOBUFS; |
---|
527 | goto out; |
---|
528 | } |
---|
529 | m->m_data += max_linkhdr; |
---|
530 | m->m_len = hdrlen; |
---|
531 | } |
---|
532 | m->m_pkthdr.rcvif = (struct ifnet *)0; |
---|
533 | ti = mtod(m, struct tcpiphdr *); |
---|
534 | if (tp->t_template == 0) |
---|
535 | panic("tcp_output"); |
---|
536 | (void)memcpy(ti, tp->t_template, sizeof (struct tcpiphdr)); |
---|
537 | |
---|
538 | /* |
---|
539 | * Fill in fields, remembering maximum advertised |
---|
540 | * window for use in delaying messages about window sizes. |
---|
541 | * If resending a FIN, be sure not to use a new sequence number. |
---|
542 | */ |
---|
543 | if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && |
---|
544 | tp->snd_nxt == tp->snd_max) |
---|
545 | tp->snd_nxt--; |
---|
546 | /* |
---|
547 | * If we are doing retransmissions, then snd_nxt will |
---|
548 | * not reflect the first unsent octet. For ACK only |
---|
549 | * packets, we do not want the sequence number of the |
---|
550 | * retransmitted packet, we want the sequence number |
---|
551 | * of the next unsent octet. So, if there is no data |
---|
552 | * (and no SYN or FIN), use snd_max instead of snd_nxt |
---|
553 | * when filling in ti_seq. But if we are in persist |
---|
554 | * state, snd_max might reflect one byte beyond the |
---|
555 | * right edge of the window, so use snd_nxt in that |
---|
556 | * case, since we know we aren't doing a retransmission. |
---|
557 | * (retransmit and persist are mutually exclusive...) |
---|
558 | */ |
---|
559 | if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST]) |
---|
560 | ti->ti_seq = htonl(tp->snd_nxt); |
---|
561 | else |
---|
562 | ti->ti_seq = htonl(tp->snd_max); |
---|
563 | ti->ti_ack = htonl(tp->rcv_nxt); |
---|
564 | if (optlen) { |
---|
565 | bcopy(opt, ti + 1, optlen); |
---|
566 | ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; |
---|
567 | } |
---|
568 | ti->ti_flags = flags; |
---|
569 | /* |
---|
570 | * Calculate receive window. Don't shrink window, |
---|
571 | * but avoid silly window syndrome. |
---|
572 | */ |
---|
573 | if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg) |
---|
574 | win = 0; |
---|
575 | if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) |
---|
576 | win = (long)(tp->rcv_adv - tp->rcv_nxt); |
---|
577 | if (win > (long)TCP_MAXWIN << tp->rcv_scale) |
---|
578 | win = (long)TCP_MAXWIN << tp->rcv_scale; |
---|
579 | ti->ti_win = htons((u_short) (win>>tp->rcv_scale)); |
---|
580 | if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { |
---|
581 | ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); |
---|
582 | ti->ti_flags |= TH_URG; |
---|
583 | } else |
---|
584 | /* |
---|
585 | * If no urgent pointer to send, then we pull |
---|
586 | * the urgent pointer to the left edge of the send window |
---|
587 | * so that it doesn't drift into the send window on sequence |
---|
588 | * number wraparound. |
---|
589 | */ |
---|
590 | tp->snd_up = tp->snd_una; /* drag it along */ |
---|
591 | |
---|
592 | /* |
---|
593 | * Put TCP length in extended header, and then |
---|
594 | * checksum extended header and data. |
---|
595 | */ |
---|
596 | if (len + optlen) |
---|
597 | ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + |
---|
598 | optlen + len)); |
---|
599 | ti->ti_sum = in_cksum(m, (int)(hdrlen + len)); |
---|
600 | |
---|
601 | /* |
---|
602 | * In transmit state, time the transmission and arrange for |
---|
603 | * the retransmit. In persist state, just set snd_max. |
---|
604 | */ |
---|
605 | if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { |
---|
606 | tcp_seq startseq = tp->snd_nxt; |
---|
607 | |
---|
608 | /* |
---|
609 | * Advance snd_nxt over sequence space of this segment. |
---|
610 | */ |
---|
611 | if (flags & (TH_SYN|TH_FIN)) { |
---|
612 | if (flags & TH_SYN) |
---|
613 | tp->snd_nxt++; |
---|
614 | if (flags & TH_FIN) { |
---|
615 | tp->snd_nxt++; |
---|
616 | tp->t_flags |= TF_SENTFIN; |
---|
617 | } |
---|
618 | } |
---|
619 | tp->snd_nxt += len; |
---|
620 | if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { |
---|
621 | tp->snd_max = tp->snd_nxt; |
---|
622 | /* |
---|
623 | * Time this transmission if not a retransmission and |
---|
624 | * not currently timing anything. |
---|
625 | */ |
---|
626 | if (tp->t_rtt == 0) { |
---|
627 | tp->t_rtt = 1; |
---|
628 | tp->t_rtseq = startseq; |
---|
629 | tcpstat.tcps_segstimed++; |
---|
630 | } |
---|
631 | } |
---|
632 | |
---|
633 | /* |
---|
634 | * Set retransmit timer if not currently set, |
---|
635 | * and not doing an ack or a keep-alive probe. |
---|
636 | * Initial value for retransmit timer is smoothed |
---|
637 | * round-trip time + 2 * round-trip time variance. |
---|
638 | * Initialize shift counter which is used for backoff |
---|
639 | * of retransmit time. |
---|
640 | */ |
---|
641 | if (tp->t_timer[TCPT_REXMT] == 0 && |
---|
642 | tp->snd_nxt != tp->snd_una) { |
---|
643 | tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; |
---|
644 | if (tp->t_timer[TCPT_PERSIST]) { |
---|
645 | tp->t_timer[TCPT_PERSIST] = 0; |
---|
646 | tp->t_rxtshift = 0; |
---|
647 | } |
---|
648 | } |
---|
649 | } else |
---|
650 | if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) |
---|
651 | tp->snd_max = tp->snd_nxt + len; |
---|
652 | |
---|
653 | #ifdef TCPDEBUG |
---|
654 | /* |
---|
655 | * Trace. |
---|
656 | */ |
---|
657 | if (so->so_options & SO_DEBUG) |
---|
658 | tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); |
---|
659 | #endif |
---|
660 | |
---|
661 | /* |
---|
662 | * Fill in IP length and desired time to live and |
---|
663 | * send to IP level. There should be a better way |
---|
664 | * to handle ttl and tos; we could keep them in |
---|
665 | * the template, but need a way to checksum without them. |
---|
666 | */ |
---|
667 | m->m_pkthdr.len = hdrlen + len; |
---|
668 | #ifdef TUBA |
---|
669 | if (tp->t_tuba_pcb) |
---|
670 | error = tuba_output(m, tp); |
---|
671 | else |
---|
672 | #endif |
---|
673 | { |
---|
674 | #if 1 |
---|
675 | struct rtentry *rt; |
---|
676 | #endif |
---|
677 | ((struct ip *)ti)->ip_len = m->m_pkthdr.len; |
---|
678 | ((struct ip *)ti)->ip_ttl = tp->t_inpcb->inp_ip_ttl; /* XXX */ |
---|
679 | ((struct ip *)ti)->ip_tos = tp->t_inpcb->inp_ip_tos; /* XXX */ |
---|
680 | #if 1 |
---|
681 | /* |
---|
682 | * See if we should do MTU discovery. We do it only if the following |
---|
683 | * are true: |
---|
684 | * 1) we have a valid route to the destination |
---|
685 | * 2) the MTU is not locked (if it is, then discovery has been |
---|
686 | * disabled) |
---|
687 | */ |
---|
688 | if ((rt = tp->t_inpcb->inp_route.ro_rt) |
---|
689 | && rt->rt_flags & RTF_UP |
---|
690 | && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { |
---|
691 | ((struct ip *)ti)->ip_off |= IP_DF; |
---|
692 | } |
---|
693 | #endif |
---|
694 | error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, |
---|
695 | so->so_options & SO_DONTROUTE, 0); |
---|
696 | } |
---|
697 | if (error) { |
---|
698 | out: |
---|
699 | if (error == ENOBUFS) { |
---|
700 | tcp_quench(tp->t_inpcb, 0); |
---|
701 | return (0); |
---|
702 | } |
---|
703 | #if 1 |
---|
704 | if (error == EMSGSIZE) { |
---|
705 | /* |
---|
706 | * ip_output() will have already fixed the route |
---|
707 | * for us. tcp_mtudisc() will, as its last action, |
---|
708 | * initiate retransmission, so it is important to |
---|
709 | * not do so here. |
---|
710 | */ |
---|
711 | tcp_mtudisc(tp->t_inpcb, 0); |
---|
712 | return 0; |
---|
713 | } |
---|
714 | #endif |
---|
715 | if ((error == EHOSTUNREACH || error == ENETDOWN) |
---|
716 | && TCPS_HAVERCVDSYN(tp->t_state)) { |
---|
717 | tp->t_softerror = error; |
---|
718 | return (0); |
---|
719 | } |
---|
720 | return (error); |
---|
721 | } |
---|
722 | tcpstat.tcps_sndtotal++; |
---|
723 | |
---|
724 | /* |
---|
725 | * Data sent (as far as we can tell). |
---|
726 | * If this advertises a larger window than any other segment, |
---|
727 | * then remember the size of the advertised window. |
---|
728 | * Any pending ACK has now been sent. |
---|
729 | */ |
---|
730 | if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) |
---|
731 | tp->rcv_adv = tp->rcv_nxt + win; |
---|
732 | tp->last_ack_sent = tp->rcv_nxt; |
---|
733 | tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); |
---|
734 | if (sendalot) |
---|
735 | goto again; |
---|
736 | return (0); |
---|
737 | } |
---|
738 | |
---|
739 | void |
---|
740 | tcp_setpersist(tp) |
---|
741 | register struct tcpcb *tp; |
---|
742 | { |
---|
743 | register int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; |
---|
744 | |
---|
745 | if (tp->t_timer[TCPT_REXMT]) |
---|
746 | panic("tcp_output REXMT"); |
---|
747 | /* |
---|
748 | * Start/restart persistance timer. |
---|
749 | */ |
---|
750 | TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], |
---|
751 | t * tcp_backoff[tp->t_rxtshift], |
---|
752 | TCPTV_PERSMIN, TCPTV_PERSMAX); |
---|
753 | if (tp->t_rxtshift < TCP_MAXRXTSHIFT) |
---|
754 | tp->t_rxtshift++; |
---|
755 | } |
---|