[39e6e65a] | 1 | /* |
---|
| 2 | * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 |
---|
| 3 | * The Regents of the University of California. All rights reserved. |
---|
| 4 | * |
---|
| 5 | * Redistribution and use in source and binary forms, with or without |
---|
| 6 | * modification, are permitted provided that the following conditions |
---|
| 7 | * are met: |
---|
| 8 | * 1. Redistributions of source code must retain the above copyright |
---|
| 9 | * notice, this list of conditions and the following disclaimer. |
---|
| 10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
| 11 | * notice, this list of conditions and the following disclaimer in the |
---|
| 12 | * documentation and/or other materials provided with the distribution. |
---|
| 13 | * 4. Neither the name of the University nor the names of its contributors |
---|
| 14 | * may be used to endorse or promote products derived from this software |
---|
| 15 | * without specific prior written permission. |
---|
| 16 | * |
---|
| 17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
---|
| 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
| 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
| 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
---|
| 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
| 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
| 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
| 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
| 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
| 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
| 27 | * SUCH DAMAGE. |
---|
| 28 | * |
---|
| 29 | * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 |
---|
| 30 | * $Id$ |
---|
| 31 | */ |
---|
| 32 | |
---|
[b25b88e7] | 33 | #ifdef HAVE_CONFIG_H |
---|
| 34 | #include "config.h" |
---|
| 35 | #endif |
---|
| 36 | |
---|
[39e6e65a] | 37 | #include "opt_tcpdebug.h" |
---|
| 38 | |
---|
| 39 | #include <sys/param.h> |
---|
[c301570] | 40 | #include <rtems/bsd/sys/queue.h> |
---|
[39e6e65a] | 41 | #include <sys/systm.h> |
---|
| 42 | #include <sys/malloc.h> |
---|
| 43 | #include <sys/mbuf.h> |
---|
| 44 | #include <sys/protosw.h> |
---|
| 45 | #include <sys/socket.h> |
---|
| 46 | #include <sys/socketvar.h> |
---|
[b3f8c9e1] | 47 | #include <errno.h> |
---|
[39e6e65a] | 48 | |
---|
| 49 | #include <net/route.h> |
---|
| 50 | |
---|
| 51 | #include <netinet/in.h> |
---|
| 52 | #include <netinet/in_systm.h> |
---|
| 53 | #include <netinet/ip.h> |
---|
| 54 | #include <netinet/in_pcb.h> |
---|
| 55 | #include <netinet/ip_var.h> |
---|
| 56 | #include <netinet/tcp.h> |
---|
| 57 | #define TCPOUTFLAGS |
---|
| 58 | #include <netinet/tcp_fsm.h> |
---|
| 59 | #include <netinet/tcp_seq.h> |
---|
| 60 | #include <netinet/tcp_timer.h> |
---|
| 61 | #include <netinet/tcp_var.h> |
---|
| 62 | #include <netinet/tcpip.h> |
---|
| 63 | #ifdef TCPDEBUG |
---|
| 64 | #include <netinet/tcp_debug.h> |
---|
| 65 | #endif |
---|
| 66 | |
---|
| 67 | #ifdef notyet |
---|
| 68 | extern struct mbuf *m_copypack(); |
---|
| 69 | #endif |
---|
| 70 | |
---|
| 71 | |
---|
| 72 | /* |
---|
| 73 | * Tcp output routine: figure out what should be sent and send it. |
---|
| 74 | */ |
---|
| 75 | int |
---|
[dd967330] | 76 | tcp_output( |
---|
| 77 | register struct tcpcb *tp) |
---|
[39e6e65a] | 78 | { |
---|
| 79 | register struct socket *so = tp->t_inpcb->inp_socket; |
---|
| 80 | register long len, win; |
---|
| 81 | int off, flags, error; |
---|
| 82 | register struct mbuf *m; |
---|
| 83 | register struct tcpiphdr *ti; |
---|
| 84 | u_char opt[TCP_MAXOLEN]; |
---|
| 85 | unsigned optlen, hdrlen; |
---|
| 86 | int idle, sendalot; |
---|
| 87 | struct rmxp_tao *taop; |
---|
| 88 | struct rmxp_tao tao_noncached; |
---|
| 89 | |
---|
| 90 | /* |
---|
| 91 | * Determine length of data that should be transmitted, |
---|
| 92 | * and flags that will be used. |
---|
| 93 | * If there is some data or critical controls (SYN, RST) |
---|
| 94 | * to send, then transmit; otherwise, investigate further. |
---|
| 95 | */ |
---|
| 96 | idle = (tp->snd_max == tp->snd_una); |
---|
| 97 | if (idle && tp->t_idle >= tp->t_rxtcur) |
---|
| 98 | /* |
---|
| 99 | * We have been idle for "a while" and no acks are |
---|
| 100 | * expected to clock out any data we send -- |
---|
| 101 | * slow start to get ack "clock" running again. |
---|
| 102 | */ |
---|
| 103 | tp->snd_cwnd = tp->t_maxseg; |
---|
| 104 | again: |
---|
| 105 | sendalot = 0; |
---|
| 106 | off = tp->snd_nxt - tp->snd_una; |
---|
| 107 | win = min(tp->snd_wnd, tp->snd_cwnd); |
---|
| 108 | |
---|
| 109 | flags = tcp_outflags[tp->t_state]; |
---|
| 110 | /* |
---|
| 111 | * Get standard flags, and add SYN or FIN if requested by 'hidden' |
---|
| 112 | * state flags. |
---|
| 113 | */ |
---|
| 114 | if (tp->t_flags & TF_NEEDFIN) |
---|
| 115 | flags |= TH_FIN; |
---|
| 116 | if (tp->t_flags & TF_NEEDSYN) |
---|
| 117 | flags |= TH_SYN; |
---|
| 118 | |
---|
| 119 | /* |
---|
| 120 | * If in persist timeout with window of 0, send 1 byte. |
---|
| 121 | * Otherwise, if window is small but nonzero |
---|
| 122 | * and timer expired, we will send what we can |
---|
| 123 | * and go to transmit state. |
---|
| 124 | */ |
---|
| 125 | if (tp->t_force) { |
---|
| 126 | if (win == 0) { |
---|
| 127 | /* |
---|
| 128 | * If we still have some data to send, then |
---|
| 129 | * clear the FIN bit. Usually this would |
---|
| 130 | * happen below when it realizes that we |
---|
| 131 | * aren't sending all the data. However, |
---|
| 132 | * if we have exactly 1 byte of unset data, |
---|
| 133 | * then it won't clear the FIN bit below, |
---|
| 134 | * and if we are in persist state, we wind |
---|
| 135 | * up sending the packet without recording |
---|
| 136 | * that we sent the FIN bit. |
---|
| 137 | * |
---|
| 138 | * We can't just blindly clear the FIN bit, |
---|
| 139 | * because if we don't have any more data |
---|
| 140 | * to send then the probe will be the FIN |
---|
| 141 | * itself. |
---|
| 142 | */ |
---|
| 143 | if (off < so->so_snd.sb_cc) |
---|
| 144 | flags &= ~TH_FIN; |
---|
| 145 | win = 1; |
---|
| 146 | } else { |
---|
| 147 | tp->t_timer[TCPT_PERSIST] = 0; |
---|
| 148 | tp->t_rxtshift = 0; |
---|
| 149 | } |
---|
| 150 | } |
---|
| 151 | |
---|
| 152 | len = min(so->so_snd.sb_cc, win) - off; |
---|
| 153 | |
---|
| 154 | if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { |
---|
| 155 | taop = &tao_noncached; |
---|
| 156 | bzero(taop, sizeof(*taop)); |
---|
| 157 | } |
---|
| 158 | |
---|
| 159 | /* |
---|
| 160 | * Lop off SYN bit if it has already been sent. However, if this |
---|
| 161 | * is SYN-SENT state and if segment contains data and if we don't |
---|
| 162 | * know that foreign host supports TAO, suppress sending segment. |
---|
| 163 | */ |
---|
| 164 | if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { |
---|
| 165 | flags &= ~TH_SYN; |
---|
| 166 | off--, len++; |
---|
| 167 | if (len > 0 && tp->t_state == TCPS_SYN_SENT && |
---|
| 168 | taop->tao_ccsent == 0) |
---|
| 169 | return 0; |
---|
| 170 | } |
---|
| 171 | |
---|
| 172 | /* |
---|
| 173 | * Be careful not to send data and/or FIN on SYN segments |
---|
| 174 | * in cases when no CC option will be sent. |
---|
| 175 | * This measure is needed to prevent interoperability problems |
---|
| 176 | * with not fully conformant TCP implementations. |
---|
| 177 | */ |
---|
| 178 | if ((flags & TH_SYN) && |
---|
| 179 | ((tp->t_flags & TF_NOOPT) || !(tp->t_flags & TF_REQ_CC) || |
---|
| 180 | ((flags & TH_ACK) && !(tp->t_flags & TF_RCVD_CC)))) { |
---|
| 181 | len = 0; |
---|
| 182 | flags &= ~TH_FIN; |
---|
| 183 | } |
---|
| 184 | |
---|
| 185 | if (len < 0) { |
---|
| 186 | /* |
---|
| 187 | * If FIN has been sent but not acked, |
---|
| 188 | * but we haven't been called to retransmit, |
---|
| 189 | * len will be -1. Otherwise, window shrank |
---|
| 190 | * after we sent into it. If window shrank to 0, |
---|
| 191 | * cancel pending retransmit, pull snd_nxt back |
---|
| 192 | * to (closed) window, and set the persist timer |
---|
| 193 | * if it isn't already going. If the window didn't |
---|
| 194 | * close completely, just wait for an ACK. |
---|
| 195 | */ |
---|
| 196 | len = 0; |
---|
| 197 | if (win == 0) { |
---|
| 198 | tp->t_timer[TCPT_REXMT] = 0; |
---|
| 199 | tp->t_rxtshift = 0; |
---|
| 200 | tp->snd_nxt = tp->snd_una; |
---|
| 201 | if (tp->t_timer[TCPT_PERSIST] == 0) |
---|
| 202 | tcp_setpersist(tp); |
---|
| 203 | } |
---|
| 204 | } |
---|
| 205 | if (len > tp->t_maxseg) { |
---|
| 206 | len = tp->t_maxseg; |
---|
| 207 | sendalot = 1; |
---|
| 208 | } |
---|
| 209 | if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) |
---|
| 210 | flags &= ~TH_FIN; |
---|
| 211 | |
---|
| 212 | win = sbspace(&so->so_rcv); |
---|
| 213 | |
---|
| 214 | /* |
---|
| 215 | * Sender silly window avoidance. If connection is idle |
---|
| 216 | * and can send all data, a maximum segment, |
---|
| 217 | * at least a maximum default-size segment do it, |
---|
| 218 | * or are forced, do it; otherwise don't bother. |
---|
| 219 | * If peer's buffer is tiny, then send |
---|
| 220 | * when window is at least half open. |
---|
| 221 | * If retransmitting (possibly after persist timer forced us |
---|
| 222 | * to send into a small window), then must resend. |
---|
| 223 | */ |
---|
| 224 | if (len) { |
---|
| 225 | if (len == tp->t_maxseg) |
---|
| 226 | goto send; |
---|
| 227 | if ((idle || tp->t_flags & TF_NODELAY) && |
---|
| 228 | (tp->t_flags & TF_NOPUSH) == 0 && |
---|
| 229 | len + off >= so->so_snd.sb_cc) |
---|
| 230 | goto send; |
---|
| 231 | if (tp->t_force) |
---|
| 232 | goto send; |
---|
| 233 | if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) |
---|
| 234 | goto send; |
---|
| 235 | if (SEQ_LT(tp->snd_nxt, tp->snd_max)) |
---|
| 236 | goto send; |
---|
| 237 | } |
---|
| 238 | |
---|
| 239 | /* |
---|
| 240 | * Compare available window to amount of window |
---|
| 241 | * known to peer (as advertised window less |
---|
| 242 | * next expected input). If the difference is at least two |
---|
| 243 | * max size segments, or at least 50% of the maximum possible |
---|
| 244 | * window, then want to send a window update to peer. |
---|
| 245 | */ |
---|
| 246 | if (win > 0) { |
---|
| 247 | /* |
---|
| 248 | * "adv" is the amount we can increase the window, |
---|
| 249 | * taking into account that we are limited by |
---|
| 250 | * TCP_MAXWIN << tp->rcv_scale. |
---|
| 251 | */ |
---|
| 252 | long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) - |
---|
| 253 | (tp->rcv_adv - tp->rcv_nxt); |
---|
| 254 | |
---|
| 255 | if (adv >= (long) (2 * tp->t_maxseg)) |
---|
| 256 | goto send; |
---|
| 257 | if (2 * adv >= (long) so->so_rcv.sb_hiwat) |
---|
| 258 | goto send; |
---|
| 259 | } |
---|
| 260 | |
---|
| 261 | /* |
---|
| 262 | * Send if we owe peer an ACK. |
---|
| 263 | */ |
---|
| 264 | if (tp->t_flags & TF_ACKNOW) |
---|
| 265 | goto send; |
---|
| 266 | if ((flags & TH_RST) || |
---|
| 267 | ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) |
---|
| 268 | goto send; |
---|
| 269 | if (SEQ_GT(tp->snd_up, tp->snd_una)) |
---|
| 270 | goto send; |
---|
| 271 | /* |
---|
| 272 | * If our state indicates that FIN should be sent |
---|
| 273 | * and we have not yet done so, or we're retransmitting the FIN, |
---|
| 274 | * then we need to send. |
---|
| 275 | */ |
---|
| 276 | if (flags & TH_FIN && |
---|
| 277 | ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) |
---|
| 278 | goto send; |
---|
| 279 | |
---|
| 280 | /* |
---|
| 281 | * TCP window updates are not reliable, rather a polling protocol |
---|
| 282 | * using ``persist'' packets is used to insure receipt of window |
---|
| 283 | * updates. The three ``states'' for the output side are: |
---|
| 284 | * idle not doing retransmits or persists |
---|
| 285 | * persisting to move a small or zero window |
---|
| 286 | * (re)transmitting and thereby not persisting |
---|
| 287 | * |
---|
| 288 | * tp->t_timer[TCPT_PERSIST] |
---|
| 289 | * is set when we are in persist state. |
---|
| 290 | * tp->t_force |
---|
| 291 | * is set when we are called to send a persist packet. |
---|
| 292 | * tp->t_timer[TCPT_REXMT] |
---|
| 293 | * is set when we are retransmitting |
---|
| 294 | * The output side is idle when both timers are zero. |
---|
| 295 | * |
---|
| 296 | * If send window is too small, there is data to transmit, and no |
---|
| 297 | * retransmit or persist is pending, then go to persist state. |
---|
| 298 | * If nothing happens soon, send when timer expires: |
---|
| 299 | * if window is nonzero, transmit what we can, |
---|
| 300 | * otherwise force out a byte. |
---|
| 301 | */ |
---|
| 302 | if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && |
---|
| 303 | tp->t_timer[TCPT_PERSIST] == 0) { |
---|
| 304 | tp->t_rxtshift = 0; |
---|
| 305 | tcp_setpersist(tp); |
---|
| 306 | } |
---|
| 307 | |
---|
| 308 | /* |
---|
| 309 | * No reason to send a segment, just return. |
---|
| 310 | */ |
---|
| 311 | return (0); |
---|
| 312 | |
---|
| 313 | send: |
---|
| 314 | /* |
---|
| 315 | * Before ESTABLISHED, force sending of initial options |
---|
| 316 | * unless TCP set not to do any options. |
---|
| 317 | * NOTE: we assume that the IP/TCP header plus TCP options |
---|
| 318 | * always fit in a single mbuf, leaving room for a maximum |
---|
| 319 | * link header, i.e. |
---|
| 320 | * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN |
---|
| 321 | */ |
---|
| 322 | optlen = 0; |
---|
| 323 | hdrlen = sizeof (struct tcpiphdr); |
---|
| 324 | if (flags & TH_SYN) { |
---|
| 325 | tp->snd_nxt = tp->iss; |
---|
| 326 | if ((tp->t_flags & TF_NOOPT) == 0) { |
---|
| 327 | u_short mss; |
---|
| 328 | |
---|
| 329 | opt[0] = TCPOPT_MAXSEG; |
---|
| 330 | opt[1] = TCPOLEN_MAXSEG; |
---|
| 331 | mss = htons((u_short) tcp_mssopt(tp)); |
---|
| 332 | (void)memcpy(opt + 2, &mss, sizeof(mss)); |
---|
| 333 | optlen = TCPOLEN_MAXSEG; |
---|
| 334 | |
---|
| 335 | if ((tp->t_flags & TF_REQ_SCALE) && |
---|
| 336 | ((flags & TH_ACK) == 0 || |
---|
| 337 | (tp->t_flags & TF_RCVD_SCALE))) { |
---|
| 338 | *((u_long *) (opt + optlen)) = htonl( |
---|
| 339 | TCPOPT_NOP << 24 | |
---|
| 340 | TCPOPT_WINDOW << 16 | |
---|
| 341 | TCPOLEN_WINDOW << 8 | |
---|
| 342 | tp->request_r_scale); |
---|
| 343 | optlen += 4; |
---|
| 344 | } |
---|
| 345 | } |
---|
| 346 | } |
---|
| 347 | |
---|
| 348 | /* |
---|
| 349 | * Send a timestamp and echo-reply if this is a SYN and our side |
---|
| 350 | * wants to use timestamps (TF_REQ_TSTMP is set) or both our side |
---|
| 351 | * and our peer have sent timestamps in our SYN's. |
---|
| 352 | */ |
---|
| 353 | if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && |
---|
| 354 | (flags & TH_RST) == 0 && |
---|
| 355 | ((flags & TH_ACK) == 0 || |
---|
| 356 | (tp->t_flags & TF_RCVD_TSTMP))) { |
---|
| 357 | u_long *lp = (u_long *)(opt + optlen); |
---|
| 358 | |
---|
| 359 | /* Form timestamp option as shown in appendix A of RFC 1323. */ |
---|
| 360 | *lp++ = htonl(TCPOPT_TSTAMP_HDR); |
---|
| 361 | *lp++ = htonl(tcp_now); |
---|
| 362 | *lp = htonl(tp->ts_recent); |
---|
| 363 | optlen += TCPOLEN_TSTAMP_APPA; |
---|
| 364 | } |
---|
| 365 | |
---|
| 366 | /* |
---|
| 367 | * Send `CC-family' options if our side wants to use them (TF_REQ_CC), |
---|
| 368 | * options are allowed (!TF_NOOPT) and it's not a RST. |
---|
| 369 | */ |
---|
| 370 | if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && |
---|
| 371 | (flags & TH_RST) == 0) { |
---|
| 372 | switch (flags & (TH_SYN|TH_ACK)) { |
---|
| 373 | /* |
---|
| 374 | * This is a normal ACK, send CC if we received CC before |
---|
| 375 | * from our peer. |
---|
| 376 | */ |
---|
| 377 | case TH_ACK: |
---|
| 378 | if (!(tp->t_flags & TF_RCVD_CC)) |
---|
| 379 | break; |
---|
| 380 | /*FALLTHROUGH*/ |
---|
| 381 | |
---|
| 382 | /* |
---|
| 383 | * We can only get here in T/TCP's SYN_SENT* state, when |
---|
| 384 | * we're a sending a non-SYN segment without waiting for |
---|
| 385 | * the ACK of our SYN. A check above assures that we only |
---|
| 386 | * do this if our peer understands T/TCP. |
---|
| 387 | */ |
---|
| 388 | case 0: |
---|
| 389 | opt[optlen++] = TCPOPT_NOP; |
---|
| 390 | opt[optlen++] = TCPOPT_NOP; |
---|
| 391 | opt[optlen++] = TCPOPT_CC; |
---|
| 392 | opt[optlen++] = TCPOLEN_CC; |
---|
| 393 | *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send); |
---|
| 394 | |
---|
| 395 | optlen += 4; |
---|
| 396 | break; |
---|
| 397 | |
---|
| 398 | /* |
---|
| 399 | * This is our initial SYN, check whether we have to use |
---|
| 400 | * CC or CC.new. |
---|
| 401 | */ |
---|
| 402 | case TH_SYN: |
---|
| 403 | opt[optlen++] = TCPOPT_NOP; |
---|
| 404 | opt[optlen++] = TCPOPT_NOP; |
---|
| 405 | opt[optlen++] = tp->t_flags & TF_SENDCCNEW ? |
---|
| 406 | TCPOPT_CCNEW : TCPOPT_CC; |
---|
| 407 | opt[optlen++] = TCPOLEN_CC; |
---|
| 408 | *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send); |
---|
| 409 | optlen += 4; |
---|
| 410 | break; |
---|
| 411 | |
---|
| 412 | /* |
---|
| 413 | * This is a SYN,ACK; send CC and CC.echo if we received |
---|
| 414 | * CC from our peer. |
---|
| 415 | */ |
---|
| 416 | case (TH_SYN|TH_ACK): |
---|
| 417 | if (tp->t_flags & TF_RCVD_CC) { |
---|
| 418 | opt[optlen++] = TCPOPT_NOP; |
---|
| 419 | opt[optlen++] = TCPOPT_NOP; |
---|
| 420 | opt[optlen++] = TCPOPT_CC; |
---|
| 421 | opt[optlen++] = TCPOLEN_CC; |
---|
| 422 | *(u_int32_t *)&opt[optlen] = |
---|
| 423 | htonl(tp->cc_send); |
---|
| 424 | optlen += 4; |
---|
| 425 | opt[optlen++] = TCPOPT_NOP; |
---|
| 426 | opt[optlen++] = TCPOPT_NOP; |
---|
| 427 | opt[optlen++] = TCPOPT_CCECHO; |
---|
| 428 | opt[optlen++] = TCPOLEN_CC; |
---|
| 429 | *(u_int32_t *)&opt[optlen] = |
---|
| 430 | htonl(tp->cc_recv); |
---|
| 431 | optlen += 4; |
---|
| 432 | } |
---|
| 433 | break; |
---|
| 434 | } |
---|
| 435 | } |
---|
| 436 | |
---|
| 437 | hdrlen += optlen; |
---|
| 438 | |
---|
| 439 | /* |
---|
| 440 | * Adjust data length if insertion of options will |
---|
| 441 | * bump the packet length beyond the t_maxopd length. |
---|
| 442 | * Clear the FIN bit because we cut off the tail of |
---|
| 443 | * the segment. |
---|
| 444 | */ |
---|
| 445 | if (len + optlen > tp->t_maxopd) { |
---|
| 446 | /* |
---|
| 447 | * If there is still more to send, don't close the connection. |
---|
| 448 | */ |
---|
| 449 | flags &= ~TH_FIN; |
---|
| 450 | len = tp->t_maxopd - optlen; |
---|
| 451 | sendalot = 1; |
---|
| 452 | } |
---|
| 453 | |
---|
| 454 | /*#ifdef DIAGNOSTIC*/ |
---|
| 455 | if (max_linkhdr + hdrlen > MHLEN) |
---|
| 456 | panic("tcphdr too big"); |
---|
| 457 | /*#endif*/ |
---|
| 458 | |
---|
| 459 | /* |
---|
| 460 | * Grab a header mbuf, attaching a copy of data to |
---|
| 461 | * be transmitted, and initialize the header from |
---|
| 462 | * the template for sends on this connection. |
---|
| 463 | */ |
---|
| 464 | if (len) { |
---|
| 465 | if (tp->t_force && len == 1) |
---|
| 466 | tcpstat.tcps_sndprobe++; |
---|
| 467 | else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { |
---|
| 468 | tcpstat.tcps_sndrexmitpack++; |
---|
| 469 | tcpstat.tcps_sndrexmitbyte += len; |
---|
| 470 | } else { |
---|
| 471 | tcpstat.tcps_sndpack++; |
---|
| 472 | tcpstat.tcps_sndbyte += len; |
---|
| 473 | } |
---|
| 474 | #ifdef notyet |
---|
| 475 | if ((m = m_copypack(so->so_snd.sb_mb, off, |
---|
| 476 | (int)len, max_linkhdr + hdrlen)) == 0) { |
---|
| 477 | error = ENOBUFS; |
---|
| 478 | goto out; |
---|
| 479 | } |
---|
| 480 | /* |
---|
| 481 | * m_copypack left space for our hdr; use it. |
---|
| 482 | */ |
---|
| 483 | m->m_len += hdrlen; |
---|
| 484 | m->m_data -= hdrlen; |
---|
| 485 | #else |
---|
| 486 | MGETHDR(m, M_DONTWAIT, MT_HEADER); |
---|
| 487 | if (m == NULL) { |
---|
| 488 | error = ENOBUFS; |
---|
| 489 | goto out; |
---|
| 490 | } |
---|
| 491 | m->m_data += max_linkhdr; |
---|
| 492 | m->m_len = hdrlen; |
---|
| 493 | if (len <= MHLEN - hdrlen - max_linkhdr) { |
---|
| 494 | m_copydata(so->so_snd.sb_mb, off, (int) len, |
---|
| 495 | mtod(m, caddr_t) + hdrlen); |
---|
| 496 | m->m_len += len; |
---|
| 497 | } else { |
---|
| 498 | m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len); |
---|
| 499 | if (m->m_next == 0) { |
---|
| 500 | (void) m_free(m); |
---|
| 501 | error = ENOBUFS; |
---|
| 502 | goto out; |
---|
| 503 | } |
---|
| 504 | } |
---|
| 505 | #endif |
---|
| 506 | /* |
---|
| 507 | * If we're sending everything we've got, set PUSH. |
---|
| 508 | * (This will keep happy those implementations which only |
---|
| 509 | * give data to the user when a buffer fills or |
---|
| 510 | * a PUSH comes in.) |
---|
| 511 | */ |
---|
| 512 | if (off + len == so->so_snd.sb_cc) |
---|
| 513 | flags |= TH_PUSH; |
---|
| 514 | } else { |
---|
| 515 | if (tp->t_flags & TF_ACKNOW) |
---|
| 516 | tcpstat.tcps_sndacks++; |
---|
| 517 | else if (flags & (TH_SYN|TH_FIN|TH_RST)) |
---|
| 518 | tcpstat.tcps_sndctrl++; |
---|
| 519 | else if (SEQ_GT(tp->snd_up, tp->snd_una)) |
---|
| 520 | tcpstat.tcps_sndurg++; |
---|
| 521 | else |
---|
| 522 | tcpstat.tcps_sndwinup++; |
---|
| 523 | |
---|
| 524 | MGETHDR(m, M_DONTWAIT, MT_HEADER); |
---|
| 525 | if (m == NULL) { |
---|
| 526 | error = ENOBUFS; |
---|
| 527 | goto out; |
---|
| 528 | } |
---|
| 529 | m->m_data += max_linkhdr; |
---|
| 530 | m->m_len = hdrlen; |
---|
| 531 | } |
---|
| 532 | m->m_pkthdr.rcvif = (struct ifnet *)0; |
---|
| 533 | ti = mtod(m, struct tcpiphdr *); |
---|
| 534 | if (tp->t_template == 0) |
---|
| 535 | panic("tcp_output"); |
---|
| 536 | (void)memcpy(ti, tp->t_template, sizeof (struct tcpiphdr)); |
---|
| 537 | |
---|
| 538 | /* |
---|
| 539 | * Fill in fields, remembering maximum advertised |
---|
| 540 | * window for use in delaying messages about window sizes. |
---|
| 541 | * If resending a FIN, be sure not to use a new sequence number. |
---|
| 542 | */ |
---|
| 543 | if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && |
---|
| 544 | tp->snd_nxt == tp->snd_max) |
---|
| 545 | tp->snd_nxt--; |
---|
| 546 | /* |
---|
| 547 | * If we are doing retransmissions, then snd_nxt will |
---|
| 548 | * not reflect the first unsent octet. For ACK only |
---|
| 549 | * packets, we do not want the sequence number of the |
---|
| 550 | * retransmitted packet, we want the sequence number |
---|
| 551 | * of the next unsent octet. So, if there is no data |
---|
| 552 | * (and no SYN or FIN), use snd_max instead of snd_nxt |
---|
| 553 | * when filling in ti_seq. But if we are in persist |
---|
| 554 | * state, snd_max might reflect one byte beyond the |
---|
| 555 | * right edge of the window, so use snd_nxt in that |
---|
| 556 | * case, since we know we aren't doing a retransmission. |
---|
| 557 | * (retransmit and persist are mutually exclusive...) |
---|
| 558 | */ |
---|
| 559 | if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST]) |
---|
| 560 | ti->ti_seq = htonl(tp->snd_nxt); |
---|
| 561 | else |
---|
| 562 | ti->ti_seq = htonl(tp->snd_max); |
---|
| 563 | ti->ti_ack = htonl(tp->rcv_nxt); |
---|
| 564 | if (optlen) { |
---|
| 565 | bcopy(opt, ti + 1, optlen); |
---|
| 566 | ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; |
---|
| 567 | } |
---|
| 568 | ti->ti_flags = flags; |
---|
| 569 | /* |
---|
| 570 | * Calculate receive window. Don't shrink window, |
---|
| 571 | * but avoid silly window syndrome. |
---|
| 572 | */ |
---|
| 573 | if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg) |
---|
| 574 | win = 0; |
---|
| 575 | if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) |
---|
| 576 | win = (long)(tp->rcv_adv - tp->rcv_nxt); |
---|
| 577 | if (win > (long)TCP_MAXWIN << tp->rcv_scale) |
---|
| 578 | win = (long)TCP_MAXWIN << tp->rcv_scale; |
---|
| 579 | ti->ti_win = htons((u_short) (win>>tp->rcv_scale)); |
---|
| 580 | if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { |
---|
| 581 | ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); |
---|
| 582 | ti->ti_flags |= TH_URG; |
---|
| 583 | } else |
---|
| 584 | /* |
---|
| 585 | * If no urgent pointer to send, then we pull |
---|
| 586 | * the urgent pointer to the left edge of the send window |
---|
| 587 | * so that it doesn't drift into the send window on sequence |
---|
| 588 | * number wraparound. |
---|
| 589 | */ |
---|
| 590 | tp->snd_up = tp->snd_una; /* drag it along */ |
---|
| 591 | |
---|
| 592 | /* |
---|
| 593 | * Put TCP length in extended header, and then |
---|
| 594 | * checksum extended header and data. |
---|
| 595 | */ |
---|
| 596 | if (len + optlen) |
---|
| 597 | ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + |
---|
| 598 | optlen + len)); |
---|
| 599 | ti->ti_sum = in_cksum(m, (int)(hdrlen + len)); |
---|
| 600 | |
---|
| 601 | /* |
---|
| 602 | * In transmit state, time the transmission and arrange for |
---|
| 603 | * the retransmit. In persist state, just set snd_max. |
---|
| 604 | */ |
---|
| 605 | if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { |
---|
| 606 | tcp_seq startseq = tp->snd_nxt; |
---|
| 607 | |
---|
| 608 | /* |
---|
| 609 | * Advance snd_nxt over sequence space of this segment. |
---|
| 610 | */ |
---|
| 611 | if (flags & (TH_SYN|TH_FIN)) { |
---|
| 612 | if (flags & TH_SYN) |
---|
| 613 | tp->snd_nxt++; |
---|
| 614 | if (flags & TH_FIN) { |
---|
| 615 | tp->snd_nxt++; |
---|
| 616 | tp->t_flags |= TF_SENTFIN; |
---|
| 617 | } |
---|
| 618 | } |
---|
| 619 | tp->snd_nxt += len; |
---|
| 620 | if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { |
---|
| 621 | tp->snd_max = tp->snd_nxt; |
---|
| 622 | /* |
---|
| 623 | * Time this transmission if not a retransmission and |
---|
| 624 | * not currently timing anything. |
---|
| 625 | */ |
---|
| 626 | if (tp->t_rtt == 0) { |
---|
| 627 | tp->t_rtt = 1; |
---|
| 628 | tp->t_rtseq = startseq; |
---|
| 629 | tcpstat.tcps_segstimed++; |
---|
| 630 | } |
---|
| 631 | } |
---|
| 632 | |
---|
| 633 | /* |
---|
| 634 | * Set retransmit timer if not currently set, |
---|
| 635 | * and not doing an ack or a keep-alive probe. |
---|
| 636 | * Initial value for retransmit timer is smoothed |
---|
| 637 | * round-trip time + 2 * round-trip time variance. |
---|
| 638 | * Initialize shift counter which is used for backoff |
---|
| 639 | * of retransmit time. |
---|
| 640 | */ |
---|
| 641 | if (tp->t_timer[TCPT_REXMT] == 0 && |
---|
| 642 | tp->snd_nxt != tp->snd_una) { |
---|
| 643 | tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; |
---|
| 644 | if (tp->t_timer[TCPT_PERSIST]) { |
---|
| 645 | tp->t_timer[TCPT_PERSIST] = 0; |
---|
| 646 | tp->t_rxtshift = 0; |
---|
| 647 | } |
---|
| 648 | } |
---|
| 649 | } else |
---|
| 650 | if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) |
---|
| 651 | tp->snd_max = tp->snd_nxt + len; |
---|
| 652 | |
---|
| 653 | #ifdef TCPDEBUG |
---|
| 654 | /* |
---|
| 655 | * Trace. |
---|
| 656 | */ |
---|
| 657 | if (so->so_options & SO_DEBUG) |
---|
| 658 | tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); |
---|
| 659 | #endif |
---|
| 660 | |
---|
| 661 | /* |
---|
| 662 | * Fill in IP length and desired time to live and |
---|
| 663 | * send to IP level. There should be a better way |
---|
| 664 | * to handle ttl and tos; we could keep them in |
---|
| 665 | * the template, but need a way to checksum without them. |
---|
| 666 | */ |
---|
| 667 | m->m_pkthdr.len = hdrlen + len; |
---|
| 668 | #ifdef TUBA |
---|
| 669 | if (tp->t_tuba_pcb) |
---|
| 670 | error = tuba_output(m, tp); |
---|
| 671 | else |
---|
| 672 | #endif |
---|
| 673 | { |
---|
| 674 | #if 1 |
---|
| 675 | struct rtentry *rt; |
---|
| 676 | #endif |
---|
| 677 | ((struct ip *)ti)->ip_len = m->m_pkthdr.len; |
---|
| 678 | ((struct ip *)ti)->ip_ttl = tp->t_inpcb->inp_ip_ttl; /* XXX */ |
---|
| 679 | ((struct ip *)ti)->ip_tos = tp->t_inpcb->inp_ip_tos; /* XXX */ |
---|
| 680 | #if 1 |
---|
| 681 | /* |
---|
| 682 | * See if we should do MTU discovery. We do it only if the following |
---|
| 683 | * are true: |
---|
| 684 | * 1) we have a valid route to the destination |
---|
| 685 | * 2) the MTU is not locked (if it is, then discovery has been |
---|
| 686 | * disabled) |
---|
| 687 | */ |
---|
| 688 | if ((rt = tp->t_inpcb->inp_route.ro_rt) |
---|
| 689 | && rt->rt_flags & RTF_UP |
---|
| 690 | && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { |
---|
| 691 | ((struct ip *)ti)->ip_off |= IP_DF; |
---|
| 692 | } |
---|
| 693 | #endif |
---|
| 694 | error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, |
---|
| 695 | so->so_options & SO_DONTROUTE, 0); |
---|
| 696 | } |
---|
| 697 | if (error) { |
---|
| 698 | out: |
---|
| 699 | if (error == ENOBUFS) { |
---|
| 700 | tcp_quench(tp->t_inpcb, 0); |
---|
| 701 | return (0); |
---|
| 702 | } |
---|
| 703 | #if 1 |
---|
| 704 | if (error == EMSGSIZE) { |
---|
| 705 | /* |
---|
| 706 | * ip_output() will have already fixed the route |
---|
| 707 | * for us. tcp_mtudisc() will, as its last action, |
---|
| 708 | * initiate retransmission, so it is important to |
---|
| 709 | * not do so here. |
---|
| 710 | */ |
---|
| 711 | tcp_mtudisc(tp->t_inpcb, 0); |
---|
| 712 | return 0; |
---|
| 713 | } |
---|
| 714 | #endif |
---|
| 715 | if ((error == EHOSTUNREACH || error == ENETDOWN) |
---|
| 716 | && TCPS_HAVERCVDSYN(tp->t_state)) { |
---|
| 717 | tp->t_softerror = error; |
---|
| 718 | return (0); |
---|
| 719 | } |
---|
| 720 | return (error); |
---|
| 721 | } |
---|
| 722 | tcpstat.tcps_sndtotal++; |
---|
| 723 | |
---|
| 724 | /* |
---|
| 725 | * Data sent (as far as we can tell). |
---|
| 726 | * If this advertises a larger window than any other segment, |
---|
| 727 | * then remember the size of the advertised window. |
---|
| 728 | * Any pending ACK has now been sent. |
---|
| 729 | */ |
---|
| 730 | if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) |
---|
| 731 | tp->rcv_adv = tp->rcv_nxt + win; |
---|
| 732 | tp->last_ack_sent = tp->rcv_nxt; |
---|
| 733 | tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); |
---|
| 734 | if (sendalot) |
---|
| 735 | goto again; |
---|
| 736 | return (0); |
---|
| 737 | } |
---|
| 738 | |
---|
| 739 | void |
---|
[dd967330] | 740 | tcp_setpersist( |
---|
| 741 | register struct tcpcb *tp) |
---|
[39e6e65a] | 742 | { |
---|
| 743 | register int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; |
---|
| 744 | |
---|
| 745 | if (tp->t_timer[TCPT_REXMT]) |
---|
| 746 | panic("tcp_output REXMT"); |
---|
| 747 | /* |
---|
| 748 | * Start/restart persistance timer. |
---|
| 749 | */ |
---|
| 750 | TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], |
---|
| 751 | t * tcp_backoff[tp->t_rxtshift], |
---|
| 752 | TCPTV_PERSMIN, TCPTV_PERSMAX); |
---|
| 753 | if (tp->t_rxtshift < TCP_MAXRXTSHIFT) |
---|
| 754 | tp->t_rxtshift++; |
---|
| 755 | } |
---|