source: rtems/cpukit/libnetworking/netinet/tcp_output.c @ b3f8c9e1

4.104.115
Last change on this file since b3f8c9e1 was b3f8c9e1, checked in by Ralf Corsepius <ralf.corsepius@…>, on 12/22/08 at 07:47:28

Include <errno.h> (POSIX,C99) instead of <sys/errno.h> (BSD'ism).

  • Property mode set to 100644
File size: 20.8 KB
Line 
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 *      The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *      @(#)tcp_output.c        8.4 (Berkeley) 5/24/95
30 *      $Id$
31 */
32
33#include "opt_tcpdebug.h"
34
35#include <sys/param.h>
36#include <rtems/bsd/sys/queue.h>
37#include <sys/systm.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/protosw.h>
41#include <sys/socket.h>
42#include <sys/socketvar.h>
43#include <errno.h>
44
45#include <net/route.h>
46
47#include <netinet/in.h>
48#include <netinet/in_systm.h>
49#include <netinet/ip.h>
50#include <netinet/in_pcb.h>
51#include <netinet/ip_var.h>
52#include <netinet/tcp.h>
53#define TCPOUTFLAGS
54#include <netinet/tcp_fsm.h>
55#include <netinet/tcp_seq.h>
56#include <netinet/tcp_timer.h>
57#include <netinet/tcp_var.h>
58#include <netinet/tcpip.h>
59#ifdef TCPDEBUG
60#include <netinet/tcp_debug.h>
61#endif
62
63#ifdef notyet
64extern struct mbuf *m_copypack();
65#endif
66
67
68/*
69 * Tcp output routine: figure out what should be sent and send it.
70 */
71int
72tcp_output(
73        register struct tcpcb *tp)
74{
75        register struct socket *so = tp->t_inpcb->inp_socket;
76        register long len, win;
77        int off, flags, error;
78        register struct mbuf *m;
79        register struct tcpiphdr *ti;
80        u_char opt[TCP_MAXOLEN];
81        unsigned optlen, hdrlen;
82        int idle, sendalot;
83        struct rmxp_tao *taop;
84        struct rmxp_tao tao_noncached;
85
86        /*
87         * Determine length of data that should be transmitted,
88         * and flags that will be used.
89         * If there is some data or critical controls (SYN, RST)
90         * to send, then transmit; otherwise, investigate further.
91         */
92        idle = (tp->snd_max == tp->snd_una);
93        if (idle && tp->t_idle >= tp->t_rxtcur)
94                /*
95                 * We have been idle for "a while" and no acks are
96                 * expected to clock out any data we send --
97                 * slow start to get ack "clock" running again.
98                 */
99                tp->snd_cwnd = tp->t_maxseg;
100again:
101        sendalot = 0;
102        off = tp->snd_nxt - tp->snd_una;
103        win = min(tp->snd_wnd, tp->snd_cwnd);
104
105        flags = tcp_outflags[tp->t_state];
106        /*
107         * Get standard flags, and add SYN or FIN if requested by 'hidden'
108         * state flags.
109         */
110        if (tp->t_flags & TF_NEEDFIN)
111                flags |= TH_FIN;
112        if (tp->t_flags & TF_NEEDSYN)
113                flags |= TH_SYN;
114
115        /*
116         * If in persist timeout with window of 0, send 1 byte.
117         * Otherwise, if window is small but nonzero
118         * and timer expired, we will send what we can
119         * and go to transmit state.
120         */
121        if (tp->t_force) {
122                if (win == 0) {
123                        /*
124                         * If we still have some data to send, then
125                         * clear the FIN bit.  Usually this would
126                         * happen below when it realizes that we
127                         * aren't sending all the data.  However,
128                         * if we have exactly 1 byte of unset data,
129                         * then it won't clear the FIN bit below,
130                         * and if we are in persist state, we wind
131                         * up sending the packet without recording
132                         * that we sent the FIN bit.
133                         *
134                         * We can't just blindly clear the FIN bit,
135                         * because if we don't have any more data
136                         * to send then the probe will be the FIN
137                         * itself.
138                         */
139                        if (off < so->so_snd.sb_cc)
140                                flags &= ~TH_FIN;
141                        win = 1;
142                } else {
143                        tp->t_timer[TCPT_PERSIST] = 0;
144                        tp->t_rxtshift = 0;
145                }
146        }
147
148        len = min(so->so_snd.sb_cc, win) - off;
149
150        if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
151                taop = &tao_noncached;
152                bzero(taop, sizeof(*taop));
153        }
154
155        /*
156         * Lop off SYN bit if it has already been sent.  However, if this
157         * is SYN-SENT state and if segment contains data and if we don't
158         * know that foreign host supports TAO, suppress sending segment.
159         */
160        if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
161                flags &= ~TH_SYN;
162                off--, len++;
163                if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
164                    taop->tao_ccsent == 0)
165                        return 0;
166        }
167
168        /*
169         * Be careful not to send data and/or FIN on SYN segments
170         * in cases when no CC option will be sent.
171         * This measure is needed to prevent interoperability problems
172         * with not fully conformant TCP implementations.
173         */
174        if ((flags & TH_SYN) &&
175            ((tp->t_flags & TF_NOOPT) || !(tp->t_flags & TF_REQ_CC) ||
176             ((flags & TH_ACK) && !(tp->t_flags & TF_RCVD_CC)))) {
177                len = 0;
178                flags &= ~TH_FIN;
179        }
180
181        if (len < 0) {
182                /*
183                 * If FIN has been sent but not acked,
184                 * but we haven't been called to retransmit,
185                 * len will be -1.  Otherwise, window shrank
186                 * after we sent into it.  If window shrank to 0,
187                 * cancel pending retransmit, pull snd_nxt back
188                 * to (closed) window, and set the persist timer
189                 * if it isn't already going.  If the window didn't
190                 * close completely, just wait for an ACK.
191                 */
192                len = 0;
193                if (win == 0) {
194                        tp->t_timer[TCPT_REXMT] = 0;
195                        tp->t_rxtshift = 0;
196                        tp->snd_nxt = tp->snd_una;
197                        if (tp->t_timer[TCPT_PERSIST] == 0)
198                                tcp_setpersist(tp);
199                }
200        }
201        if (len > tp->t_maxseg) {
202                len = tp->t_maxseg;
203                sendalot = 1;
204        }
205        if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
206                flags &= ~TH_FIN;
207
208        win = sbspace(&so->so_rcv);
209
210        /*
211         * Sender silly window avoidance.  If connection is idle
212         * and can send all data, a maximum segment,
213         * at least a maximum default-size segment do it,
214         * or are forced, do it; otherwise don't bother.
215         * If peer's buffer is tiny, then send
216         * when window is at least half open.
217         * If retransmitting (possibly after persist timer forced us
218         * to send into a small window), then must resend.
219         */
220        if (len) {
221                if (len == tp->t_maxseg)
222                        goto send;
223                if ((idle || tp->t_flags & TF_NODELAY) &&
224                    (tp->t_flags & TF_NOPUSH) == 0 &&
225                    len + off >= so->so_snd.sb_cc)
226                        goto send;
227                if (tp->t_force)
228                        goto send;
229                if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
230                        goto send;
231                if (SEQ_LT(tp->snd_nxt, tp->snd_max))
232                        goto send;
233        }
234
235        /*
236         * Compare available window to amount of window
237         * known to peer (as advertised window less
238         * next expected input).  If the difference is at least two
239         * max size segments, or at least 50% of the maximum possible
240         * window, then want to send a window update to peer.
241         */
242        if (win > 0) {
243                /*
244                 * "adv" is the amount we can increase the window,
245                 * taking into account that we are limited by
246                 * TCP_MAXWIN << tp->rcv_scale.
247                 */
248                long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
249                        (tp->rcv_adv - tp->rcv_nxt);
250
251                if (adv >= (long) (2 * tp->t_maxseg))
252                        goto send;
253                if (2 * adv >= (long) so->so_rcv.sb_hiwat)
254                        goto send;
255        }
256
257        /*
258         * Send if we owe peer an ACK.
259         */
260        if (tp->t_flags & TF_ACKNOW)
261                goto send;
262        if ((flags & TH_RST) ||
263            ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
264                goto send;
265        if (SEQ_GT(tp->snd_up, tp->snd_una))
266                goto send;
267        /*
268         * If our state indicates that FIN should be sent
269         * and we have not yet done so, or we're retransmitting the FIN,
270         * then we need to send.
271         */
272        if (flags & TH_FIN &&
273            ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
274                goto send;
275
276        /*
277         * TCP window updates are not reliable, rather a polling protocol
278         * using ``persist'' packets is used to insure receipt of window
279         * updates.  The three ``states'' for the output side are:
280         *      idle                    not doing retransmits or persists
281         *      persisting              to move a small or zero window
282         *      (re)transmitting        and thereby not persisting
283         *
284         * tp->t_timer[TCPT_PERSIST]
285         *      is set when we are in persist state.
286         * tp->t_force
287         *      is set when we are called to send a persist packet.
288         * tp->t_timer[TCPT_REXMT]
289         *      is set when we are retransmitting
290         * The output side is idle when both timers are zero.
291         *
292         * If send window is too small, there is data to transmit, and no
293         * retransmit or persist is pending, then go to persist state.
294         * If nothing happens soon, send when timer expires:
295         * if window is nonzero, transmit what we can,
296         * otherwise force out a byte.
297         */
298        if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
299            tp->t_timer[TCPT_PERSIST] == 0) {
300                tp->t_rxtshift = 0;
301                tcp_setpersist(tp);
302        }
303
304        /*
305         * No reason to send a segment, just return.
306         */
307        return (0);
308
309send:
310        /*
311         * Before ESTABLISHED, force sending of initial options
312         * unless TCP set not to do any options.
313         * NOTE: we assume that the IP/TCP header plus TCP options
314         * always fit in a single mbuf, leaving room for a maximum
315         * link header, i.e.
316         *      max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
317         */
318        optlen = 0;
319        hdrlen = sizeof (struct tcpiphdr);
320        if (flags & TH_SYN) {
321                tp->snd_nxt = tp->iss;
322                if ((tp->t_flags & TF_NOOPT) == 0) {
323                        u_short mss;
324
325                        opt[0] = TCPOPT_MAXSEG;
326                        opt[1] = TCPOLEN_MAXSEG;
327                        mss = htons((u_short) tcp_mssopt(tp));
328                        (void)memcpy(opt + 2, &mss, sizeof(mss));
329                        optlen = TCPOLEN_MAXSEG;
330
331                        if ((tp->t_flags & TF_REQ_SCALE) &&
332                            ((flags & TH_ACK) == 0 ||
333                            (tp->t_flags & TF_RCVD_SCALE))) {
334                                *((u_long *) (opt + optlen)) = htonl(
335                                        TCPOPT_NOP << 24 |
336                                        TCPOPT_WINDOW << 16 |
337                                        TCPOLEN_WINDOW << 8 |
338                                        tp->request_r_scale);
339                                optlen += 4;
340                        }
341                }
342        }
343
344        /*
345         * Send a timestamp and echo-reply if this is a SYN and our side
346         * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
347         * and our peer have sent timestamps in our SYN's.
348         */
349        if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
350            (flags & TH_RST) == 0 &&
351            ((flags & TH_ACK) == 0 ||
352             (tp->t_flags & TF_RCVD_TSTMP))) {
353                u_long *lp = (u_long *)(opt + optlen);
354
355                /* Form timestamp option as shown in appendix A of RFC 1323. */
356                *lp++ = htonl(TCPOPT_TSTAMP_HDR);
357                *lp++ = htonl(tcp_now);
358                *lp   = htonl(tp->ts_recent);
359                optlen += TCPOLEN_TSTAMP_APPA;
360        }
361
362        /*
363         * Send `CC-family' options if our side wants to use them (TF_REQ_CC),
364         * options are allowed (!TF_NOOPT) and it's not a RST.
365         */
366        if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
367             (flags & TH_RST) == 0) {
368                switch (flags & (TH_SYN|TH_ACK)) {
369                /*
370                 * This is a normal ACK, send CC if we received CC before
371                 * from our peer.
372                 */
373                case TH_ACK:
374                        if (!(tp->t_flags & TF_RCVD_CC))
375                                break;
376                        /*FALLTHROUGH*/
377
378                /*
379                 * We can only get here in T/TCP's SYN_SENT* state, when
380                 * we're a sending a non-SYN segment without waiting for
381                 * the ACK of our SYN.  A check above assures that we only
382                 * do this if our peer understands T/TCP.
383                 */
384                case 0:
385                        opt[optlen++] = TCPOPT_NOP;
386                        opt[optlen++] = TCPOPT_NOP;
387                        opt[optlen++] = TCPOPT_CC;
388                        opt[optlen++] = TCPOLEN_CC;
389                        *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
390
391                        optlen += 4;
392                        break;
393
394                /*
395                 * This is our initial SYN, check whether we have to use
396                 * CC or CC.new.
397                 */
398                case TH_SYN:
399                        opt[optlen++] = TCPOPT_NOP;
400                        opt[optlen++] = TCPOPT_NOP;
401                        opt[optlen++] = tp->t_flags & TF_SENDCCNEW ?
402                                                TCPOPT_CCNEW : TCPOPT_CC;
403                        opt[optlen++] = TCPOLEN_CC;
404                        *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
405                        optlen += 4;
406                        break;
407
408                /*
409                 * This is a SYN,ACK; send CC and CC.echo if we received
410                 * CC from our peer.
411                 */
412                case (TH_SYN|TH_ACK):
413                        if (tp->t_flags & TF_RCVD_CC) {
414                                opt[optlen++] = TCPOPT_NOP;
415                                opt[optlen++] = TCPOPT_NOP;
416                                opt[optlen++] = TCPOPT_CC;
417                                opt[optlen++] = TCPOLEN_CC;
418                                *(u_int32_t *)&opt[optlen] =
419                                        htonl(tp->cc_send);
420                                optlen += 4;
421                                opt[optlen++] = TCPOPT_NOP;
422                                opt[optlen++] = TCPOPT_NOP;
423                                opt[optlen++] = TCPOPT_CCECHO;
424                                opt[optlen++] = TCPOLEN_CC;
425                                *(u_int32_t *)&opt[optlen] =
426                                        htonl(tp->cc_recv);
427                                optlen += 4;
428                        }
429                        break;
430                }
431        }
432
433        hdrlen += optlen;
434
435        /*
436         * Adjust data length if insertion of options will
437         * bump the packet length beyond the t_maxopd length.
438         * Clear the FIN bit because we cut off the tail of
439         * the segment.
440         */
441         if (len + optlen > tp->t_maxopd) {
442                /*
443                 * If there is still more to send, don't close the connection.
444                 */
445                flags &= ~TH_FIN;
446                len = tp->t_maxopd - optlen;
447                sendalot = 1;
448        }
449
450/*#ifdef DIAGNOSTIC*/
451        if (max_linkhdr + hdrlen > MHLEN)
452                panic("tcphdr too big");
453/*#endif*/
454
455        /*
456         * Grab a header mbuf, attaching a copy of data to
457         * be transmitted, and initialize the header from
458         * the template for sends on this connection.
459         */
460        if (len) {
461                if (tp->t_force && len == 1)
462                        tcpstat.tcps_sndprobe++;
463                else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
464                        tcpstat.tcps_sndrexmitpack++;
465                        tcpstat.tcps_sndrexmitbyte += len;
466                } else {
467                        tcpstat.tcps_sndpack++;
468                        tcpstat.tcps_sndbyte += len;
469                }
470#ifdef notyet
471                if ((m = m_copypack(so->so_snd.sb_mb, off,
472                    (int)len, max_linkhdr + hdrlen)) == 0) {
473                        error = ENOBUFS;
474                        goto out;
475                }
476                /*
477                 * m_copypack left space for our hdr; use it.
478                 */
479                m->m_len += hdrlen;
480                m->m_data -= hdrlen;
481#else
482                MGETHDR(m, M_DONTWAIT, MT_HEADER);
483                if (m == NULL) {
484                        error = ENOBUFS;
485                        goto out;
486                }
487                m->m_data += max_linkhdr;
488                m->m_len = hdrlen;
489                if (len <= MHLEN - hdrlen - max_linkhdr) {
490                        m_copydata(so->so_snd.sb_mb, off, (int) len,
491                            mtod(m, caddr_t) + hdrlen);
492                        m->m_len += len;
493                } else {
494                        m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
495                        if (m->m_next == 0) {
496                                (void) m_free(m);
497                                error = ENOBUFS;
498                                goto out;
499                        }
500                }
501#endif
502                /*
503                 * If we're sending everything we've got, set PUSH.
504                 * (This will keep happy those implementations which only
505                 * give data to the user when a buffer fills or
506                 * a PUSH comes in.)
507                 */
508                if (off + len == so->so_snd.sb_cc)
509                        flags |= TH_PUSH;
510        } else {
511                if (tp->t_flags & TF_ACKNOW)
512                        tcpstat.tcps_sndacks++;
513                else if (flags & (TH_SYN|TH_FIN|TH_RST))
514                        tcpstat.tcps_sndctrl++;
515                else if (SEQ_GT(tp->snd_up, tp->snd_una))
516                        tcpstat.tcps_sndurg++;
517                else
518                        tcpstat.tcps_sndwinup++;
519
520                MGETHDR(m, M_DONTWAIT, MT_HEADER);
521                if (m == NULL) {
522                        error = ENOBUFS;
523                        goto out;
524                }
525                m->m_data += max_linkhdr;
526                m->m_len = hdrlen;
527        }
528        m->m_pkthdr.rcvif = (struct ifnet *)0;
529        ti = mtod(m, struct tcpiphdr *);
530        if (tp->t_template == 0)
531                panic("tcp_output");
532        (void)memcpy(ti, tp->t_template, sizeof (struct tcpiphdr));
533
534        /*
535         * Fill in fields, remembering maximum advertised
536         * window for use in delaying messages about window sizes.
537         * If resending a FIN, be sure not to use a new sequence number.
538         */
539        if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
540            tp->snd_nxt == tp->snd_max)
541                tp->snd_nxt--;
542        /*
543         * If we are doing retransmissions, then snd_nxt will
544         * not reflect the first unsent octet.  For ACK only
545         * packets, we do not want the sequence number of the
546         * retransmitted packet, we want the sequence number
547         * of the next unsent octet.  So, if there is no data
548         * (and no SYN or FIN), use snd_max instead of snd_nxt
549         * when filling in ti_seq.  But if we are in persist
550         * state, snd_max might reflect one byte beyond the
551         * right edge of the window, so use snd_nxt in that
552         * case, since we know we aren't doing a retransmission.
553         * (retransmit and persist are mutually exclusive...)
554         */
555        if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
556                ti->ti_seq = htonl(tp->snd_nxt);
557        else
558                ti->ti_seq = htonl(tp->snd_max);
559        ti->ti_ack = htonl(tp->rcv_nxt);
560        if (optlen) {
561                bcopy(opt, ti + 1, optlen);
562                ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
563        }
564        ti->ti_flags = flags;
565        /*
566         * Calculate receive window.  Don't shrink window,
567         * but avoid silly window syndrome.
568         */
569        if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
570                win = 0;
571        if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
572                win = (long)(tp->rcv_adv - tp->rcv_nxt);
573        if (win > (long)TCP_MAXWIN << tp->rcv_scale)
574                win = (long)TCP_MAXWIN << tp->rcv_scale;
575        ti->ti_win = htons((u_short) (win>>tp->rcv_scale));
576        if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
577                ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
578                ti->ti_flags |= TH_URG;
579        } else
580                /*
581                 * If no urgent pointer to send, then we pull
582                 * the urgent pointer to the left edge of the send window
583                 * so that it doesn't drift into the send window on sequence
584                 * number wraparound.
585                 */
586                tp->snd_up = tp->snd_una;               /* drag it along */
587
588        /*
589         * Put TCP length in extended header, and then
590         * checksum extended header and data.
591         */
592        if (len + optlen)
593                ti->ti_len = htons((u_short)(sizeof (struct tcphdr) +
594                    optlen + len));
595        ti->ti_sum = in_cksum(m, (int)(hdrlen + len));
596
597        /*
598         * In transmit state, time the transmission and arrange for
599         * the retransmit.  In persist state, just set snd_max.
600         */
601        if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
602                tcp_seq startseq = tp->snd_nxt;
603
604                /*
605                 * Advance snd_nxt over sequence space of this segment.
606                 */
607                if (flags & (TH_SYN|TH_FIN)) {
608                        if (flags & TH_SYN)
609                                tp->snd_nxt++;
610                        if (flags & TH_FIN) {
611                                tp->snd_nxt++;
612                                tp->t_flags |= TF_SENTFIN;
613                        }
614                }
615                tp->snd_nxt += len;
616                if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
617                        tp->snd_max = tp->snd_nxt;
618                        /*
619                         * Time this transmission if not a retransmission and
620                         * not currently timing anything.
621                         */
622                        if (tp->t_rtt == 0) {
623                                tp->t_rtt = 1;
624                                tp->t_rtseq = startseq;
625                                tcpstat.tcps_segstimed++;
626                        }
627                }
628
629                /*
630                 * Set retransmit timer if not currently set,
631                 * and not doing an ack or a keep-alive probe.
632                 * Initial value for retransmit timer is smoothed
633                 * round-trip time + 2 * round-trip time variance.
634                 * Initialize shift counter which is used for backoff
635                 * of retransmit time.
636                 */
637                if (tp->t_timer[TCPT_REXMT] == 0 &&
638                    tp->snd_nxt != tp->snd_una) {
639                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
640                        if (tp->t_timer[TCPT_PERSIST]) {
641                                tp->t_timer[TCPT_PERSIST] = 0;
642                                tp->t_rxtshift = 0;
643                        }
644                }
645        } else
646                if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
647                        tp->snd_max = tp->snd_nxt + len;
648
649#ifdef TCPDEBUG
650        /*
651         * Trace.
652         */
653        if (so->so_options & SO_DEBUG)
654                tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
655#endif
656
657        /*
658         * Fill in IP length and desired time to live and
659         * send to IP level.  There should be a better way
660         * to handle ttl and tos; we could keep them in
661         * the template, but need a way to checksum without them.
662         */
663        m->m_pkthdr.len = hdrlen + len;
664#ifdef TUBA
665        if (tp->t_tuba_pcb)
666                error = tuba_output(m, tp);
667        else
668#endif
669    {
670#if 1
671        struct rtentry *rt;
672#endif
673        ((struct ip *)ti)->ip_len = m->m_pkthdr.len;
674        ((struct ip *)ti)->ip_ttl = tp->t_inpcb->inp_ip_ttl;    /* XXX */
675        ((struct ip *)ti)->ip_tos = tp->t_inpcb->inp_ip_tos;    /* XXX */
676#if 1
677        /*
678         * See if we should do MTU discovery.  We do it only if the following
679         * are true:
680         *      1) we have a valid route to the destination
681         *      2) the MTU is not locked (if it is, then discovery has been
682         *         disabled)
683         */
684        if ((rt = tp->t_inpcb->inp_route.ro_rt)
685            && rt->rt_flags & RTF_UP
686            && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
687                ((struct ip *)ti)->ip_off |= IP_DF;
688        }
689#endif
690        error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
691            so->so_options & SO_DONTROUTE, 0);
692    }
693        if (error) {
694out:
695                if (error == ENOBUFS) {
696                        tcp_quench(tp->t_inpcb, 0);
697                        return (0);
698                }
699#if 1
700                if (error == EMSGSIZE) {
701                        /*
702                         * ip_output() will have already fixed the route
703                         * for us.  tcp_mtudisc() will, as its last action,
704                         * initiate retransmission, so it is important to
705                         * not do so here.
706                         */
707                        tcp_mtudisc(tp->t_inpcb, 0);
708                        return 0;
709                }
710#endif
711                if ((error == EHOSTUNREACH || error == ENETDOWN)
712                    && TCPS_HAVERCVDSYN(tp->t_state)) {
713                        tp->t_softerror = error;
714                        return (0);
715                }
716                return (error);
717        }
718        tcpstat.tcps_sndtotal++;
719
720        /*
721         * Data sent (as far as we can tell).
722         * If this advertises a larger window than any other segment,
723         * then remember the size of the advertised window.
724         * Any pending ACK has now been sent.
725         */
726        if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
727                tp->rcv_adv = tp->rcv_nxt + win;
728        tp->last_ack_sent = tp->rcv_nxt;
729        tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
730        if (sendalot)
731                goto again;
732        return (0);
733}
734
735void
736tcp_setpersist(
737        register struct tcpcb *tp)
738{
739        register int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
740
741        if (tp->t_timer[TCPT_REXMT])
742                panic("tcp_output REXMT");
743        /*
744         * Start/restart persistance timer.
745         */
746        TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
747            t * tcp_backoff[tp->t_rxtshift],
748            TCPTV_PERSMIN, TCPTV_PERSMAX);
749        if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
750                tp->t_rxtshift++;
751}
Note: See TracBrowser for help on using the repository browser.