source: rtems/cpukit/libnetworking/netinet/tcp_subr.c @ f583bb2

4.104.114.84.95
Last change on this file since f583bb2 was f583bb2, checked in by Ralf Corsepius <ralf.corsepius@…>, on 05/14/05 at 07:08:17

2005-05-14 Ralf Corsepius <ralf.corsepius@…>

  • libnetworking/netinet/tcp_usrreq.c: Cosmetics from FreeBSD.
  • libnetworking/netinet/tcp_subr.c: Partical update from FreeBSD.
  • Property mode set to 100644
File size: 19.4 KB
Line 
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 *      The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *      @(#)tcp_subr.c  8.2 (Berkeley) 5/24/95
30 * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.226 2005/05/07 00:41:36 cperciva Exp $
31 */
32 
33/*
34 *      $Id$
35 */
36
37#include "opt_tcpdebug.h"
38
39#include <sys/param.h>
40#include <sys/queue.h>
41#include <sys/proc.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/protosw.h>
50#include <sys/errno.h>
51
52#include <net/route.h>
53#include <net/if.h>
54
55#define _IP_VHL
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#include <netinet/in_pcb.h>
60#include <netinet/in_var.h>
61#include <netinet/ip_var.h>
62#include <netinet/ip_icmp.h>
63#include <netinet/tcp.h>
64#include <netinet/tcp_fsm.h>
65#include <netinet/tcp_seq.h>
66#include <netinet/tcp_timer.h>
67#include <netinet/tcp_var.h>
68#include <netinet/tcpip.h>
69#ifdef TCPDEBUG
70#include <netinet/tcp_debug.h>
71#endif
72
73int     tcp_mssdflt = TCP_MSS;
74SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW,
75    &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
76
77static int      tcp_do_rfc1323 = 1;
78#if !defined(__rtems__)
79static int      tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
80SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt,
81        CTLFLAG_RW, &tcp_rttdflt , 0, "");
82
83SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323,
84        CTLFLAG_RW, &tcp_do_rfc1323 , 0, "");
85#endif
86
87static void     tcp_notify __P((struct inpcb *, int));
88
89/*
90 * Target size of TCP PCB hash table. Will be rounded down to a prime
91 * number.
92 */
93#ifndef TCBHASHSIZE
94#define TCBHASHSIZE     128
95#endif
96
97/*
98 * Tcp initialization
99 */
100void
101tcp_init()
102{
103
104        tcp_iss = random();     /* wrong, but better than a constant */
105        tcp_ccgen = 1;
106        LIST_INIT(&tcb);
107        tcbinfo.listhead = &tcb;
108        tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask);
109        if (max_protohdr < sizeof(struct tcpiphdr))
110                max_protohdr = sizeof(struct tcpiphdr);
111        if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
112                panic("tcp_init");
113}
114
115/*
116 * Create template to be used to send tcp packets on a connection.
117 * Call after host entry created, allocates an mbuf and fills
118 * in a skeletal tcp/ip header, minimizing the amount of work
119 * necessary when the connection is used.
120 */
121struct tcpiphdr *
122tcp_template(tp)
123        struct tcpcb *tp;
124{
125        register struct inpcb *inp = tp->t_inpcb;
126        register struct mbuf *m;
127        register struct tcpiphdr *n;
128
129        if ((n = tp->t_template) == 0) {
130                m = m_get(M_DONTWAIT, MT_HEADER);
131                if (m == NULL)
132                        return (0);
133                m->m_len = sizeof (struct tcpiphdr);
134                n = mtod(m, struct tcpiphdr *);
135        }
136        n->ti_next = n->ti_prev = 0;
137        n->ti_x1 = 0;
138        n->ti_pr = IPPROTO_TCP;
139        n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
140        n->ti_src = inp->inp_laddr;
141        n->ti_dst = inp->inp_faddr;
142        n->ti_sport = inp->inp_lport;
143        n->ti_dport = inp->inp_fport;
144        n->ti_seq = 0;
145        n->ti_ack = 0;
146        n->ti_x2 = 0;
147        n->ti_off = 5;
148        n->ti_flags = 0;
149        n->ti_win = 0;
150        n->ti_sum = 0;
151        n->ti_urp = 0;
152        return (n);
153}
154
155/*
156 * Send a single message to the TCP at address specified by
157 * the given TCP/IP header.  If m == 0, then we make a copy
158 * of the tcpiphdr at ti and send directly to the addressed host.
159 * This is used to force keep alive messages out using the TCP
160 * template for a connection tp->t_template.  If flags are given
161 * then we send a message back to the TCP which originated the
162 * segment ti, and discard the mbuf containing it and any other
163 * attached mbufs.
164 *
165 * In any case the ack and sequence number of the transmitted
166 * segment are as specified by the parameters.
167 *
168 * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
169 */
170void
171tcp_respond(tp, ti, m, ack, seq, flags)
172        struct tcpcb *tp;
173        register struct tcpiphdr *ti;
174        register struct mbuf *m;
175        tcp_seq ack, seq;
176        int flags;
177{
178        register int tlen;
179        int win = 0;
180        struct route *ro = 0;
181        struct route sro;
182
183        if (tp) {
184                win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
185                ro = &tp->t_inpcb->inp_route;
186        } else {
187                ro = &sro;
188                bzero(ro, sizeof *ro);
189        }
190        if (m == NULL) {
191                m = m_gethdr(M_DONTWAIT, MT_HEADER);
192                if (m == NULL)
193                        return;
194#ifdef TCP_COMPAT_42
195                tlen = 1;
196#else
197                tlen = 0;
198#endif
199                m->m_data += max_linkhdr;
200                *mtod(m, struct tcpiphdr *) = *ti;
201                ti = mtod(m, struct tcpiphdr *);
202                flags = TH_ACK;
203        } else {
204                m_freem(m->m_next);
205                m->m_next = NULL;
206                m->m_data = (caddr_t)ti;
207                m->m_len = sizeof (struct tcpiphdr);
208                tlen = 0;
209#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
210                xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
211                xchg(ti->ti_dport, ti->ti_sport, u_short);
212#undef xchg
213        }
214        ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
215        tlen += sizeof (struct tcpiphdr);
216        m->m_len = tlen;
217        m->m_pkthdr.len = tlen;
218        m->m_pkthdr.rcvif = (struct ifnet *) 0;
219        ti->ti_next = ti->ti_prev = 0;
220        ti->ti_x1 = 0;
221        ti->ti_seq = htonl(seq);
222        ti->ti_ack = htonl(ack);
223        ti->ti_x2 = 0;
224        ti->ti_off = sizeof (struct tcphdr) >> 2;
225        ti->ti_flags = flags;
226        if (tp)
227                ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
228        else
229                ti->ti_win = htons((u_short)win);
230        ti->ti_urp = 0;
231        ti->ti_sum = 0;
232        ti->ti_sum = in_cksum(m, tlen);
233        ((struct ip *)ti)->ip_len = tlen;
234        ((struct ip *)ti)->ip_ttl = ip_defttl;
235#ifdef TCPDEBUG
236        if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
237                tcp_trace(TA_OUTPUT, 0, tp, ti, 0);
238#endif
239        (void) ip_output(m, NULL, ro, 0, NULL);
240        if (ro == &sro && ro->ro_rt) {
241                RTFREE(ro->ro_rt);
242        }
243}
244
245/*
246 * Create a new TCP control block, making an
247 * empty reassembly queue and hooking it to the argument
248 * protocol control block.
249 */
250struct tcpcb *
251tcp_newtcpcb(inp)
252        struct inpcb *inp;
253{
254        struct tcpcb *tp;
255
256        tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
257        if (tp == NULL)
258                return ((struct tcpcb *)0);
259        bzero((char *) tp, sizeof(struct tcpcb));
260        tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
261        tp->t_maxseg = tp->t_maxopd = tcp_mssdflt;
262
263        if (tcp_do_rfc1323)
264                tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
265        tp->t_inpcb = inp;
266        /*
267         * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
268         * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
269         * reasonable initial retransmit time.
270         */
271        tp->t_srtt = TCPTV_SRTTBASE;
272        tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
273        tp->t_rttmin = TCPTV_MIN;
274        tp->t_rxtcur = TCPTV_RTOBASE;
275        tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
276        tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
277        inp->inp_ip_ttl = ip_defttl;
278        inp->inp_ppcb = (caddr_t)tp;
279        return (tp);
280}
281
282/*
283 * Drop a TCP connection, reporting
284 * the specified error.  If connection is synchronized,
285 * then send a RST to peer.
286 */
287struct tcpcb *
288tcp_drop(tp, errnum)
289        register struct tcpcb *tp;
290        int errnum;
291{
292        struct socket *so = tp->t_inpcb->inp_socket;
293
294        if (TCPS_HAVERCVDSYN(tp->t_state)) {
295                tp->t_state = TCPS_CLOSED;
296                (void) tcp_output(tp);
297                tcpstat.tcps_drops++;
298        } else
299                tcpstat.tcps_conndrops++;
300        if (errnum == ETIMEDOUT && tp->t_softerror)
301                errnum = tp->t_softerror;
302        so->so_error = errnum;
303        return (tcp_close(tp));
304}
305
306/*
307 * Close a TCP control block:
308 *      discard all space held by the tcp
309 *      discard internet protocol block
310 *      wake up any sleepers
311 */
312struct tcpcb *
313tcp_close(tp)
314        struct tcpcb *tp;
315{
316        register struct tcpiphdr *t;
317        struct inpcb *inp = tp->t_inpcb;
318        struct socket *so = inp->inp_socket;
319        register struct mbuf *m;
320        register struct rtentry *rt;
321
322        /*
323         * If we got enough samples through the srtt filter,
324         * save the rtt and rttvar in the routing entry.
325         * 'Enough' is arbitrarily defined as the 16 samples.
326         * 16 samples is enough for the srtt filter to converge
327         * to within 5% of the correct value; fewer samples and
328         * we could save a very bogus rtt.
329         *
330         * Don't update the default route's characteristics and don't
331         * update anything that the user "locked".
332         */
333        if (tp->t_rttupdated >= 16 &&
334            (rt = inp->inp_route.ro_rt) &&
335            ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
336                register u_long i = 0;
337
338                if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
339                        i = tp->t_srtt *
340                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
341                        if (rt->rt_rmx.rmx_rtt && i)
342                                /*
343                                 * filter this update to half the old & half
344                                 * the new values, converting scale.
345                                 * See route.h and tcp_var.h for a
346                                 * description of the scaling constants.
347                                 */
348                                rt->rt_rmx.rmx_rtt =
349                                    (rt->rt_rmx.rmx_rtt + i) / 2;
350                        else
351                                rt->rt_rmx.rmx_rtt = i;
352                        tcpstat.tcps_cachedrtt++;
353                }
354                if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
355                        i = tp->t_rttvar *
356                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
357                        if (rt->rt_rmx.rmx_rttvar && i)
358                                rt->rt_rmx.rmx_rttvar =
359                                    (rt->rt_rmx.rmx_rttvar + i) / 2;
360                        else
361                                rt->rt_rmx.rmx_rttvar = i;
362                        tcpstat.tcps_cachedrttvar++;
363                }
364                /*
365                 * update the pipelimit (ssthresh) if it has been updated
366                 * already or if a pipesize was specified & the threshhold
367                 * got below half the pipesize.  I.e., wait for bad news
368                 * before we start updating, then update on both good
369                 * and bad news.
370                 */
371                if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
372                    ((i = tp->snd_ssthresh) != 0) && rt->rt_rmx.rmx_ssthresh) ||
373                    i < (rt->rt_rmx.rmx_sendpipe / 2)) {
374                        /*
375                         * convert the limit from user data bytes to
376                         * packets then to packet data bytes.
377                         */
378                        i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
379                        if (i < 2)
380                                i = 2;
381                        i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
382                        if (rt->rt_rmx.rmx_ssthresh)
383                                rt->rt_rmx.rmx_ssthresh =
384                                    (rt->rt_rmx.rmx_ssthresh + i) / 2;
385                        else
386                                rt->rt_rmx.rmx_ssthresh = i;
387                        tcpstat.tcps_cachedssthresh++;
388                }
389        }
390        /* free the reassembly queue, if any */
391        t = tp->seg_next;
392        while (t != (struct tcpiphdr *)tp) {
393                t = (struct tcpiphdr *)t->ti_next;
394#if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__)))
395        LD32_UNALGN((struct tcpiphdr *)t->ti_prev,m);
396#else
397                m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
398#endif
399                remque(t->ti_prev);
400                m_freem(m);
401        }
402        if (tp->t_template)
403                (void) m_free(dtom(tp->t_template));
404        free(tp, M_PCB);
405        inp->inp_ppcb = 0;
406        soisdisconnected(so);
407        in_pcbdetach(inp);
408        tcpstat.tcps_closed++;
409        return ((struct tcpcb *)0);
410}
411
412void
413tcp_drain()
414{
415
416}
417
418/*
419 * Notify a tcp user of an asynchronous error;
420 * store error as soft error, but wake up user
421 * (for now, won't do anything until can select for soft error).
422 */
423static void
424tcp_notify(inp, error)
425        struct inpcb *inp;
426        int error;
427{
428        struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
429        struct socket *so = inp->inp_socket;
430
431        /*
432         * Ignore some errors if we are hooked up.
433         * If connection hasn't completed, has retransmitted several times,
434         * and receives a second error, give up now.  This is better
435         * than waiting a long time to establish a connection that
436         * can never complete.
437         */
438        if (tp->t_state == TCPS_ESTABLISHED &&
439            (error == EHOSTUNREACH || error == ENETUNREACH ||
440             error == EHOSTDOWN)) {
441                return;
442        } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
443            tp->t_softerror)
444                so->so_error = error;
445        else
446                tp->t_softerror = error;
447        soconnwakeup (so);
448        sorwakeup(so);
449        sowwakeup(so);
450}
451
452#ifdef __rtems__
453#define INP_INFO_RLOCK(a)
454#define INP_INFO_RUNLOCK(a)
455#define INP_LOCK(a)
456#define INP_UNLOCK(a)
457#endif
458
459static int
460tcp_pcblist(SYSCTL_HANDLER_ARGS)
461{
462        int error, i, n, s;
463        struct inpcb *inp, **inp_list;
464        inp_gen_t gencnt;
465        struct xinpgen xig;
466
467        /*
468         * The process of preparing the TCB list is too time-consuming and
469         * resource-intensive to repeat twice on every request.
470         */
471        if (req->oldptr == NULL) {
472                n = tcbinfo.ipi_count;
473                req->oldidx = 2 * (sizeof xig)
474                        + (n + n/8) * sizeof(struct xtcpcb);
475                return (0);
476        }
477
478        if (req->newptr != NULL)
479                return (EPERM);
480
481        /*
482         * OK, now we're committed to doing something.
483         */
484        s = splnet();
485        INP_INFO_RLOCK(&tcbinfo);
486        gencnt = tcbinfo.ipi_gencnt;
487        n = tcbinfo.ipi_count;
488        INP_INFO_RUNLOCK(&tcbinfo);
489        splx(s);
490
491        sysctl_wire_old_buffer(req, 2 * (sizeof xig)
492                + n * sizeof(struct xtcpcb));
493
494        xig.xig_len = sizeof xig;
495        xig.xig_count = n;
496        xig.xig_gen = gencnt;
497/*      xig.xig_sogen = so_gencnt; remove by ccj */
498        error = SYSCTL_OUT(req, &xig, sizeof xig);
499        if (error)
500                return error;
501
502        /* ccj add exit if the count is 0 */
503        if (!n)
504                return error;
505 
506        inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
507        if (inp_list == 0)
508                return ENOMEM;
509       
510        s = splnet();
511        INP_INFO_RLOCK(&tcbinfo);
512        for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n;
513             inp = LIST_NEXT(inp, inp_list)) {
514                INP_LOCK(inp);
515                if (inp->inp_gencnt <= gencnt)
516#if 0
517      &&
518                    cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
519#endif
520                        inp_list[i++] = inp;
521                INP_UNLOCK(inp);
522        }
523        INP_INFO_RUNLOCK(&tcbinfo);
524        splx(s);
525        n = i;
526
527        error = 0;
528        for (i = 0; i < n; i++) {
529                inp = inp_list[i];
530                INP_LOCK(inp);
531                if (inp->inp_gencnt <= gencnt) {
532                        struct xtcpcb xt;
533                        caddr_t inp_ppcb;
534                        xt.xt_len = sizeof xt;
535                        /* XXX should avoid extra copy */
536                        bcopy(inp, &xt.xt_inp, sizeof *inp);
537                        inp_ppcb = inp->inp_ppcb;
538                        if (inp_ppcb != NULL)
539                                bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
540                        else
541                                bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
542#if 0     
543                        if (inp->inp_socket)
544                                sotoxsocket(inp->inp_socket, &xt.xt_socket);
545#endif
546                        error = SYSCTL_OUT(req, &xt, sizeof xt);
547                }
548                INP_UNLOCK(inp);
549        }
550        if (!error) {
551                /*
552                 * Give the user an updated idea of our state.
553                 * If the generation differs from what we told
554                 * her before, she knows that something happened
555                 * while we were processing this request, and it
556                 * might be necessary to retry.
557                 */
558                s = splnet();
559                INP_INFO_RLOCK(&tcbinfo);
560                xig.xig_gen = tcbinfo.ipi_gencnt;
561#if 0   
562                xig.xig_sogen = so_gencnt;
563#endif
564                xig.xig_count = tcbinfo.ipi_count;
565                INP_INFO_RUNLOCK(&tcbinfo);
566                splx(s);
567                error = SYSCTL_OUT(req, &xig, sizeof xig);
568        }
569        free(inp_list, M_TEMP);
570        return error;
571}
572
573SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
574            tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
575
576void
577tcp_ctlinput(cmd, sa, vip)
578        int cmd;
579        struct sockaddr *sa;
580        void *vip;
581{
582        struct ip *ip = vip;
583        struct tcphdr *th;
584        void (*notify) __P((struct inpcb *, int)) = tcp_notify;
585
586        if (cmd == PRC_QUENCH)
587                notify = tcp_quench;
588#if 1
589        else if (cmd == PRC_MSGSIZE)
590                notify = tcp_mtudisc;
591#endif
592        else if (!PRC_IS_REDIRECT(cmd) &&
593                 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
594                return;
595        if (ip) {
596                th = (struct tcphdr *)((caddr_t)ip
597                                       + (IP_VHL_HL(ip->ip_vhl) << 2));
598                in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
599                        cmd, notify);
600        } else
601                in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
602}
603
604/*
605 * When a source quench is received, close congestion window
606 * to one segment.  We will gradually open it again as we proceed.
607 */
608void
609tcp_quench(inp, errnum)
610        struct inpcb *inp;
611        int errnum;
612{
613        struct tcpcb *tp = intotcpcb(inp);
614
615        if (tp)
616                tp->snd_cwnd = tp->t_maxseg;
617}
618
619/*
620 * When `need fragmentation' ICMP is received, update our idea of the MSS
621 * based on the new value in the route.  Also nudge TCP to send something,
622 * since we know the packet we just sent was dropped.
623 * This duplicates some code in the tcp_mss() function in tcp_input.c.
624 */
625void
626tcp_mtudisc(inp, errnum)
627        struct inpcb *inp;
628        int errnum;
629{
630        struct tcpcb *tp = intotcpcb(inp);
631        struct rtentry *rt;
632        struct rmxp_tao *taop;
633        struct socket *so = inp->inp_socket;
634        int offered;
635        int mss;
636
637        if (tp) {
638                rt = tcp_rtlookup(inp);
639                if (!rt || !rt->rt_rmx.rmx_mtu) {
640                        tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
641                        return;
642                }
643                taop = rmx_taop(rt->rt_rmx);
644                offered = taop->tao_mssopt;
645                mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
646                if (offered)
647                        mss = min(mss, offered);
648                /*
649                 * XXX - The above conditional probably violates the TCP
650                 * spec.  The problem is that, since we don't know the
651                 * other end's MSS, we are supposed to use a conservative
652                 * default.  But, if we do that, then MTU discovery will
653                 * never actually take place, because the conservative
654                 * default is much less than the MTUs typically seen
655                 * on the Internet today.  For the moment, we'll sweep
656                 * this under the carpet.
657                 *
658                 * The conservative default might not actually be a problem
659                 * if the only case this occurs is when sending an initial
660                 * SYN with options and data to a host we've never talked
661                 * to before.  Then, they will reply with an MSS value which
662                 * will get recorded and the new parameters should get
663                 * recomputed.  For Further Study.
664                 */
665                if (tp->t_maxopd <= mss)
666                        return;
667                tp->t_maxopd = mss;
668
669                if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
670                    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
671                        mss -= TCPOLEN_TSTAMP_APPA;
672                if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
673                    (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
674                        mss -= TCPOLEN_CC_APPA;
675#if     (MCLBYTES & (MCLBYTES - 1)) == 0
676                if (mss > MCLBYTES)
677                        mss &= ~(MCLBYTES-1);
678#else
679                if (mss > MCLBYTES)
680                        mss = mss / MCLBYTES * MCLBYTES;
681#endif
682                if (so->so_snd.sb_hiwat < mss)
683                        mss = so->so_snd.sb_hiwat;
684
685                tp->t_maxseg = mss;
686
687                tcpstat.tcps_mturesent++;
688                tp->t_rtt = 0;
689                tp->snd_nxt = tp->snd_una;
690                tcp_output(tp);
691        }
692}
693
694/*
695 * Look-up the routing entry to the peer of this inpcb.  If no route
696 * is found and it cannot be allocated, then return NULL.  This routine
697 * is called by TCP routines that access the rmx structure and by tcp_mss
698 * to get the interface MTU.
699 */
700struct rtentry *
701tcp_rtlookup(inp)
702        struct inpcb *inp;
703{
704        struct route *ro;
705        struct rtentry *rt;
706
707        ro = &inp->inp_route;
708        rt = ro->ro_rt;
709        if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
710                /* No route yet, so try to acquire one */
711                if (inp->inp_faddr.s_addr != INADDR_ANY) {
712                        ro->ro_dst.sa_family = AF_INET;
713                        ro->ro_dst.sa_len = sizeof(ro->ro_dst);
714                        ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
715                                inp->inp_faddr;
716                        rtalloc(ro);
717                        rt = ro->ro_rt;
718                }
719        }
720        return rt;
721}
722
723/*
724 * Return a pointer to the cached information about the remote host.
725 * The cached information is stored in the protocol specific part of
726 * the route metrics.
727 */
728struct rmxp_tao *
729tcp_gettaocache(inp)
730        struct inpcb *inp;
731{
732        struct rtentry *rt = tcp_rtlookup(inp);
733
734        /* Make sure this is a host route and is up. */
735        if (rt == NULL ||
736            (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
737                return NULL;
738
739        return rmx_taop(rt->rt_rmx);
740}
Note: See TracBrowser for help on using the repository browser.