source: rtems/cpukit/libnetworking/netinet/tcp_subr.c @ 6d25a161

4.104.115
Last change on this file since 6d25a161 was b3f8c9e1, checked in by Ralf Corsepius <ralf.corsepius@…>, on 12/22/08 at 07:47:28

Include <errno.h> (POSIX,C99) instead of <sys/errno.h> (BSD'ism).

  • Property mode set to 100644
File size: 19.3 KB
Line 
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 *      The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *      @(#)tcp_subr.c  8.2 (Berkeley) 5/24/95
30 * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.226 2005/05/07 00:41:36 cperciva Exp $
31 */
32 
33/*
34 *      $Id$
35 */
36
37#include "opt_tcpdebug.h"
38
39#include <sys/param.h>
40#include <rtems/bsd/sys/queue.h>
41#include <sys/proc.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/protosw.h>
50#include <errno.h>
51
52#include <net/route.h>
53#include <net/if.h>
54
55#define _IP_VHL
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#include <netinet/in_pcb.h>
60#include <netinet/in_var.h>
61#include <netinet/ip_var.h>
62#include <netinet/ip_icmp.h>
63#include <netinet/tcp.h>
64#include <netinet/tcp_fsm.h>
65#include <netinet/tcp_seq.h>
66#include <netinet/tcp_timer.h>
67#include <netinet/tcp_var.h>
68#include <netinet/tcpip.h>
69#ifdef TCPDEBUG
70#include <netinet/tcp_debug.h>
71#endif
72
73int     tcp_mssdflt = TCP_MSS;
74SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW,
75    &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
76
77static int      tcp_do_rfc1323 = 1;
78#if !defined(__rtems__)
79static int      tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
80SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt,
81        CTLFLAG_RW, &tcp_rttdflt , 0, "");
82
83SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323,
84        CTLFLAG_RW, &tcp_do_rfc1323 , 0, "");
85#endif
86
87static void     tcp_notify(struct inpcb *, int);
88
89/*
90 * Target size of TCP PCB hash table. Will be rounded down to a prime
91 * number.
92 */
93#ifndef TCBHASHSIZE
94#define TCBHASHSIZE     128
95#endif
96
97/*
98 * Tcp initialization
99 */
100void
101tcp_init(void)
102{
103
104        tcp_iss = random();     /* wrong, but better than a constant */
105        tcp_ccgen = 1;
106        LIST_INIT(&tcb);
107        tcbinfo.listhead = &tcb;
108        tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask);
109        if (max_protohdr < sizeof(struct tcpiphdr))
110                max_protohdr = sizeof(struct tcpiphdr);
111        if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
112                panic("tcp_init");
113}
114
115/*
116 * Create template to be used to send tcp packets on a connection.
117 * Call after host entry created, allocates an mbuf and fills
118 * in a skeletal tcp/ip header, minimizing the amount of work
119 * necessary when the connection is used.
120 */
121struct tcpiphdr *
122tcp_template(struct tcpcb *tp)
123{
124        register struct inpcb *inp = tp->t_inpcb;
125        register struct mbuf *m;
126        register struct tcpiphdr *n;
127
128        if ((n = tp->t_template) == 0) {
129                m = m_get(M_DONTWAIT, MT_HEADER);
130                if (m == NULL)
131                        return (0);
132                m->m_len = sizeof (struct tcpiphdr);
133                n = mtod(m, struct tcpiphdr *);
134        }
135        n->ti_next = n->ti_prev = 0;
136        n->ti_x1 = 0;
137        n->ti_pr = IPPROTO_TCP;
138        n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
139        n->ti_src = inp->inp_laddr;
140        n->ti_dst = inp->inp_faddr;
141        n->ti_sport = inp->inp_lport;
142        n->ti_dport = inp->inp_fport;
143        n->ti_seq = 0;
144        n->ti_ack = 0;
145        n->ti_x2 = 0;
146        n->ti_off = 5;
147        n->ti_flags = 0;
148        n->ti_win = 0;
149        n->ti_sum = 0;
150        n->ti_urp = 0;
151        return (n);
152}
153
154/*
155 * Send a single message to the TCP at address specified by
156 * the given TCP/IP header.  If m == 0, then we make a copy
157 * of the tcpiphdr at ti and send directly to the addressed host.
158 * This is used to force keep alive messages out using the TCP
159 * template for a connection tp->t_template.  If flags are given
160 * then we send a message back to the TCP which originated the
161 * segment ti, and discard the mbuf containing it and any other
162 * attached mbufs.
163 *
164 * In any case the ack and sequence number of the transmitted
165 * segment are as specified by the parameters.
166 *
167 * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
168 */
169void
170tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m,
171    tcp_seq ack, tcp_seq seq, int flags)
172{
173        register int tlen;
174        int win = 0;
175        struct route *ro = 0;
176        struct route sro;
177
178        if (tp) {
179                win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
180                ro = &tp->t_inpcb->inp_route;
181        } else {
182                ro = &sro;
183                bzero(ro, sizeof *ro);
184        }
185        if (m == NULL) {
186                m = m_gethdr(M_DONTWAIT, MT_HEADER);
187                if (m == NULL)
188                        return;
189#ifdef TCP_COMPAT_42
190                tlen = 1;
191#else
192                tlen = 0;
193#endif
194                m->m_data += max_linkhdr;
195                *mtod(m, struct tcpiphdr *) = *ti;
196                ti = mtod(m, struct tcpiphdr *);
197                flags = TH_ACK;
198        } else {
199                m_freem(m->m_next);
200                m->m_next = NULL;
201                m->m_data = (caddr_t)ti;
202                m->m_len = sizeof (struct tcpiphdr);
203                tlen = 0;
204#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
205                xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
206                xchg(ti->ti_dport, ti->ti_sport, u_short);
207#undef xchg
208        }
209        ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
210        tlen += sizeof (struct tcpiphdr);
211        m->m_len = tlen;
212        m->m_pkthdr.len = tlen;
213        m->m_pkthdr.rcvif = (struct ifnet *) 0;
214        ti->ti_next = ti->ti_prev = 0;
215        ti->ti_x1 = 0;
216        ti->ti_seq = htonl(seq);
217        ti->ti_ack = htonl(ack);
218        ti->ti_x2 = 0;
219        ti->ti_off = sizeof (struct tcphdr) >> 2;
220        ti->ti_flags = flags;
221        if (tp)
222                ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
223        else
224                ti->ti_win = htons((u_short)win);
225        ti->ti_urp = 0;
226        ti->ti_sum = 0;
227        ti->ti_sum = in_cksum(m, tlen);
228        ((struct ip *)ti)->ip_len = tlen;
229        ((struct ip *)ti)->ip_ttl = ip_defttl;
230#ifdef TCPDEBUG
231        if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
232                tcp_trace(TA_OUTPUT, 0, tp, ti, 0);
233#endif
234        (void) ip_output(m, NULL, ro, 0, NULL);
235        if (ro == &sro && ro->ro_rt) {
236                RTFREE(ro->ro_rt);
237        }
238}
239
240/*
241 * Create a new TCP control block, making an
242 * empty reassembly queue and hooking it to the argument
243 * protocol control block.
244 */
245struct tcpcb *
246tcp_newtcpcb(struct inpcb *inp)
247{
248        struct tcpcb *tp;
249
250        tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
251        if (tp == NULL)
252                return ((struct tcpcb *)0);
253        bzero((char *) tp, sizeof(struct tcpcb));
254        tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
255        tp->t_maxseg = tp->t_maxopd = tcp_mssdflt;
256
257        if (tcp_do_rfc1323)
258                tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
259        tp->t_inpcb = inp;
260        /*
261         * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
262         * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
263         * reasonable initial retransmit time.
264         */
265        tp->t_srtt = TCPTV_SRTTBASE;
266        tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
267        tp->t_rttmin = TCPTV_MIN;
268        tp->t_rxtcur = TCPTV_RTOBASE;
269        tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
270        tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
271        inp->inp_ip_ttl = ip_defttl;
272        inp->inp_ppcb = (caddr_t)tp;
273        return (tp);
274}
275
276/*
277 * Drop a TCP connection, reporting
278 * the specified error.  If connection is synchronized,
279 * then send a RST to peer.
280 */
281struct tcpcb *
282tcp_drop(struct tcpcb *tp, int errnum)
283{
284        struct socket *so = tp->t_inpcb->inp_socket;
285
286        if (TCPS_HAVERCVDSYN(tp->t_state)) {
287                tp->t_state = TCPS_CLOSED;
288                (void) tcp_output(tp);
289                tcpstat.tcps_drops++;
290        } else
291                tcpstat.tcps_conndrops++;
292        if (errnum == ETIMEDOUT && tp->t_softerror)
293                errnum = tp->t_softerror;
294        so->so_error = errnum;
295        return (tcp_close(tp));
296}
297
298/*
299 * Close a TCP control block:
300 *      discard all space held by the tcp
301 *      discard internet protocol block
302 *      wake up any sleepers
303 */
304struct tcpcb *
305tcp_close(struct tcpcb *tp)
306{
307        register struct tcpiphdr *t;
308        struct inpcb *inp = tp->t_inpcb;
309        struct socket *so = inp->inp_socket;
310        register struct mbuf *m;
311        register struct rtentry *rt;
312
313        /*
314         * If we got enough samples through the srtt filter,
315         * save the rtt and rttvar in the routing entry.
316         * 'Enough' is arbitrarily defined as the 16 samples.
317         * 16 samples is enough for the srtt filter to converge
318         * to within 5% of the correct value; fewer samples and
319         * we could save a very bogus rtt.
320         *
321         * Don't update the default route's characteristics and don't
322         * update anything that the user "locked".
323         */
324        if (tp->t_rttupdated >= 16 &&
325            (rt = inp->inp_route.ro_rt) &&
326            ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
327                register u_long i = 0;
328
329                if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
330                        i = tp->t_srtt *
331                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
332                        if (rt->rt_rmx.rmx_rtt && i)
333                                /*
334                                 * filter this update to half the old & half
335                                 * the new values, converting scale.
336                                 * See route.h and tcp_var.h for a
337                                 * description of the scaling constants.
338                                 */
339                                rt->rt_rmx.rmx_rtt =
340                                    (rt->rt_rmx.rmx_rtt + i) / 2;
341                        else
342                                rt->rt_rmx.rmx_rtt = i;
343                        tcpstat.tcps_cachedrtt++;
344                }
345                if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
346                        i = tp->t_rttvar *
347                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
348                        if (rt->rt_rmx.rmx_rttvar && i)
349                                rt->rt_rmx.rmx_rttvar =
350                                    (rt->rt_rmx.rmx_rttvar + i) / 2;
351                        else
352                                rt->rt_rmx.rmx_rttvar = i;
353                        tcpstat.tcps_cachedrttvar++;
354                }
355                /*
356                 * update the pipelimit (ssthresh) if it has been updated
357                 * already or if a pipesize was specified & the threshhold
358                 * got below half the pipesize.  I.e., wait for bad news
359                 * before we start updating, then update on both good
360                 * and bad news.
361                 */
362                if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
363                    ((i = tp->snd_ssthresh) != 0) && rt->rt_rmx.rmx_ssthresh) ||
364                    i < (rt->rt_rmx.rmx_sendpipe / 2)) {
365                        /*
366                         * convert the limit from user data bytes to
367                         * packets then to packet data bytes.
368                         */
369                        i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
370                        if (i < 2)
371                                i = 2;
372                        i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
373                        if (rt->rt_rmx.rmx_ssthresh)
374                                rt->rt_rmx.rmx_ssthresh =
375                                    (rt->rt_rmx.rmx_ssthresh + i) / 2;
376                        else
377                                rt->rt_rmx.rmx_ssthresh = i;
378                        tcpstat.tcps_cachedssthresh++;
379                }
380        }
381        /* free the reassembly queue, if any */
382        t = tp->seg_next;
383        while (t != (struct tcpiphdr *)tp) {
384                t = (struct tcpiphdr *)t->ti_next;
385#if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__)))
386        LD32_UNALGN((struct tcpiphdr *)t->ti_prev,m);
387#else
388                m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
389#endif
390                remque(t->ti_prev);
391                m_freem(m);
392        }
393        if (tp->t_template)
394                (void) m_free(dtom(tp->t_template));
395        free(tp, M_PCB);
396        inp->inp_ppcb = 0;
397        soisdisconnected(so);
398        in_pcbdetach(inp);
399        tcpstat.tcps_closed++;
400        return ((struct tcpcb *)0);
401}
402
403void
404tcp_drain(void)
405{
406
407}
408
409/*
410 * Notify a tcp user of an asynchronous error;
411 * store error as soft error, but wake up user
412 * (for now, won't do anything until can select for soft error).
413 */
414static void
415tcp_notify(struct inpcb *inp, int error)
416{
417        struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
418        struct socket *so = inp->inp_socket;
419
420        /*
421         * Ignore some errors if we are hooked up.
422         * If connection hasn't completed, has retransmitted several times,
423         * and receives a second error, give up now.  This is better
424         * than waiting a long time to establish a connection that
425         * can never complete.
426         */
427        if (tp->t_state == TCPS_ESTABLISHED &&
428            (error == EHOSTUNREACH || error == ENETUNREACH ||
429             error == EHOSTDOWN)) {
430                return;
431        } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
432            tp->t_softerror)
433                so->so_error = error;
434        else
435                tp->t_softerror = error;
436        soconnwakeup (so);
437        sorwakeup(so);
438        sowwakeup(so);
439}
440
441#ifdef __rtems__
442#define INP_INFO_RLOCK(a)
443#define INP_INFO_RUNLOCK(a)
444#define INP_LOCK(a)
445#define INP_UNLOCK(a)
446#endif
447
448static int
449tcp_pcblist(SYSCTL_HANDLER_ARGS)
450{
451        int error, i, n, s;
452        struct inpcb *inp, **inp_list;
453        inp_gen_t gencnt;
454        struct xinpgen xig;
455
456        /*
457         * The process of preparing the TCB list is too time-consuming and
458         * resource-intensive to repeat twice on every request.
459         */
460        if (req->oldptr == NULL) {
461                n = tcbinfo.ipi_count;
462                req->oldidx = 2 * (sizeof xig)
463                        + (n + n/8) * sizeof(struct xtcpcb);
464                return (0);
465        }
466
467        if (req->newptr != NULL)
468                return (EPERM);
469
470        /*
471         * OK, now we're committed to doing something.
472         */
473        s = splnet();
474        INP_INFO_RLOCK(&tcbinfo);
475        gencnt = tcbinfo.ipi_gencnt;
476        n = tcbinfo.ipi_count;
477        INP_INFO_RUNLOCK(&tcbinfo);
478        splx(s);
479
480        sysctl_wire_old_buffer(req, 2 * (sizeof xig)
481                + n * sizeof(struct xtcpcb));
482
483        xig.xig_len = sizeof xig;
484        xig.xig_count = n;
485        xig.xig_gen = gencnt;
486/*      xig.xig_sogen = so_gencnt; remove by ccj */
487        error = SYSCTL_OUT(req, &xig, sizeof xig);
488        if (error)
489                return error;
490
491        /* ccj add exit if the count is 0 */
492        if (!n)
493                return error;
494 
495        inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
496        if (inp_list == 0)
497                return ENOMEM;
498       
499        s = splnet();
500        INP_INFO_RLOCK(&tcbinfo);
501        for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n;
502             inp = LIST_NEXT(inp, inp_list)) {
503                INP_LOCK(inp);
504                if (inp->inp_gencnt <= gencnt)
505#if 0
506      &&
507                    cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
508#endif
509                        inp_list[i++] = inp;
510                INP_UNLOCK(inp);
511        }
512        INP_INFO_RUNLOCK(&tcbinfo);
513        splx(s);
514        n = i;
515
516        error = 0;
517        for (i = 0; i < n; i++) {
518                inp = inp_list[i];
519                INP_LOCK(inp);
520                if (inp->inp_gencnt <= gencnt) {
521                        struct xtcpcb xt;
522                        caddr_t inp_ppcb;
523                        xt.xt_len = sizeof xt;
524                        /* XXX should avoid extra copy */
525                        bcopy(inp, &xt.xt_inp, sizeof *inp);
526                        inp_ppcb = inp->inp_ppcb;
527                        if (inp_ppcb != NULL)
528                                bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
529                        else
530                                bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
531#if 0     
532                        if (inp->inp_socket)
533                                sotoxsocket(inp->inp_socket, &xt.xt_socket);
534#endif
535                        error = SYSCTL_OUT(req, &xt, sizeof xt);
536                }
537                INP_UNLOCK(inp);
538        }
539        if (!error) {
540                /*
541                 * Give the user an updated idea of our state.
542                 * If the generation differs from what we told
543                 * her before, she knows that something happened
544                 * while we were processing this request, and it
545                 * might be necessary to retry.
546                 */
547                s = splnet();
548                INP_INFO_RLOCK(&tcbinfo);
549                xig.xig_gen = tcbinfo.ipi_gencnt;
550#if 0   
551                xig.xig_sogen = so_gencnt;
552#endif
553                xig.xig_count = tcbinfo.ipi_count;
554                INP_INFO_RUNLOCK(&tcbinfo);
555                splx(s);
556                error = SYSCTL_OUT(req, &xig, sizeof xig);
557        }
558        free(inp_list, M_TEMP);
559        return error;
560}
561
562SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
563            tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
564
565void
566tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
567{
568        struct ip *ip = vip;
569        struct tcphdr *th;
570        void (*notify)(struct inpcb *, int) = tcp_notify;
571
572        if (cmd == PRC_QUENCH)
573                notify = tcp_quench;
574#if 1
575        else if (cmd == PRC_MSGSIZE)
576                notify = tcp_mtudisc;
577#endif
578        else if (!PRC_IS_REDIRECT(cmd) &&
579                 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
580                return;
581        if (ip != NULL) {
582#ifdef _IP_VHL
583                th = (struct tcphdr *)((caddr_t)ip
584                                       + (IP_VHL_HL(ip->ip_vhl) << 2));
585#else
586                th = (struct tcphdr *)((caddr_t)ip
587                                       + (ip->ip_hl << 2));
588#endif
589                in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
590                        cmd, notify);
591        } else
592                in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
593}
594
595/*
596 * When a source quench is received, close congestion window
597 * to one segment.  We will gradually open it again as we proceed.
598 */
599void
600tcp_quench( struct inpcb *inp, int errnum)
601{
602        struct tcpcb *tp = intotcpcb(inp);
603
604        if (tp)
605                tp->snd_cwnd = tp->t_maxseg;
606}
607
608/*
609 * When `need fragmentation' ICMP is received, update our idea of the MSS
610 * based on the new value in the route.  Also nudge TCP to send something,
611 * since we know the packet we just sent was dropped.
612 * This duplicates some code in the tcp_mss() function in tcp_input.c.
613 */
614void
615tcp_mtudisc(struct inpcb *inp, int errnum)
616{
617        struct tcpcb *tp = intotcpcb(inp);
618        struct rtentry *rt;
619        struct rmxp_tao *taop;
620        struct socket *so = inp->inp_socket;
621        int offered;
622        int mss;
623
624        if (tp) {
625                rt = tcp_rtlookup(inp);
626                if (!rt || !rt->rt_rmx.rmx_mtu) {
627                        tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
628                        return;
629                }
630                taop = rmx_taop(rt->rt_rmx);
631                offered = taop->tao_mssopt;
632                mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
633                if (offered)
634                        mss = min(mss, offered);
635                /*
636                 * XXX - The above conditional probably violates the TCP
637                 * spec.  The problem is that, since we don't know the
638                 * other end's MSS, we are supposed to use a conservative
639                 * default.  But, if we do that, then MTU discovery will
640                 * never actually take place, because the conservative
641                 * default is much less than the MTUs typically seen
642                 * on the Internet today.  For the moment, we'll sweep
643                 * this under the carpet.
644                 *
645                 * The conservative default might not actually be a problem
646                 * if the only case this occurs is when sending an initial
647                 * SYN with options and data to a host we've never talked
648                 * to before.  Then, they will reply with an MSS value which
649                 * will get recorded and the new parameters should get
650                 * recomputed.  For Further Study.
651                 */
652                if (tp->t_maxopd <= mss)
653                        return;
654                tp->t_maxopd = mss;
655
656                if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
657                    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
658                        mss -= TCPOLEN_TSTAMP_APPA;
659                if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
660                    (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
661                        mss -= TCPOLEN_CC_APPA;
662#if     (MCLBYTES & (MCLBYTES - 1)) == 0
663                if (mss > MCLBYTES)
664                        mss &= ~(MCLBYTES-1);
665#else
666                if (mss > MCLBYTES)
667                        mss = mss / MCLBYTES * MCLBYTES;
668#endif
669                if (so->so_snd.sb_hiwat < mss)
670                        mss = so->so_snd.sb_hiwat;
671
672                tp->t_maxseg = mss;
673
674                tcpstat.tcps_mturesent++;
675                tp->t_rtt = 0;
676                tp->snd_nxt = tp->snd_una;
677                tcp_output(tp);
678        }
679}
680
681/*
682 * Look-up the routing entry to the peer of this inpcb.  If no route
683 * is found and it cannot be allocated, then return NULL.  This routine
684 * is called by TCP routines that access the rmx structure and by tcp_mss
685 * to get the interface MTU.
686 */
687struct rtentry *
688tcp_rtlookup(struct inpcb *inp)
689{
690        struct route *ro;
691        struct rtentry *rt;
692
693        ro = &inp->inp_route;
694        rt = ro->ro_rt;
695        if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
696                /* No route yet, so try to acquire one */
697                if (inp->inp_faddr.s_addr != INADDR_ANY) {
698                        ro->ro_dst.sa_family = AF_INET;
699                        ro->ro_dst.sa_len = sizeof(ro->ro_dst);
700                        ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
701                                inp->inp_faddr;
702                        rtalloc(ro);
703                        rt = ro->ro_rt;
704                }
705        }
706        return rt;
707}
708
709/*
710 * Return a pointer to the cached information about the remote host.
711 * The cached information is stored in the protocol specific part of
712 * the route metrics.
713 */
714struct rmxp_tao *
715tcp_gettaocache(struct inpcb *inp)
716{
717        struct rtentry *rt = tcp_rtlookup(inp);
718
719        /* Make sure this is a host route and is up. */
720        if (rt == NULL ||
721            (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
722                return NULL;
723
724        return rmx_taop(rt->rt_rmx);
725}
Note: See TracBrowser for help on using the repository browser.