source: rtems/cpukit/libnetworking/netinet/tcp_subr.c @ 5194a28

4.104.114.84.95
Last change on this file since 5194a28 was 5194a28, checked in by Greg Menke <gregory.menke@…>, on 12/06/04 at 20:29:51

PR 730

  • cpu_asm.S: Collected PR 601 changes for commit to cvshead for rtems-4.7
  • Property mode set to 100644
File size: 20.1 KB
Line 
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 *      The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *      This product includes software developed by the University of
16 *      California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *      @(#)tcp_subr.c  8.2 (Berkeley) 5/24/95
34 *      $Id$
35 */
36
37#include "opt_tcpdebug.h"
38
39#include <sys/param.h>
40#include <sys/queue.h>
41#include <sys/proc.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/protosw.h>
50#include <sys/errno.h>
51
52#include <net/route.h>
53#include <net/if.h>
54
55#define _IP_VHL
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#include <netinet/in_pcb.h>
60#include <netinet/in_var.h>
61#include <netinet/ip_var.h>
62#include <netinet/ip_icmp.h>
63#include <netinet/tcp.h>
64#include <netinet/tcp_fsm.h>
65#include <netinet/tcp_seq.h>
66#include <netinet/tcp_timer.h>
67#include <netinet/tcp_var.h>
68#include <netinet/tcpip.h>
69#ifdef TCPDEBUG
70#include <netinet/tcp_debug.h>
71#endif
72
73int     tcp_mssdflt = TCP_MSS;
74SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
75        CTLFLAG_RW, &tcp_mssdflt , 0, "");
76
77static int      tcp_do_rfc1323 = 1;
78static int      tcp_do_rfc1644 = 1;
79#if !defined(__rtems__)
80static int      tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
81SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt,
82        CTLFLAG_RW, &tcp_rttdflt , 0, "");
83
84SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323,
85        CTLFLAG_RW, &tcp_do_rfc1323 , 0, "");
86
87SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644,
88        CTLFLAG_RW, &tcp_do_rfc1644 , 0, "");
89#endif
90
91static void     tcp_cleartaocache(void);
92static void     tcp_notify __P((struct inpcb *, int));
93
94/*
95 * Target size of TCP PCB hash table. Will be rounded down to a prime
96 * number.
97 */
98#ifndef TCBHASHSIZE
99#define TCBHASHSIZE     128
100#endif
101
102/*
103 * Tcp initialization
104 */
105void
106tcp_init()
107{
108
109        tcp_iss = random();     /* wrong, but better than a constant */
110        tcp_ccgen = 1;
111        tcp_cleartaocache();
112        LIST_INIT(&tcb);
113        tcbinfo.listhead = &tcb;
114        tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask);
115        if (max_protohdr < sizeof(struct tcpiphdr))
116                max_protohdr = sizeof(struct tcpiphdr);
117        if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
118                panic("tcp_init");
119}
120
121/*
122 * Create template to be used to send tcp packets on a connection.
123 * Call after host entry created, allocates an mbuf and fills
124 * in a skeletal tcp/ip header, minimizing the amount of work
125 * necessary when the connection is used.
126 */
127struct tcpiphdr *
128tcp_template(tp)
129        struct tcpcb *tp;
130{
131        register struct inpcb *inp = tp->t_inpcb;
132        register struct mbuf *m;
133        register struct tcpiphdr *n;
134
135        if ((n = tp->t_template) == 0) {
136                m = m_get(M_DONTWAIT, MT_HEADER);
137                if (m == NULL)
138                        return (0);
139                m->m_len = sizeof (struct tcpiphdr);
140                n = mtod(m, struct tcpiphdr *);
141        }
142        n->ti_next = n->ti_prev = 0;
143        n->ti_x1 = 0;
144        n->ti_pr = IPPROTO_TCP;
145        n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
146        n->ti_src = inp->inp_laddr;
147        n->ti_dst = inp->inp_faddr;
148        n->ti_sport = inp->inp_lport;
149        n->ti_dport = inp->inp_fport;
150        n->ti_seq = 0;
151        n->ti_ack = 0;
152        n->ti_x2 = 0;
153        n->ti_off = 5;
154        n->ti_flags = 0;
155        n->ti_win = 0;
156        n->ti_sum = 0;
157        n->ti_urp = 0;
158        return (n);
159}
160
161/*
162 * Send a single message to the TCP at address specified by
163 * the given TCP/IP header.  If m == 0, then we make a copy
164 * of the tcpiphdr at ti and send directly to the addressed host.
165 * This is used to force keep alive messages out using the TCP
166 * template for a connection tp->t_template.  If flags are given
167 * then we send a message back to the TCP which originated the
168 * segment ti, and discard the mbuf containing it and any other
169 * attached mbufs.
170 *
171 * In any case the ack and sequence number of the transmitted
172 * segment are as specified by the parameters.
173 *
174 * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
175 */
176void
177tcp_respond(tp, ti, m, ack, seq, flags)
178        struct tcpcb *tp;
179        register struct tcpiphdr *ti;
180        register struct mbuf *m;
181        tcp_seq ack, seq;
182        int flags;
183{
184        register int tlen;
185        int win = 0;
186        struct route *ro = 0;
187        struct route sro;
188
189        if (tp) {
190                win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
191                ro = &tp->t_inpcb->inp_route;
192        } else {
193                ro = &sro;
194                bzero(ro, sizeof *ro);
195        }
196        if (m == 0) {
197                m = m_gethdr(M_DONTWAIT, MT_HEADER);
198                if (m == NULL)
199                        return;
200#ifdef TCP_COMPAT_42
201                tlen = 1;
202#else
203                tlen = 0;
204#endif
205                m->m_data += max_linkhdr;
206                *mtod(m, struct tcpiphdr *) = *ti;
207                ti = mtod(m, struct tcpiphdr *);
208                flags = TH_ACK;
209        } else {
210                m_freem(m->m_next);
211                m->m_next = 0;
212                m->m_data = (caddr_t)ti;
213                m->m_len = sizeof (struct tcpiphdr);
214                tlen = 0;
215#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
216                xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
217                xchg(ti->ti_dport, ti->ti_sport, u_short);
218#undef xchg
219        }
220        ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
221        tlen += sizeof (struct tcpiphdr);
222        m->m_len = tlen;
223        m->m_pkthdr.len = tlen;
224        m->m_pkthdr.rcvif = (struct ifnet *) 0;
225        ti->ti_next = ti->ti_prev = 0;
226        ti->ti_x1 = 0;
227        ti->ti_seq = htonl(seq);
228        ti->ti_ack = htonl(ack);
229        ti->ti_x2 = 0;
230        ti->ti_off = sizeof (struct tcphdr) >> 2;
231        ti->ti_flags = flags;
232        if (tp)
233                ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
234        else
235                ti->ti_win = htons((u_short)win);
236        ti->ti_urp = 0;
237        ti->ti_sum = 0;
238        ti->ti_sum = in_cksum(m, tlen);
239        ((struct ip *)ti)->ip_len = tlen;
240        ((struct ip *)ti)->ip_ttl = ip_defttl;
241#ifdef TCPDEBUG
242        if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
243                tcp_trace(TA_OUTPUT, 0, tp, ti, 0);
244#endif
245        (void) ip_output(m, NULL, ro, 0, NULL);
246        if (ro == &sro && ro->ro_rt) {
247                RTFREE(ro->ro_rt);
248        }
249}
250
251/*
252 * Create a new TCP control block, making an
253 * empty reassembly queue and hooking it to the argument
254 * protocol control block.
255 */
256struct tcpcb *
257tcp_newtcpcb(inp)
258        struct inpcb *inp;
259{
260        register struct tcpcb *tp;
261
262        tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
263        if (tp == NULL)
264                return ((struct tcpcb *)0);
265        bzero((char *) tp, sizeof(struct tcpcb));
266        tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
267        tp->t_maxseg = tp->t_maxopd = tcp_mssdflt;
268
269        if (tcp_do_rfc1323)
270                tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
271        if (tcp_do_rfc1644)
272                tp->t_flags |= TF_REQ_CC;
273        tp->t_inpcb = inp;
274        /*
275         * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
276         * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
277         * reasonable initial retransmit time.
278         */
279        tp->t_srtt = TCPTV_SRTTBASE;
280        tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
281        tp->t_rttmin = TCPTV_MIN;
282        tp->t_rxtcur = TCPTV_RTOBASE;
283        tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
284        tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
285        inp->inp_ip_ttl = ip_defttl;
286        inp->inp_ppcb = (caddr_t)tp;
287        return (tp);
288}
289
290/*
291 * Drop a TCP connection, reporting
292 * the specified error.  If connection is synchronized,
293 * then send a RST to peer.
294 */
295struct tcpcb *
296tcp_drop(tp, errnum)
297        register struct tcpcb *tp;
298        int errnum;
299{
300        struct socket *so = tp->t_inpcb->inp_socket;
301
302        if (TCPS_HAVERCVDSYN(tp->t_state)) {
303                tp->t_state = TCPS_CLOSED;
304                (void) tcp_output(tp);
305                tcpstat.tcps_drops++;
306        } else
307                tcpstat.tcps_conndrops++;
308        if (errnum == ETIMEDOUT && tp->t_softerror)
309                errnum = tp->t_softerror;
310        so->so_error = errnum;
311        return (tcp_close(tp));
312}
313
314/*
315 * Close a TCP control block:
316 *      discard all space held by the tcp
317 *      discard internet protocol block
318 *      wake up any sleepers
319 */
320struct tcpcb *
321tcp_close(tp)
322        register struct tcpcb *tp;
323{
324        register struct tcpiphdr *t;
325        struct inpcb *inp = tp->t_inpcb;
326        struct socket *so = inp->inp_socket;
327        register struct mbuf *m;
328        register struct rtentry *rt;
329
330        /*
331         * If we got enough samples through the srtt filter,
332         * save the rtt and rttvar in the routing entry.
333         * 'Enough' is arbitrarily defined as the 16 samples.
334         * 16 samples is enough for the srtt filter to converge
335         * to within 5% of the correct value; fewer samples and
336         * we could save a very bogus rtt.
337         *
338         * Don't update the default route's characteristics and don't
339         * update anything that the user "locked".
340         */
341        if (tp->t_rttupdated >= 16 &&
342            (rt = inp->inp_route.ro_rt) &&
343            ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
344                register u_long i = 0;
345
346                if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
347                        i = tp->t_srtt *
348                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
349                        if (rt->rt_rmx.rmx_rtt && i)
350                                /*
351                                 * filter this update to half the old & half
352                                 * the new values, converting scale.
353                                 * See route.h and tcp_var.h for a
354                                 * description of the scaling constants.
355                                 */
356                                rt->rt_rmx.rmx_rtt =
357                                    (rt->rt_rmx.rmx_rtt + i) / 2;
358                        else
359                                rt->rt_rmx.rmx_rtt = i;
360                        tcpstat.tcps_cachedrtt++;
361                }
362                if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
363                        i = tp->t_rttvar *
364                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
365                        if (rt->rt_rmx.rmx_rttvar && i)
366                                rt->rt_rmx.rmx_rttvar =
367                                    (rt->rt_rmx.rmx_rttvar + i) / 2;
368                        else
369                                rt->rt_rmx.rmx_rttvar = i;
370                        tcpstat.tcps_cachedrttvar++;
371                }
372                /*
373                 * update the pipelimit (ssthresh) if it has been updated
374                 * already or if a pipesize was specified & the threshhold
375                 * got below half the pipesize.  I.e., wait for bad news
376                 * before we start updating, then update on both good
377                 * and bad news.
378                 */
379                if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
380                    ((i = tp->snd_ssthresh) != 0) && rt->rt_rmx.rmx_ssthresh) ||
381                    i < (rt->rt_rmx.rmx_sendpipe / 2)) {
382                        /*
383                         * convert the limit from user data bytes to
384                         * packets then to packet data bytes.
385                         */
386                        i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
387                        if (i < 2)
388                                i = 2;
389                        i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
390                        if (rt->rt_rmx.rmx_ssthresh)
391                                rt->rt_rmx.rmx_ssthresh =
392                                    (rt->rt_rmx.rmx_ssthresh + i) / 2;
393                        else
394                                rt->rt_rmx.rmx_ssthresh = i;
395                        tcpstat.tcps_cachedssthresh++;
396                }
397        }
398        /* free the reassembly queue, if any */
399        t = tp->seg_next;
400        while (t != (struct tcpiphdr *)tp) {
401                t = (struct tcpiphdr *)t->ti_next;
402#if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__)))
403        LD32_UNALGN((struct tcpiphdr *)t->ti_prev,m);
404#else
405                m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
406#endif
407                remque(t->ti_prev);
408                m_freem(m);
409        }
410        if (tp->t_template)
411                (void) m_free(dtom(tp->t_template));
412        free(tp, M_PCB);
413        inp->inp_ppcb = 0;
414        soisdisconnected(so);
415        in_pcbdetach(inp);
416        tcpstat.tcps_closed++;
417        return ((struct tcpcb *)0);
418}
419
420void
421tcp_drain()
422{
423
424}
425
426/*
427 * Notify a tcp user of an asynchronous error;
428 * store error as soft error, but wake up user
429 * (for now, won't do anything until can select for soft error).
430 */
431static void
432tcp_notify(inp, error)
433        struct inpcb *inp;
434        int error;
435{
436        register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
437        register struct socket *so = inp->inp_socket;
438
439        /*
440         * Ignore some errors if we are hooked up.
441         * If connection hasn't completed, has retransmitted several times,
442         * and receives a second error, give up now.  This is better
443         * than waiting a long time to establish a connection that
444         * can never complete.
445         */
446        if (tp->t_state == TCPS_ESTABLISHED &&
447             (error == EHOSTUNREACH || error == ENETUNREACH ||
448              error == EHOSTDOWN)) {
449                return;
450        } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
451            tp->t_softerror)
452                so->so_error = error;
453        else
454                tp->t_softerror = error;
455        soconnwakeup (so);
456        sorwakeup(so);
457        sowwakeup(so);
458}
459
460#ifdef __rtems__
461#define INP_INFO_RLOCK(a)
462#define INP_INFO_RUNLOCK(a)
463#define INP_LOCK(a)
464#define INP_UNLOCK(a)
465#endif
466
467static int
468tcp_pcblist(SYSCTL_HANDLER_ARGS)
469{
470        int error, i, n, s;
471        struct inpcb *inp, **inp_list;
472        inp_gen_t gencnt;
473        struct xinpgen xig;
474
475        /*
476         * The process of preparing the TCB list is too time-consuming and
477         * resource-intensive to repeat twice on every request.
478         */
479        if (req->oldptr == 0) {
480                n = tcbinfo.ipi_count;
481                req->oldidx = 2 * (sizeof xig)
482                        + (n + n/8) * sizeof(struct xtcpcb);
483                return 0;
484        }
485
486        if (req->newptr != 0)
487                return EPERM;
488
489        /*
490         * OK, now we're committed to doing something.
491         */
492        s = splnet();
493        INP_INFO_RLOCK(&tcbinfo);
494        gencnt = tcbinfo.ipi_gencnt;
495        n = tcbinfo.ipi_count;
496        INP_INFO_RUNLOCK(&tcbinfo);
497        splx(s);
498
499        sysctl_wire_old_buffer(req, 2 * (sizeof xig)
500                + n * sizeof(struct xtcpcb));
501
502        xig.xig_len = sizeof xig;
503        xig.xig_count = n;
504        xig.xig_gen = gencnt;
505/*      xig.xig_sogen = so_gencnt; remove by ccj */
506        error = SYSCTL_OUT(req, &xig, sizeof xig);
507        if (error)
508                return error;
509
510        /* ccj add exit if the count is 0 */
511        if (!n)
512                return error;
513 
514        inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
515        if (inp_list == 0)
516                return ENOMEM;
517       
518        s = splnet();
519        INP_INFO_RLOCK(&tcbinfo);
520        for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n;
521             inp = LIST_NEXT(inp, inp_list)) {
522                INP_LOCK(inp);
523                if (inp->inp_gencnt <= gencnt)
524#if 0
525      &&
526                    cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
527#endif
528                        inp_list[i++] = inp;
529                INP_UNLOCK(inp);
530        }
531        INP_INFO_RUNLOCK(&tcbinfo);
532        splx(s);
533        n = i;
534
535        error = 0;
536        for (i = 0; i < n; i++) {
537                inp = inp_list[i];
538                INP_LOCK(inp);
539                if (inp->inp_gencnt <= gencnt) {
540                        struct xtcpcb xt;
541                        caddr_t inp_ppcb;
542                        xt.xt_len = sizeof xt;
543                        /* XXX should avoid extra copy */
544                        bcopy(inp, &xt.xt_inp, sizeof *inp);
545                        inp_ppcb = inp->inp_ppcb;
546                        if (inp_ppcb != NULL)
547                                bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
548                        else
549                                bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
550#if 0     
551                        if (inp->inp_socket)
552                                sotoxsocket(inp->inp_socket, &xt.xt_socket);
553#endif
554                        error = SYSCTL_OUT(req, &xt, sizeof xt);
555                }
556                INP_UNLOCK(inp);
557        }
558        if (!error) {
559                /*
560                 * Give the user an updated idea of our state.
561                 * If the generation differs from what we told
562                 * her before, she knows that something happened
563                 * while we were processing this request, and it
564                 * might be necessary to retry.
565                 */
566                s = splnet();
567                INP_INFO_RLOCK(&tcbinfo);
568                xig.xig_gen = tcbinfo.ipi_gencnt;
569#if 0   
570                xig.xig_sogen = so_gencnt;
571#endif
572                xig.xig_count = tcbinfo.ipi_count;
573                INP_INFO_RUNLOCK(&tcbinfo);
574                splx(s);
575                error = SYSCTL_OUT(req, &xig, sizeof xig);
576        }
577        free(inp_list, M_TEMP);
578        return error;
579}
580
581SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
582            tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
583
584void
585tcp_ctlinput(cmd, sa, vip)
586        int cmd;
587        struct sockaddr *sa;
588        void *vip;
589{
590        register struct ip *ip = vip;
591        register struct tcphdr *th;
592        void (*notify) __P((struct inpcb *, int)) = tcp_notify;
593
594        if (cmd == PRC_QUENCH)
595                notify = tcp_quench;
596#if 1
597        else if (cmd == PRC_MSGSIZE)
598                notify = tcp_mtudisc;
599#endif
600        else if (!PRC_IS_REDIRECT(cmd) &&
601                 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
602                return;
603        if (ip) {
604                th = (struct tcphdr *)((caddr_t)ip
605                                       + (IP_VHL_HL(ip->ip_vhl) << 2));
606                in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
607                        cmd, notify);
608        } else
609                in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
610}
611
612/*
613 * When a source quench is received, close congestion window
614 * to one segment.  We will gradually open it again as we proceed.
615 */
616void
617tcp_quench(inp, errnum)
618        struct inpcb *inp;
619        int errnum;
620{
621        struct tcpcb *tp = intotcpcb(inp);
622
623        if (tp)
624                tp->snd_cwnd = tp->t_maxseg;
625}
626
627#if 1
628/*
629 * When `need fragmentation' ICMP is received, update our idea of the MSS
630 * based on the new value in the route.  Also nudge TCP to send something,
631 * since we know the packet we just sent was dropped.
632 * This duplicates some code in the tcp_mss() function in tcp_input.c.
633 */
634void
635tcp_mtudisc(inp, errnum)
636        struct inpcb *inp;
637        int errnum;
638{
639        struct tcpcb *tp = intotcpcb(inp);
640        struct rtentry *rt;
641        struct rmxp_tao *taop;
642        struct socket *so = inp->inp_socket;
643        int offered;
644        int mss;
645
646        if (tp) {
647                rt = tcp_rtlookup(inp);
648                if (!rt || !rt->rt_rmx.rmx_mtu) {
649                        tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
650                        return;
651                }
652                taop = rmx_taop(rt->rt_rmx);
653                offered = taop->tao_mssopt;
654                mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
655                if (offered)
656                        mss = min(mss, offered);
657                /*
658                 * XXX - The above conditional probably violates the TCP
659                 * spec.  The problem is that, since we don't know the
660                 * other end's MSS, we are supposed to use a conservative
661                 * default.  But, if we do that, then MTU discovery will
662                 * never actually take place, because the conservative
663                 * default is much less than the MTUs typically seen
664                 * on the Internet today.  For the moment, we'll sweep
665                 * this under the carpet.
666                 *
667                 * The conservative default might not actually be a problem
668                 * if the only case this occurs is when sending an initial
669                 * SYN with options and data to a host we've never talked
670                 * to before.  Then, they will reply with an MSS value which
671                 * will get recorded and the new parameters should get
672                 * recomputed.  For Further Study.
673                 */
674                if (tp->t_maxopd <= mss)
675                        return;
676                tp->t_maxopd = mss;
677
678                if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
679                    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
680                        mss -= TCPOLEN_TSTAMP_APPA;
681                if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
682                    (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
683                        mss -= TCPOLEN_CC_APPA;
684#if     (MCLBYTES & (MCLBYTES - 1)) == 0
685                if (mss > MCLBYTES)
686                        mss &= ~(MCLBYTES-1);
687#else
688                if (mss > MCLBYTES)
689                        mss = mss / MCLBYTES * MCLBYTES;
690#endif
691                if (so->so_snd.sb_hiwat < mss)
692                        mss = so->so_snd.sb_hiwat;
693
694                tp->t_maxseg = mss;
695
696                tcpstat.tcps_mturesent++;
697                tp->t_rtt = 0;
698                tp->snd_nxt = tp->snd_una;
699                tcp_output(tp);
700        }
701}
702#endif
703
704/*
705 * Look-up the routing entry to the peer of this inpcb.  If no route
706 * is found and it cannot be allocated the return NULL.  This routine
707 * is called by TCP routines that access the rmx structure and by tcp_mss
708 * to get the interface MTU.
709 */
710struct rtentry *
711tcp_rtlookup(inp)
712        struct inpcb *inp;
713{
714        struct route *ro;
715        struct rtentry *rt;
716
717        ro = &inp->inp_route;
718        rt = ro->ro_rt;
719        if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
720                /* No route yet, so try to acquire one */
721                if (inp->inp_faddr.s_addr != INADDR_ANY) {
722                        ro->ro_dst.sa_family = AF_INET;
723                        ro->ro_dst.sa_len = sizeof(ro->ro_dst);
724                        ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
725                                inp->inp_faddr;
726                        rtalloc(ro);
727                        rt = ro->ro_rt;
728                }
729        }
730        return rt;
731}
732
733/*
734 * Return a pointer to the cached information about the remote host.
735 * The cached information is stored in the protocol specific part of
736 * the route metrics.
737 */
738struct rmxp_tao *
739tcp_gettaocache(inp)
740        struct inpcb *inp;
741{
742        struct rtentry *rt = tcp_rtlookup(inp);
743
744        /* Make sure this is a host route and is up. */
745        if (rt == NULL ||
746            (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
747                return NULL;
748
749        return rmx_taop(rt->rt_rmx);
750}
751
752/*
753 * Clear all the TAO cache entries, called from tcp_init.
754 *
755 * XXX
756 * This routine is just an empty one, because we assume that the routing
757 * routing tables are initialized at the same time when TCP, so there is
758 * nothing in the cache left over.
759 */
760static void
761tcp_cleartaocache(void)
762{ }
Note: See TracBrowser for help on using the repository browser.