source: rtems/cpukit/libnetworking/netinet/in_rmx.c @ 39bad7e8

4.104.115
Last change on this file since 39bad7e8 was c301570, checked in by Ralf Corsepius <ralf.corsepius@…>, on 05/10/07 at 05:12:54

Include <rtems/bsd/sys/queue.h> instead of <sys/queue.h>.

  • Property mode set to 100644
File size: 11.1 KB
Line 
1/*
2 * Copyright 1994, 1995 Massachusetts Institute of Technology
3 *
4 * Permission to use, copy, modify, and distribute this software and
5 * its documentation for any purpose and without fee is hereby
6 * granted, provided that both the above copyright notice and this
7 * permission notice appear in all copies, that both the above
8 * copyright notice and this permission notice appear in all
9 * supporting documentation, and that the name of M.I.T. not be used
10 * in advertising or publicity pertaining to distribution of the
11 * software without specific, written prior permission.  M.I.T. makes
12 * no representations about the suitability of this software for any
13 * purpose.  It is provided "as is" without express or implied
14 * warranty.
15 *
16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $Id$
30 */
31
32/*
33 * This code does two things necessary for the enhanced TCP metrics to
34 * function in a useful manner:
35 *  1) It marks all non-host routes as `cloning', thus ensuring that
36 *     every actual reference to such a route actually gets turned
37 *     into a reference to a host route to the specific destination
38 *     requested.
39 *  2) When such routes lose all their references, it arranges for them
40 *     to be deleted in some random collection of circumstances, so that
41 *     a large quantity of stale routing data is not kept in kernel memory
42 *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
43 */
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/kernel.h>
48#include <sys/sysctl.h>
49#include <rtems/bsd/sys/queue.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/mbuf.h>
53#include <sys/syslog.h>
54
55#include <net/if.h>
56#include <net/route.h>
57#include <netinet/in.h>
58#include <netinet/in_systm.h>
59#include <netinet/in_var.h>
60
61#include <netinet/ip.h>
62#include <netinet/ip_var.h>
63
64#include <netinet/tcp.h>
65#include <netinet/tcp_seq.h>
66#include <netinet/tcp_timer.h>
67#include <netinet/tcp_var.h>
68
69extern int      in_inithead(void **head, int off);
70
71#define RTPRF_OURS              RTF_PROTO3      /* set on routes we manage */
72
73/*
74 * Do what we need to do when inserting a route.
75 */
76static struct radix_node *
77in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
78            struct radix_node *treenodes)
79{
80        struct rtentry *rt = (struct rtentry *)treenodes;
81        struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
82        struct radix_node *ret;
83
84        /*
85         * For IP, all unicast non-host routes are automatically cloning.
86         */
87        if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
88                rt->rt_flags |= RTF_MULTICAST;
89
90        if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) {
91                rt->rt_flags |= RTF_PRCLONING;
92        }
93
94        /*
95         * A little bit of help for both IP output and input:
96         *   For host routes, we make sure that RTF_BROADCAST
97         *   is set for anything that looks like a broadcast address.
98         *   This way, we can avoid an expensive call to in_broadcast()
99         *   in ip_output() most of the time (because the route passed
100         *   to ip_output() is almost always a host route).
101         *
102         *   We also do the same for local addresses, with the thought
103         *   that this might one day be used to speed up ip_input().
104         *
105         * We also mark routes to multicast addresses as such, because
106         * it's easy to do and might be useful (but this is much more
107         * dubious since it's so easy to inspect the address).  (This
108         * is done above.)
109         */
110        if (rt->rt_flags & RTF_HOST) {
111                if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
112                        rt->rt_flags |= RTF_BROADCAST;
113                } else {
114#define satosin(sa) ((struct sockaddr_in *)sa)
115                        if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr
116                            == sin->sin_addr.s_addr)
117                                rt->rt_flags |= RTF_LOCAL;
118#undef satosin
119                }
120        }
121
122        /*
123         * We also specify a send and receive pipe size for every
124         * route added, to help TCP a bit.  TCP doesn't actually
125         * want a true pipe size, which would be prohibitive in memory
126         * costs and is hard to compute anyway; it simply uses these
127         * values to size its buffers.  So, we fill them in with the
128         * same values that TCP would have used anyway, and allow the
129         * installing program or the link layer to override these values
130         * as it sees fit.  This will hopefully allow TCP more
131         * opportunities to save its ssthresh value.
132         */
133        if (!rt->rt_rmx.rmx_sendpipe && !(rt->rt_rmx.rmx_locks & RTV_SPIPE))
134                rt->rt_rmx.rmx_sendpipe = tcp_sendspace;
135
136        if (!rt->rt_rmx.rmx_recvpipe && !(rt->rt_rmx.rmx_locks & RTV_RPIPE))
137                rt->rt_rmx.rmx_recvpipe = tcp_recvspace;
138
139        if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)
140            && rt->rt_ifp)
141                rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
142
143        ret = rn_addroute(v_arg, n_arg, head, treenodes);
144        if (ret == NULL && rt->rt_flags & RTF_HOST) {
145                struct rtentry *rt2;
146                /*
147                 * We are trying to add a host route, but can't.
148                 * Find out if it is because of an
149                 * ARP entry and delete it if so.
150                 */
151                rt2 = rtalloc1((struct sockaddr *)sin, 0,
152                                RTF_CLONING | RTF_PRCLONING);
153                if (rt2) {
154                        if (rt2->rt_flags & RTF_LLINFO &&
155                                rt2->rt_flags & RTF_HOST &&
156                                rt2->rt_gateway &&
157                                rt2->rt_gateway->sa_family == AF_LINK) {
158                                rtrequest(RTM_DELETE,
159                                          (struct sockaddr *)rt_key(rt2),
160                                          rt2->rt_gateway,
161                                          rt_mask(rt2), rt2->rt_flags, 0);
162                                ret = rn_addroute(v_arg, n_arg, head,
163                                        treenodes);
164                        }
165                        RTFREE(rt2);
166                }
167        }
168        return ret;
169}
170
171/*
172 * This code is the inverse of in_clsroute: on first reference, if we
173 * were managing the route, stop doing so and set the expiration timer
174 * back off again.
175 */
176static struct radix_node *
177in_matroute(void *v_arg, struct radix_node_head *head)
178{
179        struct radix_node *rn = rn_match(v_arg, head);
180        struct rtentry *rt = (struct rtentry *)rn;
181
182        if(rt && rt->rt_refcnt == 0) { /* this is first reference */
183                if(rt->rt_flags & RTPRF_OURS) {
184                        rt->rt_flags &= ~RTPRF_OURS;
185                        rt->rt_rmx.rmx_expire = 0;
186                }
187        }
188        return rn;
189}
190
191static int rtq_reallyold = 60*60;
192        /* one hour is ``really old'' */
193SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire,
194        CTLFLAG_RW, &rtq_reallyold , 0, "");
195                                   
196static int rtq_minreallyold = 10;
197        /* never automatically crank down to less */
198SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire,
199        CTLFLAG_RW, &rtq_minreallyold , 0, "");
200                                   
201static int rtq_toomany = 128;
202        /* 128 cached routes is ``too many'' */
203SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache,
204        CTLFLAG_RW, &rtq_toomany , 0, "");
205                                   
206
207/*
208 * On last reference drop, mark the route as belong to us so that it can be
209 * timed out.
210 */
211static void
212in_clsroute(struct radix_node *rn, struct radix_node_head *head)
213{
214        struct rtentry *rt = (struct rtentry *)rn;
215
216        if(!(rt->rt_flags & RTF_UP))
217                return;         /* prophylactic measures */
218
219        if((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
220                return;
221
222        if((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS))
223           != RTF_WASCLONED)
224                return;
225
226        /*
227         * As requested by David Greenman:
228         * If rtq_reallyold is 0, just delete the route without
229         * waiting for a timeout cycle to kill it.
230         */
231        if(rtq_reallyold != 0) {
232                rt->rt_flags |= RTPRF_OURS;
233                rt->rt_rmx.rmx_expire = rtems_bsdnet_seconds_since_boot() + rtq_reallyold;
234        } else {
235                rtrequest(RTM_DELETE,
236                          (struct sockaddr *)rt_key(rt),
237                          rt->rt_gateway, rt_mask(rt),
238                          rt->rt_flags, 0);
239        }
240}
241
242struct rtqk_arg {
243        struct radix_node_head *rnh;
244        int draining;
245        int killed;
246        int found;
247        int updating;
248        time_t nextstop;
249};
250
251/*
252 * Get rid of old routes.  When draining, this deletes everything, even when
253 * the timeout is not expired yet.  When updating, this makes sure that
254 * nothing has a timeout longer than the current value of rtq_reallyold.
255 */
256static int
257in_rtqkill(struct radix_node *rn, void *rock)
258{
259        struct rtqk_arg *ap = rock;
260        struct rtentry *rt = (struct rtentry *)rn;
261        int err;
262
263        if(rt->rt_flags & RTPRF_OURS) {
264                ap->found++;
265
266                if(ap->draining || rt->rt_rmx.rmx_expire <= rtems_bsdnet_seconds_since_boot()) {
267                        if(rt->rt_refcnt > 0)
268                                panic("rtqkill route really not free");
269
270                        err = rtrequest(RTM_DELETE,
271                                        (struct sockaddr *)rt_key(rt),
272                                        rt->rt_gateway, rt_mask(rt),
273                                        rt->rt_flags, 0);
274                        if(err) {
275                                log(LOG_WARNING, "in_rtqkill: error %d\n", err);
276                        } else {
277                                ap->killed++;
278                        }
279                } else {
280                        if(ap->updating
281                           && (rt->rt_rmx.rmx_expire - rtems_bsdnet_seconds_since_boot()
282                               > rtq_reallyold)) {
283                                rt->rt_rmx.rmx_expire = rtems_bsdnet_seconds_since_boot()
284                                        + rtq_reallyold;
285                        }
286                        ap->nextstop = lmin(ap->nextstop,
287                                            rt->rt_rmx.rmx_expire);
288                }
289        }
290
291        return 0;
292}
293
294#define RTQ_TIMEOUT     60*10   /* run no less than once every ten minutes */
295static int rtq_timeout = RTQ_TIMEOUT;
296
297static void
298in_rtqtimo(void *rock)
299{
300        struct radix_node_head *rnh = rock;
301        struct rtqk_arg arg;
302        struct timeval atv;
303        static time_t last_adjusted_timeout = 0;
304        int s;
305
306        arg.found = arg.killed = 0;
307        arg.rnh = rnh;
308        arg.nextstop = rtems_bsdnet_seconds_since_boot() + rtq_timeout;
309        arg.draining = arg.updating = 0;
310        s = splnet();
311        rnh->rnh_walktree(rnh, in_rtqkill, &arg);
312        splx(s);
313
314        /*
315         * Attempt to be somewhat dynamic about this:
316         * If there are ``too many'' routes sitting around taking up space,
317         * then crank down the timeout, and see if we can't make some more
318         * go away.  However, we make sure that we will never adjust more
319         * than once in rtq_timeout seconds, to keep from cranking down too
320         * hard.
321         */
322        if((arg.found - arg.killed > rtq_toomany)
323           && (rtems_bsdnet_seconds_since_boot() - last_adjusted_timeout >= rtq_timeout)
324           && rtq_reallyold > rtq_minreallyold) {
325                rtq_reallyold = 2*rtq_reallyold / 3;
326                if(rtq_reallyold < rtq_minreallyold) {
327                        rtq_reallyold = rtq_minreallyold;
328                }
329
330                last_adjusted_timeout = rtems_bsdnet_seconds_since_boot();
331#ifdef DIAGNOSTIC
332                log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
333                    rtq_reallyold);
334#endif
335                arg.found = arg.killed = 0;
336                arg.updating = 1;
337                s = splnet();
338                rnh->rnh_walktree(rnh, in_rtqkill, &arg);
339                splx(s);
340        }
341
342        atv.tv_usec = 0;
343        atv.tv_sec = arg.nextstop;
344        timeout(in_rtqtimo, rock, hzto(&atv));
345}
346
347void
348in_rtqdrain(void)
349{
350        struct radix_node_head *rnh = rt_tables[AF_INET];
351        struct rtqk_arg arg;
352        int s;
353        arg.found = arg.killed = 0;
354        arg.rnh = rnh;
355        arg.nextstop = 0;
356        arg.draining = 1;
357        arg.updating = 0;
358        s = splnet();
359        rnh->rnh_walktree(rnh, in_rtqkill, &arg);
360        splx(s);
361}
362
363/*
364 * Initialize our routing tree.
365 */
366int
367in_inithead(void **head, int off)
368{
369        struct radix_node_head *rnh;
370
371        if(!rn_inithead(head, off))
372                return 0;
373
374        if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */
375                return 1;       /* only do this for the real routing table */
376
377        rnh = *head;
378        rnh->rnh_addaddr = in_addroute;
379        rnh->rnh_matchaddr = in_matroute;
380        rnh->rnh_close = in_clsroute;
381        in_rtqtimo(rnh);        /* kick off timeout first time */
382        return 1;
383}
Note: See TracBrowser for help on using the repository browser.