1 | /* |
---|
2 | * Copyright 1994, 1995 Massachusetts Institute of Technology |
---|
3 | * |
---|
4 | * Permission to use, copy, modify, and distribute this software and |
---|
5 | * its documentation for any purpose and without fee is hereby |
---|
6 | * granted, provided that both the above copyright notice and this |
---|
7 | * permission notice appear in all copies, that both the above |
---|
8 | * copyright notice and this permission notice appear in all |
---|
9 | * supporting documentation, and that the name of M.I.T. not be used |
---|
10 | * in advertising or publicity pertaining to distribution of the |
---|
11 | * software without specific, written prior permission. M.I.T. makes |
---|
12 | * no representations about the suitability of this software for any |
---|
13 | * purpose. It is provided "as is" without express or implied |
---|
14 | * warranty. |
---|
15 | * |
---|
16 | * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS |
---|
17 | * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, |
---|
18 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
---|
19 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT |
---|
20 | * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
---|
22 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
---|
23 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
---|
24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
---|
25 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
---|
26 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
27 | * SUCH DAMAGE. |
---|
28 | */ |
---|
29 | |
---|
30 | /* |
---|
31 | * This code does two things necessary for the enhanced TCP metrics to |
---|
32 | * function in a useful manner: |
---|
33 | * 1) It marks all non-host routes as `cloning', thus ensuring that |
---|
34 | * every actual reference to such a route actually gets turned |
---|
35 | * into a reference to a host route to the specific destination |
---|
36 | * requested. |
---|
37 | * 2) When such routes lose all their references, it arranges for them |
---|
38 | * to be deleted in some random collection of circumstances, so that |
---|
39 | * a large quantity of stale routing data is not kept in kernel memory |
---|
40 | * indefinitely. See in_rtqtimo() below for the exact mechanism. |
---|
41 | */ |
---|
42 | |
---|
43 | #ifdef HAVE_CONFIG_H |
---|
44 | #include "config.h" |
---|
45 | #endif |
---|
46 | |
---|
47 | #include <sys/param.h> |
---|
48 | #include <sys/systm.h> |
---|
49 | #include <sys/kernel.h> |
---|
50 | #include <sys/sysctl.h> |
---|
51 | #include <sys/queue.h> |
---|
52 | #include <sys/socket.h> |
---|
53 | #include <sys/socketvar.h> |
---|
54 | #include <sys/mbuf.h> |
---|
55 | #include <sys/syslog.h> |
---|
56 | |
---|
57 | #include <net/if.h> |
---|
58 | #include <net/route.h> |
---|
59 | #include <netinet/in.h> |
---|
60 | #include <rtems/rtems_netinet_in.h> |
---|
61 | #include <netinet/in_systm.h> |
---|
62 | #include <netinet/in_var.h> |
---|
63 | |
---|
64 | #include <netinet/ip.h> |
---|
65 | #include <netinet/ip_var.h> |
---|
66 | |
---|
67 | #include <netinet/tcp.h> |
---|
68 | #include <netinet/tcp_seq.h> |
---|
69 | #include <netinet/tcp_timer.h> |
---|
70 | #include <netinet/tcp_var.h> |
---|
71 | |
---|
72 | extern int in_inithead(void **head, int off); |
---|
73 | |
---|
74 | #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ |
---|
75 | |
---|
76 | /* |
---|
77 | * Do what we need to do when inserting a route. |
---|
78 | */ |
---|
79 | static struct radix_node * |
---|
80 | in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, |
---|
81 | struct radix_node *treenodes) |
---|
82 | { |
---|
83 | struct rtentry *rt = (struct rtentry *)treenodes; |
---|
84 | struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); |
---|
85 | struct radix_node *ret; |
---|
86 | |
---|
87 | /* |
---|
88 | * For IP, all unicast non-host routes are automatically cloning. |
---|
89 | */ |
---|
90 | if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) |
---|
91 | rt->rt_flags |= RTF_MULTICAST; |
---|
92 | |
---|
93 | if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { |
---|
94 | rt->rt_flags |= RTF_PRCLONING; |
---|
95 | } |
---|
96 | |
---|
97 | /* |
---|
98 | * A little bit of help for both IP output and input: |
---|
99 | * For host routes, we make sure that RTF_BROADCAST |
---|
100 | * is set for anything that looks like a broadcast address. |
---|
101 | * This way, we can avoid an expensive call to in_broadcast() |
---|
102 | * in ip_output() most of the time (because the route passed |
---|
103 | * to ip_output() is almost always a host route). |
---|
104 | * |
---|
105 | * We also do the same for local addresses, with the thought |
---|
106 | * that this might one day be used to speed up ip_input(). |
---|
107 | * |
---|
108 | * We also mark routes to multicast addresses as such, because |
---|
109 | * it's easy to do and might be useful (but this is much more |
---|
110 | * dubious since it's so easy to inspect the address). (This |
---|
111 | * is done above.) |
---|
112 | */ |
---|
113 | if (rt->rt_flags & RTF_HOST) { |
---|
114 | if (in_broadcast(sin->sin_addr, rt->rt_ifp)) { |
---|
115 | rt->rt_flags |= RTF_BROADCAST; |
---|
116 | } else { |
---|
117 | if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr |
---|
118 | == sin->sin_addr.s_addr) |
---|
119 | rt->rt_flags |= RTF_LOCAL; |
---|
120 | } |
---|
121 | } |
---|
122 | |
---|
123 | /* |
---|
124 | * We also specify a send and receive pipe size for every |
---|
125 | * route added, to help TCP a bit. TCP doesn't actually |
---|
126 | * want a true pipe size, which would be prohibitive in memory |
---|
127 | * costs and is hard to compute anyway; it simply uses these |
---|
128 | * values to size its buffers. So, we fill them in with the |
---|
129 | * same values that TCP would have used anyway, and allow the |
---|
130 | * installing program or the link layer to override these values |
---|
131 | * as it sees fit. This will hopefully allow TCP more |
---|
132 | * opportunities to save its ssthresh value. |
---|
133 | */ |
---|
134 | if (!rt->rt_rmx.rmx_sendpipe && !(rt->rt_rmx.rmx_locks & RTV_SPIPE)) |
---|
135 | rt->rt_rmx.rmx_sendpipe = tcp_sendspace; |
---|
136 | |
---|
137 | if (!rt->rt_rmx.rmx_recvpipe && !(rt->rt_rmx.rmx_locks & RTV_RPIPE)) |
---|
138 | rt->rt_rmx.rmx_recvpipe = tcp_recvspace; |
---|
139 | |
---|
140 | if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) |
---|
141 | && rt->rt_ifp) |
---|
142 | rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; |
---|
143 | |
---|
144 | ret = rn_addroute(v_arg, n_arg, head, treenodes); |
---|
145 | if (ret == NULL && rt->rt_flags & RTF_HOST) { |
---|
146 | struct rtentry *rt2; |
---|
147 | /* |
---|
148 | * We are trying to add a host route, but can't. |
---|
149 | * Find out if it is because of an |
---|
150 | * ARP entry and delete it if so. |
---|
151 | */ |
---|
152 | rt2 = rtalloc1((struct sockaddr *)sin, 0, |
---|
153 | RTF_CLONING | RTF_PRCLONING); |
---|
154 | if (rt2) { |
---|
155 | if (rt2->rt_flags & RTF_LLINFO && |
---|
156 | rt2->rt_flags & RTF_HOST && |
---|
157 | rt2->rt_gateway && |
---|
158 | rt2->rt_gateway->sa_family == AF_LINK) { |
---|
159 | rtrequest(RTM_DELETE, |
---|
160 | (struct sockaddr *)rt_key(rt2), |
---|
161 | rt2->rt_gateway, |
---|
162 | rt_mask(rt2), rt2->rt_flags, 0); |
---|
163 | ret = rn_addroute(v_arg, n_arg, head, |
---|
164 | treenodes); |
---|
165 | } |
---|
166 | RTFREE(rt2); |
---|
167 | } |
---|
168 | } |
---|
169 | return ret; |
---|
170 | } |
---|
171 | |
---|
172 | /* |
---|
173 | * This code is the inverse of in_clsroute: on first reference, if we |
---|
174 | * were managing the route, stop doing so and set the expiration timer |
---|
175 | * back off again. |
---|
176 | */ |
---|
177 | static struct radix_node * |
---|
178 | in_matroute(void *v_arg, struct radix_node_head *head) |
---|
179 | { |
---|
180 | struct radix_node *rn = rn_match(v_arg, head); |
---|
181 | struct rtentry *rt = (struct rtentry *)rn; |
---|
182 | |
---|
183 | if(rt && rt->rt_refcnt == 0) { /* this is first reference */ |
---|
184 | if(rt->rt_flags & RTPRF_OURS) { |
---|
185 | rt->rt_flags &= ~RTPRF_OURS; |
---|
186 | rt->rt_rmx.rmx_expire = 0; |
---|
187 | } |
---|
188 | } |
---|
189 | return rn; |
---|
190 | } |
---|
191 | |
---|
192 | static int rtq_reallyold = 60*60; |
---|
193 | /* one hour is ``really old'' */ |
---|
194 | SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, |
---|
195 | CTLFLAG_RW, &rtq_reallyold , 0, ""); |
---|
196 | |
---|
197 | static int rtq_minreallyold = 10; |
---|
198 | /* never automatically crank down to less */ |
---|
199 | SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, |
---|
200 | CTLFLAG_RW, &rtq_minreallyold , 0, ""); |
---|
201 | |
---|
202 | static int rtq_toomany = 128; |
---|
203 | /* 128 cached routes is ``too many'' */ |
---|
204 | SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, |
---|
205 | CTLFLAG_RW, &rtq_toomany , 0, ""); |
---|
206 | |
---|
207 | |
---|
208 | /* |
---|
209 | * On last reference drop, mark the route as belong to us so that it can be |
---|
210 | * timed out. |
---|
211 | */ |
---|
212 | static void |
---|
213 | in_clsroute(struct radix_node *rn, struct radix_node_head *head) |
---|
214 | { |
---|
215 | struct rtentry *rt = (struct rtentry *)rn; |
---|
216 | |
---|
217 | if(!(rt->rt_flags & RTF_UP)) |
---|
218 | return; /* prophylactic measures */ |
---|
219 | |
---|
220 | if((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) |
---|
221 | return; |
---|
222 | |
---|
223 | if((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) |
---|
224 | != RTF_WASCLONED) |
---|
225 | return; |
---|
226 | |
---|
227 | /* |
---|
228 | * As requested by David Greenman: |
---|
229 | * If rtq_reallyold is 0, just delete the route without |
---|
230 | * waiting for a timeout cycle to kill it. |
---|
231 | */ |
---|
232 | if(rtq_reallyold != 0) { |
---|
233 | rt->rt_flags |= RTPRF_OURS; |
---|
234 | rt->rt_rmx.rmx_expire = rtems_bsdnet_seconds_since_boot() + rtq_reallyold; |
---|
235 | } else { |
---|
236 | rtrequest(RTM_DELETE, |
---|
237 | (struct sockaddr *)rt_key(rt), |
---|
238 | rt->rt_gateway, rt_mask(rt), |
---|
239 | rt->rt_flags, 0); |
---|
240 | } |
---|
241 | } |
---|
242 | |
---|
243 | struct rtqk_arg { |
---|
244 | struct radix_node_head *rnh; |
---|
245 | int draining; |
---|
246 | int killed; |
---|
247 | int found; |
---|
248 | int updating; |
---|
249 | time_t nextstop; |
---|
250 | }; |
---|
251 | |
---|
252 | /* |
---|
253 | * Get rid of old routes. When draining, this deletes everything, even when |
---|
254 | * the timeout is not expired yet. When updating, this makes sure that |
---|
255 | * nothing has a timeout longer than the current value of rtq_reallyold. |
---|
256 | */ |
---|
257 | static int |
---|
258 | in_rtqkill(struct radix_node *rn, void *rock) |
---|
259 | { |
---|
260 | struct rtqk_arg *ap = rock; |
---|
261 | struct rtentry *rt = (struct rtentry *)rn; |
---|
262 | int err; |
---|
263 | |
---|
264 | if(rt->rt_flags & RTPRF_OURS) { |
---|
265 | ap->found++; |
---|
266 | |
---|
267 | if(ap->draining || rt->rt_rmx.rmx_expire <= rtems_bsdnet_seconds_since_boot()) { |
---|
268 | if(rt->rt_refcnt > 0) |
---|
269 | panic("rtqkill route really not free"); |
---|
270 | |
---|
271 | err = rtrequest(RTM_DELETE, |
---|
272 | (struct sockaddr *)rt_key(rt), |
---|
273 | rt->rt_gateway, rt_mask(rt), |
---|
274 | rt->rt_flags, 0); |
---|
275 | if(err) { |
---|
276 | log(LOG_WARNING, "in_rtqkill: error %d\n", err); |
---|
277 | } else { |
---|
278 | ap->killed++; |
---|
279 | } |
---|
280 | } else { |
---|
281 | if(ap->updating |
---|
282 | && (rt->rt_rmx.rmx_expire - rtems_bsdnet_seconds_since_boot() |
---|
283 | > rtq_reallyold)) { |
---|
284 | rt->rt_rmx.rmx_expire = rtems_bsdnet_seconds_since_boot() |
---|
285 | + rtq_reallyold; |
---|
286 | } |
---|
287 | ap->nextstop = lmin(ap->nextstop, |
---|
288 | rt->rt_rmx.rmx_expire); |
---|
289 | } |
---|
290 | } |
---|
291 | |
---|
292 | return 0; |
---|
293 | } |
---|
294 | |
---|
295 | #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ |
---|
296 | static int rtq_timeout = RTQ_TIMEOUT; |
---|
297 | |
---|
298 | static void |
---|
299 | in_rtqtimo(void *rock) |
---|
300 | { |
---|
301 | struct radix_node_head *rnh = rock; |
---|
302 | struct rtqk_arg arg; |
---|
303 | struct timeval atv; |
---|
304 | static time_t last_adjusted_timeout = 0; |
---|
305 | int s; |
---|
306 | |
---|
307 | arg.found = arg.killed = 0; |
---|
308 | arg.rnh = rnh; |
---|
309 | arg.nextstop = rtems_bsdnet_seconds_since_boot() + rtq_timeout; |
---|
310 | arg.draining = arg.updating = 0; |
---|
311 | s = splnet(); |
---|
312 | rnh->rnh_walktree(rnh, in_rtqkill, &arg); |
---|
313 | splx(s); |
---|
314 | |
---|
315 | /* |
---|
316 | * Attempt to be somewhat dynamic about this: |
---|
317 | * If there are ``too many'' routes sitting around taking up space, |
---|
318 | * then crank down the timeout, and see if we can't make some more |
---|
319 | * go away. However, we make sure that we will never adjust more |
---|
320 | * than once in rtq_timeout seconds, to keep from cranking down too |
---|
321 | * hard. |
---|
322 | */ |
---|
323 | if((arg.found - arg.killed > rtq_toomany) |
---|
324 | && (rtems_bsdnet_seconds_since_boot() - last_adjusted_timeout >= rtq_timeout) |
---|
325 | && rtq_reallyold > rtq_minreallyold) { |
---|
326 | rtq_reallyold = 2*rtq_reallyold / 3; |
---|
327 | if(rtq_reallyold < rtq_minreallyold) { |
---|
328 | rtq_reallyold = rtq_minreallyold; |
---|
329 | } |
---|
330 | |
---|
331 | last_adjusted_timeout = rtems_bsdnet_seconds_since_boot(); |
---|
332 | #ifdef DIAGNOSTIC |
---|
333 | log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n", |
---|
334 | rtq_reallyold); |
---|
335 | #endif |
---|
336 | arg.found = arg.killed = 0; |
---|
337 | arg.updating = 1; |
---|
338 | s = splnet(); |
---|
339 | rnh->rnh_walktree(rnh, in_rtqkill, &arg); |
---|
340 | splx(s); |
---|
341 | } |
---|
342 | |
---|
343 | atv.tv_usec = 0; |
---|
344 | atv.tv_sec = arg.nextstop; |
---|
345 | timeout(in_rtqtimo, rock, hzto(&atv)); |
---|
346 | } |
---|
347 | |
---|
348 | void |
---|
349 | in_rtqdrain(void) |
---|
350 | { |
---|
351 | struct radix_node_head *rnh = rt_tables[AF_INET]; |
---|
352 | struct rtqk_arg arg; |
---|
353 | int s; |
---|
354 | arg.found = arg.killed = 0; |
---|
355 | arg.rnh = rnh; |
---|
356 | arg.nextstop = 0; |
---|
357 | arg.draining = 1; |
---|
358 | arg.updating = 0; |
---|
359 | s = splnet(); |
---|
360 | rnh->rnh_walktree(rnh, in_rtqkill, &arg); |
---|
361 | splx(s); |
---|
362 | } |
---|
363 | |
---|
364 | /* |
---|
365 | * Initialize our routing tree. |
---|
366 | */ |
---|
367 | int |
---|
368 | in_inithead(void **head, int off) |
---|
369 | { |
---|
370 | struct radix_node_head *rnh; |
---|
371 | |
---|
372 | if(!rn_inithead(head, off)) |
---|
373 | return 0; |
---|
374 | |
---|
375 | if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */ |
---|
376 | return 1; /* only do this for the real routing table */ |
---|
377 | |
---|
378 | rnh = *head; |
---|
379 | rnh->rnh_addaddr = in_addroute; |
---|
380 | rnh->rnh_matchaddr = in_matroute; |
---|
381 | rnh->rnh_close = in_clsroute; |
---|
382 | in_rtqtimo(rnh); /* kick off timeout first time */ |
---|
383 | return 1; |
---|
384 | } |
---|