1 | #include <machine/rtems-bsd-kernel-space.h> |
---|
2 | |
---|
3 | /*- |
---|
4 | * Copyright (c) 1989 Stephen Deering |
---|
5 | * Copyright (c) 1992, 1993 |
---|
6 | * The Regents of the University of California. All rights reserved. |
---|
7 | * |
---|
8 | * This code is derived from software contributed to Berkeley by |
---|
9 | * Stephen Deering of Stanford University. |
---|
10 | * |
---|
11 | * Redistribution and use in source and binary forms, with or without |
---|
12 | * modification, are permitted provided that the following conditions |
---|
13 | * are met: |
---|
14 | * 1. Redistributions of source code must retain the above copyright |
---|
15 | * notice, this list of conditions and the following disclaimer. |
---|
16 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
17 | * notice, this list of conditions and the following disclaimer in the |
---|
18 | * documentation and/or other materials provided with the distribution. |
---|
19 | * 4. Neither the name of the University nor the names of its contributors |
---|
20 | * may be used to endorse or promote products derived from this software |
---|
21 | * without specific prior written permission. |
---|
22 | * |
---|
23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
---|
24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
---|
27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
33 | * SUCH DAMAGE. |
---|
34 | * |
---|
35 | * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 |
---|
36 | */ |
---|
37 | |
---|
38 | /* |
---|
39 | * IP multicast forwarding procedures |
---|
40 | * |
---|
41 | * Written by David Waitzman, BBN Labs, August 1988. |
---|
42 | * Modified by Steve Deering, Stanford, February 1989. |
---|
43 | * Modified by Mark J. Steiglitz, Stanford, May, 1991 |
---|
44 | * Modified by Van Jacobson, LBL, January 1993 |
---|
45 | * Modified by Ajit Thyagarajan, PARC, August 1993 |
---|
46 | * Modified by Bill Fenner, PARC, April 1995 |
---|
47 | * Modified by Ahmed Helmy, SGI, June 1996 |
---|
48 | * Modified by George Edmond Eddy (Rusty), ISI, February 1998 |
---|
49 | * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000 |
---|
50 | * Modified by Hitoshi Asaeda, WIDE, August 2000 |
---|
51 | * Modified by Pavlin Radoslavov, ICSI, October 2002 |
---|
52 | * |
---|
53 | * MROUTING Revision: 3.5 |
---|
54 | * and PIM-SMv2 and PIM-DM support, advanced API support, |
---|
55 | * bandwidth metering and signaling |
---|
56 | */ |
---|
57 | |
---|
58 | /* |
---|
59 | * TODO: Prefix functions with ipmf_. |
---|
60 | * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol |
---|
61 | * domain attachment (if_afdata) so we can track consumers of that service. |
---|
62 | * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT, |
---|
63 | * move it to socket options. |
---|
64 | * TODO: Cleanup LSRR removal further. |
---|
65 | * TODO: Push RSVP stubs into raw_ip.c. |
---|
66 | * TODO: Use bitstring.h for vif set. |
---|
67 | * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded. |
---|
68 | * TODO: Sync ip6_mroute.c with this file. |
---|
69 | */ |
---|
70 | |
---|
71 | #include <sys/cdefs.h> |
---|
72 | __FBSDID("$FreeBSD$"); |
---|
73 | |
---|
74 | #include <rtems/bsd/local/opt_inet.h> |
---|
75 | #include <rtems/bsd/local/opt_mrouting.h> |
---|
76 | |
---|
77 | #define _PIM_VT 1 |
---|
78 | |
---|
79 | #include <rtems/bsd/sys/param.h> |
---|
80 | #include <sys/kernel.h> |
---|
81 | #include <sys/stddef.h> |
---|
82 | #include <rtems/bsd/sys/lock.h> |
---|
83 | #include <sys/ktr.h> |
---|
84 | #include <sys/malloc.h> |
---|
85 | #include <sys/mbuf.h> |
---|
86 | #include <sys/module.h> |
---|
87 | #include <sys/priv.h> |
---|
88 | #include <sys/protosw.h> |
---|
89 | #include <sys/signalvar.h> |
---|
90 | #include <sys/socket.h> |
---|
91 | #include <sys/socketvar.h> |
---|
92 | #include <sys/sockio.h> |
---|
93 | #include <sys/sx.h> |
---|
94 | #include <sys/sysctl.h> |
---|
95 | #include <sys/syslog.h> |
---|
96 | #include <sys/systm.h> |
---|
97 | #include <rtems/bsd/sys/time.h> |
---|
98 | |
---|
99 | #include <net/if.h> |
---|
100 | #include <net/netisr.h> |
---|
101 | #include <net/route.h> |
---|
102 | #include <net/vnet.h> |
---|
103 | |
---|
104 | #include <netinet/in.h> |
---|
105 | #include <netinet/igmp.h> |
---|
106 | #include <netinet/in_systm.h> |
---|
107 | #include <netinet/in_var.h> |
---|
108 | #include <netinet/ip.h> |
---|
109 | #include <netinet/ip_encap.h> |
---|
110 | #include <netinet/ip_mroute.h> |
---|
111 | #include <netinet/ip_var.h> |
---|
112 | #include <netinet/ip_options.h> |
---|
113 | #include <netinet/pim.h> |
---|
114 | #include <netinet/pim_var.h> |
---|
115 | #include <netinet/udp.h> |
---|
116 | |
---|
117 | #include <machine/in_cksum.h> |
---|
118 | |
---|
119 | #ifndef KTR_IPMF |
---|
120 | #define KTR_IPMF KTR_INET |
---|
121 | #endif |
---|
122 | |
---|
123 | #define VIFI_INVALID ((vifi_t) -1) |
---|
124 | #define M_HASCL(m) ((m)->m_flags & M_EXT) |
---|
125 | |
---|
126 | static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */ |
---|
127 | #define V_last_tv_sec VNET(last_tv_sec) |
---|
128 | |
---|
129 | static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache"); |
---|
130 | |
---|
131 | /* |
---|
132 | * Locking. We use two locks: one for the virtual interface table and |
---|
133 | * one for the forwarding table. These locks may be nested in which case |
---|
134 | * the VIF lock must always be taken first. Note that each lock is used |
---|
135 | * to cover not only the specific data structure but also related data |
---|
136 | * structures. |
---|
137 | */ |
---|
138 | |
---|
139 | static struct mtx mrouter_mtx; |
---|
140 | #define MROUTER_LOCK() mtx_lock(&mrouter_mtx) |
---|
141 | #define MROUTER_UNLOCK() mtx_unlock(&mrouter_mtx) |
---|
142 | #define MROUTER_LOCK_ASSERT() mtx_assert(&mrouter_mtx, MA_OWNED) |
---|
143 | #define MROUTER_LOCK_INIT() \ |
---|
144 | mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF) |
---|
145 | #define MROUTER_LOCK_DESTROY() mtx_destroy(&mrouter_mtx) |
---|
146 | |
---|
147 | static int ip_mrouter_cnt; /* # of vnets with active mrouters */ |
---|
148 | static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */ |
---|
149 | |
---|
150 | static VNET_DEFINE(struct mrtstat, mrtstat); |
---|
151 | #define V_mrtstat VNET(mrtstat) |
---|
152 | SYSCTL_VNET_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW, |
---|
153 | &VNET_NAME(mrtstat), mrtstat, |
---|
154 | "IPv4 Multicast Forwarding Statistics (struct mrtstat, " |
---|
155 | "netinet/ip_mroute.h)"); |
---|
156 | |
---|
157 | static VNET_DEFINE(u_long, mfchash); |
---|
158 | #define V_mfchash VNET(mfchash) |
---|
159 | #define MFCHASH(a, g) \ |
---|
160 | ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \ |
---|
161 | ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash) |
---|
162 | #define MFCHASHSIZE 256 |
---|
163 | |
---|
164 | static u_long mfchashsize; /* Hash size */ |
---|
165 | static VNET_DEFINE(u_char *, nexpire); /* 0..mfchashsize-1 */ |
---|
166 | #define V_nexpire VNET(nexpire) |
---|
167 | static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl); |
---|
168 | #define V_mfchashtbl VNET(mfchashtbl) |
---|
169 | |
---|
170 | static struct mtx mfc_mtx; |
---|
171 | #define MFC_LOCK() mtx_lock(&mfc_mtx) |
---|
172 | #define MFC_UNLOCK() mtx_unlock(&mfc_mtx) |
---|
173 | #define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED) |
---|
174 | #define MFC_LOCK_INIT() \ |
---|
175 | mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF) |
---|
176 | #define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx) |
---|
177 | |
---|
178 | static VNET_DEFINE(vifi_t, numvifs); |
---|
179 | #define V_numvifs VNET(numvifs) |
---|
180 | static VNET_DEFINE(struct vif, viftable[MAXVIFS]); |
---|
181 | #define V_viftable VNET(viftable) |
---|
182 | SYSCTL_VNET_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD, |
---|
183 | &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]", |
---|
184 | "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); |
---|
185 | |
---|
186 | static struct mtx vif_mtx; |
---|
187 | #define VIF_LOCK() mtx_lock(&vif_mtx) |
---|
188 | #define VIF_UNLOCK() mtx_unlock(&vif_mtx) |
---|
189 | #define VIF_LOCK_ASSERT() mtx_assert(&vif_mtx, MA_OWNED) |
---|
190 | #define VIF_LOCK_INIT() \ |
---|
191 | mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF) |
---|
192 | #define VIF_LOCK_DESTROY() mtx_destroy(&vif_mtx) |
---|
193 | |
---|
194 | static eventhandler_tag if_detach_event_tag = NULL; |
---|
195 | |
---|
196 | static VNET_DEFINE(struct callout, expire_upcalls_ch); |
---|
197 | #define V_expire_upcalls_ch VNET(expire_upcalls_ch) |
---|
198 | |
---|
199 | #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ |
---|
200 | #define UPCALL_EXPIRE 6 /* number of timeouts */ |
---|
201 | |
---|
202 | /* |
---|
203 | * Bandwidth meter variables and constants |
---|
204 | */ |
---|
205 | static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters"); |
---|
206 | /* |
---|
207 | * Pending timeouts are stored in a hash table, the key being the |
---|
208 | * expiration time. Periodically, the entries are analysed and processed. |
---|
209 | */ |
---|
210 | #define BW_METER_BUCKETS 1024 |
---|
211 | static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]); |
---|
212 | #define V_bw_meter_timers VNET(bw_meter_timers) |
---|
213 | static VNET_DEFINE(struct callout, bw_meter_ch); |
---|
214 | #define V_bw_meter_ch VNET(bw_meter_ch) |
---|
215 | #define BW_METER_PERIOD (hz) /* periodical handling of bw meters */ |
---|
216 | |
---|
217 | /* |
---|
218 | * Pending upcalls are stored in a vector which is flushed when |
---|
219 | * full, or periodically |
---|
220 | */ |
---|
221 | static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]); |
---|
222 | #define V_bw_upcalls VNET(bw_upcalls) |
---|
223 | static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */ |
---|
224 | #define V_bw_upcalls_n VNET(bw_upcalls_n) |
---|
225 | static VNET_DEFINE(struct callout, bw_upcalls_ch); |
---|
226 | #define V_bw_upcalls_ch VNET(bw_upcalls_ch) |
---|
227 | |
---|
228 | #define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */ |
---|
229 | |
---|
230 | static VNET_DEFINE(struct pimstat, pimstat); |
---|
231 | #define V_pimstat VNET(pimstat) |
---|
232 | |
---|
233 | SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM"); |
---|
234 | SYSCTL_VNET_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD, |
---|
235 | &VNET_NAME(pimstat), pimstat, |
---|
236 | "PIM Statistics (struct pimstat, netinet/pim_var.h)"); |
---|
237 | |
---|
238 | static u_long pim_squelch_wholepkt = 0; |
---|
239 | SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW, |
---|
240 | &pim_squelch_wholepkt, 0, |
---|
241 | "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified"); |
---|
242 | |
---|
243 | extern struct domain inetdomain; |
---|
244 | static const struct protosw in_pim_protosw = { |
---|
245 | .pr_type = SOCK_RAW, |
---|
246 | .pr_domain = &inetdomain, |
---|
247 | .pr_protocol = IPPROTO_PIM, |
---|
248 | .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, |
---|
249 | .pr_input = pim_input, |
---|
250 | .pr_output = (pr_output_t*)rip_output, |
---|
251 | .pr_ctloutput = rip_ctloutput, |
---|
252 | .pr_usrreqs = &rip_usrreqs |
---|
253 | }; |
---|
254 | static const struct encaptab *pim_encap_cookie; |
---|
255 | |
---|
256 | static int pim_encapcheck(const struct mbuf *, int, int, void *); |
---|
257 | |
---|
258 | /* |
---|
259 | * Note: the PIM Register encapsulation adds the following in front of a |
---|
260 | * data packet: |
---|
261 | * |
---|
262 | * struct pim_encap_hdr { |
---|
263 | * struct ip ip; |
---|
264 | * struct pim_encap_pimhdr pim; |
---|
265 | * } |
---|
266 | * |
---|
267 | */ |
---|
268 | |
---|
269 | struct pim_encap_pimhdr { |
---|
270 | struct pim pim; |
---|
271 | uint32_t flags; |
---|
272 | }; |
---|
273 | #define PIM_ENCAP_TTL 64 |
---|
274 | |
---|
275 | static struct ip pim_encap_iphdr = { |
---|
276 | #if BYTE_ORDER == LITTLE_ENDIAN |
---|
277 | sizeof(struct ip) >> 2, |
---|
278 | IPVERSION, |
---|
279 | #else |
---|
280 | IPVERSION, |
---|
281 | sizeof(struct ip) >> 2, |
---|
282 | #endif |
---|
283 | 0, /* tos */ |
---|
284 | sizeof(struct ip), /* total length */ |
---|
285 | 0, /* id */ |
---|
286 | 0, /* frag offset */ |
---|
287 | PIM_ENCAP_TTL, |
---|
288 | IPPROTO_PIM, |
---|
289 | 0, /* checksum */ |
---|
290 | }; |
---|
291 | |
---|
292 | static struct pim_encap_pimhdr pim_encap_pimhdr = { |
---|
293 | { |
---|
294 | PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */ |
---|
295 | 0, /* reserved */ |
---|
296 | 0, /* checksum */ |
---|
297 | }, |
---|
298 | 0 /* flags */ |
---|
299 | }; |
---|
300 | |
---|
301 | static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID; |
---|
302 | #define V_reg_vif_num VNET(reg_vif_num) |
---|
303 | static VNET_DEFINE(struct ifnet, multicast_register_if); |
---|
304 | #define V_multicast_register_if VNET(multicast_register_if) |
---|
305 | |
---|
306 | /* |
---|
307 | * Private variables. |
---|
308 | */ |
---|
309 | |
---|
310 | static u_long X_ip_mcast_src(int); |
---|
311 | static int X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *, |
---|
312 | struct ip_moptions *); |
---|
313 | static int X_ip_mrouter_done(void); |
---|
314 | static int X_ip_mrouter_get(struct socket *, struct sockopt *); |
---|
315 | static int X_ip_mrouter_set(struct socket *, struct sockopt *); |
---|
316 | static int X_legal_vif_num(int); |
---|
317 | static int X_mrt_ioctl(u_long, caddr_t, int); |
---|
318 | |
---|
319 | static int add_bw_upcall(struct bw_upcall *); |
---|
320 | static int add_mfc(struct mfcctl2 *); |
---|
321 | static int add_vif(struct vifctl *); |
---|
322 | static void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *); |
---|
323 | static void bw_meter_process(void); |
---|
324 | static void bw_meter_receive_packet(struct bw_meter *, int, |
---|
325 | struct timeval *); |
---|
326 | static void bw_upcalls_send(void); |
---|
327 | static int del_bw_upcall(struct bw_upcall *); |
---|
328 | static int del_mfc(struct mfcctl2 *); |
---|
329 | static int del_vif(vifi_t); |
---|
330 | static int del_vif_locked(vifi_t); |
---|
331 | static void expire_bw_meter_process(void *); |
---|
332 | static void expire_bw_upcalls_send(void *); |
---|
333 | static void expire_mfc(struct mfc *); |
---|
334 | static void expire_upcalls(void *); |
---|
335 | static void free_bw_list(struct bw_meter *); |
---|
336 | static int get_sg_cnt(struct sioc_sg_req *); |
---|
337 | static int get_vif_cnt(struct sioc_vif_req *); |
---|
338 | static void if_detached_event(void *, struct ifnet *); |
---|
339 | static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); |
---|
340 | static int ip_mrouter_init(struct socket *, int); |
---|
341 | static __inline struct mfc * |
---|
342 | mfc_find(struct in_addr *, struct in_addr *); |
---|
343 | static void phyint_send(struct ip *, struct vif *, struct mbuf *); |
---|
344 | static struct mbuf * |
---|
345 | pim_register_prepare(struct ip *, struct mbuf *); |
---|
346 | static int pim_register_send(struct ip *, struct vif *, |
---|
347 | struct mbuf *, struct mfc *); |
---|
348 | static int pim_register_send_rp(struct ip *, struct vif *, |
---|
349 | struct mbuf *, struct mfc *); |
---|
350 | static int pim_register_send_upcall(struct ip *, struct vif *, |
---|
351 | struct mbuf *, struct mfc *); |
---|
352 | static void schedule_bw_meter(struct bw_meter *, struct timeval *); |
---|
353 | static void send_packet(struct vif *, struct mbuf *); |
---|
354 | static int set_api_config(uint32_t *); |
---|
355 | static int set_assert(int); |
---|
356 | static int socket_send(struct socket *, struct mbuf *, |
---|
357 | struct sockaddr_in *); |
---|
358 | static void unschedule_bw_meter(struct bw_meter *); |
---|
359 | |
---|
360 | /* |
---|
361 | * Kernel multicast forwarding API capabilities and setup. |
---|
362 | * If more API capabilities are added to the kernel, they should be |
---|
363 | * recorded in `mrt_api_support'. |
---|
364 | */ |
---|
365 | #define MRT_API_VERSION 0x0305 |
---|
366 | |
---|
367 | static const int mrt_api_version = MRT_API_VERSION; |
---|
368 | static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | |
---|
369 | MRT_MFC_FLAGS_BORDER_VIF | |
---|
370 | MRT_MFC_RP | |
---|
371 | MRT_MFC_BW_UPCALL); |
---|
372 | static VNET_DEFINE(uint32_t, mrt_api_config); |
---|
373 | #define V_mrt_api_config VNET(mrt_api_config) |
---|
374 | static VNET_DEFINE(int, pim_assert_enabled); |
---|
375 | #define V_pim_assert_enabled VNET(pim_assert_enabled) |
---|
376 | static struct timeval pim_assert_interval = { 3, 0 }; /* Rate limit */ |
---|
377 | |
---|
378 | /* |
---|
379 | * Find a route for a given origin IP address and multicast group address. |
---|
380 | * Statistics must be updated by the caller. |
---|
381 | */ |
---|
382 | static __inline struct mfc * |
---|
383 | mfc_find(struct in_addr *o, struct in_addr *g) |
---|
384 | { |
---|
385 | struct mfc *rt; |
---|
386 | |
---|
387 | MFC_LOCK_ASSERT(); |
---|
388 | |
---|
389 | LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) { |
---|
390 | if (in_hosteq(rt->mfc_origin, *o) && |
---|
391 | in_hosteq(rt->mfc_mcastgrp, *g) && |
---|
392 | TAILQ_EMPTY(&rt->mfc_stall)) |
---|
393 | break; |
---|
394 | } |
---|
395 | |
---|
396 | return (rt); |
---|
397 | } |
---|
398 | |
---|
399 | /* |
---|
400 | * Handle MRT setsockopt commands to modify the multicast forwarding tables. |
---|
401 | */ |
---|
402 | static int |
---|
403 | X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) |
---|
404 | { |
---|
405 | int error, optval; |
---|
406 | vifi_t vifi; |
---|
407 | struct vifctl vifc; |
---|
408 | struct mfcctl2 mfc; |
---|
409 | struct bw_upcall bw_upcall; |
---|
410 | uint32_t i; |
---|
411 | |
---|
412 | if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT) |
---|
413 | return EPERM; |
---|
414 | |
---|
415 | error = 0; |
---|
416 | switch (sopt->sopt_name) { |
---|
417 | case MRT_INIT: |
---|
418 | error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); |
---|
419 | if (error) |
---|
420 | break; |
---|
421 | error = ip_mrouter_init(so, optval); |
---|
422 | break; |
---|
423 | |
---|
424 | case MRT_DONE: |
---|
425 | error = ip_mrouter_done(); |
---|
426 | break; |
---|
427 | |
---|
428 | case MRT_ADD_VIF: |
---|
429 | error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); |
---|
430 | if (error) |
---|
431 | break; |
---|
432 | error = add_vif(&vifc); |
---|
433 | break; |
---|
434 | |
---|
435 | case MRT_DEL_VIF: |
---|
436 | error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); |
---|
437 | if (error) |
---|
438 | break; |
---|
439 | error = del_vif(vifi); |
---|
440 | break; |
---|
441 | |
---|
442 | case MRT_ADD_MFC: |
---|
443 | case MRT_DEL_MFC: |
---|
444 | /* |
---|
445 | * select data size depending on API version. |
---|
446 | */ |
---|
447 | if (sopt->sopt_name == MRT_ADD_MFC && |
---|
448 | V_mrt_api_config & MRT_API_FLAGS_ALL) { |
---|
449 | error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2), |
---|
450 | sizeof(struct mfcctl2)); |
---|
451 | } else { |
---|
452 | error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl), |
---|
453 | sizeof(struct mfcctl)); |
---|
454 | bzero((caddr_t)&mfc + sizeof(struct mfcctl), |
---|
455 | sizeof(mfc) - sizeof(struct mfcctl)); |
---|
456 | } |
---|
457 | if (error) |
---|
458 | break; |
---|
459 | if (sopt->sopt_name == MRT_ADD_MFC) |
---|
460 | error = add_mfc(&mfc); |
---|
461 | else |
---|
462 | error = del_mfc(&mfc); |
---|
463 | break; |
---|
464 | |
---|
465 | case MRT_ASSERT: |
---|
466 | error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); |
---|
467 | if (error) |
---|
468 | break; |
---|
469 | set_assert(optval); |
---|
470 | break; |
---|
471 | |
---|
472 | case MRT_API_CONFIG: |
---|
473 | error = sooptcopyin(sopt, &i, sizeof i, sizeof i); |
---|
474 | if (!error) |
---|
475 | error = set_api_config(&i); |
---|
476 | if (!error) |
---|
477 | error = sooptcopyout(sopt, &i, sizeof i); |
---|
478 | break; |
---|
479 | |
---|
480 | case MRT_ADD_BW_UPCALL: |
---|
481 | case MRT_DEL_BW_UPCALL: |
---|
482 | error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall, |
---|
483 | sizeof bw_upcall); |
---|
484 | if (error) |
---|
485 | break; |
---|
486 | if (sopt->sopt_name == MRT_ADD_BW_UPCALL) |
---|
487 | error = add_bw_upcall(&bw_upcall); |
---|
488 | else |
---|
489 | error = del_bw_upcall(&bw_upcall); |
---|
490 | break; |
---|
491 | |
---|
492 | default: |
---|
493 | error = EOPNOTSUPP; |
---|
494 | break; |
---|
495 | } |
---|
496 | return error; |
---|
497 | } |
---|
498 | |
---|
499 | /* |
---|
500 | * Handle MRT getsockopt commands |
---|
501 | */ |
---|
502 | static int |
---|
503 | X_ip_mrouter_get(struct socket *so, struct sockopt *sopt) |
---|
504 | { |
---|
505 | int error; |
---|
506 | |
---|
507 | switch (sopt->sopt_name) { |
---|
508 | case MRT_VERSION: |
---|
509 | error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version); |
---|
510 | break; |
---|
511 | |
---|
512 | case MRT_ASSERT: |
---|
513 | error = sooptcopyout(sopt, &V_pim_assert_enabled, |
---|
514 | sizeof V_pim_assert_enabled); |
---|
515 | break; |
---|
516 | |
---|
517 | case MRT_API_SUPPORT: |
---|
518 | error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support); |
---|
519 | break; |
---|
520 | |
---|
521 | case MRT_API_CONFIG: |
---|
522 | error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config); |
---|
523 | break; |
---|
524 | |
---|
525 | default: |
---|
526 | error = EOPNOTSUPP; |
---|
527 | break; |
---|
528 | } |
---|
529 | return error; |
---|
530 | } |
---|
531 | |
---|
532 | /* |
---|
533 | * Handle ioctl commands to obtain information from the cache |
---|
534 | */ |
---|
535 | static int |
---|
536 | X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused) |
---|
537 | { |
---|
538 | int error = 0; |
---|
539 | |
---|
540 | /* |
---|
541 | * Currently the only function calling this ioctl routine is rtioctl(). |
---|
542 | * Typically, only root can create the raw socket in order to execute |
---|
543 | * this ioctl method, however the request might be coming from a prison |
---|
544 | */ |
---|
545 | error = priv_check(curthread, PRIV_NETINET_MROUTE); |
---|
546 | if (error) |
---|
547 | return (error); |
---|
548 | switch (cmd) { |
---|
549 | case (SIOCGETVIFCNT): |
---|
550 | error = get_vif_cnt((struct sioc_vif_req *)data); |
---|
551 | break; |
---|
552 | |
---|
553 | case (SIOCGETSGCNT): |
---|
554 | error = get_sg_cnt((struct sioc_sg_req *)data); |
---|
555 | break; |
---|
556 | |
---|
557 | default: |
---|
558 | error = EINVAL; |
---|
559 | break; |
---|
560 | } |
---|
561 | return error; |
---|
562 | } |
---|
563 | |
---|
564 | /* |
---|
565 | * returns the packet, byte, rpf-failure count for the source group provided |
---|
566 | */ |
---|
567 | static int |
---|
568 | get_sg_cnt(struct sioc_sg_req *req) |
---|
569 | { |
---|
570 | struct mfc *rt; |
---|
571 | |
---|
572 | MFC_LOCK(); |
---|
573 | rt = mfc_find(&req->src, &req->grp); |
---|
574 | if (rt == NULL) { |
---|
575 | MFC_UNLOCK(); |
---|
576 | req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; |
---|
577 | return EADDRNOTAVAIL; |
---|
578 | } |
---|
579 | req->pktcnt = rt->mfc_pkt_cnt; |
---|
580 | req->bytecnt = rt->mfc_byte_cnt; |
---|
581 | req->wrong_if = rt->mfc_wrong_if; |
---|
582 | MFC_UNLOCK(); |
---|
583 | return 0; |
---|
584 | } |
---|
585 | |
---|
586 | /* |
---|
587 | * returns the input and output packet and byte counts on the vif provided |
---|
588 | */ |
---|
589 | static int |
---|
590 | get_vif_cnt(struct sioc_vif_req *req) |
---|
591 | { |
---|
592 | vifi_t vifi = req->vifi; |
---|
593 | |
---|
594 | VIF_LOCK(); |
---|
595 | if (vifi >= V_numvifs) { |
---|
596 | VIF_UNLOCK(); |
---|
597 | return EINVAL; |
---|
598 | } |
---|
599 | |
---|
600 | req->icount = V_viftable[vifi].v_pkt_in; |
---|
601 | req->ocount = V_viftable[vifi].v_pkt_out; |
---|
602 | req->ibytes = V_viftable[vifi].v_bytes_in; |
---|
603 | req->obytes = V_viftable[vifi].v_bytes_out; |
---|
604 | VIF_UNLOCK(); |
---|
605 | |
---|
606 | return 0; |
---|
607 | } |
---|
608 | |
---|
609 | static void |
---|
610 | if_detached_event(void *arg __unused, struct ifnet *ifp) |
---|
611 | { |
---|
612 | vifi_t vifi; |
---|
613 | u_long i; |
---|
614 | |
---|
615 | MROUTER_LOCK(); |
---|
616 | |
---|
617 | if (V_ip_mrouter == NULL) { |
---|
618 | MROUTER_UNLOCK(); |
---|
619 | return; |
---|
620 | } |
---|
621 | |
---|
622 | VIF_LOCK(); |
---|
623 | MFC_LOCK(); |
---|
624 | |
---|
625 | /* |
---|
626 | * Tear down multicast forwarder state associated with this ifnet. |
---|
627 | * 1. Walk the vif list, matching vifs against this ifnet. |
---|
628 | * 2. Walk the multicast forwarding cache (mfc) looking for |
---|
629 | * inner matches with this vif's index. |
---|
630 | * 3. Expire any matching multicast forwarding cache entries. |
---|
631 | * 4. Free vif state. This should disable ALLMULTI on the interface. |
---|
632 | */ |
---|
633 | for (vifi = 0; vifi < V_numvifs; vifi++) { |
---|
634 | if (V_viftable[vifi].v_ifp != ifp) |
---|
635 | continue; |
---|
636 | for (i = 0; i < mfchashsize; i++) { |
---|
637 | struct mfc *rt, *nrt; |
---|
638 | for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) { |
---|
639 | nrt = LIST_NEXT(rt, mfc_hash); |
---|
640 | if (rt->mfc_parent == vifi) { |
---|
641 | expire_mfc(rt); |
---|
642 | } |
---|
643 | } |
---|
644 | } |
---|
645 | del_vif_locked(vifi); |
---|
646 | } |
---|
647 | |
---|
648 | MFC_UNLOCK(); |
---|
649 | VIF_UNLOCK(); |
---|
650 | |
---|
651 | MROUTER_UNLOCK(); |
---|
652 | } |
---|
653 | |
---|
654 | /* |
---|
655 | * Enable multicast forwarding. |
---|
656 | */ |
---|
657 | static int |
---|
658 | ip_mrouter_init(struct socket *so, int version) |
---|
659 | { |
---|
660 | |
---|
661 | CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__, |
---|
662 | so->so_type, so->so_proto->pr_protocol); |
---|
663 | |
---|
664 | if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) |
---|
665 | return EOPNOTSUPP; |
---|
666 | |
---|
667 | if (version != 1) |
---|
668 | return ENOPROTOOPT; |
---|
669 | |
---|
670 | MROUTER_LOCK(); |
---|
671 | |
---|
672 | if (ip_mrouter_unloading) { |
---|
673 | MROUTER_UNLOCK(); |
---|
674 | return ENOPROTOOPT; |
---|
675 | } |
---|
676 | |
---|
677 | if (V_ip_mrouter != NULL) { |
---|
678 | MROUTER_UNLOCK(); |
---|
679 | return EADDRINUSE; |
---|
680 | } |
---|
681 | |
---|
682 | V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash, |
---|
683 | HASH_NOWAIT); |
---|
684 | |
---|
685 | callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, |
---|
686 | curvnet); |
---|
687 | callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send, |
---|
688 | curvnet); |
---|
689 | callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, |
---|
690 | curvnet); |
---|
691 | |
---|
692 | V_ip_mrouter = so; |
---|
693 | ip_mrouter_cnt++; |
---|
694 | |
---|
695 | MROUTER_UNLOCK(); |
---|
696 | |
---|
697 | CTR1(KTR_IPMF, "%s: done", __func__); |
---|
698 | |
---|
699 | return 0; |
---|
700 | } |
---|
701 | |
---|
702 | /* |
---|
703 | * Disable multicast forwarding. |
---|
704 | */ |
---|
705 | static int |
---|
706 | X_ip_mrouter_done(void) |
---|
707 | { |
---|
708 | struct ifnet *ifp; |
---|
709 | u_long i; |
---|
710 | vifi_t vifi; |
---|
711 | |
---|
712 | MROUTER_LOCK(); |
---|
713 | |
---|
714 | if (V_ip_mrouter == NULL) { |
---|
715 | MROUTER_UNLOCK(); |
---|
716 | return EINVAL; |
---|
717 | } |
---|
718 | |
---|
719 | /* |
---|
720 | * Detach/disable hooks to the reset of the system. |
---|
721 | */ |
---|
722 | V_ip_mrouter = NULL; |
---|
723 | ip_mrouter_cnt--; |
---|
724 | V_mrt_api_config = 0; |
---|
725 | |
---|
726 | VIF_LOCK(); |
---|
727 | |
---|
728 | /* |
---|
729 | * For each phyint in use, disable promiscuous reception of all IP |
---|
730 | * multicasts. |
---|
731 | */ |
---|
732 | for (vifi = 0; vifi < V_numvifs; vifi++) { |
---|
733 | if (!in_nullhost(V_viftable[vifi].v_lcl_addr) && |
---|
734 | !(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { |
---|
735 | ifp = V_viftable[vifi].v_ifp; |
---|
736 | if_allmulti(ifp, 0); |
---|
737 | } |
---|
738 | } |
---|
739 | bzero((caddr_t)V_viftable, sizeof(V_viftable)); |
---|
740 | V_numvifs = 0; |
---|
741 | V_pim_assert_enabled = 0; |
---|
742 | |
---|
743 | VIF_UNLOCK(); |
---|
744 | |
---|
745 | callout_stop(&V_expire_upcalls_ch); |
---|
746 | callout_stop(&V_bw_upcalls_ch); |
---|
747 | callout_stop(&V_bw_meter_ch); |
---|
748 | |
---|
749 | MFC_LOCK(); |
---|
750 | |
---|
751 | /* |
---|
752 | * Free all multicast forwarding cache entries. |
---|
753 | * Do not use hashdestroy(), as we must perform other cleanup. |
---|
754 | */ |
---|
755 | for (i = 0; i < mfchashsize; i++) { |
---|
756 | struct mfc *rt, *nrt; |
---|
757 | for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) { |
---|
758 | nrt = LIST_NEXT(rt, mfc_hash); |
---|
759 | expire_mfc(rt); |
---|
760 | } |
---|
761 | } |
---|
762 | free(V_mfchashtbl, M_MRTABLE); |
---|
763 | V_mfchashtbl = NULL; |
---|
764 | |
---|
765 | bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize); |
---|
766 | |
---|
767 | V_bw_upcalls_n = 0; |
---|
768 | bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers)); |
---|
769 | |
---|
770 | MFC_UNLOCK(); |
---|
771 | |
---|
772 | V_reg_vif_num = VIFI_INVALID; |
---|
773 | |
---|
774 | MROUTER_UNLOCK(); |
---|
775 | |
---|
776 | CTR1(KTR_IPMF, "%s: done", __func__); |
---|
777 | |
---|
778 | return 0; |
---|
779 | } |
---|
780 | |
---|
781 | /* |
---|
782 | * Set PIM assert processing global |
---|
783 | */ |
---|
784 | static int |
---|
785 | set_assert(int i) |
---|
786 | { |
---|
787 | if ((i != 1) && (i != 0)) |
---|
788 | return EINVAL; |
---|
789 | |
---|
790 | V_pim_assert_enabled = i; |
---|
791 | |
---|
792 | return 0; |
---|
793 | } |
---|
794 | |
---|
795 | /* |
---|
796 | * Configure API capabilities |
---|
797 | */ |
---|
798 | int |
---|
799 | set_api_config(uint32_t *apival) |
---|
800 | { |
---|
801 | u_long i; |
---|
802 | |
---|
803 | /* |
---|
804 | * We can set the API capabilities only if it is the first operation |
---|
805 | * after MRT_INIT. I.e.: |
---|
806 | * - there are no vifs installed |
---|
807 | * - pim_assert is not enabled |
---|
808 | * - the MFC table is empty |
---|
809 | */ |
---|
810 | if (V_numvifs > 0) { |
---|
811 | *apival = 0; |
---|
812 | return EPERM; |
---|
813 | } |
---|
814 | if (V_pim_assert_enabled) { |
---|
815 | *apival = 0; |
---|
816 | return EPERM; |
---|
817 | } |
---|
818 | |
---|
819 | MFC_LOCK(); |
---|
820 | |
---|
821 | for (i = 0; i < mfchashsize; i++) { |
---|
822 | if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) { |
---|
823 | MFC_UNLOCK(); |
---|
824 | *apival = 0; |
---|
825 | return EPERM; |
---|
826 | } |
---|
827 | } |
---|
828 | |
---|
829 | MFC_UNLOCK(); |
---|
830 | |
---|
831 | V_mrt_api_config = *apival & mrt_api_support; |
---|
832 | *apival = V_mrt_api_config; |
---|
833 | |
---|
834 | return 0; |
---|
835 | } |
---|
836 | |
---|
837 | /* |
---|
838 | * Add a vif to the vif table |
---|
839 | */ |
---|
840 | static int |
---|
841 | add_vif(struct vifctl *vifcp) |
---|
842 | { |
---|
843 | struct vif *vifp = V_viftable + vifcp->vifc_vifi; |
---|
844 | struct sockaddr_in sin = {sizeof sin, AF_INET}; |
---|
845 | struct ifaddr *ifa; |
---|
846 | struct ifnet *ifp; |
---|
847 | int error; |
---|
848 | |
---|
849 | VIF_LOCK(); |
---|
850 | if (vifcp->vifc_vifi >= MAXVIFS) { |
---|
851 | VIF_UNLOCK(); |
---|
852 | return EINVAL; |
---|
853 | } |
---|
854 | /* rate limiting is no longer supported by this code */ |
---|
855 | if (vifcp->vifc_rate_limit != 0) { |
---|
856 | log(LOG_ERR, "rate limiting is no longer supported\n"); |
---|
857 | VIF_UNLOCK(); |
---|
858 | return EINVAL; |
---|
859 | } |
---|
860 | if (!in_nullhost(vifp->v_lcl_addr)) { |
---|
861 | VIF_UNLOCK(); |
---|
862 | return EADDRINUSE; |
---|
863 | } |
---|
864 | if (in_nullhost(vifcp->vifc_lcl_addr)) { |
---|
865 | VIF_UNLOCK(); |
---|
866 | return EADDRNOTAVAIL; |
---|
867 | } |
---|
868 | |
---|
869 | /* Find the interface with an address in AF_INET family */ |
---|
870 | if (vifcp->vifc_flags & VIFF_REGISTER) { |
---|
871 | /* |
---|
872 | * XXX: Because VIFF_REGISTER does not really need a valid |
---|
873 | * local interface (e.g. it could be 127.0.0.2), we don't |
---|
874 | * check its address. |
---|
875 | */ |
---|
876 | ifp = NULL; |
---|
877 | } else { |
---|
878 | sin.sin_addr = vifcp->vifc_lcl_addr; |
---|
879 | ifa = ifa_ifwithaddr((struct sockaddr *)&sin); |
---|
880 | if (ifa == NULL) { |
---|
881 | VIF_UNLOCK(); |
---|
882 | return EADDRNOTAVAIL; |
---|
883 | } |
---|
884 | ifp = ifa->ifa_ifp; |
---|
885 | ifa_free(ifa); |
---|
886 | } |
---|
887 | |
---|
888 | if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) { |
---|
889 | CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__); |
---|
890 | VIF_UNLOCK(); |
---|
891 | return EOPNOTSUPP; |
---|
892 | } else if (vifcp->vifc_flags & VIFF_REGISTER) { |
---|
893 | ifp = &V_multicast_register_if; |
---|
894 | CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp); |
---|
895 | if (V_reg_vif_num == VIFI_INVALID) { |
---|
896 | if_initname(&V_multicast_register_if, "register_vif", 0); |
---|
897 | V_multicast_register_if.if_flags = IFF_LOOPBACK; |
---|
898 | V_reg_vif_num = vifcp->vifc_vifi; |
---|
899 | } |
---|
900 | } else { /* Make sure the interface supports multicast */ |
---|
901 | if ((ifp->if_flags & IFF_MULTICAST) == 0) { |
---|
902 | VIF_UNLOCK(); |
---|
903 | return EOPNOTSUPP; |
---|
904 | } |
---|
905 | |
---|
906 | /* Enable promiscuous reception of all IP multicasts from the if */ |
---|
907 | error = if_allmulti(ifp, 1); |
---|
908 | if (error) { |
---|
909 | VIF_UNLOCK(); |
---|
910 | return error; |
---|
911 | } |
---|
912 | } |
---|
913 | |
---|
914 | vifp->v_flags = vifcp->vifc_flags; |
---|
915 | vifp->v_threshold = vifcp->vifc_threshold; |
---|
916 | vifp->v_lcl_addr = vifcp->vifc_lcl_addr; |
---|
917 | vifp->v_rmt_addr = vifcp->vifc_rmt_addr; |
---|
918 | vifp->v_ifp = ifp; |
---|
919 | /* initialize per vif pkt counters */ |
---|
920 | vifp->v_pkt_in = 0; |
---|
921 | vifp->v_pkt_out = 0; |
---|
922 | vifp->v_bytes_in = 0; |
---|
923 | vifp->v_bytes_out = 0; |
---|
924 | |
---|
925 | /* Adjust numvifs up if the vifi is higher than numvifs */ |
---|
926 | if (V_numvifs <= vifcp->vifc_vifi) |
---|
927 | V_numvifs = vifcp->vifc_vifi + 1; |
---|
928 | |
---|
929 | VIF_UNLOCK(); |
---|
930 | |
---|
931 | CTR4(KTR_IPMF, "%s: add vif %d laddr %s thresh %x", __func__, |
---|
932 | (int)vifcp->vifc_vifi, inet_ntoa(vifcp->vifc_lcl_addr), |
---|
933 | (int)vifcp->vifc_threshold); |
---|
934 | |
---|
935 | return 0; |
---|
936 | } |
---|
937 | |
---|
938 | /* |
---|
939 | * Delete a vif from the vif table |
---|
940 | */ |
---|
941 | static int |
---|
942 | del_vif_locked(vifi_t vifi) |
---|
943 | { |
---|
944 | struct vif *vifp; |
---|
945 | |
---|
946 | VIF_LOCK_ASSERT(); |
---|
947 | |
---|
948 | if (vifi >= V_numvifs) { |
---|
949 | return EINVAL; |
---|
950 | } |
---|
951 | vifp = &V_viftable[vifi]; |
---|
952 | if (in_nullhost(vifp->v_lcl_addr)) { |
---|
953 | return EADDRNOTAVAIL; |
---|
954 | } |
---|
955 | |
---|
956 | if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) |
---|
957 | if_allmulti(vifp->v_ifp, 0); |
---|
958 | |
---|
959 | if (vifp->v_flags & VIFF_REGISTER) |
---|
960 | V_reg_vif_num = VIFI_INVALID; |
---|
961 | |
---|
962 | bzero((caddr_t)vifp, sizeof (*vifp)); |
---|
963 | |
---|
964 | CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi); |
---|
965 | |
---|
966 | /* Adjust numvifs down */ |
---|
967 | for (vifi = V_numvifs; vifi > 0; vifi--) |
---|
968 | if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr)) |
---|
969 | break; |
---|
970 | V_numvifs = vifi; |
---|
971 | |
---|
972 | return 0; |
---|
973 | } |
---|
974 | |
---|
975 | static int |
---|
976 | del_vif(vifi_t vifi) |
---|
977 | { |
---|
978 | int cc; |
---|
979 | |
---|
980 | VIF_LOCK(); |
---|
981 | cc = del_vif_locked(vifi); |
---|
982 | VIF_UNLOCK(); |
---|
983 | |
---|
984 | return cc; |
---|
985 | } |
---|
986 | |
---|
987 | /* |
---|
988 | * update an mfc entry without resetting counters and S,G addresses. |
---|
989 | */ |
---|
990 | static void |
---|
991 | update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) |
---|
992 | { |
---|
993 | int i; |
---|
994 | |
---|
995 | rt->mfc_parent = mfccp->mfcc_parent; |
---|
996 | for (i = 0; i < V_numvifs; i++) { |
---|
997 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; |
---|
998 | rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config & |
---|
999 | MRT_MFC_FLAGS_ALL; |
---|
1000 | } |
---|
1001 | /* set the RP address */ |
---|
1002 | if (V_mrt_api_config & MRT_MFC_RP) |
---|
1003 | rt->mfc_rp = mfccp->mfcc_rp; |
---|
1004 | else |
---|
1005 | rt->mfc_rp.s_addr = INADDR_ANY; |
---|
1006 | } |
---|
1007 | |
---|
1008 | /* |
---|
1009 | * fully initialize an mfc entry from the parameter. |
---|
1010 | */ |
---|
1011 | static void |
---|
1012 | init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) |
---|
1013 | { |
---|
1014 | rt->mfc_origin = mfccp->mfcc_origin; |
---|
1015 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; |
---|
1016 | |
---|
1017 | update_mfc_params(rt, mfccp); |
---|
1018 | |
---|
1019 | /* initialize pkt counters per src-grp */ |
---|
1020 | rt->mfc_pkt_cnt = 0; |
---|
1021 | rt->mfc_byte_cnt = 0; |
---|
1022 | rt->mfc_wrong_if = 0; |
---|
1023 | timevalclear(&rt->mfc_last_assert); |
---|
1024 | } |
---|
1025 | |
---|
1026 | static void |
---|
1027 | expire_mfc(struct mfc *rt) |
---|
1028 | { |
---|
1029 | struct rtdetq *rte, *nrte; |
---|
1030 | |
---|
1031 | MFC_LOCK_ASSERT(); |
---|
1032 | |
---|
1033 | free_bw_list(rt->mfc_bw_meter); |
---|
1034 | |
---|
1035 | TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { |
---|
1036 | m_freem(rte->m); |
---|
1037 | TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); |
---|
1038 | free(rte, M_MRTABLE); |
---|
1039 | } |
---|
1040 | |
---|
1041 | LIST_REMOVE(rt, mfc_hash); |
---|
1042 | free(rt, M_MRTABLE); |
---|
1043 | } |
---|
1044 | |
---|
1045 | /* |
---|
1046 | * Add an mfc entry |
---|
1047 | */ |
---|
1048 | static int |
---|
1049 | add_mfc(struct mfcctl2 *mfccp) |
---|
1050 | { |
---|
1051 | struct mfc *rt; |
---|
1052 | struct rtdetq *rte, *nrte; |
---|
1053 | u_long hash = 0; |
---|
1054 | u_short nstl; |
---|
1055 | |
---|
1056 | VIF_LOCK(); |
---|
1057 | MFC_LOCK(); |
---|
1058 | |
---|
1059 | rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp); |
---|
1060 | |
---|
1061 | /* If an entry already exists, just update the fields */ |
---|
1062 | if (rt) { |
---|
1063 | CTR4(KTR_IPMF, "%s: update mfc orig %s group %lx parent %x", |
---|
1064 | __func__, inet_ntoa(mfccp->mfcc_origin), |
---|
1065 | (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), |
---|
1066 | mfccp->mfcc_parent); |
---|
1067 | update_mfc_params(rt, mfccp); |
---|
1068 | MFC_UNLOCK(); |
---|
1069 | VIF_UNLOCK(); |
---|
1070 | return (0); |
---|
1071 | } |
---|
1072 | |
---|
1073 | /* |
---|
1074 | * Find the entry for which the upcall was made and update |
---|
1075 | */ |
---|
1076 | nstl = 0; |
---|
1077 | hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp); |
---|
1078 | LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { |
---|
1079 | if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && |
---|
1080 | in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && |
---|
1081 | !TAILQ_EMPTY(&rt->mfc_stall)) { |
---|
1082 | CTR5(KTR_IPMF, |
---|
1083 | "%s: add mfc orig %s group %lx parent %x qh %p", |
---|
1084 | __func__, inet_ntoa(mfccp->mfcc_origin), |
---|
1085 | (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), |
---|
1086 | mfccp->mfcc_parent, |
---|
1087 | TAILQ_FIRST(&rt->mfc_stall)); |
---|
1088 | if (nstl++) |
---|
1089 | CTR1(KTR_IPMF, "%s: multiple matches", __func__); |
---|
1090 | |
---|
1091 | init_mfc_params(rt, mfccp); |
---|
1092 | rt->mfc_expire = 0; /* Don't clean this guy up */ |
---|
1093 | V_nexpire[hash]--; |
---|
1094 | |
---|
1095 | /* Free queued packets, but attempt to forward them first. */ |
---|
1096 | TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { |
---|
1097 | if (rte->ifp != NULL) |
---|
1098 | ip_mdq(rte->m, rte->ifp, rt, -1); |
---|
1099 | m_freem(rte->m); |
---|
1100 | TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); |
---|
1101 | rt->mfc_nstall--; |
---|
1102 | free(rte, M_MRTABLE); |
---|
1103 | } |
---|
1104 | } |
---|
1105 | } |
---|
1106 | |
---|
1107 | /* |
---|
1108 | * It is possible that an entry is being inserted without an upcall |
---|
1109 | */ |
---|
1110 | if (nstl == 0) { |
---|
1111 | CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__); |
---|
1112 | LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { |
---|
1113 | if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && |
---|
1114 | in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) { |
---|
1115 | init_mfc_params(rt, mfccp); |
---|
1116 | if (rt->mfc_expire) |
---|
1117 | V_nexpire[hash]--; |
---|
1118 | rt->mfc_expire = 0; |
---|
1119 | break; /* XXX */ |
---|
1120 | } |
---|
1121 | } |
---|
1122 | |
---|
1123 | if (rt == NULL) { /* no upcall, so make a new entry */ |
---|
1124 | rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); |
---|
1125 | if (rt == NULL) { |
---|
1126 | MFC_UNLOCK(); |
---|
1127 | VIF_UNLOCK(); |
---|
1128 | return (ENOBUFS); |
---|
1129 | } |
---|
1130 | |
---|
1131 | init_mfc_params(rt, mfccp); |
---|
1132 | TAILQ_INIT(&rt->mfc_stall); |
---|
1133 | rt->mfc_nstall = 0; |
---|
1134 | |
---|
1135 | rt->mfc_expire = 0; |
---|
1136 | rt->mfc_bw_meter = NULL; |
---|
1137 | |
---|
1138 | /* insert new entry at head of hash chain */ |
---|
1139 | LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash); |
---|
1140 | } |
---|
1141 | } |
---|
1142 | |
---|
1143 | MFC_UNLOCK(); |
---|
1144 | VIF_UNLOCK(); |
---|
1145 | |
---|
1146 | return (0); |
---|
1147 | } |
---|
1148 | |
---|
1149 | /* |
---|
1150 | * Delete an mfc entry |
---|
1151 | */ |
---|
1152 | static int |
---|
1153 | del_mfc(struct mfcctl2 *mfccp) |
---|
1154 | { |
---|
1155 | struct in_addr origin; |
---|
1156 | struct in_addr mcastgrp; |
---|
1157 | struct mfc *rt; |
---|
1158 | |
---|
1159 | origin = mfccp->mfcc_origin; |
---|
1160 | mcastgrp = mfccp->mfcc_mcastgrp; |
---|
1161 | |
---|
1162 | CTR3(KTR_IPMF, "%s: delete mfc orig %s group %lx", __func__, |
---|
1163 | inet_ntoa(origin), (u_long)ntohl(mcastgrp.s_addr)); |
---|
1164 | |
---|
1165 | MFC_LOCK(); |
---|
1166 | |
---|
1167 | rt = mfc_find(&origin, &mcastgrp); |
---|
1168 | if (rt == NULL) { |
---|
1169 | MFC_UNLOCK(); |
---|
1170 | return EADDRNOTAVAIL; |
---|
1171 | } |
---|
1172 | |
---|
1173 | /* |
---|
1174 | * free the bw_meter entries |
---|
1175 | */ |
---|
1176 | free_bw_list(rt->mfc_bw_meter); |
---|
1177 | rt->mfc_bw_meter = NULL; |
---|
1178 | |
---|
1179 | LIST_REMOVE(rt, mfc_hash); |
---|
1180 | free(rt, M_MRTABLE); |
---|
1181 | |
---|
1182 | MFC_UNLOCK(); |
---|
1183 | |
---|
1184 | return (0); |
---|
1185 | } |
---|
1186 | |
---|
1187 | /* |
---|
1188 | * Send a message to the routing daemon on the multicast routing socket. |
---|
1189 | */ |
---|
1190 | static int |
---|
1191 | socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) |
---|
1192 | { |
---|
1193 | if (s) { |
---|
1194 | SOCKBUF_LOCK(&s->so_rcv); |
---|
1195 | if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm, |
---|
1196 | NULL) != 0) { |
---|
1197 | sorwakeup_locked(s); |
---|
1198 | return 0; |
---|
1199 | } |
---|
1200 | SOCKBUF_UNLOCK(&s->so_rcv); |
---|
1201 | } |
---|
1202 | m_freem(mm); |
---|
1203 | return -1; |
---|
1204 | } |
---|
1205 | |
---|
1206 | /* |
---|
1207 | * IP multicast forwarding function. This function assumes that the packet |
---|
1208 | * pointed to by "ip" has arrived on (or is about to be sent to) the interface |
---|
1209 | * pointed to by "ifp", and the packet is to be relayed to other networks |
---|
1210 | * that have members of the packet's destination IP multicast group. |
---|
1211 | * |
---|
1212 | * The packet is returned unscathed to the caller, unless it is |
---|
1213 | * erroneous, in which case a non-zero return value tells the caller to |
---|
1214 | * discard it. |
---|
1215 | */ |
---|
1216 | |
---|
1217 | #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ |
---|
1218 | |
---|
1219 | static int |
---|
1220 | X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, |
---|
1221 | struct ip_moptions *imo) |
---|
1222 | { |
---|
1223 | struct mfc *rt; |
---|
1224 | int error; |
---|
1225 | vifi_t vifi; |
---|
1226 | |
---|
1227 | CTR3(KTR_IPMF, "ip_mforward: delete mfc orig %s group %lx ifp %p", |
---|
1228 | inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr), ifp); |
---|
1229 | |
---|
1230 | if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || |
---|
1231 | ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) { |
---|
1232 | /* |
---|
1233 | * Packet arrived via a physical interface or |
---|
1234 | * an encapsulated tunnel or a register_vif. |
---|
1235 | */ |
---|
1236 | } else { |
---|
1237 | /* |
---|
1238 | * Packet arrived through a source-route tunnel. |
---|
1239 | * Source-route tunnels are no longer supported. |
---|
1240 | */ |
---|
1241 | return (1); |
---|
1242 | } |
---|
1243 | |
---|
1244 | VIF_LOCK(); |
---|
1245 | MFC_LOCK(); |
---|
1246 | if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) { |
---|
1247 | if (ip->ip_ttl < MAXTTL) |
---|
1248 | ip->ip_ttl++; /* compensate for -1 in *_send routines */ |
---|
1249 | error = ip_mdq(m, ifp, NULL, vifi); |
---|
1250 | MFC_UNLOCK(); |
---|
1251 | VIF_UNLOCK(); |
---|
1252 | return error; |
---|
1253 | } |
---|
1254 | |
---|
1255 | /* |
---|
1256 | * Don't forward a packet with time-to-live of zero or one, |
---|
1257 | * or a packet destined to a local-only group. |
---|
1258 | */ |
---|
1259 | if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) { |
---|
1260 | MFC_UNLOCK(); |
---|
1261 | VIF_UNLOCK(); |
---|
1262 | return 0; |
---|
1263 | } |
---|
1264 | |
---|
1265 | /* |
---|
1266 | * Determine forwarding vifs from the forwarding cache table |
---|
1267 | */ |
---|
1268 | MRTSTAT_INC(mrts_mfc_lookups); |
---|
1269 | rt = mfc_find(&ip->ip_src, &ip->ip_dst); |
---|
1270 | |
---|
1271 | /* Entry exists, so forward if necessary */ |
---|
1272 | if (rt != NULL) { |
---|
1273 | error = ip_mdq(m, ifp, rt, -1); |
---|
1274 | MFC_UNLOCK(); |
---|
1275 | VIF_UNLOCK(); |
---|
1276 | return error; |
---|
1277 | } else { |
---|
1278 | /* |
---|
1279 | * If we don't have a route for packet's origin, |
---|
1280 | * Make a copy of the packet & send message to routing daemon |
---|
1281 | */ |
---|
1282 | |
---|
1283 | struct mbuf *mb0; |
---|
1284 | struct rtdetq *rte; |
---|
1285 | u_long hash; |
---|
1286 | int hlen = ip->ip_hl << 2; |
---|
1287 | |
---|
1288 | MRTSTAT_INC(mrts_mfc_misses); |
---|
1289 | MRTSTAT_INC(mrts_no_route); |
---|
1290 | CTR2(KTR_IPMF, "ip_mforward: no mfc for (%s,%lx)", |
---|
1291 | inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr)); |
---|
1292 | |
---|
1293 | /* |
---|
1294 | * Allocate mbufs early so that we don't do extra work if we are |
---|
1295 | * just going to fail anyway. Make sure to pullup the header so |
---|
1296 | * that other people can't step on it. |
---|
1297 | */ |
---|
1298 | rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, |
---|
1299 | M_NOWAIT|M_ZERO); |
---|
1300 | if (rte == NULL) { |
---|
1301 | MFC_UNLOCK(); |
---|
1302 | VIF_UNLOCK(); |
---|
1303 | return ENOBUFS; |
---|
1304 | } |
---|
1305 | |
---|
1306 | mb0 = m_copypacket(m, M_DONTWAIT); |
---|
1307 | if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) |
---|
1308 | mb0 = m_pullup(mb0, hlen); |
---|
1309 | if (mb0 == NULL) { |
---|
1310 | free(rte, M_MRTABLE); |
---|
1311 | MFC_UNLOCK(); |
---|
1312 | VIF_UNLOCK(); |
---|
1313 | return ENOBUFS; |
---|
1314 | } |
---|
1315 | |
---|
1316 | /* is there an upcall waiting for this flow ? */ |
---|
1317 | hash = MFCHASH(ip->ip_src, ip->ip_dst); |
---|
1318 | LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { |
---|
1319 | if (in_hosteq(ip->ip_src, rt->mfc_origin) && |
---|
1320 | in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) && |
---|
1321 | !TAILQ_EMPTY(&rt->mfc_stall)) |
---|
1322 | break; |
---|
1323 | } |
---|
1324 | |
---|
1325 | if (rt == NULL) { |
---|
1326 | int i; |
---|
1327 | struct igmpmsg *im; |
---|
1328 | struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; |
---|
1329 | struct mbuf *mm; |
---|
1330 | |
---|
1331 | /* |
---|
1332 | * Locate the vifi for the incoming interface for this packet. |
---|
1333 | * If none found, drop packet. |
---|
1334 | */ |
---|
1335 | for (vifi = 0; vifi < V_numvifs && |
---|
1336 | V_viftable[vifi].v_ifp != ifp; vifi++) |
---|
1337 | ; |
---|
1338 | if (vifi >= V_numvifs) /* vif not found, drop packet */ |
---|
1339 | goto non_fatal; |
---|
1340 | |
---|
1341 | /* no upcall, so make a new entry */ |
---|
1342 | rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); |
---|
1343 | if (rt == NULL) |
---|
1344 | goto fail; |
---|
1345 | |
---|
1346 | /* Make a copy of the header to send to the user level process */ |
---|
1347 | mm = m_copy(mb0, 0, hlen); |
---|
1348 | if (mm == NULL) |
---|
1349 | goto fail1; |
---|
1350 | |
---|
1351 | /* |
---|
1352 | * Send message to routing daemon to install |
---|
1353 | * a route into the kernel table |
---|
1354 | */ |
---|
1355 | |
---|
1356 | im = mtod(mm, struct igmpmsg *); |
---|
1357 | im->im_msgtype = IGMPMSG_NOCACHE; |
---|
1358 | im->im_mbz = 0; |
---|
1359 | im->im_vif = vifi; |
---|
1360 | |
---|
1361 | MRTSTAT_INC(mrts_upcalls); |
---|
1362 | |
---|
1363 | k_igmpsrc.sin_addr = ip->ip_src; |
---|
1364 | if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) { |
---|
1365 | CTR0(KTR_IPMF, "ip_mforward: socket queue full"); |
---|
1366 | MRTSTAT_INC(mrts_upq_sockfull); |
---|
1367 | fail1: |
---|
1368 | free(rt, M_MRTABLE); |
---|
1369 | fail: |
---|
1370 | free(rte, M_MRTABLE); |
---|
1371 | m_freem(mb0); |
---|
1372 | MFC_UNLOCK(); |
---|
1373 | VIF_UNLOCK(); |
---|
1374 | return ENOBUFS; |
---|
1375 | } |
---|
1376 | |
---|
1377 | /* insert new entry at head of hash chain */ |
---|
1378 | rt->mfc_origin.s_addr = ip->ip_src.s_addr; |
---|
1379 | rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; |
---|
1380 | rt->mfc_expire = UPCALL_EXPIRE; |
---|
1381 | V_nexpire[hash]++; |
---|
1382 | for (i = 0; i < V_numvifs; i++) { |
---|
1383 | rt->mfc_ttls[i] = 0; |
---|
1384 | rt->mfc_flags[i] = 0; |
---|
1385 | } |
---|
1386 | rt->mfc_parent = -1; |
---|
1387 | |
---|
1388 | /* clear the RP address */ |
---|
1389 | rt->mfc_rp.s_addr = INADDR_ANY; |
---|
1390 | rt->mfc_bw_meter = NULL; |
---|
1391 | |
---|
1392 | /* initialize pkt counters per src-grp */ |
---|
1393 | rt->mfc_pkt_cnt = 0; |
---|
1394 | rt->mfc_byte_cnt = 0; |
---|
1395 | rt->mfc_wrong_if = 0; |
---|
1396 | timevalclear(&rt->mfc_last_assert); |
---|
1397 | |
---|
1398 | TAILQ_INIT(&rt->mfc_stall); |
---|
1399 | rt->mfc_nstall = 0; |
---|
1400 | |
---|
1401 | /* link into table */ |
---|
1402 | LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash); |
---|
1403 | TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link); |
---|
1404 | rt->mfc_nstall++; |
---|
1405 | |
---|
1406 | } else { |
---|
1407 | /* determine if queue has overflowed */ |
---|
1408 | if (rt->mfc_nstall > MAX_UPQ) { |
---|
1409 | MRTSTAT_INC(mrts_upq_ovflw); |
---|
1410 | non_fatal: |
---|
1411 | free(rte, M_MRTABLE); |
---|
1412 | m_freem(mb0); |
---|
1413 | MFC_UNLOCK(); |
---|
1414 | VIF_UNLOCK(); |
---|
1415 | return (0); |
---|
1416 | } |
---|
1417 | TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link); |
---|
1418 | rt->mfc_nstall++; |
---|
1419 | } |
---|
1420 | |
---|
1421 | rte->m = mb0; |
---|
1422 | rte->ifp = ifp; |
---|
1423 | |
---|
1424 | MFC_UNLOCK(); |
---|
1425 | VIF_UNLOCK(); |
---|
1426 | |
---|
1427 | return 0; |
---|
1428 | } |
---|
1429 | } |
---|
1430 | |
---|
1431 | /* |
---|
1432 | * Clean up the cache entry if upcall is not serviced |
---|
1433 | */ |
---|
1434 | static void |
---|
1435 | expire_upcalls(void *arg) |
---|
1436 | { |
---|
1437 | u_long i; |
---|
1438 | |
---|
1439 | CURVNET_SET((struct vnet *) arg); |
---|
1440 | |
---|
1441 | MFC_LOCK(); |
---|
1442 | |
---|
1443 | for (i = 0; i < mfchashsize; i++) { |
---|
1444 | struct mfc *rt, *nrt; |
---|
1445 | |
---|
1446 | if (V_nexpire[i] == 0) |
---|
1447 | continue; |
---|
1448 | |
---|
1449 | for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) { |
---|
1450 | nrt = LIST_NEXT(rt, mfc_hash); |
---|
1451 | |
---|
1452 | if (TAILQ_EMPTY(&rt->mfc_stall)) |
---|
1453 | continue; |
---|
1454 | |
---|
1455 | if (rt->mfc_expire == 0 || --rt->mfc_expire > 0) |
---|
1456 | continue; |
---|
1457 | |
---|
1458 | /* |
---|
1459 | * free the bw_meter entries |
---|
1460 | */ |
---|
1461 | while (rt->mfc_bw_meter != NULL) { |
---|
1462 | struct bw_meter *x = rt->mfc_bw_meter; |
---|
1463 | |
---|
1464 | rt->mfc_bw_meter = x->bm_mfc_next; |
---|
1465 | free(x, M_BWMETER); |
---|
1466 | } |
---|
1467 | |
---|
1468 | MRTSTAT_INC(mrts_cache_cleanups); |
---|
1469 | CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__, |
---|
1470 | (u_long)ntohl(rt->mfc_origin.s_addr), |
---|
1471 | (u_long)ntohl(rt->mfc_mcastgrp.s_addr)); |
---|
1472 | |
---|
1473 | expire_mfc(rt); |
---|
1474 | } |
---|
1475 | } |
---|
1476 | |
---|
1477 | MFC_UNLOCK(); |
---|
1478 | |
---|
1479 | callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, |
---|
1480 | curvnet); |
---|
1481 | |
---|
1482 | CURVNET_RESTORE(); |
---|
1483 | } |
---|
1484 | |
---|
1485 | /* |
---|
1486 | * Packet forwarding routine once entry in the cache is made |
---|
1487 | */ |
---|
1488 | static int |
---|
1489 | ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) |
---|
1490 | { |
---|
1491 | struct ip *ip = mtod(m, struct ip *); |
---|
1492 | vifi_t vifi; |
---|
1493 | int plen = ip->ip_len; |
---|
1494 | |
---|
1495 | VIF_LOCK_ASSERT(); |
---|
1496 | |
---|
1497 | /* |
---|
1498 | * If xmt_vif is not -1, send on only the requested vif. |
---|
1499 | * |
---|
1500 | * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) |
---|
1501 | */ |
---|
1502 | if (xmt_vif < V_numvifs) { |
---|
1503 | if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER) |
---|
1504 | pim_register_send(ip, V_viftable + xmt_vif, m, rt); |
---|
1505 | else |
---|
1506 | phyint_send(ip, V_viftable + xmt_vif, m); |
---|
1507 | return 1; |
---|
1508 | } |
---|
1509 | |
---|
1510 | /* |
---|
1511 | * Don't forward if it didn't arrive from the parent vif for its origin. |
---|
1512 | */ |
---|
1513 | vifi = rt->mfc_parent; |
---|
1514 | if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) { |
---|
1515 | CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)", |
---|
1516 | __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp); |
---|
1517 | MRTSTAT_INC(mrts_wrong_if); |
---|
1518 | ++rt->mfc_wrong_if; |
---|
1519 | /* |
---|
1520 | * If we are doing PIM assert processing, send a message |
---|
1521 | * to the routing daemon. |
---|
1522 | * |
---|
1523 | * XXX: A PIM-SM router needs the WRONGVIF detection so it |
---|
1524 | * can complete the SPT switch, regardless of the type |
---|
1525 | * of the iif (broadcast media, GRE tunnel, etc). |
---|
1526 | */ |
---|
1527 | if (V_pim_assert_enabled && (vifi < V_numvifs) && |
---|
1528 | V_viftable[vifi].v_ifp) { |
---|
1529 | |
---|
1530 | if (ifp == &V_multicast_register_if) |
---|
1531 | PIMSTAT_INC(pims_rcv_registers_wrongiif); |
---|
1532 | |
---|
1533 | /* Get vifi for the incoming packet */ |
---|
1534 | for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp; |
---|
1535 | vifi++) |
---|
1536 | ; |
---|
1537 | if (vifi >= V_numvifs) |
---|
1538 | return 0; /* The iif is not found: ignore the packet. */ |
---|
1539 | |
---|
1540 | if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF) |
---|
1541 | return 0; /* WRONGVIF disabled: ignore the packet */ |
---|
1542 | |
---|
1543 | if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) { |
---|
1544 | struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; |
---|
1545 | struct igmpmsg *im; |
---|
1546 | int hlen = ip->ip_hl << 2; |
---|
1547 | struct mbuf *mm = m_copy(m, 0, hlen); |
---|
1548 | |
---|
1549 | if (mm && (M_HASCL(mm) || mm->m_len < hlen)) |
---|
1550 | mm = m_pullup(mm, hlen); |
---|
1551 | if (mm == NULL) |
---|
1552 | return ENOBUFS; |
---|
1553 | |
---|
1554 | im = mtod(mm, struct igmpmsg *); |
---|
1555 | im->im_msgtype = IGMPMSG_WRONGVIF; |
---|
1556 | im->im_mbz = 0; |
---|
1557 | im->im_vif = vifi; |
---|
1558 | |
---|
1559 | MRTSTAT_INC(mrts_upcalls); |
---|
1560 | |
---|
1561 | k_igmpsrc.sin_addr = im->im_src; |
---|
1562 | if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) { |
---|
1563 | CTR1(KTR_IPMF, "%s: socket queue full", __func__); |
---|
1564 | MRTSTAT_INC(mrts_upq_sockfull); |
---|
1565 | return ENOBUFS; |
---|
1566 | } |
---|
1567 | } |
---|
1568 | } |
---|
1569 | return 0; |
---|
1570 | } |
---|
1571 | |
---|
1572 | |
---|
1573 | /* If I sourced this packet, it counts as output, else it was input. */ |
---|
1574 | if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) { |
---|
1575 | V_viftable[vifi].v_pkt_out++; |
---|
1576 | V_viftable[vifi].v_bytes_out += plen; |
---|
1577 | } else { |
---|
1578 | V_viftable[vifi].v_pkt_in++; |
---|
1579 | V_viftable[vifi].v_bytes_in += plen; |
---|
1580 | } |
---|
1581 | rt->mfc_pkt_cnt++; |
---|
1582 | rt->mfc_byte_cnt += plen; |
---|
1583 | |
---|
1584 | /* |
---|
1585 | * For each vif, decide if a copy of the packet should be forwarded. |
---|
1586 | * Forward if: |
---|
1587 | * - the ttl exceeds the vif's threshold |
---|
1588 | * - there are group members downstream on interface |
---|
1589 | */ |
---|
1590 | for (vifi = 0; vifi < V_numvifs; vifi++) |
---|
1591 | if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) { |
---|
1592 | V_viftable[vifi].v_pkt_out++; |
---|
1593 | V_viftable[vifi].v_bytes_out += plen; |
---|
1594 | if (V_viftable[vifi].v_flags & VIFF_REGISTER) |
---|
1595 | pim_register_send(ip, V_viftable + vifi, m, rt); |
---|
1596 | else |
---|
1597 | phyint_send(ip, V_viftable + vifi, m); |
---|
1598 | } |
---|
1599 | |
---|
1600 | /* |
---|
1601 | * Perform upcall-related bw measuring. |
---|
1602 | */ |
---|
1603 | if (rt->mfc_bw_meter != NULL) { |
---|
1604 | struct bw_meter *x; |
---|
1605 | struct timeval now; |
---|
1606 | |
---|
1607 | microtime(&now); |
---|
1608 | MFC_LOCK_ASSERT(); |
---|
1609 | for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) |
---|
1610 | bw_meter_receive_packet(x, plen, &now); |
---|
1611 | } |
---|
1612 | |
---|
1613 | return 0; |
---|
1614 | } |
---|
1615 | |
---|
1616 | /* |
---|
1617 | * Check if a vif number is legal/ok. This is used by in_mcast.c. |
---|
1618 | */ |
---|
1619 | static int |
---|
1620 | X_legal_vif_num(int vif) |
---|
1621 | { |
---|
1622 | int ret; |
---|
1623 | |
---|
1624 | ret = 0; |
---|
1625 | if (vif < 0) |
---|
1626 | return (ret); |
---|
1627 | |
---|
1628 | VIF_LOCK(); |
---|
1629 | if (vif < V_numvifs) |
---|
1630 | ret = 1; |
---|
1631 | VIF_UNLOCK(); |
---|
1632 | |
---|
1633 | return (ret); |
---|
1634 | } |
---|
1635 | |
---|
1636 | /* |
---|
1637 | * Return the local address used by this vif |
---|
1638 | */ |
---|
1639 | static u_long |
---|
1640 | X_ip_mcast_src(int vifi) |
---|
1641 | { |
---|
1642 | in_addr_t addr; |
---|
1643 | |
---|
1644 | addr = INADDR_ANY; |
---|
1645 | if (vifi < 0) |
---|
1646 | return (addr); |
---|
1647 | |
---|
1648 | VIF_LOCK(); |
---|
1649 | if (vifi < V_numvifs) |
---|
1650 | addr = V_viftable[vifi].v_lcl_addr.s_addr; |
---|
1651 | VIF_UNLOCK(); |
---|
1652 | |
---|
1653 | return (addr); |
---|
1654 | } |
---|
1655 | |
---|
1656 | static void |
---|
1657 | phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) |
---|
1658 | { |
---|
1659 | struct mbuf *mb_copy; |
---|
1660 | int hlen = ip->ip_hl << 2; |
---|
1661 | |
---|
1662 | VIF_LOCK_ASSERT(); |
---|
1663 | |
---|
1664 | /* |
---|
1665 | * Make a new reference to the packet; make sure that |
---|
1666 | * the IP header is actually copied, not just referenced, |
---|
1667 | * so that ip_output() only scribbles on the copy. |
---|
1668 | */ |
---|
1669 | mb_copy = m_copypacket(m, M_DONTWAIT); |
---|
1670 | if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) |
---|
1671 | mb_copy = m_pullup(mb_copy, hlen); |
---|
1672 | if (mb_copy == NULL) |
---|
1673 | return; |
---|
1674 | |
---|
1675 | send_packet(vifp, mb_copy); |
---|
1676 | } |
---|
1677 | |
---|
1678 | static void |
---|
1679 | send_packet(struct vif *vifp, struct mbuf *m) |
---|
1680 | { |
---|
1681 | struct ip_moptions imo; |
---|
1682 | struct in_multi *imm[2]; |
---|
1683 | int error; |
---|
1684 | |
---|
1685 | VIF_LOCK_ASSERT(); |
---|
1686 | |
---|
1687 | imo.imo_multicast_ifp = vifp->v_ifp; |
---|
1688 | imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; |
---|
1689 | imo.imo_multicast_loop = 1; |
---|
1690 | imo.imo_multicast_vif = -1; |
---|
1691 | imo.imo_num_memberships = 0; |
---|
1692 | imo.imo_max_memberships = 2; |
---|
1693 | imo.imo_membership = &imm[0]; |
---|
1694 | |
---|
1695 | /* |
---|
1696 | * Re-entrancy should not be a problem here, because |
---|
1697 | * the packets that we send out and are looped back at us |
---|
1698 | * should get rejected because they appear to come from |
---|
1699 | * the loopback interface, thus preventing looping. |
---|
1700 | */ |
---|
1701 | error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL); |
---|
1702 | CTR3(KTR_IPMF, "%s: vif %td err %d", __func__, |
---|
1703 | (ptrdiff_t)(vifp - V_viftable), error); |
---|
1704 | } |
---|
1705 | |
---|
1706 | /* |
---|
1707 | * Stubs for old RSVP socket shim implementation. |
---|
1708 | */ |
---|
1709 | |
---|
1710 | static int |
---|
1711 | X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused) |
---|
1712 | { |
---|
1713 | |
---|
1714 | return (EOPNOTSUPP); |
---|
1715 | } |
---|
1716 | |
---|
1717 | static void |
---|
1718 | X_ip_rsvp_force_done(struct socket *so __unused) |
---|
1719 | { |
---|
1720 | |
---|
1721 | } |
---|
1722 | |
---|
1723 | static void |
---|
1724 | X_rsvp_input(struct mbuf *m, int off __unused) |
---|
1725 | { |
---|
1726 | |
---|
1727 | if (!V_rsvp_on) |
---|
1728 | m_freem(m); |
---|
1729 | } |
---|
1730 | |
---|
1731 | /* |
---|
1732 | * Code for bandwidth monitors |
---|
1733 | */ |
---|
1734 | |
---|
1735 | /* |
---|
1736 | * Define common interface for timeval-related methods |
---|
1737 | */ |
---|
1738 | #define BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp) |
---|
1739 | #define BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp)) |
---|
1740 | #define BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp)) |
---|
1741 | |
---|
1742 | static uint32_t |
---|
1743 | compute_bw_meter_flags(struct bw_upcall *req) |
---|
1744 | { |
---|
1745 | uint32_t flags = 0; |
---|
1746 | |
---|
1747 | if (req->bu_flags & BW_UPCALL_UNIT_PACKETS) |
---|
1748 | flags |= BW_METER_UNIT_PACKETS; |
---|
1749 | if (req->bu_flags & BW_UPCALL_UNIT_BYTES) |
---|
1750 | flags |= BW_METER_UNIT_BYTES; |
---|
1751 | if (req->bu_flags & BW_UPCALL_GEQ) |
---|
1752 | flags |= BW_METER_GEQ; |
---|
1753 | if (req->bu_flags & BW_UPCALL_LEQ) |
---|
1754 | flags |= BW_METER_LEQ; |
---|
1755 | |
---|
1756 | return flags; |
---|
1757 | } |
---|
1758 | |
---|
1759 | /* |
---|
1760 | * Add a bw_meter entry |
---|
1761 | */ |
---|
1762 | static int |
---|
1763 | add_bw_upcall(struct bw_upcall *req) |
---|
1764 | { |
---|
1765 | struct mfc *mfc; |
---|
1766 | struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC, |
---|
1767 | BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC }; |
---|
1768 | struct timeval now; |
---|
1769 | struct bw_meter *x; |
---|
1770 | uint32_t flags; |
---|
1771 | |
---|
1772 | if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL)) |
---|
1773 | return EOPNOTSUPP; |
---|
1774 | |
---|
1775 | /* Test if the flags are valid */ |
---|
1776 | if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES))) |
---|
1777 | return EINVAL; |
---|
1778 | if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))) |
---|
1779 | return EINVAL; |
---|
1780 | if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) |
---|
1781 | == (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) |
---|
1782 | return EINVAL; |
---|
1783 | |
---|
1784 | /* Test if the threshold time interval is valid */ |
---|
1785 | if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <)) |
---|
1786 | return EINVAL; |
---|
1787 | |
---|
1788 | flags = compute_bw_meter_flags(req); |
---|
1789 | |
---|
1790 | /* |
---|
1791 | * Find if we have already same bw_meter entry |
---|
1792 | */ |
---|
1793 | MFC_LOCK(); |
---|
1794 | mfc = mfc_find(&req->bu_src, &req->bu_dst); |
---|
1795 | if (mfc == NULL) { |
---|
1796 | MFC_UNLOCK(); |
---|
1797 | return EADDRNOTAVAIL; |
---|
1798 | } |
---|
1799 | for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) { |
---|
1800 | if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, |
---|
1801 | &req->bu_threshold.b_time, ==)) && |
---|
1802 | (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && |
---|
1803 | (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && |
---|
1804 | (x->bm_flags & BW_METER_USER_FLAGS) == flags) { |
---|
1805 | MFC_UNLOCK(); |
---|
1806 | return 0; /* XXX Already installed */ |
---|
1807 | } |
---|
1808 | } |
---|
1809 | |
---|
1810 | /* Allocate the new bw_meter entry */ |
---|
1811 | x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT); |
---|
1812 | if (x == NULL) { |
---|
1813 | MFC_UNLOCK(); |
---|
1814 | return ENOBUFS; |
---|
1815 | } |
---|
1816 | |
---|
1817 | /* Set the new bw_meter entry */ |
---|
1818 | x->bm_threshold.b_time = req->bu_threshold.b_time; |
---|
1819 | microtime(&now); |
---|
1820 | x->bm_start_time = now; |
---|
1821 | x->bm_threshold.b_packets = req->bu_threshold.b_packets; |
---|
1822 | x->bm_threshold.b_bytes = req->bu_threshold.b_bytes; |
---|
1823 | x->bm_measured.b_packets = 0; |
---|
1824 | x->bm_measured.b_bytes = 0; |
---|
1825 | x->bm_flags = flags; |
---|
1826 | x->bm_time_next = NULL; |
---|
1827 | x->bm_time_hash = BW_METER_BUCKETS; |
---|
1828 | |
---|
1829 | /* Add the new bw_meter entry to the front of entries for this MFC */ |
---|
1830 | x->bm_mfc = mfc; |
---|
1831 | x->bm_mfc_next = mfc->mfc_bw_meter; |
---|
1832 | mfc->mfc_bw_meter = x; |
---|
1833 | schedule_bw_meter(x, &now); |
---|
1834 | MFC_UNLOCK(); |
---|
1835 | |
---|
1836 | return 0; |
---|
1837 | } |
---|
1838 | |
---|
1839 | static void |
---|
1840 | free_bw_list(struct bw_meter *list) |
---|
1841 | { |
---|
1842 | while (list != NULL) { |
---|
1843 | struct bw_meter *x = list; |
---|
1844 | |
---|
1845 | list = list->bm_mfc_next; |
---|
1846 | unschedule_bw_meter(x); |
---|
1847 | free(x, M_BWMETER); |
---|
1848 | } |
---|
1849 | } |
---|
1850 | |
---|
1851 | /* |
---|
1852 | * Delete one or multiple bw_meter entries |
---|
1853 | */ |
---|
1854 | static int |
---|
1855 | del_bw_upcall(struct bw_upcall *req) |
---|
1856 | { |
---|
1857 | struct mfc *mfc; |
---|
1858 | struct bw_meter *x; |
---|
1859 | |
---|
1860 | if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL)) |
---|
1861 | return EOPNOTSUPP; |
---|
1862 | |
---|
1863 | MFC_LOCK(); |
---|
1864 | |
---|
1865 | /* Find the corresponding MFC entry */ |
---|
1866 | mfc = mfc_find(&req->bu_src, &req->bu_dst); |
---|
1867 | if (mfc == NULL) { |
---|
1868 | MFC_UNLOCK(); |
---|
1869 | return EADDRNOTAVAIL; |
---|
1870 | } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) { |
---|
1871 | /* |
---|
1872 | * Delete all bw_meter entries for this mfc |
---|
1873 | */ |
---|
1874 | struct bw_meter *list; |
---|
1875 | |
---|
1876 | list = mfc->mfc_bw_meter; |
---|
1877 | mfc->mfc_bw_meter = NULL; |
---|
1878 | free_bw_list(list); |
---|
1879 | MFC_UNLOCK(); |
---|
1880 | return 0; |
---|
1881 | } else { /* Delete a single bw_meter entry */ |
---|
1882 | struct bw_meter *prev; |
---|
1883 | uint32_t flags = 0; |
---|
1884 | |
---|
1885 | flags = compute_bw_meter_flags(req); |
---|
1886 | |
---|
1887 | /* Find the bw_meter entry to delete */ |
---|
1888 | for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL; |
---|
1889 | prev = x, x = x->bm_mfc_next) { |
---|
1890 | if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, |
---|
1891 | &req->bu_threshold.b_time, ==)) && |
---|
1892 | (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && |
---|
1893 | (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && |
---|
1894 | (x->bm_flags & BW_METER_USER_FLAGS) == flags) |
---|
1895 | break; |
---|
1896 | } |
---|
1897 | if (x != NULL) { /* Delete entry from the list for this MFC */ |
---|
1898 | if (prev != NULL) |
---|
1899 | prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/ |
---|
1900 | else |
---|
1901 | x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */ |
---|
1902 | |
---|
1903 | unschedule_bw_meter(x); |
---|
1904 | MFC_UNLOCK(); |
---|
1905 | /* Free the bw_meter entry */ |
---|
1906 | free(x, M_BWMETER); |
---|
1907 | return 0; |
---|
1908 | } else { |
---|
1909 | MFC_UNLOCK(); |
---|
1910 | return EINVAL; |
---|
1911 | } |
---|
1912 | } |
---|
1913 | /* NOTREACHED */ |
---|
1914 | } |
---|
1915 | |
---|
1916 | /* |
---|
1917 | * Perform bandwidth measurement processing that may result in an upcall |
---|
1918 | */ |
---|
1919 | static void |
---|
1920 | bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp) |
---|
1921 | { |
---|
1922 | struct timeval delta; |
---|
1923 | |
---|
1924 | MFC_LOCK_ASSERT(); |
---|
1925 | |
---|
1926 | delta = *nowp; |
---|
1927 | BW_TIMEVALDECR(&delta, &x->bm_start_time); |
---|
1928 | |
---|
1929 | if (x->bm_flags & BW_METER_GEQ) { |
---|
1930 | /* |
---|
1931 | * Processing for ">=" type of bw_meter entry |
---|
1932 | */ |
---|
1933 | if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { |
---|
1934 | /* Reset the bw_meter entry */ |
---|
1935 | x->bm_start_time = *nowp; |
---|
1936 | x->bm_measured.b_packets = 0; |
---|
1937 | x->bm_measured.b_bytes = 0; |
---|
1938 | x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; |
---|
1939 | } |
---|
1940 | |
---|
1941 | /* Record that a packet is received */ |
---|
1942 | x->bm_measured.b_packets++; |
---|
1943 | x->bm_measured.b_bytes += plen; |
---|
1944 | |
---|
1945 | /* |
---|
1946 | * Test if we should deliver an upcall |
---|
1947 | */ |
---|
1948 | if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) { |
---|
1949 | if (((x->bm_flags & BW_METER_UNIT_PACKETS) && |
---|
1950 | (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) || |
---|
1951 | ((x->bm_flags & BW_METER_UNIT_BYTES) && |
---|
1952 | (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) { |
---|
1953 | /* Prepare an upcall for delivery */ |
---|
1954 | bw_meter_prepare_upcall(x, nowp); |
---|
1955 | x->bm_flags |= BW_METER_UPCALL_DELIVERED; |
---|
1956 | } |
---|
1957 | } |
---|
1958 | } else if (x->bm_flags & BW_METER_LEQ) { |
---|
1959 | /* |
---|
1960 | * Processing for "<=" type of bw_meter entry |
---|
1961 | */ |
---|
1962 | if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { |
---|
1963 | /* |
---|
1964 | * We are behind time with the multicast forwarding table |
---|
1965 | * scanning for "<=" type of bw_meter entries, so test now |
---|
1966 | * if we should deliver an upcall. |
---|
1967 | */ |
---|
1968 | if (((x->bm_flags & BW_METER_UNIT_PACKETS) && |
---|
1969 | (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || |
---|
1970 | ((x->bm_flags & BW_METER_UNIT_BYTES) && |
---|
1971 | (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { |
---|
1972 | /* Prepare an upcall for delivery */ |
---|
1973 | bw_meter_prepare_upcall(x, nowp); |
---|
1974 | } |
---|
1975 | /* Reschedule the bw_meter entry */ |
---|
1976 | unschedule_bw_meter(x); |
---|
1977 | schedule_bw_meter(x, nowp); |
---|
1978 | } |
---|
1979 | |
---|
1980 | /* Record that a packet is received */ |
---|
1981 | x->bm_measured.b_packets++; |
---|
1982 | x->bm_measured.b_bytes += plen; |
---|
1983 | |
---|
1984 | /* |
---|
1985 | * Test if we should restart the measuring interval |
---|
1986 | */ |
---|
1987 | if ((x->bm_flags & BW_METER_UNIT_PACKETS && |
---|
1988 | x->bm_measured.b_packets <= x->bm_threshold.b_packets) || |
---|
1989 | (x->bm_flags & BW_METER_UNIT_BYTES && |
---|
1990 | x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) { |
---|
1991 | /* Don't restart the measuring interval */ |
---|
1992 | } else { |
---|
1993 | /* Do restart the measuring interval */ |
---|
1994 | /* |
---|
1995 | * XXX: note that we don't unschedule and schedule, because this |
---|
1996 | * might be too much overhead per packet. Instead, when we process |
---|
1997 | * all entries for a given timer hash bin, we check whether it is |
---|
1998 | * really a timeout. If not, we reschedule at that time. |
---|
1999 | */ |
---|
2000 | x->bm_start_time = *nowp; |
---|
2001 | x->bm_measured.b_packets = 0; |
---|
2002 | x->bm_measured.b_bytes = 0; |
---|
2003 | x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; |
---|
2004 | } |
---|
2005 | } |
---|
2006 | } |
---|
2007 | |
---|
2008 | /* |
---|
2009 | * Prepare a bandwidth-related upcall |
---|
2010 | */ |
---|
2011 | static void |
---|
2012 | bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp) |
---|
2013 | { |
---|
2014 | struct timeval delta; |
---|
2015 | struct bw_upcall *u; |
---|
2016 | |
---|
2017 | MFC_LOCK_ASSERT(); |
---|
2018 | |
---|
2019 | /* |
---|
2020 | * Compute the measured time interval |
---|
2021 | */ |
---|
2022 | delta = *nowp; |
---|
2023 | BW_TIMEVALDECR(&delta, &x->bm_start_time); |
---|
2024 | |
---|
2025 | /* |
---|
2026 | * If there are too many pending upcalls, deliver them now |
---|
2027 | */ |
---|
2028 | if (V_bw_upcalls_n >= BW_UPCALLS_MAX) |
---|
2029 | bw_upcalls_send(); |
---|
2030 | |
---|
2031 | /* |
---|
2032 | * Set the bw_upcall entry |
---|
2033 | */ |
---|
2034 | u = &V_bw_upcalls[V_bw_upcalls_n++]; |
---|
2035 | u->bu_src = x->bm_mfc->mfc_origin; |
---|
2036 | u->bu_dst = x->bm_mfc->mfc_mcastgrp; |
---|
2037 | u->bu_threshold.b_time = x->bm_threshold.b_time; |
---|
2038 | u->bu_threshold.b_packets = x->bm_threshold.b_packets; |
---|
2039 | u->bu_threshold.b_bytes = x->bm_threshold.b_bytes; |
---|
2040 | u->bu_measured.b_time = delta; |
---|
2041 | u->bu_measured.b_packets = x->bm_measured.b_packets; |
---|
2042 | u->bu_measured.b_bytes = x->bm_measured.b_bytes; |
---|
2043 | u->bu_flags = 0; |
---|
2044 | if (x->bm_flags & BW_METER_UNIT_PACKETS) |
---|
2045 | u->bu_flags |= BW_UPCALL_UNIT_PACKETS; |
---|
2046 | if (x->bm_flags & BW_METER_UNIT_BYTES) |
---|
2047 | u->bu_flags |= BW_UPCALL_UNIT_BYTES; |
---|
2048 | if (x->bm_flags & BW_METER_GEQ) |
---|
2049 | u->bu_flags |= BW_UPCALL_GEQ; |
---|
2050 | if (x->bm_flags & BW_METER_LEQ) |
---|
2051 | u->bu_flags |= BW_UPCALL_LEQ; |
---|
2052 | } |
---|
2053 | |
---|
2054 | /* |
---|
2055 | * Send the pending bandwidth-related upcalls |
---|
2056 | */ |
---|
2057 | static void |
---|
2058 | bw_upcalls_send(void) |
---|
2059 | { |
---|
2060 | struct mbuf *m; |
---|
2061 | int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]); |
---|
2062 | struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; |
---|
2063 | static struct igmpmsg igmpmsg = { 0, /* unused1 */ |
---|
2064 | 0, /* unused2 */ |
---|
2065 | IGMPMSG_BW_UPCALL,/* im_msgtype */ |
---|
2066 | 0, /* im_mbz */ |
---|
2067 | 0, /* im_vif */ |
---|
2068 | 0, /* unused3 */ |
---|
2069 | { 0 }, /* im_src */ |
---|
2070 | { 0 } }; /* im_dst */ |
---|
2071 | |
---|
2072 | MFC_LOCK_ASSERT(); |
---|
2073 | |
---|
2074 | if (V_bw_upcalls_n == 0) |
---|
2075 | return; /* No pending upcalls */ |
---|
2076 | |
---|
2077 | V_bw_upcalls_n = 0; |
---|
2078 | |
---|
2079 | /* |
---|
2080 | * Allocate a new mbuf, initialize it with the header and |
---|
2081 | * the payload for the pending calls. |
---|
2082 | */ |
---|
2083 | MGETHDR(m, M_DONTWAIT, MT_DATA); |
---|
2084 | if (m == NULL) { |
---|
2085 | log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n"); |
---|
2086 | return; |
---|
2087 | } |
---|
2088 | |
---|
2089 | m->m_len = m->m_pkthdr.len = 0; |
---|
2090 | m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg); |
---|
2091 | m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]); |
---|
2092 | |
---|
2093 | /* |
---|
2094 | * Send the upcalls |
---|
2095 | * XXX do we need to set the address in k_igmpsrc ? |
---|
2096 | */ |
---|
2097 | MRTSTAT_INC(mrts_upcalls); |
---|
2098 | if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) { |
---|
2099 | log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n"); |
---|
2100 | MRTSTAT_INC(mrts_upq_sockfull); |
---|
2101 | } |
---|
2102 | } |
---|
2103 | |
---|
2104 | /* |
---|
2105 | * Compute the timeout hash value for the bw_meter entries |
---|
2106 | */ |
---|
2107 | #define BW_METER_TIMEHASH(bw_meter, hash) \ |
---|
2108 | do { \ |
---|
2109 | struct timeval next_timeval = (bw_meter)->bm_start_time; \ |
---|
2110 | \ |
---|
2111 | BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \ |
---|
2112 | (hash) = next_timeval.tv_sec; \ |
---|
2113 | if (next_timeval.tv_usec) \ |
---|
2114 | (hash)++; /* XXX: make sure we don't timeout early */ \ |
---|
2115 | (hash) %= BW_METER_BUCKETS; \ |
---|
2116 | } while (0) |
---|
2117 | |
---|
2118 | /* |
---|
2119 | * Schedule a timer to process periodically bw_meter entry of type "<=" |
---|
2120 | * by linking the entry in the proper hash bucket. |
---|
2121 | */ |
---|
2122 | static void |
---|
2123 | schedule_bw_meter(struct bw_meter *x, struct timeval *nowp) |
---|
2124 | { |
---|
2125 | int time_hash; |
---|
2126 | |
---|
2127 | MFC_LOCK_ASSERT(); |
---|
2128 | |
---|
2129 | if (!(x->bm_flags & BW_METER_LEQ)) |
---|
2130 | return; /* XXX: we schedule timers only for "<=" entries */ |
---|
2131 | |
---|
2132 | /* |
---|
2133 | * Reset the bw_meter entry |
---|
2134 | */ |
---|
2135 | x->bm_start_time = *nowp; |
---|
2136 | x->bm_measured.b_packets = 0; |
---|
2137 | x->bm_measured.b_bytes = 0; |
---|
2138 | x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; |
---|
2139 | |
---|
2140 | /* |
---|
2141 | * Compute the timeout hash value and insert the entry |
---|
2142 | */ |
---|
2143 | BW_METER_TIMEHASH(x, time_hash); |
---|
2144 | x->bm_time_next = V_bw_meter_timers[time_hash]; |
---|
2145 | V_bw_meter_timers[time_hash] = x; |
---|
2146 | x->bm_time_hash = time_hash; |
---|
2147 | } |
---|
2148 | |
---|
2149 | /* |
---|
2150 | * Unschedule the periodic timer that processes bw_meter entry of type "<=" |
---|
2151 | * by removing the entry from the proper hash bucket. |
---|
2152 | */ |
---|
2153 | static void |
---|
2154 | unschedule_bw_meter(struct bw_meter *x) |
---|
2155 | { |
---|
2156 | int time_hash; |
---|
2157 | struct bw_meter *prev, *tmp; |
---|
2158 | |
---|
2159 | MFC_LOCK_ASSERT(); |
---|
2160 | |
---|
2161 | if (!(x->bm_flags & BW_METER_LEQ)) |
---|
2162 | return; /* XXX: we schedule timers only for "<=" entries */ |
---|
2163 | |
---|
2164 | /* |
---|
2165 | * Compute the timeout hash value and delete the entry |
---|
2166 | */ |
---|
2167 | time_hash = x->bm_time_hash; |
---|
2168 | if (time_hash >= BW_METER_BUCKETS) |
---|
2169 | return; /* Entry was not scheduled */ |
---|
2170 | |
---|
2171 | for (prev = NULL, tmp = V_bw_meter_timers[time_hash]; |
---|
2172 | tmp != NULL; prev = tmp, tmp = tmp->bm_time_next) |
---|
2173 | if (tmp == x) |
---|
2174 | break; |
---|
2175 | |
---|
2176 | if (tmp == NULL) |
---|
2177 | panic("unschedule_bw_meter: bw_meter entry not found"); |
---|
2178 | |
---|
2179 | if (prev != NULL) |
---|
2180 | prev->bm_time_next = x->bm_time_next; |
---|
2181 | else |
---|
2182 | V_bw_meter_timers[time_hash] = x->bm_time_next; |
---|
2183 | |
---|
2184 | x->bm_time_next = NULL; |
---|
2185 | x->bm_time_hash = BW_METER_BUCKETS; |
---|
2186 | } |
---|
2187 | |
---|
2188 | |
---|
2189 | /* |
---|
2190 | * Process all "<=" type of bw_meter that should be processed now, |
---|
2191 | * and for each entry prepare an upcall if necessary. Each processed |
---|
2192 | * entry is rescheduled again for the (periodic) processing. |
---|
2193 | * |
---|
2194 | * This is run periodically (once per second normally). On each round, |
---|
2195 | * all the potentially matching entries are in the hash slot that we are |
---|
2196 | * looking at. |
---|
2197 | */ |
---|
2198 | static void |
---|
2199 | bw_meter_process() |
---|
2200 | { |
---|
2201 | uint32_t loops; |
---|
2202 | int i; |
---|
2203 | struct timeval now, process_endtime; |
---|
2204 | |
---|
2205 | microtime(&now); |
---|
2206 | if (V_last_tv_sec == now.tv_sec) |
---|
2207 | return; /* nothing to do */ |
---|
2208 | |
---|
2209 | loops = now.tv_sec - V_last_tv_sec; |
---|
2210 | V_last_tv_sec = now.tv_sec; |
---|
2211 | if (loops > BW_METER_BUCKETS) |
---|
2212 | loops = BW_METER_BUCKETS; |
---|
2213 | |
---|
2214 | MFC_LOCK(); |
---|
2215 | /* |
---|
2216 | * Process all bins of bw_meter entries from the one after the last |
---|
2217 | * processed to the current one. On entry, i points to the last bucket |
---|
2218 | * visited, so we need to increment i at the beginning of the loop. |
---|
2219 | */ |
---|
2220 | for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) { |
---|
2221 | struct bw_meter *x, *tmp_list; |
---|
2222 | |
---|
2223 | if (++i >= BW_METER_BUCKETS) |
---|
2224 | i = 0; |
---|
2225 | |
---|
2226 | /* Disconnect the list of bw_meter entries from the bin */ |
---|
2227 | tmp_list = V_bw_meter_timers[i]; |
---|
2228 | V_bw_meter_timers[i] = NULL; |
---|
2229 | |
---|
2230 | /* Process the list of bw_meter entries */ |
---|
2231 | while (tmp_list != NULL) { |
---|
2232 | x = tmp_list; |
---|
2233 | tmp_list = tmp_list->bm_time_next; |
---|
2234 | |
---|
2235 | /* Test if the time interval is over */ |
---|
2236 | process_endtime = x->bm_start_time; |
---|
2237 | BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time); |
---|
2238 | if (BW_TIMEVALCMP(&process_endtime, &now, >)) { |
---|
2239 | /* Not yet: reschedule, but don't reset */ |
---|
2240 | int time_hash; |
---|
2241 | |
---|
2242 | BW_METER_TIMEHASH(x, time_hash); |
---|
2243 | if (time_hash == i && process_endtime.tv_sec == now.tv_sec) { |
---|
2244 | /* |
---|
2245 | * XXX: somehow the bin processing is a bit ahead of time. |
---|
2246 | * Put the entry in the next bin. |
---|
2247 | */ |
---|
2248 | if (++time_hash >= BW_METER_BUCKETS) |
---|
2249 | time_hash = 0; |
---|
2250 | } |
---|
2251 | x->bm_time_next = V_bw_meter_timers[time_hash]; |
---|
2252 | V_bw_meter_timers[time_hash] = x; |
---|
2253 | x->bm_time_hash = time_hash; |
---|
2254 | |
---|
2255 | continue; |
---|
2256 | } |
---|
2257 | |
---|
2258 | /* |
---|
2259 | * Test if we should deliver an upcall |
---|
2260 | */ |
---|
2261 | if (((x->bm_flags & BW_METER_UNIT_PACKETS) && |
---|
2262 | (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || |
---|
2263 | ((x->bm_flags & BW_METER_UNIT_BYTES) && |
---|
2264 | (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { |
---|
2265 | /* Prepare an upcall for delivery */ |
---|
2266 | bw_meter_prepare_upcall(x, &now); |
---|
2267 | } |
---|
2268 | |
---|
2269 | /* |
---|
2270 | * Reschedule for next processing |
---|
2271 | */ |
---|
2272 | schedule_bw_meter(x, &now); |
---|
2273 | } |
---|
2274 | } |
---|
2275 | |
---|
2276 | /* Send all upcalls that are pending delivery */ |
---|
2277 | bw_upcalls_send(); |
---|
2278 | |
---|
2279 | MFC_UNLOCK(); |
---|
2280 | } |
---|
2281 | |
---|
2282 | /* |
---|
2283 | * A periodic function for sending all upcalls that are pending delivery |
---|
2284 | */ |
---|
2285 | static void |
---|
2286 | expire_bw_upcalls_send(void *arg) |
---|
2287 | { |
---|
2288 | CURVNET_SET((struct vnet *) arg); |
---|
2289 | |
---|
2290 | MFC_LOCK(); |
---|
2291 | bw_upcalls_send(); |
---|
2292 | MFC_UNLOCK(); |
---|
2293 | |
---|
2294 | callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send, |
---|
2295 | curvnet); |
---|
2296 | CURVNET_RESTORE(); |
---|
2297 | } |
---|
2298 | |
---|
2299 | /* |
---|
2300 | * A periodic function for periodic scanning of the multicast forwarding |
---|
2301 | * table for processing all "<=" bw_meter entries. |
---|
2302 | */ |
---|
2303 | static void |
---|
2304 | expire_bw_meter_process(void *arg) |
---|
2305 | { |
---|
2306 | CURVNET_SET((struct vnet *) arg); |
---|
2307 | |
---|
2308 | if (V_mrt_api_config & MRT_MFC_BW_UPCALL) |
---|
2309 | bw_meter_process(); |
---|
2310 | |
---|
2311 | callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, |
---|
2312 | curvnet); |
---|
2313 | CURVNET_RESTORE(); |
---|
2314 | } |
---|
2315 | |
---|
2316 | /* |
---|
2317 | * End of bandwidth monitoring code |
---|
2318 | */ |
---|
2319 | |
---|
2320 | /* |
---|
2321 | * Send the packet up to the user daemon, or eventually do kernel encapsulation |
---|
2322 | * |
---|
2323 | */ |
---|
2324 | static int |
---|
2325 | pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m, |
---|
2326 | struct mfc *rt) |
---|
2327 | { |
---|
2328 | struct mbuf *mb_copy, *mm; |
---|
2329 | |
---|
2330 | /* |
---|
2331 | * Do not send IGMP_WHOLEPKT notifications to userland, if the |
---|
2332 | * rendezvous point was unspecified, and we were told not to. |
---|
2333 | */ |
---|
2334 | if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) && |
---|
2335 | in_nullhost(rt->mfc_rp)) |
---|
2336 | return 0; |
---|
2337 | |
---|
2338 | mb_copy = pim_register_prepare(ip, m); |
---|
2339 | if (mb_copy == NULL) |
---|
2340 | return ENOBUFS; |
---|
2341 | |
---|
2342 | /* |
---|
2343 | * Send all the fragments. Note that the mbuf for each fragment |
---|
2344 | * is freed by the sending machinery. |
---|
2345 | */ |
---|
2346 | for (mm = mb_copy; mm; mm = mb_copy) { |
---|
2347 | mb_copy = mm->m_nextpkt; |
---|
2348 | mm->m_nextpkt = 0; |
---|
2349 | mm = m_pullup(mm, sizeof(struct ip)); |
---|
2350 | if (mm != NULL) { |
---|
2351 | ip = mtod(mm, struct ip *); |
---|
2352 | if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) { |
---|
2353 | pim_register_send_rp(ip, vifp, mm, rt); |
---|
2354 | } else { |
---|
2355 | pim_register_send_upcall(ip, vifp, mm, rt); |
---|
2356 | } |
---|
2357 | } |
---|
2358 | } |
---|
2359 | |
---|
2360 | return 0; |
---|
2361 | } |
---|
2362 | |
---|
2363 | /* |
---|
2364 | * Return a copy of the data packet that is ready for PIM Register |
---|
2365 | * encapsulation. |
---|
2366 | * XXX: Note that in the returned copy the IP header is a valid one. |
---|
2367 | */ |
---|
2368 | static struct mbuf * |
---|
2369 | pim_register_prepare(struct ip *ip, struct mbuf *m) |
---|
2370 | { |
---|
2371 | struct mbuf *mb_copy = NULL; |
---|
2372 | int mtu; |
---|
2373 | |
---|
2374 | /* Take care of delayed checksums */ |
---|
2375 | if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { |
---|
2376 | in_delayed_cksum(m); |
---|
2377 | m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; |
---|
2378 | } |
---|
2379 | |
---|
2380 | /* |
---|
2381 | * Copy the old packet & pullup its IP header into the |
---|
2382 | * new mbuf so we can modify it. |
---|
2383 | */ |
---|
2384 | mb_copy = m_copypacket(m, M_DONTWAIT); |
---|
2385 | if (mb_copy == NULL) |
---|
2386 | return NULL; |
---|
2387 | mb_copy = m_pullup(mb_copy, ip->ip_hl << 2); |
---|
2388 | if (mb_copy == NULL) |
---|
2389 | return NULL; |
---|
2390 | |
---|
2391 | /* take care of the TTL */ |
---|
2392 | ip = mtod(mb_copy, struct ip *); |
---|
2393 | --ip->ip_ttl; |
---|
2394 | |
---|
2395 | /* Compute the MTU after the PIM Register encapsulation */ |
---|
2396 | mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr); |
---|
2397 | |
---|
2398 | if (ip->ip_len <= mtu) { |
---|
2399 | /* Turn the IP header into a valid one */ |
---|
2400 | ip->ip_len = htons(ip->ip_len); |
---|
2401 | ip->ip_off = htons(ip->ip_off); |
---|
2402 | ip->ip_sum = 0; |
---|
2403 | ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); |
---|
2404 | } else { |
---|
2405 | /* Fragment the packet */ |
---|
2406 | if (ip_fragment(ip, &mb_copy, mtu, 0, CSUM_DELAY_IP) != 0) { |
---|
2407 | m_freem(mb_copy); |
---|
2408 | return NULL; |
---|
2409 | } |
---|
2410 | } |
---|
2411 | return mb_copy; |
---|
2412 | } |
---|
2413 | |
---|
2414 | /* |
---|
2415 | * Send an upcall with the data packet to the user-level process. |
---|
2416 | */ |
---|
2417 | static int |
---|
2418 | pim_register_send_upcall(struct ip *ip, struct vif *vifp, |
---|
2419 | struct mbuf *mb_copy, struct mfc *rt) |
---|
2420 | { |
---|
2421 | struct mbuf *mb_first; |
---|
2422 | int len = ntohs(ip->ip_len); |
---|
2423 | struct igmpmsg *im; |
---|
2424 | struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; |
---|
2425 | |
---|
2426 | VIF_LOCK_ASSERT(); |
---|
2427 | |
---|
2428 | /* |
---|
2429 | * Add a new mbuf with an upcall header |
---|
2430 | */ |
---|
2431 | MGETHDR(mb_first, M_DONTWAIT, MT_DATA); |
---|
2432 | if (mb_first == NULL) { |
---|
2433 | m_freem(mb_copy); |
---|
2434 | return ENOBUFS; |
---|
2435 | } |
---|
2436 | mb_first->m_data += max_linkhdr; |
---|
2437 | mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg); |
---|
2438 | mb_first->m_len = sizeof(struct igmpmsg); |
---|
2439 | mb_first->m_next = mb_copy; |
---|
2440 | |
---|
2441 | /* Send message to routing daemon */ |
---|
2442 | im = mtod(mb_first, struct igmpmsg *); |
---|
2443 | im->im_msgtype = IGMPMSG_WHOLEPKT; |
---|
2444 | im->im_mbz = 0; |
---|
2445 | im->im_vif = vifp - V_viftable; |
---|
2446 | im->im_src = ip->ip_src; |
---|
2447 | im->im_dst = ip->ip_dst; |
---|
2448 | |
---|
2449 | k_igmpsrc.sin_addr = ip->ip_src; |
---|
2450 | |
---|
2451 | MRTSTAT_INC(mrts_upcalls); |
---|
2452 | |
---|
2453 | if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) { |
---|
2454 | CTR1(KTR_IPMF, "%s: socket queue full", __func__); |
---|
2455 | MRTSTAT_INC(mrts_upq_sockfull); |
---|
2456 | return ENOBUFS; |
---|
2457 | } |
---|
2458 | |
---|
2459 | /* Keep statistics */ |
---|
2460 | PIMSTAT_INC(pims_snd_registers_msgs); |
---|
2461 | PIMSTAT_ADD(pims_snd_registers_bytes, len); |
---|
2462 | |
---|
2463 | return 0; |
---|
2464 | } |
---|
2465 | |
---|
2466 | /* |
---|
2467 | * Encapsulate the data packet in PIM Register message and send it to the RP. |
---|
2468 | */ |
---|
2469 | static int |
---|
2470 | pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, |
---|
2471 | struct mfc *rt) |
---|
2472 | { |
---|
2473 | struct mbuf *mb_first; |
---|
2474 | struct ip *ip_outer; |
---|
2475 | struct pim_encap_pimhdr *pimhdr; |
---|
2476 | int len = ntohs(ip->ip_len); |
---|
2477 | vifi_t vifi = rt->mfc_parent; |
---|
2478 | |
---|
2479 | VIF_LOCK_ASSERT(); |
---|
2480 | |
---|
2481 | if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) { |
---|
2482 | m_freem(mb_copy); |
---|
2483 | return EADDRNOTAVAIL; /* The iif vif is invalid */ |
---|
2484 | } |
---|
2485 | |
---|
2486 | /* |
---|
2487 | * Add a new mbuf with the encapsulating header |
---|
2488 | */ |
---|
2489 | MGETHDR(mb_first, M_DONTWAIT, MT_DATA); |
---|
2490 | if (mb_first == NULL) { |
---|
2491 | m_freem(mb_copy); |
---|
2492 | return ENOBUFS; |
---|
2493 | } |
---|
2494 | mb_first->m_data += max_linkhdr; |
---|
2495 | mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); |
---|
2496 | mb_first->m_next = mb_copy; |
---|
2497 | |
---|
2498 | mb_first->m_pkthdr.len = len + mb_first->m_len; |
---|
2499 | |
---|
2500 | /* |
---|
2501 | * Fill in the encapsulating IP and PIM header |
---|
2502 | */ |
---|
2503 | ip_outer = mtod(mb_first, struct ip *); |
---|
2504 | *ip_outer = pim_encap_iphdr; |
---|
2505 | ip_outer->ip_id = ip_newid(); |
---|
2506 | ip_outer->ip_len = len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); |
---|
2507 | ip_outer->ip_src = V_viftable[vifi].v_lcl_addr; |
---|
2508 | ip_outer->ip_dst = rt->mfc_rp; |
---|
2509 | /* |
---|
2510 | * Copy the inner header TOS to the outer header, and take care of the |
---|
2511 | * IP_DF bit. |
---|
2512 | */ |
---|
2513 | ip_outer->ip_tos = ip->ip_tos; |
---|
2514 | if (ntohs(ip->ip_off) & IP_DF) |
---|
2515 | ip_outer->ip_off |= IP_DF; |
---|
2516 | pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer |
---|
2517 | + sizeof(pim_encap_iphdr)); |
---|
2518 | *pimhdr = pim_encap_pimhdr; |
---|
2519 | /* If the iif crosses a border, set the Border-bit */ |
---|
2520 | if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config) |
---|
2521 | pimhdr->flags |= htonl(PIM_BORDER_REGISTER); |
---|
2522 | |
---|
2523 | mb_first->m_data += sizeof(pim_encap_iphdr); |
---|
2524 | pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr)); |
---|
2525 | mb_first->m_data -= sizeof(pim_encap_iphdr); |
---|
2526 | |
---|
2527 | send_packet(vifp, mb_first); |
---|
2528 | |
---|
2529 | /* Keep statistics */ |
---|
2530 | PIMSTAT_INC(pims_snd_registers_msgs); |
---|
2531 | PIMSTAT_ADD(pims_snd_registers_bytes, len); |
---|
2532 | |
---|
2533 | return 0; |
---|
2534 | } |
---|
2535 | |
---|
2536 | /* |
---|
2537 | * pim_encapcheck() is called by the encap4_input() path at runtime to |
---|
2538 | * determine if a packet is for PIM; allowing PIM to be dynamically loaded |
---|
2539 | * into the kernel. |
---|
2540 | */ |
---|
2541 | static int |
---|
2542 | pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg) |
---|
2543 | { |
---|
2544 | |
---|
2545 | #ifdef DIAGNOSTIC |
---|
2546 | KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM")); |
---|
2547 | #endif |
---|
2548 | if (proto != IPPROTO_PIM) |
---|
2549 | return 0; /* not for us; reject the datagram. */ |
---|
2550 | |
---|
2551 | return 64; /* claim the datagram. */ |
---|
2552 | } |
---|
2553 | |
---|
2554 | /* |
---|
2555 | * PIM-SMv2 and PIM-DM messages processing. |
---|
2556 | * Receives and verifies the PIM control messages, and passes them |
---|
2557 | * up to the listening socket, using rip_input(). |
---|
2558 | * The only message with special processing is the PIM_REGISTER message |
---|
2559 | * (used by PIM-SM): the PIM header is stripped off, and the inner packet |
---|
2560 | * is passed to if_simloop(). |
---|
2561 | */ |
---|
2562 | void |
---|
2563 | pim_input(struct mbuf *m, int off) |
---|
2564 | { |
---|
2565 | struct ip *ip = mtod(m, struct ip *); |
---|
2566 | struct pim *pim; |
---|
2567 | int minlen; |
---|
2568 | int datalen = ip->ip_len; |
---|
2569 | int ip_tos; |
---|
2570 | int iphlen = off; |
---|
2571 | |
---|
2572 | /* Keep statistics */ |
---|
2573 | PIMSTAT_INC(pims_rcv_total_msgs); |
---|
2574 | PIMSTAT_ADD(pims_rcv_total_bytes, datalen); |
---|
2575 | |
---|
2576 | /* |
---|
2577 | * Validate lengths |
---|
2578 | */ |
---|
2579 | if (datalen < PIM_MINLEN) { |
---|
2580 | PIMSTAT_INC(pims_rcv_tooshort); |
---|
2581 | CTR3(KTR_IPMF, "%s: short packet (%d) from %s", |
---|
2582 | __func__, datalen, inet_ntoa(ip->ip_src)); |
---|
2583 | m_freem(m); |
---|
2584 | return; |
---|
2585 | } |
---|
2586 | |
---|
2587 | /* |
---|
2588 | * If the packet is at least as big as a REGISTER, go agead |
---|
2589 | * and grab the PIM REGISTER header size, to avoid another |
---|
2590 | * possible m_pullup() later. |
---|
2591 | * |
---|
2592 | * PIM_MINLEN == pimhdr + u_int32_t == 4 + 4 = 8 |
---|
2593 | * PIM_REG_MINLEN == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28 |
---|
2594 | */ |
---|
2595 | minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN); |
---|
2596 | /* |
---|
2597 | * Get the IP and PIM headers in contiguous memory, and |
---|
2598 | * possibly the PIM REGISTER header. |
---|
2599 | */ |
---|
2600 | if ((m->m_flags & M_EXT || m->m_len < minlen) && |
---|
2601 | (m = m_pullup(m, minlen)) == 0) { |
---|
2602 | CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__); |
---|
2603 | return; |
---|
2604 | } |
---|
2605 | |
---|
2606 | /* m_pullup() may have given us a new mbuf so reset ip. */ |
---|
2607 | ip = mtod(m, struct ip *); |
---|
2608 | ip_tos = ip->ip_tos; |
---|
2609 | |
---|
2610 | /* adjust mbuf to point to the PIM header */ |
---|
2611 | m->m_data += iphlen; |
---|
2612 | m->m_len -= iphlen; |
---|
2613 | pim = mtod(m, struct pim *); |
---|
2614 | |
---|
2615 | /* |
---|
2616 | * Validate checksum. If PIM REGISTER, exclude the data packet. |
---|
2617 | * |
---|
2618 | * XXX: some older PIMv2 implementations don't make this distinction, |
---|
2619 | * so for compatibility reason perform the checksum over part of the |
---|
2620 | * message, and if error, then over the whole message. |
---|
2621 | */ |
---|
2622 | if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) { |
---|
2623 | /* do nothing, checksum okay */ |
---|
2624 | } else if (in_cksum(m, datalen)) { |
---|
2625 | PIMSTAT_INC(pims_rcv_badsum); |
---|
2626 | CTR1(KTR_IPMF, "%s: invalid checksum", __func__); |
---|
2627 | m_freem(m); |
---|
2628 | return; |
---|
2629 | } |
---|
2630 | |
---|
2631 | /* PIM version check */ |
---|
2632 | if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) { |
---|
2633 | PIMSTAT_INC(pims_rcv_badversion); |
---|
2634 | CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__, |
---|
2635 | (int)PIM_VT_V(pim->pim_vt), PIM_VERSION); |
---|
2636 | m_freem(m); |
---|
2637 | return; |
---|
2638 | } |
---|
2639 | |
---|
2640 | /* restore mbuf back to the outer IP */ |
---|
2641 | m->m_data -= iphlen; |
---|
2642 | m->m_len += iphlen; |
---|
2643 | |
---|
2644 | if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) { |
---|
2645 | /* |
---|
2646 | * Since this is a REGISTER, we'll make a copy of the register |
---|
2647 | * headers ip + pim + u_int32 + encap_ip, to be passed up to the |
---|
2648 | * routing daemon. |
---|
2649 | */ |
---|
2650 | struct sockaddr_in dst = { sizeof(dst), AF_INET }; |
---|
2651 | struct mbuf *mcp; |
---|
2652 | struct ip *encap_ip; |
---|
2653 | u_int32_t *reghdr; |
---|
2654 | struct ifnet *vifp; |
---|
2655 | |
---|
2656 | VIF_LOCK(); |
---|
2657 | if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) { |
---|
2658 | VIF_UNLOCK(); |
---|
2659 | CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__, |
---|
2660 | (int)V_reg_vif_num); |
---|
2661 | m_freem(m); |
---|
2662 | return; |
---|
2663 | } |
---|
2664 | /* XXX need refcnt? */ |
---|
2665 | vifp = V_viftable[V_reg_vif_num].v_ifp; |
---|
2666 | VIF_UNLOCK(); |
---|
2667 | |
---|
2668 | /* |
---|
2669 | * Validate length |
---|
2670 | */ |
---|
2671 | if (datalen < PIM_REG_MINLEN) { |
---|
2672 | PIMSTAT_INC(pims_rcv_tooshort); |
---|
2673 | PIMSTAT_INC(pims_rcv_badregisters); |
---|
2674 | CTR1(KTR_IPMF, "%s: register packet size too small", __func__); |
---|
2675 | m_freem(m); |
---|
2676 | return; |
---|
2677 | } |
---|
2678 | |
---|
2679 | reghdr = (u_int32_t *)(pim + 1); |
---|
2680 | encap_ip = (struct ip *)(reghdr + 1); |
---|
2681 | |
---|
2682 | CTR3(KTR_IPMF, "%s: register: encap ip src %s len %d", |
---|
2683 | __func__, inet_ntoa(encap_ip->ip_src), ntohs(encap_ip->ip_len)); |
---|
2684 | |
---|
2685 | /* verify the version number of the inner packet */ |
---|
2686 | if (encap_ip->ip_v != IPVERSION) { |
---|
2687 | PIMSTAT_INC(pims_rcv_badregisters); |
---|
2688 | CTR1(KTR_IPMF, "%s: bad encap ip version", __func__); |
---|
2689 | m_freem(m); |
---|
2690 | return; |
---|
2691 | } |
---|
2692 | |
---|
2693 | /* verify the inner packet is destined to a mcast group */ |
---|
2694 | if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) { |
---|
2695 | PIMSTAT_INC(pims_rcv_badregisters); |
---|
2696 | CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__, |
---|
2697 | inet_ntoa(encap_ip->ip_dst)); |
---|
2698 | m_freem(m); |
---|
2699 | return; |
---|
2700 | } |
---|
2701 | |
---|
2702 | /* If a NULL_REGISTER, pass it to the daemon */ |
---|
2703 | if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) |
---|
2704 | goto pim_input_to_daemon; |
---|
2705 | |
---|
2706 | /* |
---|
2707 | * Copy the TOS from the outer IP header to the inner IP header. |
---|
2708 | */ |
---|
2709 | if (encap_ip->ip_tos != ip_tos) { |
---|
2710 | /* Outer TOS -> inner TOS */ |
---|
2711 | encap_ip->ip_tos = ip_tos; |
---|
2712 | /* Recompute the inner header checksum. Sigh... */ |
---|
2713 | |
---|
2714 | /* adjust mbuf to point to the inner IP header */ |
---|
2715 | m->m_data += (iphlen + PIM_MINLEN); |
---|
2716 | m->m_len -= (iphlen + PIM_MINLEN); |
---|
2717 | |
---|
2718 | encap_ip->ip_sum = 0; |
---|
2719 | encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2); |
---|
2720 | |
---|
2721 | /* restore mbuf to point back to the outer IP header */ |
---|
2722 | m->m_data -= (iphlen + PIM_MINLEN); |
---|
2723 | m->m_len += (iphlen + PIM_MINLEN); |
---|
2724 | } |
---|
2725 | |
---|
2726 | /* |
---|
2727 | * Decapsulate the inner IP packet and loopback to forward it |
---|
2728 | * as a normal multicast packet. Also, make a copy of the |
---|
2729 | * outer_iphdr + pimhdr + reghdr + encap_iphdr |
---|
2730 | * to pass to the daemon later, so it can take the appropriate |
---|
2731 | * actions (e.g., send back PIM_REGISTER_STOP). |
---|
2732 | * XXX: here m->m_data points to the outer IP header. |
---|
2733 | */ |
---|
2734 | mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN); |
---|
2735 | if (mcp == NULL) { |
---|
2736 | CTR1(KTR_IPMF, "%s: m_copy() failed", __func__); |
---|
2737 | m_freem(m); |
---|
2738 | return; |
---|
2739 | } |
---|
2740 | |
---|
2741 | /* Keep statistics */ |
---|
2742 | /* XXX: registers_bytes include only the encap. mcast pkt */ |
---|
2743 | PIMSTAT_INC(pims_rcv_registers_msgs); |
---|
2744 | PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len)); |
---|
2745 | |
---|
2746 | /* |
---|
2747 | * forward the inner ip packet; point m_data at the inner ip. |
---|
2748 | */ |
---|
2749 | m_adj(m, iphlen + PIM_MINLEN); |
---|
2750 | |
---|
2751 | CTR4(KTR_IPMF, |
---|
2752 | "%s: forward decap'd REGISTER: src %lx dst %lx vif %d", |
---|
2753 | __func__, |
---|
2754 | (u_long)ntohl(encap_ip->ip_src.s_addr), |
---|
2755 | (u_long)ntohl(encap_ip->ip_dst.s_addr), |
---|
2756 | (int)V_reg_vif_num); |
---|
2757 | |
---|
2758 | /* NB: vifp was collected above; can it change on us? */ |
---|
2759 | if_simloop(vifp, m, dst.sin_family, 0); |
---|
2760 | |
---|
2761 | /* prepare the register head to send to the mrouting daemon */ |
---|
2762 | m = mcp; |
---|
2763 | } |
---|
2764 | |
---|
2765 | pim_input_to_daemon: |
---|
2766 | /* |
---|
2767 | * Pass the PIM message up to the daemon; if it is a Register message, |
---|
2768 | * pass the 'head' only up to the daemon. This includes the |
---|
2769 | * outer IP header, PIM header, PIM-Register header and the |
---|
2770 | * inner IP header. |
---|
2771 | * XXX: the outer IP header pkt size of a Register is not adjust to |
---|
2772 | * reflect the fact that the inner multicast data is truncated. |
---|
2773 | */ |
---|
2774 | rip_input(m, iphlen); |
---|
2775 | |
---|
2776 | return; |
---|
2777 | } |
---|
2778 | |
---|
2779 | static int |
---|
2780 | sysctl_mfctable(SYSCTL_HANDLER_ARGS) |
---|
2781 | { |
---|
2782 | struct mfc *rt; |
---|
2783 | int error, i; |
---|
2784 | |
---|
2785 | if (req->newptr) |
---|
2786 | return (EPERM); |
---|
2787 | if (V_mfchashtbl == NULL) /* XXX unlocked */ |
---|
2788 | return (0); |
---|
2789 | error = sysctl_wire_old_buffer(req, 0); |
---|
2790 | if (error) |
---|
2791 | return (error); |
---|
2792 | |
---|
2793 | MFC_LOCK(); |
---|
2794 | for (i = 0; i < mfchashsize; i++) { |
---|
2795 | LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) { |
---|
2796 | error = SYSCTL_OUT(req, rt, sizeof(struct mfc)); |
---|
2797 | if (error) |
---|
2798 | goto out_locked; |
---|
2799 | } |
---|
2800 | } |
---|
2801 | out_locked: |
---|
2802 | MFC_UNLOCK(); |
---|
2803 | return (error); |
---|
2804 | } |
---|
2805 | |
---|
2806 | static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, |
---|
2807 | sysctl_mfctable, "IPv4 Multicast Forwarding Table " |
---|
2808 | "(struct *mfc[mfchashsize], netinet/ip_mroute.h)"); |
---|
2809 | |
---|
2810 | static void |
---|
2811 | vnet_mroute_init(const void *unused __unused) |
---|
2812 | { |
---|
2813 | |
---|
2814 | MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO); |
---|
2815 | bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers)); |
---|
2816 | callout_init(&V_expire_upcalls_ch, CALLOUT_MPSAFE); |
---|
2817 | callout_init(&V_bw_upcalls_ch, CALLOUT_MPSAFE); |
---|
2818 | callout_init(&V_bw_meter_ch, CALLOUT_MPSAFE); |
---|
2819 | } |
---|
2820 | |
---|
2821 | VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init, |
---|
2822 | NULL); |
---|
2823 | |
---|
2824 | static void |
---|
2825 | vnet_mroute_uninit(const void *unused __unused) |
---|
2826 | { |
---|
2827 | |
---|
2828 | FREE(V_nexpire, M_MRTABLE); |
---|
2829 | V_nexpire = NULL; |
---|
2830 | } |
---|
2831 | |
---|
2832 | VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, |
---|
2833 | vnet_mroute_uninit, NULL); |
---|
2834 | |
---|
2835 | static int |
---|
2836 | ip_mroute_modevent(module_t mod, int type, void *unused) |
---|
2837 | { |
---|
2838 | |
---|
2839 | switch (type) { |
---|
2840 | case MOD_LOAD: |
---|
2841 | MROUTER_LOCK_INIT(); |
---|
2842 | |
---|
2843 | if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, |
---|
2844 | if_detached_event, NULL, EVENTHANDLER_PRI_ANY); |
---|
2845 | if (if_detach_event_tag == NULL) { |
---|
2846 | printf("ip_mroute: unable to register " |
---|
2847 | "ifnet_departure_event handler\n"); |
---|
2848 | MROUTER_LOCK_DESTROY(); |
---|
2849 | return (EINVAL); |
---|
2850 | } |
---|
2851 | |
---|
2852 | MFC_LOCK_INIT(); |
---|
2853 | VIF_LOCK_INIT(); |
---|
2854 | |
---|
2855 | mfchashsize = MFCHASHSIZE; |
---|
2856 | #ifndef __rtems__ |
---|
2857 | if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) && |
---|
2858 | !powerof2(mfchashsize)) { |
---|
2859 | printf("WARNING: %s not a power of 2; using default\n", |
---|
2860 | "net.inet.ip.mfchashsize"); |
---|
2861 | mfchashsize = MFCHASHSIZE; |
---|
2862 | } |
---|
2863 | #endif /* __rtems__ */ |
---|
2864 | |
---|
2865 | pim_squelch_wholepkt = 0; |
---|
2866 | TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt", |
---|
2867 | &pim_squelch_wholepkt); |
---|
2868 | |
---|
2869 | pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM, |
---|
2870 | pim_encapcheck, &in_pim_protosw, NULL); |
---|
2871 | if (pim_encap_cookie == NULL) { |
---|
2872 | printf("ip_mroute: unable to attach pim encap\n"); |
---|
2873 | VIF_LOCK_DESTROY(); |
---|
2874 | MFC_LOCK_DESTROY(); |
---|
2875 | MROUTER_LOCK_DESTROY(); |
---|
2876 | return (EINVAL); |
---|
2877 | } |
---|
2878 | |
---|
2879 | ip_mcast_src = X_ip_mcast_src; |
---|
2880 | ip_mforward = X_ip_mforward; |
---|
2881 | ip_mrouter_done = X_ip_mrouter_done; |
---|
2882 | ip_mrouter_get = X_ip_mrouter_get; |
---|
2883 | ip_mrouter_set = X_ip_mrouter_set; |
---|
2884 | |
---|
2885 | ip_rsvp_force_done = X_ip_rsvp_force_done; |
---|
2886 | ip_rsvp_vif = X_ip_rsvp_vif; |
---|
2887 | |
---|
2888 | legal_vif_num = X_legal_vif_num; |
---|
2889 | mrt_ioctl = X_mrt_ioctl; |
---|
2890 | rsvp_input_p = X_rsvp_input; |
---|
2891 | break; |
---|
2892 | |
---|
2893 | case MOD_UNLOAD: |
---|
2894 | /* |
---|
2895 | * Typically module unload happens after the user-level |
---|
2896 | * process has shutdown the kernel services (the check |
---|
2897 | * below insures someone can't just yank the module out |
---|
2898 | * from under a running process). But if the module is |
---|
2899 | * just loaded and then unloaded w/o starting up a user |
---|
2900 | * process we still need to cleanup. |
---|
2901 | */ |
---|
2902 | MROUTER_LOCK(); |
---|
2903 | if (ip_mrouter_cnt != 0) { |
---|
2904 | MROUTER_UNLOCK(); |
---|
2905 | return (EINVAL); |
---|
2906 | } |
---|
2907 | ip_mrouter_unloading = 1; |
---|
2908 | MROUTER_UNLOCK(); |
---|
2909 | |
---|
2910 | EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); |
---|
2911 | |
---|
2912 | if (pim_encap_cookie) { |
---|
2913 | encap_detach(pim_encap_cookie); |
---|
2914 | pim_encap_cookie = NULL; |
---|
2915 | } |
---|
2916 | |
---|
2917 | ip_mcast_src = NULL; |
---|
2918 | ip_mforward = NULL; |
---|
2919 | ip_mrouter_done = NULL; |
---|
2920 | ip_mrouter_get = NULL; |
---|
2921 | ip_mrouter_set = NULL; |
---|
2922 | |
---|
2923 | ip_rsvp_force_done = NULL; |
---|
2924 | ip_rsvp_vif = NULL; |
---|
2925 | |
---|
2926 | legal_vif_num = NULL; |
---|
2927 | mrt_ioctl = NULL; |
---|
2928 | rsvp_input_p = NULL; |
---|
2929 | |
---|
2930 | VIF_LOCK_DESTROY(); |
---|
2931 | MFC_LOCK_DESTROY(); |
---|
2932 | MROUTER_LOCK_DESTROY(); |
---|
2933 | break; |
---|
2934 | |
---|
2935 | default: |
---|
2936 | return EOPNOTSUPP; |
---|
2937 | } |
---|
2938 | return 0; |
---|
2939 | } |
---|
2940 | |
---|
2941 | static moduledata_t ip_mroutemod = { |
---|
2942 | "ip_mroute", |
---|
2943 | ip_mroute_modevent, |
---|
2944 | 0 |
---|
2945 | }; |
---|
2946 | |
---|
2947 | DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); |
---|