Context Navigation

source: rtems-libbsd/freebsd/sys/net/bpf.c @ 21ea762

5

Last change on this file since 21ea762 was 21ea762, checked in by Sebastian Huber <sebastian.huber@…>, on 03/28/19 at 10:34:10
BPF(9): Fix timestamps
Property mode set to `100644`
File size: 75.6 KB

Line
1	#include <machine/rtems-bsd-kernel-space.h>
2
3	/*-
4	* SPDX-License-Identifier: BSD-3-Clause
5	*
6	* Copyright (c) 1990, 1991, 1993
7	* The Regents of the University of California. All rights reserved.
8	*
9	* This code is derived from the Stanford/CMU enet packet filter,
10	* (net/enet.c) distributed as part of 4.3BSD, and code contributed
11	* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12	* Berkeley Laboratory.
13	*
14	* Redistribution and use in source and binary forms, with or without
15	* modification, are permitted provided that the following conditions
16	* are met:
17	* 1. Redistributions of source code must retain the above copyright
18	* notice, this list of conditions and the following disclaimer.
19	* 2. Redistributions in binary form must reproduce the above copyright
20	* notice, this list of conditions and the following disclaimer in the
21	* documentation and/or other materials provided with the distribution.
22	* 3. Neither the name of the University nor the names of its contributors
23	* may be used to endorse or promote products derived from this software
24	* without specific prior written permission.
25	*
26	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36	* SUCH DAMAGE.
37	*
38	* @(#)bpf.c 8.4 (Berkeley) 1/9/95
39	*/
40
41	#include <sys/cdefs.h>
42	__FBSDID("$FreeBSD$");
43
44	#include <rtems/bsd/local/opt_bpf.h>
45	#include <rtems/bsd/local/opt_ddb.h>
46	#include <rtems/bsd/local/opt_netgraph.h>
47
48	#include <sys/types.h>
49	#include <sys/param.h>
50	#include <sys/lock.h>
51	#include <sys/rwlock.h>
52	#include <sys/systm.h>
53	#include <sys/conf.h>
54	#include <sys/fcntl.h>
55	#include <sys/jail.h>
56	#include <sys/malloc.h>
57	#include <sys/mbuf.h>
58	#include <sys/time.h>
59	#include <sys/priv.h>
60	#include <sys/proc.h>
61	#include <sys/signalvar.h>
62	#include <sys/filio.h>
63	#include <sys/sockio.h>
64	#include <sys/ttycom.h>
65	#include <sys/uio.h>
66	#include <sys/sysent.h>
67
68	#include <sys/event.h>
69	#include <sys/file.h>
70	#include <sys/poll.h>
71	#include <sys/proc.h>
72
73	#include <sys/socket.h>
74
75	#ifdef DDB
76	#include <ddb/ddb.h>
77	#endif
78
79	#include <net/if.h>
80	#include <net/if_var.h>
81	#include <net/if_dl.h>
82	#include <net/bpf.h>
83	#include <net/bpf_buffer.h>
84	#ifdef BPF_JITTER
85	#include <net/bpf_jitter.h>
86	#endif
87	#include <net/bpf_zerocopy.h>
88	#include <net/bpfdesc.h>
89	#include <net/route.h>
90	#include <net/vnet.h>
91
92	#include <netinet/in.h>
93	#include <netinet/if_ether.h>
94	#include <sys/kernel.h>
95	#include <sys/sysctl.h>
96
97	#include <net80211/ieee80211_freebsd.h>
98
99	#include <security/mac/mac_framework.h>
100	#ifdef __rtems__
101	#include <rtems/imfs.h>
102	#define devfs_get_cdevpriv(x) 0
103	#define devtoname(x) "bpf"
104	#endif /* __rtems__ */
105
106	MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
107
108	static struct bpf_if_ext dead_bpf_if = {
109	.bif_dlist = LIST_HEAD_INITIALIZER()
110	};
111
112	struct bpf_if {
113	#define bif_next bif_ext.bif_next
114	#define bif_dlist bif_ext.bif_dlist
115	struct bpf_if_ext bif_ext; /* public members */
116	u_int bif_dlt; /* link layer type */
117	u_int bif_hdrlen; /* length of link header */
118	struct ifnet bif_ifp; / corresponding interface */
119	struct rwlock bif_lock; /* interface lock */
120	LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
121	int bif_flags; /* Interface flags */
122	struct bpf_if *bif_bpf; / Pointer to pointer to us */
123	};
124
125	CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
126
127	#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock)
128	#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock)
129	#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock)
130	#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock)
131
132	#if defined(DEV_BPF) \|\| defined(NETGRAPH_BPF)
133
134	#define PRINET 26 /* interruptible */
135
136	#define SIZEOF_BPF_HDR(type) \
137	(offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
138
139	#ifdef COMPAT_FREEBSD32
140	#include <sys/mount.h>
141	#include <compat/freebsd32/freebsd32.h>
142	#define BPF_ALIGNMENT32 sizeof(int32_t)
143	#define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
144
145	#ifndef BURN_BRIDGES
146	/*
147	* 32-bit version of structure prepended to each packet. We use this header
148	* instead of the standard one for 32-bit streams. We mark the a stream as
149	* 32-bit the first time we see a 32-bit compat ioctl request.
150	*/
151	struct bpf_hdr32 {
152	struct timeval32 bh_tstamp; /* time stamp */
153	uint32_t bh_caplen; /* length of captured portion */
154	uint32_t bh_datalen; /* original length of packet */
155	uint16_t bh_hdrlen; /* length of bpf header (this struct
156	plus alignment padding) */
157	};
158	#endif
159
160	struct bpf_program32 {
161	u_int bf_len;
162	uint32_t bf_insns;
163	};
164
165	struct bpf_dltlist32 {
166	u_int bfl_len;
167	u_int bfl_list;
168	};
169
170	#define BIOCSETF32 _IOW('B', 103, struct bpf_program32)
171	#define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32)
172	#define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32)
173	#define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32)
174	#define BIOCSETWF32 _IOW('B', 123, struct bpf_program32)
175	#define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32)
176	#endif
177
178	#define BPF_LOCK() sx_xlock(&bpf_sx)
179	#define BPF_UNLOCK() sx_xunlock(&bpf_sx)
180	#define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED)
181	/*
182	* bpf_iflist is a list of BPF interface structures, each corresponding to a
183	* specific DLT. The same network interface might have several BPF interface
184	* structures registered by different layers in the stack (i.e., 802.11
185	* frames, ethernet frames, etc).
186	*/
187	static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist;
188	static struct sx bpf_sx; /* bpf global lock */
189	static int bpf_bpfd_cnt;
190
191	static void bpf_attachd(struct bpf_d , struct bpf_if );
192	static void bpf_detachd(struct bpf_d *);
193	static void bpf_detachd_locked(struct bpf_d *);
194	static void bpf_freed(struct bpf_d *);
195	static int bpf_movein(struct uio , int, struct ifnet , struct mbuf **,
196	struct sockaddr , int , struct bpf_d *);
197	static int bpf_setif(struct bpf_d , struct ifreq );
198	static void bpf_timed_out(void *);
199	static __inline void
200	bpf_wakeup(struct bpf_d *);
201	static void catchpacket(struct bpf_d , u_char , u_int, u_int,
202	void ()(struct bpf_d , caddr_t, u_int, void *, u_int),
203	struct bintime *);
204	static void reset_d(struct bpf_d *);
205	static int bpf_setf(struct bpf_d , struct bpf_program , u_long cmd);
206	static int bpf_getdltlist(struct bpf_d , struct bpf_dltlist );
207	static int bpf_setdlt(struct bpf_d *, u_int);
208	static void filt_bpfdetach(struct knote *);
209	static int filt_bpfread(struct knote *, long);
210	static void bpf_drvinit(void *);
211	static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
212
213	SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
214	int bpf_maxinsns = BPF_MAXINSNS;
215	SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
216	&bpf_maxinsns, 0, "Maximum bpf program instructions");
217	#ifndef __rtems__
218	static int bpf_zerocopy_enable = 0;
219	SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
220	&bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
221	#endif /* __rtems__ */
222	static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE \| CTLFLAG_RW,
223	bpf_stats_sysctl, "bpf statistics portal");
224
225	VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0;
226	#define V_bpf_optimize_writers VNET(bpf_optimize_writers)
227	SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET \| CTLFLAG_RW,
228	&VNET_NAME(bpf_optimize_writers), 0,
229	"Do not send packets until BPF program is set");
230
231	#ifndef __rtems__
232	static d_open_t bpfopen;
233	static d_read_t bpfread;
234	static d_write_t bpfwrite;
235	static d_ioctl_t bpfioctl;
236	static d_poll_t bpfpoll;
237	static d_kqfilter_t bpfkqfilter;
238
239	static struct cdevsw bpf_cdevsw = {
240	.d_version = D_VERSION,
241	.d_open = bpfopen,
242	.d_read = bpfread,
243	.d_write = bpfwrite,
244	.d_ioctl = bpfioctl,
245	.d_poll = bpfpoll,
246	.d_name = "bpf",
247	.d_kqfilter = bpfkqfilter,
248	};
249	#endif /* __rtems__ */
250
251	static struct filterops bpfread_filtops = {
252	.f_isfd = 1,
253	.f_detach = filt_bpfdetach,
254	.f_event = filt_bpfread,
255	};
256
257	eventhandler_tag bpf_ifdetach_cookie = NULL;
258
259	/*
260	* LOCKING MODEL USED BY BPF:
261	* Locks:
262	* 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
263	* some global counters and every bpf_if reference.
264	* 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
265	* 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
266	* used by bpf_mtap code.
267	*
268	* Lock order:
269	*
270	* Global lock, interface lock, descriptor lock
271	*
272	* We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
273	* working model. In many places (like bpf_detachd) we start with BPF descriptor
274	* (and we need to at least rlock it to get reliable interface pointer). This
275	* gives us potential LOR. As a result, we use global lock to protect from bpf_if
276	* change in every such place.
277	*
278	* Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
279	* 3) descriptor main wlock.
280	* Reading bd_bif can be protected by any of these locks, typically global lock.
281	*
282	* Changing read/write BPF filter is protected by the same three locks,
283	* the same applies for reading.
284	*
285	* Sleeping in global lock is not allowed due to bpfdetach() using it.
286	*/
287
288	/*
289	* Wrapper functions for various buffering methods. If the set of buffer
290	* modes expands, we will probably want to introduce a switch data structure
291	* similar to protosw, et.
292	*/
293	static void
294	bpf_append_bytes(struct bpf_d d, caddr_t buf, u_int offset, void src,
295	u_int len)
296	{
297
298	BPFD_LOCK_ASSERT(d);
299
300	switch (d->bd_bufmode) {
301	case BPF_BUFMODE_BUFFER:
302	return (bpf_buffer_append_bytes(d, buf, offset, src, len));
303
304	#ifndef __rtems__
305	case BPF_BUFMODE_ZBUF:
306	counter_u64_add(d->bd_zcopy, 1);
307	return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
308	#endif /* __rtems__ */
309
310	default:
311	panic("bpf_buf_append_bytes");
312	}
313	}
314
315	static void
316	bpf_append_mbuf(struct bpf_d d, caddr_t buf, u_int offset, void src,
317	u_int len)
318	{
319
320	BPFD_LOCK_ASSERT(d);
321
322	switch (d->bd_bufmode) {
323	case BPF_BUFMODE_BUFFER:
324	return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
325
326	#ifndef __rtems__
327	case BPF_BUFMODE_ZBUF:
328	counter_u64_add(d->bd_zcopy, 1);
329	return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
330	#endif /* __rtems__ */
331
332	default:
333	panic("bpf_buf_append_mbuf");
334	}
335	}
336
337	/*
338	* This function gets called when the free buffer is re-assigned.
339	*/
340	static void
341	bpf_buf_reclaimed(struct bpf_d *d)
342	{
343
344	BPFD_LOCK_ASSERT(d);
345
346	switch (d->bd_bufmode) {
347	case BPF_BUFMODE_BUFFER:
348	return;
349
350	#ifndef __rtems__
351	case BPF_BUFMODE_ZBUF:
352	bpf_zerocopy_buf_reclaimed(d);
353	return;
354	#endif /* __rtems__ */
355
356	default:
357	panic("bpf_buf_reclaimed");
358	}
359	}
360
361	/*
362	* If the buffer mechanism has a way to decide that a held buffer can be made
363	* free, then it is exposed via the bpf_canfreebuf() interface. (1) is
364	* returned if the buffer can be discarded, (0) is returned if it cannot.
365	*/
366	static int
367	bpf_canfreebuf(struct bpf_d *d)
368	{
369
370	BPFD_LOCK_ASSERT(d);
371
372	#ifndef __rtems__
373	switch (d->bd_bufmode) {
374	case BPF_BUFMODE_ZBUF:
375	return (bpf_zerocopy_canfreebuf(d));
376	}
377	#endif /* __rtems__ */
378	return (0);
379	}
380
381	/*
382	* Allow the buffer model to indicate that the current store buffer is
383	* immutable, regardless of the appearance of space. Return (1) if the
384	* buffer is writable, and (0) if not.
385	*/
386	static int
387	bpf_canwritebuf(struct bpf_d *d)
388	{
389	BPFD_LOCK_ASSERT(d);
390
391	#ifndef __rtems__
392	switch (d->bd_bufmode) {
393	case BPF_BUFMODE_ZBUF:
394	return (bpf_zerocopy_canwritebuf(d));
395	}
396	#endif /* __rtems__ */
397	return (1);
398	}
399
400	/*
401	* Notify buffer model that an attempt to write to the store buffer has
402	* resulted in a dropped packet, in which case the buffer may be considered
403	* full.
404	*/
405	static void
406	bpf_buffull(struct bpf_d *d)
407	{
408
409	BPFD_LOCK_ASSERT(d);
410
411	#ifndef __rtems__
412	switch (d->bd_bufmode) {
413	case BPF_BUFMODE_ZBUF:
414	bpf_zerocopy_buffull(d);
415	break;
416	}
417	#endif /* __rtems__ */
418	}
419
420	/*
421	* Notify the buffer model that a buffer has moved into the hold position.
422	*/
423	void
424	bpf_bufheld(struct bpf_d *d)
425	{
426
427	BPFD_LOCK_ASSERT(d);
428
429	#ifndef __rtems__
430	switch (d->bd_bufmode) {
431	case BPF_BUFMODE_ZBUF:
432	bpf_zerocopy_bufheld(d);
433	break;
434	}
435	#endif /* __rtems__ */
436	}
437
438	static void
439	bpf_free(struct bpf_d *d)
440	{
441
442	switch (d->bd_bufmode) {
443	case BPF_BUFMODE_BUFFER:
444	return (bpf_buffer_free(d));
445
446	#ifndef __rtems__
447	case BPF_BUFMODE_ZBUF:
448	return (bpf_zerocopy_free(d));
449	#endif /* __rtems__ */
450
451	default:
452	panic("bpf_buf_free");
453	}
454	}
455
456	static int
457	bpf_uiomove(struct bpf_d d, caddr_t buf, u_int len, struct uio uio)
458	{
459
460	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
461	return (EOPNOTSUPP);
462	return (bpf_buffer_uiomove(d, buf, len, uio));
463	}
464
465	static int
466	bpf_ioctl_sblen(struct bpf_d d, u_int i)
467	{
468
469	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
470	return (EOPNOTSUPP);
471	return (bpf_buffer_ioctl_sblen(d, i));
472	}
473
474	static int
475	bpf_ioctl_getzmax(struct thread td, struct bpf_d d, size_t *i)
476	{
477
478	#ifndef __rtems__
479	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
480	return (EOPNOTSUPP);
481	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
482	#else /* __rtems__ */
483	return (EOPNOTSUPP);
484	#endif /* __rtems__ */
485	}
486
487	static int
488	bpf_ioctl_rotzbuf(struct thread td, struct bpf_d d, struct bpf_zbuf *bz)
489	{
490
491	#ifndef __rtems__
492	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
493	return (EOPNOTSUPP);
494	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
495	#else /* __rtems__ */
496	return (EOPNOTSUPP);
497	#endif /* __rtems__ */
498	}
499
500	static int
501	bpf_ioctl_setzbuf(struct thread td, struct bpf_d d, struct bpf_zbuf *bz)
502	{
503
504	#ifndef __rtems__
505	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
506	return (EOPNOTSUPP);
507	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
508	#else /* __rtems__ */
509	return (EOPNOTSUPP);
510	#endif /* __rtems__ */
511	}
512
513	/*
514	* General BPF functions.
515	*/
516	static int
517	bpf_movein(struct uio uio, int linktype, struct ifnet ifp, struct mbuf **mp,
518	struct sockaddr sockp, int hdrlen, struct bpf_d *d)
519	{
520	const struct ieee80211_bpf_params *p;
521	struct ether_header *eh;
522	struct mbuf *m;
523	int error;
524	int len;
525	int hlen;
526	int slen;
527
528	/*
529	* Build a sockaddr based on the data link layer type.
530	* We do this at this level because the ethernet header
531	* is copied directly into the data field of the sockaddr.
532	* In the case of SLIP, there is no header and the packet
533	* is forwarded as is.
534	* Also, we are careful to leave room at the front of the mbuf
535	* for the link level header.
536	*/
537	switch (linktype) {
538
539	case DLT_SLIP:
540	sockp->sa_family = AF_INET;
541	hlen = 0;
542	break;
543
544	case DLT_EN10MB:
545	sockp->sa_family = AF_UNSPEC;
546	/* XXX Would MAXLINKHDR be better? */
547	hlen = ETHER_HDR_LEN;
548	break;
549
550	case DLT_FDDI:
551	sockp->sa_family = AF_IMPLINK;
552	hlen = 0;
553	break;
554
555	case DLT_RAW:
556	sockp->sa_family = AF_UNSPEC;
557	hlen = 0;
558	break;
559
560	case DLT_NULL:
561	/*
562	* null interface types require a 4 byte pseudo header which
563	* corresponds to the address family of the packet.
564	*/
565	sockp->sa_family = AF_UNSPEC;
566	hlen = 4;
567	break;
568
569	case DLT_ATM_RFC1483:
570	/*
571	* en atm driver requires 4-byte atm pseudo header.
572	* though it isn't standard, vpi:vci needs to be
573	* specified anyway.
574	*/
575	sockp->sa_family = AF_UNSPEC;
576	hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
577	break;
578
579	case DLT_PPP:
580	sockp->sa_family = AF_UNSPEC;
581	hlen = 4; /* This should match PPP_HDRLEN */
582	break;
583
584	case DLT_IEEE802_11: /* IEEE 802.11 wireless */
585	sockp->sa_family = AF_IEEE80211;
586	hlen = 0;
587	break;
588
589	case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */
590	sockp->sa_family = AF_IEEE80211;
591	sockp->sa_len = 12; /* XXX != 0 */
592	hlen = sizeof(struct ieee80211_bpf_params);
593	break;
594
595	default:
596	return (EIO);
597	}
598
599	len = uio->uio_resid;
600	if (len < hlen \|\| len - hlen > ifp->if_mtu)
601	return (EMSGSIZE);
602
603	m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
604	if (m == NULL)
605	return (EIO);
606	m->m_pkthdr.len = m->m_len = len;
607	*mp = m;
608
609	error = uiomove(mtod(m, u_char *), len, uio);
610	if (error)
611	goto bad;
612
613	slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
614	if (slen == 0) {
615	error = EPERM;
616	goto bad;
617	}
618
619	/* Check for multicast destination */
620	switch (linktype) {
621	case DLT_EN10MB:
622	eh = mtod(m, struct ether_header *);
623	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
624	if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
625	ETHER_ADDR_LEN) == 0)
626	m->m_flags \|= M_BCAST;
627	else
628	m->m_flags \|= M_MCAST;
629	}
630	if (d->bd_hdrcmplt == 0) {
631	memcpy(eh->ether_shost, IF_LLADDR(ifp),
632	sizeof(eh->ether_shost));
633	}
634	break;
635	}
636
637	/*
638	* Make room for link header, and copy it to sockaddr
639	*/
640	if (hlen != 0) {
641	if (sockp->sa_family == AF_IEEE80211) {
642	/*
643	* Collect true length from the parameter header
644	* NB: sockp is known to be zero'd so if we do a
645	* short copy unspecified parameters will be
646	* zero.
647	* NB: packet may not be aligned after stripping
648	* bpf params
649	* XXX check ibp_vers
650	*/
651	p = mtod(m, const struct ieee80211_bpf_params *);
652	hlen = p->ibp_len;
653	if (hlen > sizeof(sockp->sa_data)) {
654	error = EINVAL;
655	goto bad;
656	}
657	}
658	bcopy(mtod(m, const void *), sockp->sa_data, hlen);
659	}
660	*hdrlen = hlen;
661
662	return (0);
663	bad:
664	m_freem(m);
665	return (error);
666	}
667
668	/*
669	* Attach file to the bpf interface, i.e. make d listen on bp.
670	*/
671	static void
672	bpf_attachd(struct bpf_d d, struct bpf_if bp)
673	{
674	int op_w;
675
676	BPF_LOCK_ASSERT();
677
678	/*
679	* Save sysctl value to protect from sysctl change
680	* between reads
681	*/
682	op_w = V_bpf_optimize_writers \|\| d->bd_writer;
683
684	if (d->bd_bif != NULL)
685	bpf_detachd_locked(d);
686	/*
687	* Point d at bp, and add d to the interface's list.
688	* Since there are many applications using BPF for
689	* sending raw packets only (dhcpd, cdpd are good examples)
690	* we can delay adding d to the list of active listeners until
691	* some filter is configured.
692	*/
693
694	BPFIF_WLOCK(bp);
695	BPFD_LOCK(d);
696
697	d->bd_bif = bp;
698
699	if (op_w != 0) {
700	/* Add to writers-only list */
701	LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
702	/*
703	* We decrement bd_writer on every filter set operation.
704	* First BIOCSETF is done by pcap_open_live() to set up
705	* snap length. After that appliation usually sets its own filter
706	*/
707	d->bd_writer = 2;
708	} else
709	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
710
711	BPFD_UNLOCK(d);
712	BPFIF_WUNLOCK(bp);
713
714	bpf_bpfd_cnt++;
715
716	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
717	__func__, d->bd_pid, d->bd_writer ? "writer" : "active");
718
719	if (op_w == 0)
720	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
721	}
722
723	/*
724	* Check if we need to upgrade our descriptor @d from write-only mode.
725	*/
726	static int
727	bpf_check_upgrade(u_long cmd, struct bpf_d d, struct bpf_insn fcode, int flen)
728	{
729	int is_snap, need_upgrade;
730
731	/*
732	* Check if we've already upgraded or new filter is empty.
733	*/
734	if (d->bd_writer == 0 \|\| fcode == NULL)
735	return (0);
736
737	need_upgrade = 0;
738
739	/*
740	* Check if cmd looks like snaplen setting from
741	* pcap_bpf.c:pcap_open_live().
742	* Note we're not checking .k value here:
743	* while pcap_open_live() definitely sets to non-zero value,
744	* we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
745	* do not consider upgrading immediately
746	*/
747	if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET \| BPF_K))
748	is_snap = 1;
749	else
750	is_snap = 0;
751
752	if (is_snap == 0) {
753	/*
754	* We're setting first filter and it doesn't look like
755	* setting snaplen. We're probably using bpf directly.
756	* Upgrade immediately.
757	*/
758	need_upgrade = 1;
759	} else {
760	/*
761	* Do not require upgrade by first BIOCSETF
762	* (used to set snaplen) by pcap_open_live().
763	*/
764
765	if (--d->bd_writer == 0) {
766	/*
767	* First snaplen filter has already
768	* been set. This is probably catch-all
769	* filter
770	*/
771	need_upgrade = 1;
772	}
773	}
774
775	CTR5(KTR_NET,
776	"%s: filter function set by pid %d, "
777	"bd_writer counter %d, snap %d upgrade %d",
778	__func__, d->bd_pid, d->bd_writer,
779	is_snap, need_upgrade);
780
781	return (need_upgrade);
782	}
783
784	/*
785	* Add d to the list of active bp filters.
786	* Requires bpf_attachd() to be called before.
787	*/
788	static void
789	bpf_upgraded(struct bpf_d *d)
790	{
791	struct bpf_if *bp;
792
793	BPF_LOCK_ASSERT();
794
795	bp = d->bd_bif;
796
797	/*
798	* Filter can be set several times without specifying interface.
799	* Mark d as reader and exit.
800	*/
801	if (bp == NULL) {
802	BPFD_LOCK(d);
803	d->bd_writer = 0;
804	BPFD_UNLOCK(d);
805	return;
806	}
807
808	BPFIF_WLOCK(bp);
809	BPFD_LOCK(d);
810
811	/* Remove from writers-only list */
812	LIST_REMOVE(d, bd_next);
813	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
814	/* Mark d as reader */
815	d->bd_writer = 0;
816
817	BPFD_UNLOCK(d);
818	BPFIF_WUNLOCK(bp);
819
820	CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
821
822	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
823	}
824
825	/*
826	* Detach a file from its interface.
827	*/
828	static void
829	bpf_detachd(struct bpf_d *d)
830	{
831	BPF_LOCK();
832	bpf_detachd_locked(d);
833	BPF_UNLOCK();
834	}
835
836	static void
837	bpf_detachd_locked(struct bpf_d *d)
838	{
839	int error;
840	struct bpf_if *bp;
841	struct ifnet *ifp;
842
843	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
844
845	BPF_LOCK_ASSERT();
846
847	/* Check if descriptor is attached */
848	if ((bp = d->bd_bif) == NULL)
849	return;
850
851	BPFIF_WLOCK(bp);
852	BPFD_LOCK(d);
853
854	/* Save bd_writer value */
855	error = d->bd_writer;
856
857	/*
858	* Remove d from the interface's descriptor list.
859	*/
860	LIST_REMOVE(d, bd_next);
861
862	ifp = bp->bif_ifp;
863	d->bd_bif = NULL;
864	BPFD_UNLOCK(d);
865	BPFIF_WUNLOCK(bp);
866
867	bpf_bpfd_cnt--;
868
869	/* Call event handler iff d is attached */
870	if (error == 0)
871	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
872
873	/*
874	* Check if this descriptor had requested promiscuous mode.
875	* If so, turn it off.
876	*/
877	if (d->bd_promisc) {
878	d->bd_promisc = 0;
879	CURVNET_SET(ifp->if_vnet);
880	error = ifpromisc(ifp, 0);
881	CURVNET_RESTORE();
882	if (error != 0 && error != ENXIO) {
883	/*
884	* ENXIO can happen if a pccard is unplugged
885	* Something is really wrong if we were able to put
886	* the driver into promiscuous mode, but can't
887	* take it out.
888	*/
889	if_printf(bp->bif_ifp,
890	"bpf_detach: ifpromisc failed (%d)\n", error);
891	}
892	}
893	}
894
895	/*
896	* Close the descriptor by detaching it from its interface,
897	* deallocating its buffers, and marking it free.
898	*/
899	static void
900	bpf_dtor(void *data)
901	{
902	struct bpf_d *d = data;
903
904	BPFD_LOCK(d);
905	if (d->bd_state == BPF_WAITING)
906	callout_stop(&d->bd_callout);
907	d->bd_state = BPF_IDLE;
908	BPFD_UNLOCK(d);
909	funsetown(&d->bd_sigio);
910	bpf_detachd(d);
911	#ifdef MAC
912	mac_bpfdesc_destroy(d);
913	#endif /* MAC */
914	seldrain(&d->bd_sel);
915	knlist_destroy(&d->bd_sel.si_note);
916	callout_drain(&d->bd_callout);
917	bpf_freed(d);
918	free(d, M_BPF);
919	}
920
921	/*
922	* Open ethernet device. Returns ENXIO for illegal minor device number,
923	* EBUSY if file is open by another process.
924	*/
925	/* ARGSUSED */
926	#ifndef __rtems__
927	static int
928	#else /* __rtems__ */
929	static struct bpf_d *
930	#endif /* __rtems__ */
931	bpfopen(struct cdev dev, int flags, int fmt, struct thread td)
932	{
933	struct bpf_d *d;
934	#ifndef __rtems__
935	int error;
936
937	d = malloc(sizeof(*d), M_BPF, M_WAITOK \| M_ZERO);
938	error = devfs_set_cdevpriv(d, bpf_dtor);
939	if (error != 0) {
940	free(d, M_BPF);
941	return (error);
942	}
943	#else /* __rtems__ */
944	d = malloc(sizeof(*d), M_BPF, M_NOWAIT \| M_ZERO);
945	if (d == NULL) {
946	return (d);
947	}
948	#endif /* __rtems__ */
949
950	/* Setup counters */
951	d->bd_rcount = counter_u64_alloc(M_WAITOK);
952	d->bd_dcount = counter_u64_alloc(M_WAITOK);
953	d->bd_fcount = counter_u64_alloc(M_WAITOK);
954	d->bd_wcount = counter_u64_alloc(M_WAITOK);
955	d->bd_wfcount = counter_u64_alloc(M_WAITOK);
956	d->bd_wdcount = counter_u64_alloc(M_WAITOK);
957	d->bd_zcopy = counter_u64_alloc(M_WAITOK);
958
959	/*
960	* For historical reasons, perform a one-time initialization call to
961	* the buffer routines, even though we're not yet committed to a
962	* particular buffer method.
963	*/
964	bpf_buffer_init(d);
965	if ((flags & FREAD) == 0)
966	d->bd_writer = 2;
967	d->bd_hbuf_in_use = 0;
968	d->bd_bufmode = BPF_BUFMODE_BUFFER;
969	d->bd_sig = SIGIO;
970	d->bd_direction = BPF_D_INOUT;
971	BPF_PID_REFRESH(d, td);
972	#ifdef MAC
973	mac_bpfdesc_init(d);
974	mac_bpfdesc_create(td->td_ucred, d);
975	#endif
976	mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
977	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
978	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
979
980	#ifndef __rtems__
981	return (0);
982	#else /* __rtems__ */
983	return (d);
984	#endif /* __rtems__ */
985	}
986
987	/*
988	* bpfread - read next chunk of packets from buffers
989	*/
990	static int
991	#ifndef __rtems__
992	bpfread(struct cdev dev, struct uio uio, int ioflag)
993	#else /* __rtems__ */
994	bpfread(struct bpf_d d, struct uio uio, int ioflag)
995	#endif /* __rtems__ */
996	{
997	#ifndef __rtems__
998	struct bpf_d *d;
999	#endif /* __rtems__ */
1000	int error;
1001	int non_block;
1002	int timed_out;
1003
1004	error = devfs_get_cdevpriv((void **)&d);
1005	if (error != 0)
1006	return (error);
1007
1008	/*
1009	* Restrict application to use a buffer the same size as
1010	* as kernel buffers.
1011	*/
1012	if (uio->uio_resid != d->bd_bufsize)
1013	return (EINVAL);
1014
1015	non_block = ((ioflag & O_NONBLOCK) != 0);
1016
1017	BPFD_LOCK(d);
1018	BPF_PID_REFRESH_CUR(d);
1019	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
1020	BPFD_UNLOCK(d);
1021	return (EOPNOTSUPP);
1022	}
1023	if (d->bd_state == BPF_WAITING)
1024	callout_stop(&d->bd_callout);
1025	timed_out = (d->bd_state == BPF_TIMED_OUT);
1026	d->bd_state = BPF_IDLE;
1027	while (d->bd_hbuf_in_use) {
1028	error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
1029	PRINET\|PCATCH, "bd_hbuf", 0);
1030	if (error != 0) {
1031	BPFD_UNLOCK(d);
1032	return (error);
1033	}
1034	}
1035	/*
1036	* If the hold buffer is empty, then do a timed sleep, which
1037	* ends when the timeout expires or when enough packets
1038	* have arrived to fill the store buffer.
1039	*/
1040	while (d->bd_hbuf == NULL) {
1041	if (d->bd_slen != 0) {
1042	/*
1043	* A packet(s) either arrived since the previous
1044	* read or arrived while we were asleep.
1045	*/
1046	if (d->bd_immediate \|\| non_block \|\| timed_out) {
1047	/*
1048	* Rotate the buffers and return what's here
1049	* if we are in immediate mode, non-blocking
1050	* flag is set, or this descriptor timed out.
1051	*/
1052	ROTATE_BUFFERS(d);
1053	break;
1054	}
1055	}
1056
1057	/*
1058	* No data is available, check to see if the bpf device
1059	* is still pointed at a real interface. If not, return
1060	* ENXIO so that the userland process knows to rebind
1061	* it before using it again.
1062	*/
1063	if (d->bd_bif == NULL) {
1064	BPFD_UNLOCK(d);
1065	return (ENXIO);
1066	}
1067
1068	if (non_block) {
1069	BPFD_UNLOCK(d);
1070	return (EWOULDBLOCK);
1071	}
1072	error = msleep(d, &d->bd_lock, PRINET\|PCATCH,
1073	"bpf", d->bd_rtout);
1074	if (error == EINTR \|\| error == ERESTART) {
1075	BPFD_UNLOCK(d);
1076	return (error);
1077	}
1078	if (error == EWOULDBLOCK) {
1079	/*
1080	* On a timeout, return what's in the buffer,
1081	* which may be nothing. If there is something
1082	* in the store buffer, we can rotate the buffers.
1083	*/
1084	if (d->bd_hbuf)
1085	/*
1086	* We filled up the buffer in between
1087	* getting the timeout and arriving
1088	* here, so we don't need to rotate.
1089	*/
1090	break;
1091
1092	if (d->bd_slen == 0) {
1093	BPFD_UNLOCK(d);
1094	return (0);
1095	}
1096	ROTATE_BUFFERS(d);
1097	break;
1098	}
1099	}
1100	/*
1101	* At this point, we know we have something in the hold slot.
1102	*/
1103	d->bd_hbuf_in_use = 1;
1104	BPFD_UNLOCK(d);
1105
1106	/*
1107	* Move data from hold buffer into user space.
1108	* We know the entire buffer is transferred since
1109	* we checked above that the read buffer is bpf_bufsize bytes.
1110	*
1111	* We do not have to worry about simultaneous reads because
1112	* we waited for sole access to the hold buffer above.
1113	*/
1114	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
1115
1116	BPFD_LOCK(d);
1117	KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
1118	d->bd_fbuf = d->bd_hbuf;
1119	d->bd_hbuf = NULL;
1120	d->bd_hlen = 0;
1121	bpf_buf_reclaimed(d);
1122	d->bd_hbuf_in_use = 0;
1123	wakeup(&d->bd_hbuf_in_use);
1124	BPFD_UNLOCK(d);
1125
1126	return (error);
1127	}
1128
1129	/*
1130	* If there are processes sleeping on this descriptor, wake them up.
1131	*/
1132	static __inline void
1133	bpf_wakeup(struct bpf_d *d)
1134	{
1135
1136	BPFD_LOCK_ASSERT(d);
1137	if (d->bd_state == BPF_WAITING) {
1138	callout_stop(&d->bd_callout);
1139	d->bd_state = BPF_IDLE;
1140	}
1141	wakeup(d);
1142	#ifndef __rtems__
1143	if (d->bd_async && d->bd_sig && d->bd_sigio)
1144	pgsigio(&d->bd_sigio, d->bd_sig, 0);
1145	#endif /* __rtems__ */
1146
1147	selwakeuppri(&d->bd_sel, PRINET);
1148	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
1149	}
1150
1151	static void
1152	bpf_timed_out(void *arg)
1153	{
1154	struct bpf_d d = (struct bpf_d )arg;
1155
1156	BPFD_LOCK_ASSERT(d);
1157
1158	if (callout_pending(&d->bd_callout) \|\| !callout_active(&d->bd_callout))
1159	return;
1160	if (d->bd_state == BPF_WAITING) {
1161	d->bd_state = BPF_TIMED_OUT;
1162	if (d->bd_slen != 0)
1163	bpf_wakeup(d);
1164	}
1165	}
1166
1167	static int
1168	bpf_ready(struct bpf_d *d)
1169	{
1170
1171	BPFD_LOCK_ASSERT(d);
1172
1173	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
1174	return (1);
1175	if ((d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT) &&
1176	d->bd_slen != 0)
1177	return (1);
1178	return (0);
1179	}
1180
1181	static int
1182	#ifndef __rtems__
1183	bpfwrite(struct cdev dev, struct uio uio, int ioflag)
1184	#else /* __rtems__ */
1185	bpfwrite(struct bpf_d d, struct uio uio, int ioflag)
1186	#endif /* __rtems__ */
1187	{
1188	#ifndef __rtems__
1189	struct bpf_d *d;
1190	#endif /* __rtems__ */
1191	struct ifnet *ifp;
1192	struct mbuf m, mc;
1193	struct sockaddr dst;
1194	struct route ro;
1195	int error, hlen;
1196
1197	error = devfs_get_cdevpriv((void **)&d);
1198	if (error != 0)
1199	return (error);
1200
1201	BPF_PID_REFRESH_CUR(d);
1202	counter_u64_add(d->bd_wcount, 1);
1203	/* XXX: locking required */
1204	if (d->bd_bif == NULL) {
1205	counter_u64_add(d->bd_wdcount, 1);
1206	return (ENXIO);
1207	}
1208
1209	ifp = d->bd_bif->bif_ifp;
1210
1211	if ((ifp->if_flags & IFF_UP) == 0) {
1212	counter_u64_add(d->bd_wdcount, 1);
1213	return (ENETDOWN);
1214	}
1215
1216	if (uio->uio_resid == 0) {
1217	counter_u64_add(d->bd_wdcount, 1);
1218	return (0);
1219	}
1220
1221	bzero(&dst, sizeof(dst));
1222	m = NULL;
1223	hlen = 0;
1224	/* XXX: bpf_movein() can sleep */
1225	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
1226	&m, &dst, &hlen, d);
1227	if (error) {
1228	counter_u64_add(d->bd_wdcount, 1);
1229	return (error);
1230	}
1231	counter_u64_add(d->bd_wfcount, 1);
1232	if (d->bd_hdrcmplt)
1233	dst.sa_family = pseudo_AF_HDRCMPLT;
1234
1235	if (d->bd_feedback) {
1236	mc = m_dup(m, M_NOWAIT);
1237	if (mc != NULL)
1238	mc->m_pkthdr.rcvif = ifp;
1239	/* Set M_PROMISC for outgoing packets to be discarded. */
1240	if (d->bd_direction == BPF_D_INOUT)
1241	m->m_flags \|= M_PROMISC;
1242	} else
1243	mc = NULL;
1244
1245	m->m_pkthdr.len -= hlen;
1246	m->m_len -= hlen;
1247	m->m_data += hlen; /* XXX */
1248
1249	CURVNET_SET(ifp->if_vnet);
1250	#ifdef MAC
1251	BPFD_LOCK(d);
1252	mac_bpfdesc_create_mbuf(d, m);
1253	if (mc != NULL)
1254	mac_bpfdesc_create_mbuf(d, mc);
1255	BPFD_UNLOCK(d);
1256	#endif
1257
1258	bzero(&ro, sizeof(ro));
1259	if (hlen != 0) {
1260	ro.ro_prepend = (u_char *)&dst.sa_data;
1261	ro.ro_plen = hlen;
1262	ro.ro_flags = RT_HAS_HEADER;
1263	}
1264
1265	error = (*ifp->if_output)(ifp, m, &dst, &ro);
1266	if (error)
1267	counter_u64_add(d->bd_wdcount, 1);
1268
1269	if (mc != NULL) {
1270	if (error == 0)
1271	(*ifp->if_input)(ifp, mc);
1272	else
1273	m_freem(mc);
1274	}
1275	CURVNET_RESTORE();
1276
1277	return (error);
1278	}
1279
1280	/*
1281	* Reset a descriptor by flushing its packet buffer and clearing the receive
1282	* and drop counts. This is doable for kernel-only buffers, but with
1283	* zero-copy buffers, we can't write to (or rotate) buffers that are
1284	* currently owned by userspace. It would be nice if we could encapsulate
1285	* this logic in the buffer code rather than here.
1286	*/
1287	static void
1288	reset_d(struct bpf_d *d)
1289	{
1290
1291	BPFD_LOCK_ASSERT(d);
1292
1293	while (d->bd_hbuf_in_use)
1294	mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
1295	"bd_hbuf", 0);
1296	if ((d->bd_hbuf != NULL) &&
1297	(d->bd_bufmode != BPF_BUFMODE_ZBUF \|\| bpf_canfreebuf(d))) {
1298	/* Free the hold buffer. */
1299	d->bd_fbuf = d->bd_hbuf;
1300	d->bd_hbuf = NULL;
1301	d->bd_hlen = 0;
1302	bpf_buf_reclaimed(d);
1303	}
1304	if (bpf_canwritebuf(d))
1305	d->bd_slen = 0;
1306	counter_u64_zero(d->bd_rcount);
1307	counter_u64_zero(d->bd_dcount);
1308	counter_u64_zero(d->bd_fcount);
1309	counter_u64_zero(d->bd_wcount);
1310	counter_u64_zero(d->bd_wfcount);
1311	counter_u64_zero(d->bd_wdcount);
1312	counter_u64_zero(d->bd_zcopy);
1313	}
1314
1315	/*
1316	* FIONREAD Check for read packet available.
1317	* BIOCGBLEN Get buffer len [for read()].
1318	* BIOCSETF Set read filter.
1319	* BIOCSETFNR Set read filter without resetting descriptor.
1320	* BIOCSETWF Set write filter.
1321	* BIOCFLUSH Flush read packet buffer.
1322	* BIOCPROMISC Put interface into promiscuous mode.
1323	* BIOCGDLT Get link layer type.
1324	* BIOCGETIF Get interface name.
1325	* BIOCSETIF Set interface.
1326	* BIOCSRTIMEOUT Set read timeout.
1327	* BIOCGRTIMEOUT Get read timeout.
1328	* BIOCGSTATS Get packet stats.
1329	* BIOCIMMEDIATE Set immediate mode.
1330	* BIOCVERSION Get filter language version.
1331	* BIOCGHDRCMPLT Get "header already complete" flag
1332	* BIOCSHDRCMPLT Set "header already complete" flag
1333	* BIOCGDIRECTION Get packet direction flag
1334	* BIOCSDIRECTION Set packet direction flag
1335	* BIOCGTSTAMP Get time stamp format and resolution.
1336	* BIOCSTSTAMP Set time stamp format and resolution.
1337	* BIOCLOCK Set "locked" flag
1338	* BIOCFEEDBACK Set packet feedback mode.
1339	* BIOCSETZBUF Set current zero-copy buffer locations.
1340	* BIOCGETZMAX Get maximum zero-copy buffer size.
1341	* BIOCROTZBUF Force rotation of zero-copy buffer
1342	* BIOCSETBUFMODE Set buffer mode.
1343	* BIOCGETBUFMODE Get current buffer mode.
1344	*/
1345	/* ARGSUSED */
1346	static int
1347	#ifndef __rtems__
1348	bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1349	struct thread *td)
1350	#else /* __rtems__ */
1351	bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
1352	struct thread *td)
1353	#endif /* __rtems__ */
1354	{
1355	#ifndef __rtems__
1356	struct bpf_d *d;
1357	#endif /* __rtems__ */
1358	int error;
1359
1360	error = devfs_get_cdevpriv((void **)&d);
1361	if (error != 0)
1362	return (error);
1363
1364	/*
1365	* Refresh PID associated with this descriptor.
1366	*/
1367	BPFD_LOCK(d);
1368	BPF_PID_REFRESH(d, td);
1369	if (d->bd_state == BPF_WAITING)
1370	callout_stop(&d->bd_callout);
1371	d->bd_state = BPF_IDLE;
1372	BPFD_UNLOCK(d);
1373
1374	if (d->bd_locked == 1) {
1375	switch (cmd) {
1376	case BIOCGBLEN:
1377	case BIOCFLUSH:
1378	case BIOCGDLT:
1379	case BIOCGDLTLIST:
1380	#ifdef COMPAT_FREEBSD32
1381	case BIOCGDLTLIST32:
1382	#endif
1383	case BIOCGETIF:
1384	case BIOCGRTIMEOUT:
1385	#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1386	case BIOCGRTIMEOUT32:
1387	#endif
1388	case BIOCGSTATS:
1389	case BIOCVERSION:
1390	case BIOCGRSIG:
1391	case BIOCGHDRCMPLT:
1392	case BIOCSTSTAMP:
1393	case BIOCFEEDBACK:
1394	case FIONREAD:
1395	case BIOCLOCK:
1396	case BIOCSRTIMEOUT:
1397	#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1398	case BIOCSRTIMEOUT32:
1399	#endif
1400	case BIOCIMMEDIATE:
1401	case TIOCGPGRP:
1402	case BIOCROTZBUF:
1403	break;
1404	default:
1405	return (EPERM);
1406	}
1407	}
1408	#ifdef COMPAT_FREEBSD32
1409	/*
1410	* If we see a 32-bit compat ioctl, mark the stream as 32-bit so
1411	* that it will get 32-bit packet headers.
1412	*/
1413	switch (cmd) {
1414	case BIOCSETF32:
1415	case BIOCSETFNR32:
1416	case BIOCSETWF32:
1417	case BIOCGDLTLIST32:
1418	case BIOCGRTIMEOUT32:
1419	case BIOCSRTIMEOUT32:
1420	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
1421	BPFD_LOCK(d);
1422	d->bd_compat32 = 1;
1423	BPFD_UNLOCK(d);
1424	}
1425	}
1426	#endif
1427
1428	CURVNET_SET(TD_TO_VNET(td));
1429	switch (cmd) {
1430
1431	default:
1432	error = EINVAL;
1433	break;
1434
1435	/*
1436	* Check for read packet available.
1437	*/
1438	case FIONREAD:
1439	{
1440	int n;
1441
1442	BPFD_LOCK(d);
1443	n = d->bd_slen;
1444	while (d->bd_hbuf_in_use)
1445	mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
1446	PRINET, "bd_hbuf", 0);
1447	if (d->bd_hbuf)
1448	n += d->bd_hlen;
1449	BPFD_UNLOCK(d);
1450
1451	(int )addr = n;
1452	break;
1453	}
1454
1455	/*
1456	* Get buffer len [for read()].
1457	*/
1458	case BIOCGBLEN:
1459	BPFD_LOCK(d);
1460	(u_int )addr = d->bd_bufsize;
1461	BPFD_UNLOCK(d);
1462	break;
1463
1464	/*
1465	* Set buffer length.
1466	*/
1467	case BIOCSBLEN:
1468	error = bpf_ioctl_sblen(d, (u_int *)addr);
1469	break;
1470
1471	/*
1472	* Set link layer read filter.
1473	*/
1474	case BIOCSETF:
1475	case BIOCSETFNR:
1476	case BIOCSETWF:
1477	#ifdef COMPAT_FREEBSD32
1478	case BIOCSETF32:
1479	case BIOCSETFNR32:
1480	case BIOCSETWF32:
1481	#endif
1482	error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1483	break;
1484
1485	/*
1486	* Flush read packet buffer.
1487	*/
1488	case BIOCFLUSH:
1489	BPFD_LOCK(d);
1490	reset_d(d);
1491	BPFD_UNLOCK(d);
1492	break;
1493
1494	/*
1495	* Put interface into promiscuous mode.
1496	*/
1497	case BIOCPROMISC:
1498	if (d->bd_bif == NULL) {
1499	/*
1500	* No interface attached yet.
1501	*/
1502	error = EINVAL;
1503	break;
1504	}
1505	if (d->bd_promisc == 0) {
1506	error = ifpromisc(d->bd_bif->bif_ifp, 1);
1507	if (error == 0)
1508	d->bd_promisc = 1;
1509	}
1510	break;
1511
1512	/*
1513	* Get current data link type.
1514	*/
1515	case BIOCGDLT:
1516	BPF_LOCK();
1517	if (d->bd_bif == NULL)
1518	error = EINVAL;
1519	else
1520	(u_int )addr = d->bd_bif->bif_dlt;
1521	BPF_UNLOCK();
1522	break;
1523
1524	/*
1525	* Get a list of supported data link types.
1526	*/
1527	#ifdef COMPAT_FREEBSD32
1528	case BIOCGDLTLIST32:
1529	{
1530	struct bpf_dltlist32 *list32;
1531	struct bpf_dltlist dltlist;
1532
1533	list32 = (struct bpf_dltlist32 *)addr;
1534	dltlist.bfl_len = list32->bfl_len;
1535	dltlist.bfl_list = PTRIN(list32->bfl_list);
1536	BPF_LOCK();
1537	if (d->bd_bif == NULL)
1538	error = EINVAL;
1539	else {
1540	error = bpf_getdltlist(d, &dltlist);
1541	if (error == 0)
1542	list32->bfl_len = dltlist.bfl_len;
1543	}
1544	BPF_UNLOCK();
1545	break;
1546	}
1547	#endif
1548
1549	case BIOCGDLTLIST:
1550	BPF_LOCK();
1551	if (d->bd_bif == NULL)
1552	error = EINVAL;
1553	else
1554	error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1555	BPF_UNLOCK();
1556	break;
1557
1558	/*
1559	* Set data link type.
1560	*/
1561	case BIOCSDLT:
1562	BPF_LOCK();
1563	if (d->bd_bif == NULL)
1564	error = EINVAL;
1565	else
1566	error = bpf_setdlt(d, (u_int )addr);
1567	BPF_UNLOCK();
1568	break;
1569
1570	/*
1571	* Get interface name.
1572	*/
1573	case BIOCGETIF:
1574	BPF_LOCK();
1575	if (d->bd_bif == NULL)
1576	error = EINVAL;
1577	else {
1578	struct ifnet *const ifp = d->bd_bif->bif_ifp;
1579	struct ifreq const ifr = (struct ifreq )addr;
1580
1581	strlcpy(ifr->ifr_name, ifp->if_xname,
1582	sizeof(ifr->ifr_name));
1583	}
1584	BPF_UNLOCK();
1585	break;
1586
1587	/*
1588	* Set interface.
1589	*/
1590	case BIOCSETIF:
1591	{
1592	int alloc_buf, size;
1593
1594	/*
1595	* Behavior here depends on the buffering model. If
1596	* we're using kernel memory buffers, then we can
1597	* allocate them here. If we're using zero-copy,
1598	* then the user process must have registered buffers
1599	* by the time we get here.
1600	*/
1601	alloc_buf = 0;
1602	BPFD_LOCK(d);
1603	if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
1604	d->bd_sbuf == NULL)
1605	alloc_buf = 1;
1606	BPFD_UNLOCK(d);
1607	if (alloc_buf) {
1608	size = d->bd_bufsize;
1609	error = bpf_buffer_ioctl_sblen(d, &size);
1610	if (error != 0)
1611	break;
1612	}
1613	BPF_LOCK();
1614	error = bpf_setif(d, (struct ifreq *)addr);
1615	BPF_UNLOCK();
1616	break;
1617	}
1618
1619	/*
1620	* Set read timeout.
1621	*/
1622	case BIOCSRTIMEOUT:
1623	#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1624	case BIOCSRTIMEOUT32:
1625	#endif
1626	{
1627	struct timeval tv = (struct timeval )addr;
1628	#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1629	struct timeval32 *tv32;
1630	struct timeval tv64;
1631
1632	if (cmd == BIOCSRTIMEOUT32) {
1633	tv32 = (struct timeval32 *)addr;
1634	tv = &tv64;
1635	tv->tv_sec = tv32->tv_sec;
1636	tv->tv_usec = tv32->tv_usec;
1637	} else
1638	#endif
1639	tv = (struct timeval *)addr;
1640
1641	/*
1642	* Subtract 1 tick from tvtohz() since this isn't
1643	* a one-shot timer.
1644	*/
1645	if ((error = itimerfix(tv)) == 0)
1646	d->bd_rtout = tvtohz(tv) - 1;
1647	break;
1648	}
1649
1650	/*
1651	* Get read timeout.
1652	*/
1653	case BIOCGRTIMEOUT:
1654	#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1655	case BIOCGRTIMEOUT32:
1656	#endif
1657	{
1658	struct timeval *tv;
1659	#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1660	struct timeval32 *tv32;
1661	struct timeval tv64;
1662
1663	if (cmd == BIOCGRTIMEOUT32)
1664	tv = &tv64;
1665	else
1666	#endif
1667	tv = (struct timeval *)addr;
1668
1669	tv->tv_sec = d->bd_rtout / hz;
1670	tv->tv_usec = (d->bd_rtout % hz) * tick;
1671	#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1672	if (cmd == BIOCGRTIMEOUT32) {
1673	tv32 = (struct timeval32 *)addr;
1674	tv32->tv_sec = tv->tv_sec;
1675	tv32->tv_usec = tv->tv_usec;
1676	}
1677	#endif
1678
1679	break;
1680	}
1681
1682	/*
1683	* Get packet stats.
1684	*/
1685	case BIOCGSTATS:
1686	{
1687	struct bpf_stat bs = (struct bpf_stat )addr;
1688
1689	/* XXXCSJP overflow */
1690	bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount);
1691	bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount);
1692	break;
1693	}
1694
1695	/*
1696	* Set immediate mode.
1697	*/
1698	case BIOCIMMEDIATE:
1699	BPFD_LOCK(d);
1700	d->bd_immediate = (u_int )addr;
1701	BPFD_UNLOCK(d);
1702	break;
1703
1704	case BIOCVERSION:
1705	{
1706	struct bpf_version bv = (struct bpf_version )addr;
1707
1708	bv->bv_major = BPF_MAJOR_VERSION;
1709	bv->bv_minor = BPF_MINOR_VERSION;
1710	break;
1711	}
1712
1713	/*
1714	* Get "header already complete" flag
1715	*/
1716	case BIOCGHDRCMPLT:
1717	BPFD_LOCK(d);
1718	(u_int )addr = d->bd_hdrcmplt;
1719	BPFD_UNLOCK(d);
1720	break;
1721
1722	/*
1723	* Set "header already complete" flag
1724	*/
1725	case BIOCSHDRCMPLT:
1726	BPFD_LOCK(d);
1727	d->bd_hdrcmplt = (u_int )addr ? 1 : 0;
1728	BPFD_UNLOCK(d);
1729	break;
1730
1731	/*
1732	* Get packet direction flag
1733	*/
1734	case BIOCGDIRECTION:
1735	BPFD_LOCK(d);
1736	(u_int )addr = d->bd_direction;
1737	BPFD_UNLOCK(d);
1738	break;
1739
1740	/*
1741	* Set packet direction flag
1742	*/
1743	case BIOCSDIRECTION:
1744	{
1745	u_int direction;
1746
1747	direction = (u_int )addr;
1748	switch (direction) {
1749	case BPF_D_IN:
1750	case BPF_D_INOUT:
1751	case BPF_D_OUT:
1752	BPFD_LOCK(d);
1753	d->bd_direction = direction;
1754	BPFD_UNLOCK(d);
1755	break;
1756	default:
1757	error = EINVAL;
1758	}
1759	}
1760	break;
1761
1762	/*
1763	* Get packet timestamp format and resolution.
1764	*/
1765	case BIOCGTSTAMP:
1766	BPFD_LOCK(d);
1767	(u_int )addr = d->bd_tstamp;
1768	BPFD_UNLOCK(d);
1769	break;
1770
1771	/*
1772	* Set packet timestamp format and resolution.
1773	*/
1774	case BIOCSTSTAMP:
1775	{
1776	u_int func;
1777
1778	func = (u_int )addr;
1779	if (BPF_T_VALID(func))
1780	d->bd_tstamp = func;
1781	else
1782	error = EINVAL;
1783	}
1784	break;
1785
1786	case BIOCFEEDBACK:
1787	BPFD_LOCK(d);
1788	d->bd_feedback = (u_int )addr;
1789	BPFD_UNLOCK(d);
1790	break;
1791
1792	case BIOCLOCK:
1793	BPFD_LOCK(d);
1794	d->bd_locked = 1;
1795	BPFD_UNLOCK(d);
1796	break;
1797
1798	case FIONBIO: /* Non-blocking I/O */
1799	break;
1800
1801	#ifndef __rtems__
1802	case FIOASYNC: /* Send signal on receive packets */
1803	BPFD_LOCK(d);
1804	d->bd_async = (int )addr;
1805	BPFD_UNLOCK(d);
1806	break;
1807	#endif /* __rtems__ */
1808
1809	case FIOSETOWN:
1810	/*
1811	* XXX: Add some sort of locking here?
1812	* fsetown() can sleep.
1813	*/
1814	error = fsetown((int )addr, &d->bd_sigio);
1815	break;
1816
1817	case FIOGETOWN:
1818	BPFD_LOCK(d);
1819	(int )addr = fgetown(&d->bd_sigio);
1820	BPFD_UNLOCK(d);
1821	break;
1822
1823	/* This is deprecated, FIOSETOWN should be used instead. */
1824	case TIOCSPGRP:
1825	error = fsetown(-((int )addr), &d->bd_sigio);
1826	break;
1827
1828	/* This is deprecated, FIOGETOWN should be used instead. */
1829	case TIOCGPGRP:
1830	(int )addr = -fgetown(&d->bd_sigio);
1831	break;
1832
1833	case BIOCSRSIG: /* Set receive signal */
1834	{
1835	u_int sig;
1836
1837	sig = (u_int )addr;
1838
1839	if (sig >= NSIG)
1840	error = EINVAL;
1841	else {
1842	BPFD_LOCK(d);
1843	d->bd_sig = sig;
1844	BPFD_UNLOCK(d);
1845	}
1846	break;
1847	}
1848	case BIOCGRSIG:
1849	BPFD_LOCK(d);
1850	(u_int )addr = d->bd_sig;
1851	BPFD_UNLOCK(d);
1852	break;
1853
1854	case BIOCGETBUFMODE:
1855	BPFD_LOCK(d);
1856	(u_int )addr = d->bd_bufmode;
1857	BPFD_UNLOCK(d);
1858	break;
1859
1860	case BIOCSETBUFMODE:
1861	/*
1862	* Allow the buffering mode to be changed as long as we
1863	* haven't yet committed to a particular mode. Our
1864	* definition of commitment, for now, is whether or not a
1865	* buffer has been allocated or an interface attached, since
1866	* that's the point where things get tricky.
1867	*/
1868	switch ((u_int )addr) {
1869	case BPF_BUFMODE_BUFFER:
1870	break;
1871
1872	#ifndef __rtems__
1873	case BPF_BUFMODE_ZBUF:
1874	if (bpf_zerocopy_enable)
1875	break;
1876	/* FALLSTHROUGH */
1877	#endif /* __rtems__ */
1878
1879	default:
1880	CURVNET_RESTORE();
1881	return (EINVAL);
1882	}
1883
1884	BPFD_LOCK(d);
1885	if (d->bd_sbuf != NULL \|\| d->bd_hbuf != NULL \|\|
1886	d->bd_fbuf != NULL \|\| d->bd_bif != NULL) {
1887	BPFD_UNLOCK(d);
1888	CURVNET_RESTORE();
1889	return (EBUSY);
1890	}
1891	d->bd_bufmode = (u_int )addr;
1892	BPFD_UNLOCK(d);
1893	break;
1894
1895	case BIOCGETZMAX:
1896	error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
1897	break;
1898
1899	case BIOCSETZBUF:
1900	error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
1901	break;
1902
1903	case BIOCROTZBUF:
1904	error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
1905	break;
1906	}
1907	CURVNET_RESTORE();
1908	return (error);
1909	}
1910
1911	/*
1912	* Set d's packet filter program to fp. If this file already has a filter,
1913	* free it and replace it. Returns EINVAL for bogus requests.
1914	*
1915	* Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
1916	* since reading d->bd_bif can't be protected by d or interface lock due to
1917	* lock order.
1918	*
1919	* Additionally, we have to acquire interface write lock due to bpf_mtap() uses
1920	* interface read lock to read all filers.
1921	*
1922	*/
1923	static int
1924	bpf_setf(struct bpf_d d, struct bpf_program fp, u_long cmd)
1925	{
1926	#ifdef COMPAT_FREEBSD32
1927	struct bpf_program fp_swab;
1928	struct bpf_program32 *fp32;
1929	#endif
1930	struct bpf_insn fcode, old;
1931	#ifdef BPF_JITTER
1932	bpf_jit_filter jfunc, ofunc;
1933	#endif
1934	size_t size;
1935	u_int flen;
1936	int need_upgrade;
1937
1938	#ifdef COMPAT_FREEBSD32
1939	switch (cmd) {
1940	case BIOCSETF32:
1941	case BIOCSETWF32:
1942	case BIOCSETFNR32:
1943	fp32 = (struct bpf_program32 *)fp;
1944	fp_swab.bf_len = fp32->bf_len;
1945	fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
1946	fp = &fp_swab;
1947	switch (cmd) {
1948	case BIOCSETF32:
1949	cmd = BIOCSETF;
1950	break;
1951	case BIOCSETWF32:
1952	cmd = BIOCSETWF;
1953	break;
1954	}
1955	break;
1956	}
1957	#endif
1958
1959	fcode = NULL;
1960	#ifdef BPF_JITTER
1961	jfunc = ofunc = NULL;
1962	#endif
1963	need_upgrade = 0;
1964
1965	/*
1966	* Check new filter validness before acquiring any locks.
1967	* Allocate memory for new filter, if needed.
1968	*/
1969	flen = fp->bf_len;
1970	if (flen > bpf_maxinsns \|\| (fp->bf_insns == NULL && flen != 0))
1971	return (EINVAL);
1972	size = flen * sizeof(*fp->bf_insns);
1973	if (size > 0) {
1974	/* We're setting up new filter. Copy and check actual data. */
1975	fcode = malloc(size, M_BPF, M_WAITOK);
1976	if (copyin(fp->bf_insns, fcode, size) != 0 \|\|
1977	!bpf_validate(fcode, flen)) {
1978	free(fcode, M_BPF);
1979	return (EINVAL);
1980	}
1981	#ifdef BPF_JITTER
1982	if (cmd != BIOCSETWF) {
1983	/*
1984	* Filter is copied inside fcode and is
1985	* perfectly valid.
1986	*/
1987	jfunc = bpf_jitter(fcode, flen);
1988	}
1989	#endif
1990	}
1991
1992	BPF_LOCK();
1993
1994	/*
1995	* Set up new filter.
1996	* Protect filter change by interface lock.
1997	* Additionally, we are protected by global lock here.
1998	*/
1999	if (d->bd_bif != NULL)
2000	BPFIF_WLOCK(d->bd_bif);
2001	BPFD_LOCK(d);
2002	if (cmd == BIOCSETWF) {
2003	old = d->bd_wfilter;
2004	d->bd_wfilter = fcode;
2005	} else {
2006	old = d->bd_rfilter;
2007	d->bd_rfilter = fcode;
2008	#ifdef BPF_JITTER
2009	ofunc = d->bd_bfilter;
2010	d->bd_bfilter = jfunc;
2011	#endif
2012	if (cmd == BIOCSETF)
2013	reset_d(d);
2014
2015	need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen);
2016	}
2017	BPFD_UNLOCK(d);
2018	if (d->bd_bif != NULL)
2019	BPFIF_WUNLOCK(d->bd_bif);
2020	if (old != NULL)
2021	free(old, M_BPF);
2022	#ifdef BPF_JITTER
2023	if (ofunc != NULL)
2024	bpf_destroy_jit_filter(ofunc);
2025	#endif
2026
2027	/* Move d to active readers list. */
2028	if (need_upgrade != 0)
2029	bpf_upgraded(d);
2030
2031	BPF_UNLOCK();
2032	return (0);
2033	}
2034
2035	/*
2036	* Detach a file from its current interface (if attached at all) and attach
2037	* to the interface indicated by the name stored in ifr.
2038	* Return an errno or 0.
2039	*/
2040	static int
2041	bpf_setif(struct bpf_d d, struct ifreq ifr)
2042	{
2043	struct bpf_if *bp;
2044	struct ifnet *theywant;
2045
2046	BPF_LOCK_ASSERT();
2047
2048	theywant = ifunit(ifr->ifr_name);
2049	if (theywant == NULL \|\| theywant->if_bpf == NULL)
2050	return (ENXIO);
2051
2052	bp = theywant->if_bpf;
2053
2054	/* Check if interface is not being detached from BPF */
2055	BPFIF_RLOCK(bp);
2056	if (bp->bif_flags & BPFIF_FLAG_DYING) {
2057	BPFIF_RUNLOCK(bp);
2058	return (ENXIO);
2059	}
2060	BPFIF_RUNLOCK(bp);
2061
2062	/*
2063	* At this point, we expect the buffer is already allocated. If not,
2064	* return an error.
2065	*/
2066	switch (d->bd_bufmode) {
2067	case BPF_BUFMODE_BUFFER:
2068	#ifndef __rtems__
2069	case BPF_BUFMODE_ZBUF:
2070	#endif /* __rtems__ */
2071	if (d->bd_sbuf == NULL)
2072	return (EINVAL);
2073	break;
2074
2075	default:
2076	panic("bpf_setif: bufmode %d", d->bd_bufmode);
2077	}
2078	if (bp != d->bd_bif)
2079	bpf_attachd(d, bp);
2080	BPFD_LOCK(d);
2081	reset_d(d);
2082	BPFD_UNLOCK(d);
2083	return (0);
2084	}
2085
2086	/*
2087	* Support for select() and poll() system calls
2088	*
2089	* Return true iff the specific operation will not block indefinitely.
2090	* Otherwise, return false but make a note that a selwakeup() must be done.
2091	*/
2092	static int
2093	#ifndef __rtems__
2094	bpfpoll(struct cdev dev, int events, struct thread td)
2095	#else /* __rtems__ */
2096	bpfpoll(struct bpf_d d, int events, struct thread td)
2097	#endif /* __rtems__ */
2098	{
2099	#ifndef __rtems__
2100	struct bpf_d *d;
2101	#endif /* __rtems__ */
2102	int revents;
2103
2104	if (devfs_get_cdevpriv((void **)&d) != 0 \|\| d->bd_bif == NULL)
2105	return (events &
2106	(POLLHUP\|POLLIN\|POLLRDNORM\|POLLOUT\|POLLWRNORM));
2107
2108	/*
2109	* Refresh PID associated with this descriptor.
2110	*/
2111	revents = events & (POLLOUT \| POLLWRNORM);
2112	BPFD_LOCK(d);
2113	BPF_PID_REFRESH(d, td);
2114	if (events & (POLLIN \| POLLRDNORM)) {
2115	if (bpf_ready(d))
2116	revents \|= events & (POLLIN \| POLLRDNORM);
2117	else {
2118	selrecord(td, &d->bd_sel);
2119	/* Start the read timeout if necessary. */
2120	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2121	callout_reset(&d->bd_callout, d->bd_rtout,
2122	bpf_timed_out, d);
2123	d->bd_state = BPF_WAITING;
2124	}
2125	}
2126	}
2127	BPFD_UNLOCK(d);
2128	return (revents);
2129	}
2130
2131	/*
2132	* Support for kevent() system call. Register EVFILT_READ filters and
2133	* reject all others.
2134	*/
2135	#ifdef __rtems__
2136	static
2137	#endif /* __rtems__ */
2138	int
2139	#ifndef __rtems__
2140	bpfkqfilter(struct cdev dev, struct knote kn)
2141	#else /* __rtems__ */
2142	bpfkqfilter(struct bpf_d d, struct knote kn)
2143	#endif /* __rtems__ */
2144	{
2145	#ifndef __rtems__
2146	struct bpf_d *d;
2147	#endif /* __rtems__ */
2148
2149	if (devfs_get_cdevpriv((void **)&d) != 0 \|\|
2150	kn->kn_filter != EVFILT_READ)
2151	return (1);
2152
2153	/*
2154	* Refresh PID associated with this descriptor.
2155	*/
2156	BPFD_LOCK(d);
2157	BPF_PID_REFRESH_CUR(d);
2158	kn->kn_fop = &bpfread_filtops;
2159	kn->kn_hook = d;
2160	knlist_add(&d->bd_sel.si_note, kn, 1);
2161	BPFD_UNLOCK(d);
2162
2163	return (0);
2164	}
2165
2166	static void
2167	filt_bpfdetach(struct knote *kn)
2168	{
2169	struct bpf_d d = (struct bpf_d )kn->kn_hook;
2170
2171	knlist_remove(&d->bd_sel.si_note, kn, 0);
2172	}
2173
2174	static int
2175	filt_bpfread(struct knote *kn, long hint)
2176	{
2177	struct bpf_d d = (struct bpf_d )kn->kn_hook;
2178	int ready;
2179
2180	BPFD_LOCK_ASSERT(d);
2181	ready = bpf_ready(d);
2182	if (ready) {
2183	kn->kn_data = d->bd_slen;
2184	/*
2185	* Ignore the hold buffer if it is being copied to user space.
2186	*/
2187	if (!d->bd_hbuf_in_use && d->bd_hbuf)
2188	kn->kn_data += d->bd_hlen;
2189	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2190	callout_reset(&d->bd_callout, d->bd_rtout,
2191	bpf_timed_out, d);
2192	d->bd_state = BPF_WAITING;
2193	}
2194
2195	return (ready);
2196	}
2197
2198	#define BPF_TSTAMP_NONE 0
2199	#define BPF_TSTAMP_FAST 1
2200	#define BPF_TSTAMP_NORMAL 2
2201	#define BPF_TSTAMP_EXTERN 3
2202
2203	static int
2204	bpf_ts_quality(int tstype)
2205	{
2206
2207	if (tstype == BPF_T_NONE)
2208	return (BPF_TSTAMP_NONE);
2209	if ((tstype & BPF_T_FAST) != 0)
2210	return (BPF_TSTAMP_FAST);
2211
2212	return (BPF_TSTAMP_NORMAL);
2213	}
2214
2215	static int
2216	bpf_gettime(struct bintime bt, int tstype, struct mbuf m)
2217	{
2218	struct m_tag *tag;
2219	int quality;
2220
2221	quality = bpf_ts_quality(tstype);
2222	if (quality == BPF_TSTAMP_NONE)
2223	return (quality);
2224
2225	if (m != NULL) {
2226	tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
2227	if (tag != NULL) {
2228	bt = (struct bintime *)(tag + 1);
2229	return (BPF_TSTAMP_EXTERN);
2230	}
2231	}
2232	if (quality == BPF_TSTAMP_NORMAL)
2233	binuptime(bt);
2234	else
2235	getbinuptime(bt);
2236
2237	return (quality);
2238	}
2239
2240	/*
2241	* Incoming linkage from device drivers. Process the packet pkt, of length
2242	* pktlen, which is stored in a contiguous buffer. The packet is parsed
2243	* by each process' filter, and if accepted, stashed into the corresponding
2244	* buffer.
2245	*/
2246	void
2247	bpf_tap(struct bpf_if bp, u_char pkt, u_int pktlen)
2248	{
2249	struct bintime bt;
2250	struct bpf_d *d;
2251	#ifdef BPF_JITTER
2252	bpf_jit_filter *bf;
2253	#endif
2254	u_int slen;
2255	int gottime;
2256
2257	gottime = BPF_TSTAMP_NONE;
2258
2259	BPFIF_RLOCK(bp);
2260
2261	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2262	/*
2263	* We are not using any locks for d here because:
2264	* 1) any filter change is protected by interface
2265	* write lock
2266	* 2) destroying/detaching d is protected by interface
2267	* write lock, too
2268	*/
2269
2270	counter_u64_add(d->bd_rcount, 1);
2271	/*
2272	* NB: We dont call BPF_CHECK_DIRECTION() here since there is no
2273	* way for the caller to indiciate to us whether this packet
2274	* is inbound or outbound. In the bpf_mtap() routines, we use
2275	* the interface pointers on the mbuf to figure it out.
2276	*/
2277	#ifdef BPF_JITTER
2278	bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2279	if (bf != NULL)
2280	slen = (*(bf->func))(pkt, pktlen, pktlen);
2281	else
2282	#endif
2283	slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
2284	if (slen != 0) {
2285	/*
2286	* Filter matches. Let's to acquire write lock.
2287	*/
2288	BPFD_LOCK(d);
2289
2290	counter_u64_add(d->bd_fcount, 1);
2291	if (gottime < bpf_ts_quality(d->bd_tstamp))
2292	gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
2293	#ifdef MAC
2294	if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2295	#endif
2296	catchpacket(d, pkt, pktlen, slen,
2297	bpf_append_bytes, &bt);
2298	BPFD_UNLOCK(d);
2299	}
2300	}
2301	BPFIF_RUNLOCK(bp);
2302	}
2303
2304	#define BPF_CHECK_DIRECTION(d, r, i) \
2305	(((d)->bd_direction == BPF_D_IN && (r) != (i)) \|\| \
2306	((d)->bd_direction == BPF_D_OUT && (r) == (i)))
2307
2308	/*
2309	* Incoming linkage from device drivers, when packet is in an mbuf chain.
2310	* Locking model is explained in bpf_tap().
2311	*/
2312	void
2313	bpf_mtap(struct bpf_if bp, struct mbuf m)
2314	{
2315	struct bintime bt;
2316	struct bpf_d *d;
2317	#ifdef BPF_JITTER
2318	bpf_jit_filter *bf;
2319	#endif
2320	u_int pktlen, slen;
2321	int gottime;
2322
2323	/* Skip outgoing duplicate packets. */
2324	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2325	m->m_flags &= ~M_PROMISC;
2326	return;
2327	}
2328
2329	pktlen = m_length(m, NULL);
2330	gottime = BPF_TSTAMP_NONE;
2331
2332	BPFIF_RLOCK(bp);
2333
2334	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2335	if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2336	continue;
2337	counter_u64_add(d->bd_rcount, 1);
2338	#ifdef BPF_JITTER
2339	bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2340	/* XXX We cannot handle multiple mbufs. */
2341	if (bf != NULL && m->m_next == NULL)
2342	slen = ((bf->func))(mtod(m, u_char ), pktlen, pktlen);
2343	else
2344	#endif
2345	slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
2346	if (slen != 0) {
2347	BPFD_LOCK(d);
2348
2349	counter_u64_add(d->bd_fcount, 1);
2350	if (gottime < bpf_ts_quality(d->bd_tstamp))
2351	gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2352	#ifdef MAC
2353	if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2354	#endif
2355	catchpacket(d, (u_char *)m, pktlen, slen,
2356	bpf_append_mbuf, &bt);
2357	BPFD_UNLOCK(d);
2358	}
2359	}
2360	BPFIF_RUNLOCK(bp);
2361	}
2362
2363	/*
2364	* Incoming linkage from device drivers, when packet is in
2365	* an mbuf chain and to be prepended by a contiguous header.
2366	*/
2367	void
2368	bpf_mtap2(struct bpf_if bp, void data, u_int dlen, struct mbuf *m)
2369	{
2370	struct bintime bt;
2371	struct mbuf mb;
2372	struct bpf_d *d;
2373	u_int pktlen, slen;
2374	int gottime;
2375
2376	/* Skip outgoing duplicate packets. */
2377	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2378	m->m_flags &= ~M_PROMISC;
2379	return;
2380	}
2381
2382	pktlen = m_length(m, NULL);
2383	/*
2384	* Craft on-stack mbuf suitable for passing to bpf_filter.
2385	* Note that we cut corners here; we only setup what's
2386	* absolutely needed--this mbuf should never go anywhere else.
2387	*/
2388	mb.m_next = m;
2389	mb.m_data = data;
2390	mb.m_len = dlen;
2391	pktlen += dlen;
2392
2393	gottime = BPF_TSTAMP_NONE;
2394
2395	BPFIF_RLOCK(bp);
2396
2397	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2398	if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2399	continue;
2400	counter_u64_add(d->bd_rcount, 1);
2401	slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
2402	if (slen != 0) {
2403	BPFD_LOCK(d);
2404
2405	counter_u64_add(d->bd_fcount, 1);
2406	if (gottime < bpf_ts_quality(d->bd_tstamp))
2407	gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2408	#ifdef MAC
2409	if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2410	#endif
2411	catchpacket(d, (u_char *)&mb, pktlen, slen,
2412	bpf_append_mbuf, &bt);
2413	BPFD_UNLOCK(d);
2414	}
2415	}
2416	BPFIF_RUNLOCK(bp);
2417	}
2418
2419	#undef BPF_CHECK_DIRECTION
2420
2421	#undef BPF_TSTAMP_NONE
2422	#undef BPF_TSTAMP_FAST
2423	#undef BPF_TSTAMP_NORMAL
2424	#undef BPF_TSTAMP_EXTERN
2425
2426	static int
2427	bpf_hdrlen(struct bpf_d *d)
2428	{
2429	int hdrlen;
2430
2431	hdrlen = d->bd_bif->bif_hdrlen;
2432	#ifndef BURN_BRIDGES
2433	if (d->bd_tstamp == BPF_T_NONE \|\|
2434	BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
2435	#ifdef COMPAT_FREEBSD32
2436	if (d->bd_compat32)
2437	hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
2438	else
2439	#endif
2440	hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
2441	else
2442	#endif
2443	hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
2444	#ifdef COMPAT_FREEBSD32
2445	if (d->bd_compat32)
2446	hdrlen = BPF_WORDALIGN32(hdrlen);
2447	else
2448	#endif
2449	hdrlen = BPF_WORDALIGN(hdrlen);
2450
2451	return (hdrlen - d->bd_bif->bif_hdrlen);
2452	}
2453
2454	static void
2455	bpf_bintime2ts(struct bintime bt, struct bpf_ts ts, int tstype)
2456	{
2457	struct bintime bt2, boottimebin;
2458	struct timeval tsm;
2459	struct timespec tsn;
2460
2461	if ((tstype & BPF_T_MONOTONIC) == 0) {
2462	bt2 = *bt;
2463	getboottimebin(&boottimebin);
2464	bintime_add(&bt2, &boottimebin);
2465	bt = &bt2;
2466	}
2467	switch (BPF_T_FORMAT(tstype)) {
2468	case BPF_T_MICROTIME:
2469	bintime2timeval(bt, &tsm);
2470	ts->bt_sec = tsm.tv_sec;
2471	ts->bt_frac = tsm.tv_usec;
2472	break;
2473	case BPF_T_NANOTIME:
2474	bintime2timespec(bt, &tsn);
2475	ts->bt_sec = tsn.tv_sec;
2476	ts->bt_frac = tsn.tv_nsec;
2477	break;
2478	case BPF_T_BINTIME:
2479	ts->bt_sec = bt->sec;
2480	ts->bt_frac = bt->frac;
2481	break;
2482	}
2483	}
2484
2485	/*
2486	* Move the packet data from interface memory (pkt) into the
2487	* store buffer. "cpfn" is the routine called to do the actual data
2488	* transfer. bcopy is passed in to copy contiguous chunks, while
2489	* bpf_append_mbuf is passed in to copy mbuf chains. In the latter case,
2490	* pkt is really an mbuf.
2491	*/
2492	static void
2493	catchpacket(struct bpf_d d, u_char pkt, u_int pktlen, u_int snaplen,
2494	void (cpfn)(struct bpf_d , caddr_t, u_int, void *, u_int),
2495	struct bintime *bt)
2496	{
2497	struct bpf_xhdr hdr;
2498	#ifndef BURN_BRIDGES
2499	struct bpf_hdr hdr_old;
2500	#ifdef COMPAT_FREEBSD32
2501	struct bpf_hdr32 hdr32_old;
2502	#endif
2503	#endif
2504	int caplen, curlen, hdrlen, totlen;
2505	int do_wakeup = 0;
2506	int do_timestamp;
2507	int tstype;
2508
2509	BPFD_LOCK_ASSERT(d);
2510
2511	/*
2512	* Detect whether user space has released a buffer back to us, and if
2513	* so, move it from being a hold buffer to a free buffer. This may
2514	* not be the best place to do it (for example, we might only want to
2515	* run this check if we need the space), but for now it's a reliable
2516	* spot to do it.
2517	*/
2518	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
2519	d->bd_fbuf = d->bd_hbuf;
2520	d->bd_hbuf = NULL;
2521	d->bd_hlen = 0;
2522	bpf_buf_reclaimed(d);
2523	}
2524
2525	/*
2526	* Figure out how many bytes to move. If the packet is
2527	* greater or equal to the snapshot length, transfer that
2528	* much. Otherwise, transfer the whole packet (unless
2529	* we hit the buffer size limit).
2530	*/
2531	hdrlen = bpf_hdrlen(d);
2532	totlen = hdrlen + min(snaplen, pktlen);
2533	if (totlen > d->bd_bufsize)
2534	totlen = d->bd_bufsize;
2535
2536	/*
2537	* Round up the end of the previous packet to the next longword.
2538	*
2539	* Drop the packet if there's no room and no hope of room
2540	* If the packet would overflow the storage buffer or the storage
2541	* buffer is considered immutable by the buffer model, try to rotate
2542	* the buffer and wakeup pending processes.
2543	*/
2544	#ifdef COMPAT_FREEBSD32
2545	if (d->bd_compat32)
2546	curlen = BPF_WORDALIGN32(d->bd_slen);
2547	else
2548	#endif
2549	curlen = BPF_WORDALIGN(d->bd_slen);
2550	if (curlen + totlen > d->bd_bufsize \|\| !bpf_canwritebuf(d)) {
2551	if (d->bd_fbuf == NULL) {
2552	/*
2553	* There's no room in the store buffer, and no
2554	* prospect of room, so drop the packet. Notify the
2555	* buffer model.
2556	*/
2557	bpf_buffull(d);
2558	counter_u64_add(d->bd_dcount, 1);
2559	return;
2560	}
2561	KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
2562	ROTATE_BUFFERS(d);
2563	do_wakeup = 1;
2564	curlen = 0;
2565	} else if (d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT)
2566	/*
2567	* Immediate mode is set, or the read timeout has already
2568	* expired during a select call. A packet arrived, so the
2569	* reader should be woken up.
2570	*/
2571	do_wakeup = 1;
2572	caplen = totlen - hdrlen;
2573	tstype = d->bd_tstamp;
2574	do_timestamp = tstype != BPF_T_NONE;
2575	#ifndef BURN_BRIDGES
2576	if (tstype == BPF_T_NONE \|\| BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
2577	struct bpf_ts ts;
2578	if (do_timestamp)
2579	bpf_bintime2ts(bt, &ts, tstype);
2580	#ifdef COMPAT_FREEBSD32
2581	if (d->bd_compat32) {
2582	bzero(&hdr32_old, sizeof(hdr32_old));
2583	if (do_timestamp) {
2584	hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
2585	hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
2586	}
2587	hdr32_old.bh_datalen = pktlen;
2588	hdr32_old.bh_hdrlen = hdrlen;
2589	hdr32_old.bh_caplen = caplen;
2590	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
2591	sizeof(hdr32_old));
2592	goto copy;
2593	}
2594	#endif
2595	bzero(&hdr_old, sizeof(hdr_old));
2596	if (do_timestamp) {
2597	hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
2598	hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
2599	}
2600	hdr_old.bh_datalen = pktlen;
2601	hdr_old.bh_hdrlen = hdrlen;
2602	hdr_old.bh_caplen = caplen;
2603	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
2604	sizeof(hdr_old));
2605	goto copy;
2606	}
2607	#endif
2608
2609	/*
2610	* Append the bpf header. Note we append the actual header size, but
2611	* move forward the length of the header plus padding.
2612	*/
2613	bzero(&hdr, sizeof(hdr));
2614	if (do_timestamp)
2615	bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
2616	hdr.bh_datalen = pktlen;
2617	hdr.bh_hdrlen = hdrlen;
2618	hdr.bh_caplen = caplen;
2619	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
2620
2621	/*
2622	* Copy the packet data into the store buffer and update its length.
2623	*/
2624	#ifndef BURN_BRIDGES
2625	copy:
2626	#endif
2627	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
2628	d->bd_slen = curlen + totlen;
2629
2630	if (do_wakeup)
2631	bpf_wakeup(d);
2632	}
2633
2634	/*
2635	* Free buffers currently in use by a descriptor.
2636	* Called on close.
2637	*/
2638	static void
2639	bpf_freed(struct bpf_d *d)
2640	{
2641
2642	/*
2643	* We don't need to lock out interrupts since this descriptor has
2644	* been detached from its interface and it yet hasn't been marked
2645	* free.
2646	*/
2647	bpf_free(d);
2648	if (d->bd_rfilter != NULL) {
2649	free((caddr_t)d->bd_rfilter, M_BPF);
2650	#ifdef BPF_JITTER
2651	if (d->bd_bfilter != NULL)
2652	bpf_destroy_jit_filter(d->bd_bfilter);
2653	#endif
2654	}
2655	if (d->bd_wfilter != NULL)
2656	free((caddr_t)d->bd_wfilter, M_BPF);
2657	mtx_destroy(&d->bd_lock);
2658
2659	counter_u64_free(d->bd_rcount);
2660	counter_u64_free(d->bd_dcount);
2661	counter_u64_free(d->bd_fcount);
2662	counter_u64_free(d->bd_wcount);
2663	counter_u64_free(d->bd_wfcount);
2664	counter_u64_free(d->bd_wdcount);
2665	counter_u64_free(d->bd_zcopy);
2666
2667	}
2668
2669	/*
2670	* Attach an interface to bpf. dlt is the link layer type; hdrlen is the
2671	* fixed size of the link header (variable length headers not yet supported).
2672	*/
2673	void
2674	bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2675	{
2676
2677	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2678	}
2679
2680	/*
2681	* Attach an interface to bpf. ifp is a pointer to the structure
2682	* defining the interface to be attached, dlt is the link layer type,
2683	* and hdrlen is the fixed size of the link header (variable length
2684	* headers are not yet supporrted).
2685	*/
2686	void
2687	bpfattach2(struct ifnet ifp, u_int dlt, u_int hdrlen, struct bpf_if *driverp)
2688	{
2689	struct bpf_if *bp;
2690
2691	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT \| M_ZERO);
2692	if (bp == NULL)
2693	panic("bpfattach");
2694
2695	LIST_INIT(&bp->bif_dlist);
2696	LIST_INIT(&bp->bif_wlist);
2697	bp->bif_ifp = ifp;
2698	bp->bif_dlt = dlt;
2699	rw_init(&bp->bif_lock, "bpf interface lock");
2700	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
2701	bp->bif_bpf = driverp;
2702	*driverp = bp;
2703
2704	BPF_LOCK();
2705	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
2706	BPF_UNLOCK();
2707
2708	bp->bif_hdrlen = hdrlen;
2709
2710	if (bootverbose && IS_DEFAULT_VNET(curvnet))
2711	if_printf(ifp, "bpf attached\n");
2712	}
2713
2714	#ifdef VIMAGE
2715	/*
2716	* When moving interfaces between vnet instances we need a way to
2717	* query the dlt and hdrlen before detach so we can re-attch the if_bpf
2718	* after the vmove. We unfortunately have no device driver infrastructure
2719	* to query the interface for these values after creation/attach, thus
2720	* add this as a workaround.
2721	*/
2722	int
2723	bpf_get_bp_params(struct bpf_if bp, u_int bif_dlt, u_int *bif_hdrlen)
2724	{
2725
2726	if (bp == NULL)
2727	return (ENXIO);
2728	if (bif_dlt == NULL && bif_hdrlen == NULL)
2729	return (0);
2730
2731	if (bif_dlt != NULL)
2732	*bif_dlt = bp->bif_dlt;
2733	if (bif_hdrlen != NULL)
2734	*bif_hdrlen = bp->bif_hdrlen;
2735
2736	return (0);
2737	}
2738	#endif
2739
2740	/*
2741	* Detach bpf from an interface. This involves detaching each descriptor
2742	* associated with the interface. Notify each descriptor as it's detached
2743	* so that any sleepers wake up and get ENXIO.
2744	*/
2745	void
2746	bpfdetach(struct ifnet *ifp)
2747	{
2748	struct bpf_if bp, bp_temp;
2749	struct bpf_d *d;
2750	int ndetached;
2751
2752	ndetached = 0;
2753
2754	BPF_LOCK();
2755	/* Find all bpf_if struct's which reference ifp and detach them. */
2756	LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
2757	if (ifp != bp->bif_ifp)
2758	continue;
2759
2760	LIST_REMOVE(bp, bif_next);
2761	/* Add to to-be-freed list */
2762	LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
2763
2764	ndetached++;
2765	/*
2766	* Delay freeing bp till interface is detached
2767	* and all routes through this interface are removed.
2768	* Mark bp as detached to restrict new consumers.
2769	*/
2770	BPFIF_WLOCK(bp);
2771	bp->bif_flags \|= BPFIF_FLAG_DYING;
2772	bp->bif_bpf = (struct bpf_if )&dead_bpf_if;
2773	BPFIF_WUNLOCK(bp);
2774
2775	CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
2776	__func__, bp->bif_dlt, bp, ifp);
2777
2778	/* Free common descriptors */
2779	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
2780	bpf_detachd_locked(d);
2781	BPFD_LOCK(d);
2782	bpf_wakeup(d);
2783	BPFD_UNLOCK(d);
2784	}
2785
2786	/* Free writer-only descriptors */
2787	while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
2788	bpf_detachd_locked(d);
2789	BPFD_LOCK(d);
2790	bpf_wakeup(d);
2791	BPFD_UNLOCK(d);
2792	}
2793	}
2794	BPF_UNLOCK();
2795
2796	#ifdef INVARIANTS
2797	if (ndetached == 0)
2798	printf("bpfdetach: %s was not attached\n", ifp->if_xname);
2799	#endif
2800	}
2801
2802	/*
2803	* Interface departure handler.
2804	* Note departure event does not guarantee interface is going down.
2805	* Interface renaming is currently done via departure/arrival event set.
2806	*
2807	* Departure handled is called after all routes pointing to
2808	* given interface are removed and interface is in down state
2809	* restricting any packets to be sent/received. We assume it is now safe
2810	* to free data allocated by BPF.
2811	*/
2812	static void
2813	bpf_ifdetach(void arg __unused, struct ifnet ifp)
2814	{
2815	struct bpf_if bp, bp_temp;
2816	int nmatched = 0;
2817
2818	/* Ignore ifnet renaming. */
2819	if (ifp->if_flags & IFF_RENAMING)
2820	return;
2821
2822	BPF_LOCK();
2823	/*
2824	* Find matching entries in free list.
2825	* Nothing should be found if bpfdetach() was not called.
2826	*/
2827	LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
2828	if (ifp != bp->bif_ifp)
2829	continue;
2830
2831	CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
2832	__func__, bp, ifp);
2833
2834	LIST_REMOVE(bp, bif_next);
2835
2836	rw_destroy(&bp->bif_lock);
2837	free(bp, M_BPF);
2838
2839	nmatched++;
2840	}
2841	BPF_UNLOCK();
2842	}
2843
2844	/*
2845	* Get a list of available data link type of the interface.
2846	*/
2847	static int
2848	bpf_getdltlist(struct bpf_d d, struct bpf_dltlist bfl)
2849	{
2850	struct ifnet *ifp;
2851	struct bpf_if *bp;
2852	u_int *lst;
2853	int error, n, n1;
2854
2855	BPF_LOCK_ASSERT();
2856
2857	ifp = d->bd_bif->bif_ifp;
2858	again:
2859	n1 = 0;
2860	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2861	if (bp->bif_ifp == ifp)
2862	n1++;
2863	}
2864	if (bfl->bfl_list == NULL) {
2865	bfl->bfl_len = n1;
2866	return (0);
2867	}
2868	if (n1 > bfl->bfl_len)
2869	return (ENOMEM);
2870	BPF_UNLOCK();
2871	lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
2872	n = 0;
2873	BPF_LOCK();
2874	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2875	if (bp->bif_ifp != ifp)
2876	continue;
2877	if (n >= n1) {
2878	free(lst, M_TEMP);
2879	goto again;
2880	}
2881	lst[n] = bp->bif_dlt;
2882	n++;
2883	}
2884	BPF_UNLOCK();
2885	error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
2886	free(lst, M_TEMP);
2887	BPF_LOCK();
2888	bfl->bfl_len = n;
2889	return (error);
2890	}
2891
2892	/*
2893	* Set the data link type of a BPF instance.
2894	*/
2895	static int
2896	bpf_setdlt(struct bpf_d *d, u_int dlt)
2897	{
2898	int error, opromisc;
2899	struct ifnet *ifp;
2900	struct bpf_if *bp;
2901
2902	BPF_LOCK_ASSERT();
2903
2904	if (d->bd_bif->bif_dlt == dlt)
2905	return (0);
2906	ifp = d->bd_bif->bif_ifp;
2907
2908	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2909	if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2910	break;
2911	}
2912
2913	if (bp != NULL) {
2914	opromisc = d->bd_promisc;
2915	bpf_attachd(d, bp);
2916	BPFD_LOCK(d);
2917	reset_d(d);
2918	BPFD_UNLOCK(d);
2919	if (opromisc) {
2920	error = ifpromisc(bp->bif_ifp, 1);
2921	if (error)
2922	if_printf(bp->bif_ifp,
2923	"bpf_setdlt: ifpromisc failed (%d)\n",
2924	error);
2925	else
2926	d->bd_promisc = 1;
2927	}
2928	}
2929	return (bp == NULL ? EINVAL : 0);
2930	}
2931	#ifdef __rtems__
2932	static struct bpf_d *
2933	bpf_imfs_get_context_by_iop(const rtems_libio_t *iop)
2934	{
2935	return iop->data1;
2936	}
2937
2938	static int
2939	bpf_imfs_open(rtems_libio_t iop, const char path, int oflag, mode_t mode)
2940	{
2941	struct bpf_d *d;
2942
2943	d = bpfopen(NULL, oflag + 1, 0, NULL);
2944	iop->data1 = d;
2945
2946	if (d != NULL) {
2947	return (0);
2948	} else {
2949	rtems_set_errno_and_return_minus_one(ENOMEM);
2950	}
2951	}
2952
2953	static int
2954	bpf_imfs_close(rtems_libio_t *iop)
2955	{
2956	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
2957
2958	bpf_dtor(d);
2959
2960	return (0);
2961	}
2962
2963	static ssize_t
2964	bpf_imfs_readv(rtems_libio_t iop, const struct iovec iov, int iovcnt, ssize_t total)
2965	{
2966	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
2967	struct thread *td = rtems_bsd_get_curthread_or_null();
2968	struct uio uio = {
2969	.uio_iov = iov,
2970	.uio_iovcnt = iovcnt,
2971	.uio_offset = 0,
2972	.uio_resid = total,
2973	.uio_segflg = UIO_USERSPACE,
2974	.uio_rw = UIO_READ,
2975	.uio_td = td
2976	};
2977	int error;
2978
2979	if (td != NULL) {
2980	error = bpfread(d, &uio,
2981	rtems_libio_to_fcntl_flags(iop->flags));
2982	} else {
2983	error = ENOMEM;
2984	}
2985
2986	if (error == 0) {
2987	return (total - uio.uio_resid);
2988	} else {
2989	rtems_set_errno_and_return_minus_one(error);
2990	}
2991	}
2992
2993	static ssize_t
2994	bpf_imfs_read(rtems_libio_t iop, void buffer, size_t count)
2995	{
2996	struct iovec iov = {
2997	.iov_base = buffer,
2998	.iov_len = count
2999	};
3000
3001	return bpf_imfs_readv(iop, &iov, 1, count);
3002	}
3003
3004	static ssize_t
3005	bpf_imfs_writev(rtems_libio_t iop, const struct iovec iov, int iovcnt, ssize_t total)
3006	{
3007	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
3008	struct thread *td = rtems_bsd_get_curthread_or_null();
3009	struct uio uio = {
3010	.uio_iov = iov,
3011	.uio_iovcnt = iovcnt,
3012	.uio_offset = 0,
3013	.uio_resid = total,
3014	.uio_segflg = UIO_USERSPACE,
3015	.uio_rw = UIO_WRITE,
3016	.uio_td = td
3017	};
3018	int error;
3019
3020	if (td != NULL) {
3021	error = bpfwrite(d, &uio,
3022	rtems_libio_to_fcntl_flags(iop->flags));
3023	} else {
3024	error = ENOMEM;
3025	}
3026
3027	if (error == 0) {
3028	return (total - uio.uio_resid);
3029	} else {
3030	rtems_set_errno_and_return_minus_one(error);
3031	}
3032	}
3033
3034	static ssize_t
3035	bpf_imfs_write(rtems_libio_t iop, const void buffer, size_t count)
3036	{
3037	struct iovec iov = {
3038	.iov_base = buffer,
3039	.iov_len = count
3040	};
3041
3042	return bpf_imfs_writev(iop, &iov, 1, count);
3043	}
3044
3045	static int
3046	bpf_imfs_ioctl(rtems_libio_t iop, ioctl_command_t request, void buffer)
3047	{
3048	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
3049	struct thread *td = rtems_bsd_get_curthread_or_null();
3050	int error;
3051
3052	if (td != 0) {
3053	error = bpfioctl(d, request, buffer, 0, td);
3054	} else {
3055	error = ENOMEM;
3056	}
3057
3058	return rtems_bsd_error_to_status_and_errno(error);
3059	}
3060
3061	static int
3062	bpf_imfs_poll(rtems_libio_t *iop, int events)
3063	{
3064	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
3065
3066	return (bpfpoll(d, events, rtems_bsd_get_curthread_or_wait_forever()));
3067	}
3068
3069	static int
3070	bpf_imfs_kqfilter(rtems_libio_t iop, struct knote kn)
3071	{
3072	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
3073
3074	return (bpfkqfilter(d, kn));
3075	}
3076
3077	static const rtems_filesystem_file_handlers_r bpf_imfs_handlers = {
3078	.open_h = bpf_imfs_open,
3079	.close_h = bpf_imfs_close,
3080	.read_h = bpf_imfs_read,
3081	.write_h = bpf_imfs_write,
3082	.ioctl_h = bpf_imfs_ioctl,
3083	.lseek_h = rtems_filesystem_default_lseek_file,
3084	.fstat_h = rtems_filesystem_default_fstat,
3085	.ftruncate_h = rtems_filesystem_default_ftruncate,
3086	.fsync_h = rtems_filesystem_default_fsync_or_fdatasync,
3087	.fdatasync_h = rtems_filesystem_default_fsync_or_fdatasync,
3088	.fcntl_h = rtems_filesystem_default_fcntl,
3089	.poll_h = bpf_imfs_poll,
3090	.kqfilter_h = bpf_imfs_kqfilter,
3091	.readv_h = bpf_imfs_readv,
3092	.writev_h = bpf_imfs_writev
3093	};
3094
3095	static const IMFS_node_control bpf_imfs_control = IMFS_GENERIC_INITIALIZER(
3096	&bpf_imfs_handlers, IMFS_node_initialize_generic,
3097	IMFS_node_destroy_default);
3098	#endif /* __rtems__ */
3099
3100	static void
3101	bpf_drvinit(void *unused)
3102	{
3103	#ifndef __rtems__
3104	struct cdev *dev;
3105	#else /* __rtems__ */
3106	mode_t mode = S_IFCHR \| S_IRWXU \| S_IRWXG \| S_IRWXO;
3107	int rv;
3108	#endif /* __rtems__ */
3109
3110	sx_init(&bpf_sx, "bpf global lock");
3111	LIST_INIT(&bpf_iflist);
3112	LIST_INIT(&bpf_freelist);
3113
3114	#ifndef __rtems__
3115	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
3116	/* For compatibility */
3117	make_dev_alias(dev, "bpf0");
3118	#else /* __rtems__ */
3119	rv = IMFS_make_generic_node("/dev/bpf", mode, &bpf_imfs_control, NULL);
3120	BSD_ASSERT(rv == 0);
3121	rv = symlink("/dev/bpf", "/dev/bpf0");
3122	BSD_ASSERT(rv == 0);
3123	#endif /* __rtems__ */
3124
3125	/* Register interface departure handler */
3126	bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
3127	ifnet_departure_event, bpf_ifdetach, NULL,
3128	EVENTHANDLER_PRI_ANY);
3129	}
3130
3131	/*
3132	* Zero out the various packet counters associated with all of the bpf
3133	* descriptors. At some point, we will probably want to get a bit more
3134	* granular and allow the user to specify descriptors to be zeroed.
3135	*/
3136	static void
3137	bpf_zero_counters(void)
3138	{
3139	struct bpf_if *bp;
3140	struct bpf_d *bd;
3141
3142	BPF_LOCK();
3143	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
3144	BPFIF_RLOCK(bp);
3145	LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
3146	BPFD_LOCK(bd);
3147	counter_u64_zero(bd->bd_rcount);
3148	counter_u64_zero(bd->bd_dcount);
3149	counter_u64_zero(bd->bd_fcount);
3150	counter_u64_zero(bd->bd_wcount);
3151	counter_u64_zero(bd->bd_wfcount);
3152	counter_u64_zero(bd->bd_zcopy);
3153	BPFD_UNLOCK(bd);
3154	}
3155	BPFIF_RUNLOCK(bp);
3156	}
3157	BPF_UNLOCK();
3158	}
3159
3160	/*
3161	* Fill filter statistics
3162	*/
3163	static void
3164	bpfstats_fill_xbpf(struct xbpf_d d, struct bpf_d bd)
3165	{
3166
3167	bzero(d, sizeof(*d));
3168	BPFD_LOCK_ASSERT(bd);
3169	d->bd_structsize = sizeof(*d);
3170	/* XXX: reading should be protected by global lock */
3171	d->bd_immediate = bd->bd_immediate;
3172	d->bd_promisc = bd->bd_promisc;
3173	d->bd_hdrcmplt = bd->bd_hdrcmplt;
3174	d->bd_direction = bd->bd_direction;
3175	d->bd_feedback = bd->bd_feedback;
3176	#ifndef __rtems__
3177	d->bd_async = bd->bd_async;
3178	#endif /* __rtems__ */
3179	d->bd_rcount = counter_u64_fetch(bd->bd_rcount);
3180	d->bd_dcount = counter_u64_fetch(bd->bd_dcount);
3181	d->bd_fcount = counter_u64_fetch(bd->bd_fcount);
3182	d->bd_sig = bd->bd_sig;
3183	d->bd_slen = bd->bd_slen;
3184	d->bd_hlen = bd->bd_hlen;
3185	d->bd_bufsize = bd->bd_bufsize;
3186	d->bd_pid = bd->bd_pid;
3187	strlcpy(d->bd_ifname,
3188	bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
3189	d->bd_locked = bd->bd_locked;
3190	d->bd_wcount = counter_u64_fetch(bd->bd_wcount);
3191	d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount);
3192	d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount);
3193	d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy);
3194	d->bd_bufmode = bd->bd_bufmode;
3195	}
3196
3197	/*
3198	* Handle `netstat -B' stats request
3199	*/
3200	static int
3201	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
3202	{
3203	static const struct xbpf_d zerostats;
3204	struct xbpf_d xbdbuf, xbd, tempstats;
3205	int index, error;
3206	struct bpf_if *bp;
3207	struct bpf_d *bd;
3208
3209	/*
3210	* XXX This is not technically correct. It is possible for non
3211	* privileged users to open bpf devices. It would make sense
3212	* if the users who opened the devices were able to retrieve
3213	* the statistics for them, too.
3214	*/
3215	error = priv_check(req->td, PRIV_NET_BPF);
3216	if (error)
3217	return (error);
3218	/*
3219	* Check to see if the user is requesting that the counters be
3220	* zeroed out. Explicitly check that the supplied data is zeroed,
3221	* as we aren't allowing the user to set the counters currently.
3222	*/
3223	if (req->newptr != NULL) {
3224	if (req->newlen != sizeof(tempstats))
3225	return (EINVAL);
3226	memset(&tempstats, 0, sizeof(tempstats));
3227	error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
3228	if (error)
3229	return (error);
3230	if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
3231	return (EINVAL);
3232	bpf_zero_counters();
3233	return (0);
3234	}
3235	if (req->oldptr == NULL)
3236	return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
3237	if (bpf_bpfd_cnt == 0)
3238	return (SYSCTL_OUT(req, 0, 0));
3239	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
3240	BPF_LOCK();
3241	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
3242	BPF_UNLOCK();
3243	free(xbdbuf, M_BPF);
3244	return (ENOMEM);
3245	}
3246	index = 0;
3247	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
3248	BPFIF_RLOCK(bp);
3249	/* Send writers-only first */
3250	LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
3251	xbd = &xbdbuf[index++];
3252	BPFD_LOCK(bd);
3253	bpfstats_fill_xbpf(xbd, bd);
3254	BPFD_UNLOCK(bd);
3255	}
3256	LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
3257	xbd = &xbdbuf[index++];
3258	BPFD_LOCK(bd);
3259	bpfstats_fill_xbpf(xbd, bd);
3260	BPFD_UNLOCK(bd);
3261	}
3262	BPFIF_RUNLOCK(bp);
3263	}
3264	BPF_UNLOCK();
3265	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
3266	free(xbdbuf, M_BPF);
3267	return (error);
3268	}
3269
3270	SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
3271
3272	#else /* !DEV_BPF && !NETGRAPH_BPF */
3273
3274	/*
3275	* NOP stubs to allow bpf-using drivers to load and function.
3276	*
3277	* A 'better' implementation would allow the core bpf functionality
3278	* to be loaded at runtime.
3279	*/
3280
3281	void
3282	bpf_tap(struct bpf_if bp, u_char pkt, u_int pktlen)
3283	{
3284	}
3285
3286	void
3287	bpf_mtap(struct bpf_if bp, struct mbuf m)
3288	{
3289	}
3290
3291	void
3292	bpf_mtap2(struct bpf_if bp, void d, u_int l, struct mbuf *m)
3293	{
3294	}
3295
3296	void
3297	bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3298	{
3299
3300	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
3301	}
3302
3303	void
3304	bpfattach2(struct ifnet ifp, u_int dlt, u_int hdrlen, struct bpf_if *driverp)
3305	{
3306
3307	driverp = (struct bpf_if )&dead_bpf_if;
3308	}
3309
3310	void
3311	bpfdetach(struct ifnet *ifp)
3312	{
3313	}
3314
3315	u_int
3316	bpf_filter(const struct bpf_insn pc, u_char p, u_int wirelen, u_int buflen)
3317	{
3318	return -1; /* "no filter" behaviour */
3319	}
3320
3321	int
3322	bpf_validate(const struct bpf_insn *f, int len)
3323	{
3324	return 0; /* false */
3325	}
3326
3327	#endif /* !DEV_BPF && !NETGRAPH_BPF */
3328
3329	#ifdef DDB
3330	static void
3331	bpf_show_bpf_if(struct bpf_if *bpf_if)
3332	{
3333
3334	if (bpf_if == NULL)
3335	return;
3336	db_printf("%p:\n", bpf_if);
3337	#define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e);
3338	/* bif_ext.bif_next */
3339	/* bif_ext.bif_dlist */
3340	BPF_DB_PRINTF("%#x", bif_dlt);
3341	BPF_DB_PRINTF("%u", bif_hdrlen);
3342	BPF_DB_PRINTF("%p", bif_ifp);
3343	/* bif_lock */
3344	/* bif_wlist */
3345	BPF_DB_PRINTF("%#x", bif_flags);
3346	}
3347
3348	DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
3349	{
3350
3351	if (!have_addr) {
3352	db_printf("usage: show bpf_if <struct bpf_if *>\n");
3353	return;
3354	}
3355
3356	bpf_show_bpf_if((struct bpf_if *)addr);
3357	}
3358	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats: