Context Navigation

source: rtems-libbsd/freebsd/sys/net/bpf.c @ e6f6b5e

55-freebsd-126-freebsd-12

Last change on this file since e6f6b5e was e6f6b5e, checked in by Nicolas Tsiogkas <lou.nick@…>, on 07/04/18 at 05:19:01
BPF(9): Pass flags to bpfopen()
Property mode set to `100644`
File size: 73.9 KB

Line
1	#include <machine/rtems-bsd-kernel-space.h>
2
3	/*-
4	* Copyright (c) 1990, 1991, 1993
5	* The Regents of the University of California. All rights reserved.
6	*
7	* This code is derived from the Stanford/CMU enet packet filter,
8	* (net/enet.c) distributed as part of 4.3BSD, and code contributed
9	* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10	* Berkeley Laboratory.
11	*
12	* Redistribution and use in source and binary forms, with or without
13	* modification, are permitted provided that the following conditions
14	* are met:
15	* 1. Redistributions of source code must retain the above copyright
16	* notice, this list of conditions and the following disclaimer.
17	* 2. Redistributions in binary form must reproduce the above copyright
18	* notice, this list of conditions and the following disclaimer in the
19	* documentation and/or other materials provided with the distribution.
20	* 3. Neither the name of the University nor the names of its contributors
21	* may be used to endorse or promote products derived from this software
22	* without specific prior written permission.
23	*
24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34	* SUCH DAMAGE.
35	*
36	* @(#)bpf.c 8.4 (Berkeley) 1/9/95
37	*/
38
39	#include <sys/cdefs.h>
40	__FBSDID("$FreeBSD$");
41
42	#include <rtems/bsd/local/opt_bpf.h>
43	#include <rtems/bsd/local/opt_compat.h>
44	#include <rtems/bsd/local/opt_ddb.h>
45	#include <rtems/bsd/local/opt_netgraph.h>
46
47	#include <sys/types.h>
48	#include <sys/param.h>
49	#include <sys/lock.h>
50	#include <sys/rwlock.h>
51	#include <sys/systm.h>
52	#include <sys/conf.h>
53	#include <sys/fcntl.h>
54	#include <sys/jail.h>
55	#include <sys/malloc.h>
56	#include <sys/mbuf.h>
57	#include <sys/time.h>
58	#include <sys/priv.h>
59	#include <sys/proc.h>
60	#include <sys/signalvar.h>
61	#include <sys/filio.h>
62	#include <sys/sockio.h>
63	#include <sys/ttycom.h>
64	#include <sys/uio.h>
65
66	#include <sys/event.h>
67	#include <sys/file.h>
68	#include <sys/poll.h>
69	#include <sys/proc.h>
70
71	#include <sys/socket.h>
72
73	#ifdef DDB
74	#include <ddb/ddb.h>
75	#endif
76
77	#include <net/if.h>
78	#include <net/if_var.h>
79	#include <net/if_dl.h>
80	#include <net/bpf.h>
81	#include <net/bpf_buffer.h>
82	#ifdef BPF_JITTER
83	#include <net/bpf_jitter.h>
84	#endif
85	#include <net/bpf_zerocopy.h>
86	#include <net/bpfdesc.h>
87	#include <net/route.h>
88	#include <net/vnet.h>
89
90	#include <netinet/in.h>
91	#include <netinet/if_ether.h>
92	#include <sys/kernel.h>
93	#include <sys/sysctl.h>
94
95	#include <net80211/ieee80211_freebsd.h>
96
97	#include <security/mac/mac_framework.h>
98	#ifdef __rtems__
99	#include <rtems/imfs.h>
100	#define devfs_get_cdevpriv(x) 0
101	#define devtoname(x) "bpf"
102	#endif /* __rtems__ */
103
104	MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
105
106	struct bpf_if {
107	#define bif_next bif_ext.bif_next
108	#define bif_dlist bif_ext.bif_dlist
109	struct bpf_if_ext bif_ext; /* public members */
110	u_int bif_dlt; /* link layer type */
111	u_int bif_hdrlen; /* length of link header */
112	struct ifnet bif_ifp; / corresponding interface */
113	struct rwlock bif_lock; /* interface lock */
114	LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
115	int bif_flags; /* Interface flags */
116	};
117
118	CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
119
120	#if defined(DEV_BPF) \|\| defined(NETGRAPH_BPF)
121
122	#define PRINET 26 /* interruptible */
123
124	#define SIZEOF_BPF_HDR(type) \
125	(offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
126
127	#ifdef COMPAT_FREEBSD32
128	#include <sys/mount.h>
129	#include <compat/freebsd32/freebsd32.h>
130	#define BPF_ALIGNMENT32 sizeof(int32_t)
131	#define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
132
133	#ifndef BURN_BRIDGES
134	/*
135	* 32-bit version of structure prepended to each packet. We use this header
136	* instead of the standard one for 32-bit streams. We mark the a stream as
137	* 32-bit the first time we see a 32-bit compat ioctl request.
138	*/
139	struct bpf_hdr32 {
140	struct timeval32 bh_tstamp; /* time stamp */
141	uint32_t bh_caplen; /* length of captured portion */
142	uint32_t bh_datalen; /* original length of packet */
143	uint16_t bh_hdrlen; /* length of bpf header (this struct
144	plus alignment padding) */
145	};
146	#endif
147
148	struct bpf_program32 {
149	u_int bf_len;
150	uint32_t bf_insns;
151	};
152
153	struct bpf_dltlist32 {
154	u_int bfl_len;
155	u_int bfl_list;
156	};
157
158	#define BIOCSETF32 _IOW('B', 103, struct bpf_program32)
159	#define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32)
160	#define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32)
161	#define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32)
162	#define BIOCSETWF32 _IOW('B', 123, struct bpf_program32)
163	#define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32)
164	#endif
165
166	/*
167	* bpf_iflist is a list of BPF interface structures, each corresponding to a
168	* specific DLT. The same network interface might have several BPF interface
169	* structures registered by different layers in the stack (i.e., 802.11
170	* frames, ethernet frames, etc).
171	*/
172	static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist;
173	static struct mtx bpf_mtx; /* bpf global lock */
174	static int bpf_bpfd_cnt;
175
176	static void bpf_attachd(struct bpf_d , struct bpf_if );
177	static void bpf_detachd(struct bpf_d *);
178	static void bpf_detachd_locked(struct bpf_d *);
179	static void bpf_freed(struct bpf_d *);
180	static int bpf_movein(struct uio , int, struct ifnet , struct mbuf **,
181	struct sockaddr , int , struct bpf_d *);
182	static int bpf_setif(struct bpf_d , struct ifreq );
183	static void bpf_timed_out(void *);
184	static __inline void
185	bpf_wakeup(struct bpf_d *);
186	static void catchpacket(struct bpf_d , u_char , u_int, u_int,
187	void ()(struct bpf_d , caddr_t, u_int, void *, u_int),
188	struct bintime *);
189	static void reset_d(struct bpf_d *);
190	static int bpf_setf(struct bpf_d , struct bpf_program , u_long cmd);
191	static int bpf_getdltlist(struct bpf_d , struct bpf_dltlist );
192	static int bpf_setdlt(struct bpf_d *, u_int);
193	static void filt_bpfdetach(struct knote *);
194	static int filt_bpfread(struct knote *, long);
195	static void bpf_drvinit(void *);
196	static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
197
198	SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
199	int bpf_maxinsns = BPF_MAXINSNS;
200	SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
201	&bpf_maxinsns, 0, "Maximum bpf program instructions");
202	#ifndef __rtems__
203	static int bpf_zerocopy_enable = 0;
204	SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
205	&bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
206	#endif /* __rtems__ */
207	static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE \| CTLFLAG_RW,
208	bpf_stats_sysctl, "bpf statistics portal");
209
210	static VNET_DEFINE(int, bpf_optimize_writers) = 0;
211	#define V_bpf_optimize_writers VNET(bpf_optimize_writers)
212	SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET \| CTLFLAG_RW,
213	&VNET_NAME(bpf_optimize_writers), 0,
214	"Do not send packets until BPF program is set");
215
216	#ifndef __rtems__
217	static d_open_t bpfopen;
218	static d_read_t bpfread;
219	static d_write_t bpfwrite;
220	static d_ioctl_t bpfioctl;
221	static d_poll_t bpfpoll;
222	static d_kqfilter_t bpfkqfilter;
223
224	static struct cdevsw bpf_cdevsw = {
225	.d_version = D_VERSION,
226	.d_open = bpfopen,
227	.d_read = bpfread,
228	.d_write = bpfwrite,
229	.d_ioctl = bpfioctl,
230	.d_poll = bpfpoll,
231	.d_name = "bpf",
232	.d_kqfilter = bpfkqfilter,
233	};
234	#endif /* __rtems__ */
235
236	static struct filterops bpfread_filtops = {
237	.f_isfd = 1,
238	.f_detach = filt_bpfdetach,
239	.f_event = filt_bpfread,
240	};
241
242	eventhandler_tag bpf_ifdetach_cookie = NULL;
243
244	/*
245	* LOCKING MODEL USED BY BPF:
246	* Locks:
247	* 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
248	* some global counters and every bpf_if reference.
249	* 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
250	* 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
251	* used by bpf_mtap code.
252	*
253	* Lock order:
254	*
255	* Global lock, interface lock, descriptor lock
256	*
257	* We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
258	* working model. In many places (like bpf_detachd) we start with BPF descriptor
259	* (and we need to at least rlock it to get reliable interface pointer). This
260	* gives us potential LOR. As a result, we use global lock to protect from bpf_if
261	* change in every such place.
262	*
263	* Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
264	* 3) descriptor main wlock.
265	* Reading bd_bif can be protected by any of these locks, typically global lock.
266	*
267	* Changing read/write BPF filter is protected by the same three locks,
268	* the same applies for reading.
269	*
270	* Sleeping in global lock is not allowed due to bpfdetach() using it.
271	*/
272
273	/*
274	* Wrapper functions for various buffering methods. If the set of buffer
275	* modes expands, we will probably want to introduce a switch data structure
276	* similar to protosw, et.
277	*/
278	static void
279	bpf_append_bytes(struct bpf_d d, caddr_t buf, u_int offset, void src,
280	u_int len)
281	{
282
283	BPFD_LOCK_ASSERT(d);
284
285	switch (d->bd_bufmode) {
286	case BPF_BUFMODE_BUFFER:
287	return (bpf_buffer_append_bytes(d, buf, offset, src, len));
288
289	#ifndef __rtems__
290	case BPF_BUFMODE_ZBUF:
291	d->bd_zcopy++;
292	return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
293	#endif /* __rtems__ */
294
295	default:
296	panic("bpf_buf_append_bytes");
297	}
298	}
299
300	static void
301	bpf_append_mbuf(struct bpf_d d, caddr_t buf, u_int offset, void src,
302	u_int len)
303	{
304
305	BPFD_LOCK_ASSERT(d);
306
307	switch (d->bd_bufmode) {
308	case BPF_BUFMODE_BUFFER:
309	return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
310
311	#ifndef __rtems__
312	case BPF_BUFMODE_ZBUF:
313	d->bd_zcopy++;
314	return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
315	#endif /* __rtems__ */
316
317	default:
318	panic("bpf_buf_append_mbuf");
319	}
320	}
321
322	/*
323	* This function gets called when the free buffer is re-assigned.
324	*/
325	static void
326	bpf_buf_reclaimed(struct bpf_d *d)
327	{
328
329	BPFD_LOCK_ASSERT(d);
330
331	switch (d->bd_bufmode) {
332	case BPF_BUFMODE_BUFFER:
333	return;
334
335	#ifndef __rtems__
336	case BPF_BUFMODE_ZBUF:
337	bpf_zerocopy_buf_reclaimed(d);
338	return;
339	#endif /* __rtems__ */
340
341	default:
342	panic("bpf_buf_reclaimed");
343	}
344	}
345
346	/*
347	* If the buffer mechanism has a way to decide that a held buffer can be made
348	* free, then it is exposed via the bpf_canfreebuf() interface. (1) is
349	* returned if the buffer can be discarded, (0) is returned if it cannot.
350	*/
351	static int
352	bpf_canfreebuf(struct bpf_d *d)
353	{
354
355	BPFD_LOCK_ASSERT(d);
356
357	#ifndef __rtems__
358	switch (d->bd_bufmode) {
359	case BPF_BUFMODE_ZBUF:
360	return (bpf_zerocopy_canfreebuf(d));
361	}
362	#endif /* __rtems__ */
363	return (0);
364	}
365
366	/*
367	* Allow the buffer model to indicate that the current store buffer is
368	* immutable, regardless of the appearance of space. Return (1) if the
369	* buffer is writable, and (0) if not.
370	*/
371	static int
372	bpf_canwritebuf(struct bpf_d *d)
373	{
374	BPFD_LOCK_ASSERT(d);
375
376	#ifndef __rtems__
377	switch (d->bd_bufmode) {
378	case BPF_BUFMODE_ZBUF:
379	return (bpf_zerocopy_canwritebuf(d));
380	}
381	#endif /* __rtems__ */
382	return (1);
383	}
384
385	/*
386	* Notify buffer model that an attempt to write to the store buffer has
387	* resulted in a dropped packet, in which case the buffer may be considered
388	* full.
389	*/
390	static void
391	bpf_buffull(struct bpf_d *d)
392	{
393
394	BPFD_LOCK_ASSERT(d);
395
396	#ifndef __rtems__
397	switch (d->bd_bufmode) {
398	case BPF_BUFMODE_ZBUF:
399	bpf_zerocopy_buffull(d);
400	break;
401	}
402	#endif /* __rtems__ */
403	}
404
405	/*
406	* Notify the buffer model that a buffer has moved into the hold position.
407	*/
408	void
409	bpf_bufheld(struct bpf_d *d)
410	{
411
412	BPFD_LOCK_ASSERT(d);
413
414	#ifndef __rtems__
415	switch (d->bd_bufmode) {
416	case BPF_BUFMODE_ZBUF:
417	bpf_zerocopy_bufheld(d);
418	break;
419	}
420	#endif /* __rtems__ */
421	}
422
423	static void
424	bpf_free(struct bpf_d *d)
425	{
426
427	switch (d->bd_bufmode) {
428	case BPF_BUFMODE_BUFFER:
429	return (bpf_buffer_free(d));
430
431	#ifndef __rtems__
432	case BPF_BUFMODE_ZBUF:
433	return (bpf_zerocopy_free(d));
434	#endif /* __rtems__ */
435
436	default:
437	panic("bpf_buf_free");
438	}
439	}
440
441	static int
442	bpf_uiomove(struct bpf_d d, caddr_t buf, u_int len, struct uio uio)
443	{
444
445	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
446	return (EOPNOTSUPP);
447	return (bpf_buffer_uiomove(d, buf, len, uio));
448	}
449
450	static int
451	bpf_ioctl_sblen(struct bpf_d d, u_int i)
452	{
453
454	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
455	return (EOPNOTSUPP);
456	return (bpf_buffer_ioctl_sblen(d, i));
457	}
458
459	static int
460	bpf_ioctl_getzmax(struct thread td, struct bpf_d d, size_t *i)
461	{
462
463	#ifndef __rtems__
464	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
465	return (EOPNOTSUPP);
466	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
467	#else /* __rtems__ */
468	return (EOPNOTSUPP);
469	#endif /* __rtems__ */
470	}
471
472	static int
473	bpf_ioctl_rotzbuf(struct thread td, struct bpf_d d, struct bpf_zbuf *bz)
474	{
475
476	#ifndef __rtems__
477	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
478	return (EOPNOTSUPP);
479	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
480	#else /* __rtems__ */
481	return (EOPNOTSUPP);
482	#endif /* __rtems__ */
483	}
484
485	static int
486	bpf_ioctl_setzbuf(struct thread td, struct bpf_d d, struct bpf_zbuf *bz)
487	{
488
489	#ifndef __rtems__
490	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
491	return (EOPNOTSUPP);
492	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
493	#else /* __rtems__ */
494	return (EOPNOTSUPP);
495	#endif /* __rtems__ */
496	}
497
498	/*
499	* General BPF functions.
500	*/
501	static int
502	bpf_movein(struct uio uio, int linktype, struct ifnet ifp, struct mbuf **mp,
503	struct sockaddr sockp, int hdrlen, struct bpf_d *d)
504	{
505	const struct ieee80211_bpf_params *p;
506	struct ether_header *eh;
507	struct mbuf *m;
508	int error;
509	int len;
510	int hlen;
511	int slen;
512
513	/*
514	* Build a sockaddr based on the data link layer type.
515	* We do this at this level because the ethernet header
516	* is copied directly into the data field of the sockaddr.
517	* In the case of SLIP, there is no header and the packet
518	* is forwarded as is.
519	* Also, we are careful to leave room at the front of the mbuf
520	* for the link level header.
521	*/
522	switch (linktype) {
523
524	case DLT_SLIP:
525	sockp->sa_family = AF_INET;
526	hlen = 0;
527	break;
528
529	case DLT_EN10MB:
530	sockp->sa_family = AF_UNSPEC;
531	/* XXX Would MAXLINKHDR be better? */
532	hlen = ETHER_HDR_LEN;
533	break;
534
535	case DLT_FDDI:
536	sockp->sa_family = AF_IMPLINK;
537	hlen = 0;
538	break;
539
540	case DLT_RAW:
541	sockp->sa_family = AF_UNSPEC;
542	hlen = 0;
543	break;
544
545	case DLT_NULL:
546	/*
547	* null interface types require a 4 byte pseudo header which
548	* corresponds to the address family of the packet.
549	*/
550	sockp->sa_family = AF_UNSPEC;
551	hlen = 4;
552	break;
553
554	case DLT_ATM_RFC1483:
555	/*
556	* en atm driver requires 4-byte atm pseudo header.
557	* though it isn't standard, vpi:vci needs to be
558	* specified anyway.
559	*/
560	sockp->sa_family = AF_UNSPEC;
561	hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
562	break;
563
564	case DLT_PPP:
565	sockp->sa_family = AF_UNSPEC;
566	hlen = 4; /* This should match PPP_HDRLEN */
567	break;
568
569	case DLT_IEEE802_11: /* IEEE 802.11 wireless */
570	sockp->sa_family = AF_IEEE80211;
571	hlen = 0;
572	break;
573
574	case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */
575	sockp->sa_family = AF_IEEE80211;
576	sockp->sa_len = 12; /* XXX != 0 */
577	hlen = sizeof(struct ieee80211_bpf_params);
578	break;
579
580	default:
581	return (EIO);
582	}
583
584	len = uio->uio_resid;
585	if (len < hlen \|\| len - hlen > ifp->if_mtu)
586	return (EMSGSIZE);
587
588	m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
589	if (m == NULL)
590	return (EIO);
591	m->m_pkthdr.len = m->m_len = len;
592	*mp = m;
593
594	error = uiomove(mtod(m, u_char *), len, uio);
595	if (error)
596	goto bad;
597
598	slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
599	if (slen == 0) {
600	error = EPERM;
601	goto bad;
602	}
603
604	/* Check for multicast destination */
605	switch (linktype) {
606	case DLT_EN10MB:
607	eh = mtod(m, struct ether_header *);
608	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
609	if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
610	ETHER_ADDR_LEN) == 0)
611	m->m_flags \|= M_BCAST;
612	else
613	m->m_flags \|= M_MCAST;
614	}
615	if (d->bd_hdrcmplt == 0) {
616	memcpy(eh->ether_shost, IF_LLADDR(ifp),
617	sizeof(eh->ether_shost));
618	}
619	break;
620	}
621
622	/*
623	* Make room for link header, and copy it to sockaddr
624	*/
625	if (hlen != 0) {
626	if (sockp->sa_family == AF_IEEE80211) {
627	/*
628	* Collect true length from the parameter header
629	* NB: sockp is known to be zero'd so if we do a
630	* short copy unspecified parameters will be
631	* zero.
632	* NB: packet may not be aligned after stripping
633	* bpf params
634	* XXX check ibp_vers
635	*/
636	p = mtod(m, const struct ieee80211_bpf_params *);
637	hlen = p->ibp_len;
638	if (hlen > sizeof(sockp->sa_data)) {
639	error = EINVAL;
640	goto bad;
641	}
642	}
643	bcopy(mtod(m, const void *), sockp->sa_data, hlen);
644	}
645	*hdrlen = hlen;
646
647	return (0);
648	bad:
649	m_freem(m);
650	return (error);
651	}
652
653	/*
654	* Attach file to the bpf interface, i.e. make d listen on bp.
655	*/
656	static void
657	bpf_attachd(struct bpf_d d, struct bpf_if bp)
658	{
659	int op_w;
660
661	BPF_LOCK_ASSERT();
662
663	/*
664	* Save sysctl value to protect from sysctl change
665	* between reads
666	*/
667	op_w = V_bpf_optimize_writers \|\| d->bd_writer;
668
669	if (d->bd_bif != NULL)
670	bpf_detachd_locked(d);
671	/*
672	* Point d at bp, and add d to the interface's list.
673	* Since there are many applications using BPF for
674	* sending raw packets only (dhcpd, cdpd are good examples)
675	* we can delay adding d to the list of active listeners until
676	* some filter is configured.
677	*/
678
679	BPFIF_WLOCK(bp);
680	BPFD_LOCK(d);
681
682	d->bd_bif = bp;
683
684	if (op_w != 0) {
685	/* Add to writers-only list */
686	LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
687	/*
688	* We decrement bd_writer on every filter set operation.
689	* First BIOCSETF is done by pcap_open_live() to set up
690	* snap length. After that appliation usually sets its own filter
691	*/
692	d->bd_writer = 2;
693	} else
694	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
695
696	BPFD_UNLOCK(d);
697	BPFIF_WUNLOCK(bp);
698
699	bpf_bpfd_cnt++;
700
701	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
702	__func__, d->bd_pid, d->bd_writer ? "writer" : "active");
703
704	if (op_w == 0)
705	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
706	}
707
708	/*
709	* Check if we need to upgrade our descriptor @d from write-only mode.
710	*/
711	static int
712	bpf_check_upgrade(u_long cmd, struct bpf_d d, struct bpf_insn fcode, int flen)
713	{
714	int is_snap, need_upgrade;
715
716	/*
717	* Check if we've already upgraded or new filter is empty.
718	*/
719	if (d->bd_writer == 0 \|\| fcode == NULL)
720	return (0);
721
722	need_upgrade = 0;
723
724	/*
725	* Check if cmd looks like snaplen setting from
726	* pcap_bpf.c:pcap_open_live().
727	* Note we're not checking .k value here:
728	* while pcap_open_live() definitely sets to to non-zero value,
729	* we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
730	* do not consider upgrading immediately
731	*/
732	if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET \| BPF_K))
733	is_snap = 1;
734	else
735	is_snap = 0;
736
737	if (is_snap == 0) {
738	/*
739	* We're setting first filter and it doesn't look like
740	* setting snaplen. We're probably using bpf directly.
741	* Upgrade immediately.
742	*/
743	need_upgrade = 1;
744	} else {
745	/*
746	* Do not require upgrade by first BIOCSETF
747	* (used to set snaplen) by pcap_open_live().
748	*/
749
750	if (--d->bd_writer == 0) {
751	/*
752	* First snaplen filter has already
753	* been set. This is probably catch-all
754	* filter
755	*/
756	need_upgrade = 1;
757	}
758	}
759
760	CTR5(KTR_NET,
761	"%s: filter function set by pid %d, "
762	"bd_writer counter %d, snap %d upgrade %d",
763	__func__, d->bd_pid, d->bd_writer,
764	is_snap, need_upgrade);
765
766	return (need_upgrade);
767	}
768
769	/*
770	* Add d to the list of active bp filters.
771	* Requires bpf_attachd() to be called before.
772	*/
773	static void
774	bpf_upgraded(struct bpf_d *d)
775	{
776	struct bpf_if *bp;
777
778	BPF_LOCK_ASSERT();
779
780	bp = d->bd_bif;
781
782	/*
783	* Filter can be set several times without specifying interface.
784	* Mark d as reader and exit.
785	*/
786	if (bp == NULL) {
787	BPFD_LOCK(d);
788	d->bd_writer = 0;
789	BPFD_UNLOCK(d);
790	return;
791	}
792
793	BPFIF_WLOCK(bp);
794	BPFD_LOCK(d);
795
796	/* Remove from writers-only list */
797	LIST_REMOVE(d, bd_next);
798	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
799	/* Mark d as reader */
800	d->bd_writer = 0;
801
802	BPFD_UNLOCK(d);
803	BPFIF_WUNLOCK(bp);
804
805	CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
806
807	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
808	}
809
810	/*
811	* Detach a file from its interface.
812	*/
813	static void
814	bpf_detachd(struct bpf_d *d)
815	{
816	BPF_LOCK();
817	bpf_detachd_locked(d);
818	BPF_UNLOCK();
819	}
820
821	static void
822	bpf_detachd_locked(struct bpf_d *d)
823	{
824	int error;
825	struct bpf_if *bp;
826	struct ifnet *ifp;
827
828	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
829
830	BPF_LOCK_ASSERT();
831
832	/* Check if descriptor is attached */
833	if ((bp = d->bd_bif) == NULL)
834	return;
835
836	BPFIF_WLOCK(bp);
837	BPFD_LOCK(d);
838
839	/* Save bd_writer value */
840	error = d->bd_writer;
841
842	/*
843	* Remove d from the interface's descriptor list.
844	*/
845	LIST_REMOVE(d, bd_next);
846
847	ifp = bp->bif_ifp;
848	d->bd_bif = NULL;
849	BPFD_UNLOCK(d);
850	BPFIF_WUNLOCK(bp);
851
852	bpf_bpfd_cnt--;
853
854	/* Call event handler iff d is attached */
855	if (error == 0)
856	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
857
858	/*
859	* Check if this descriptor had requested promiscuous mode.
860	* If so, turn it off.
861	*/
862	if (d->bd_promisc) {
863	d->bd_promisc = 0;
864	CURVNET_SET(ifp->if_vnet);
865	error = ifpromisc(ifp, 0);
866	CURVNET_RESTORE();
867	if (error != 0 && error != ENXIO) {
868	/*
869	* ENXIO can happen if a pccard is unplugged
870	* Something is really wrong if we were able to put
871	* the driver into promiscuous mode, but can't
872	* take it out.
873	*/
874	if_printf(bp->bif_ifp,
875	"bpf_detach: ifpromisc failed (%d)\n", error);
876	}
877	}
878	}
879
880	/*
881	* Close the descriptor by detaching it from its interface,
882	* deallocating its buffers, and marking it free.
883	*/
884	static void
885	bpf_dtor(void *data)
886	{
887	struct bpf_d *d = data;
888
889	BPFD_LOCK(d);
890	if (d->bd_state == BPF_WAITING)
891	callout_stop(&d->bd_callout);
892	d->bd_state = BPF_IDLE;
893	BPFD_UNLOCK(d);
894	funsetown(&d->bd_sigio);
895	bpf_detachd(d);
896	#ifdef MAC
897	mac_bpfdesc_destroy(d);
898	#endif /* MAC */
899	seldrain(&d->bd_sel);
900	knlist_destroy(&d->bd_sel.si_note);
901	callout_drain(&d->bd_callout);
902	bpf_freed(d);
903	free(d, M_BPF);
904	}
905
906	/*
907	* Open ethernet device. Returns ENXIO for illegal minor device number,
908	* EBUSY if file is open by another process.
909	*/
910	/* ARGSUSED */
911	#ifndef __rtems__
912	static int
913	#else /* __rtems__ */
914	static struct bpf_d *
915	#endif /* __rtems__ */
916	bpfopen(struct cdev dev, int flags, int fmt, struct thread td)
917	{
918	struct bpf_d *d;
919	#ifndef __rtems__
920	int error;
921
922	d = malloc(sizeof(*d), M_BPF, M_WAITOK \| M_ZERO);
923	error = devfs_set_cdevpriv(d, bpf_dtor);
924	if (error != 0) {
925	free(d, M_BPF);
926	return (error);
927	}
928	#else /* __rtems__ */
929	u_int size;
930
931	d = malloc(sizeof(*d), M_BPF, M_NOWAIT \| M_ZERO);
932	if (d == NULL) {
933	return (d);
934	}
935	#endif /* __rtems__ */
936
937	/*
938	* For historical reasons, perform a one-time initialization call to
939	* the buffer routines, even though we're not yet committed to a
940	* particular buffer method.
941	*/
942	bpf_buffer_init(d);
943	if ((flags & FREAD) == 0)
944	d->bd_writer = 2;
945	d->bd_hbuf_in_use = 0;
946	d->bd_bufmode = BPF_BUFMODE_BUFFER;
947	d->bd_sig = SIGIO;
948	d->bd_direction = BPF_D_INOUT;
949	BPF_PID_REFRESH(d, td);
950	#ifdef MAC
951	mac_bpfdesc_init(d);
952	mac_bpfdesc_create(td->td_ucred, d);
953	#endif
954	mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
955	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
956	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
957
958	#ifndef __rtems__
959	return (0);
960	#else /* __rtems__ */
961	return (d);
962	#endif /* __rtems__ */
963	}
964
965	/*
966	* bpfread - read next chunk of packets from buffers
967	*/
968	static int
969	#ifndef __rtems__
970	bpfread(struct cdev dev, struct uio uio, int ioflag)
971	#else /* __rtems__ */
972	bpfread(struct bpf_d d, struct uio uio, int ioflag)
973	#endif /* __rtems__ */
974	{
975	#ifndef __rtems__
976	struct bpf_d *d;
977	#endif /* __rtems__ */
978	int error;
979	int non_block;
980	int timed_out;
981
982	error = devfs_get_cdevpriv((void **)&d);
983	if (error != 0)
984	return (error);
985
986	/*
987	* Restrict application to use a buffer the same size as
988	* as kernel buffers.
989	*/
990	if (uio->uio_resid != d->bd_bufsize)
991	return (EINVAL);
992
993	non_block = ((ioflag & O_NONBLOCK) != 0);
994
995	BPFD_LOCK(d);
996	BPF_PID_REFRESH_CUR(d);
997	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
998	BPFD_UNLOCK(d);
999	return (EOPNOTSUPP);
1000	}
1001	if (d->bd_state == BPF_WAITING)
1002	callout_stop(&d->bd_callout);
1003	timed_out = (d->bd_state == BPF_TIMED_OUT);
1004	d->bd_state = BPF_IDLE;
1005	while (d->bd_hbuf_in_use) {
1006	error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
1007	PRINET\|PCATCH, "bd_hbuf", 0);
1008	if (error != 0) {
1009	BPFD_UNLOCK(d);
1010	return (error);
1011	}
1012	}
1013	/*
1014	* If the hold buffer is empty, then do a timed sleep, which
1015	* ends when the timeout expires or when enough packets
1016	* have arrived to fill the store buffer.
1017	*/
1018	while (d->bd_hbuf == NULL) {
1019	if (d->bd_slen != 0) {
1020	/*
1021	* A packet(s) either arrived since the previous
1022	* read or arrived while we were asleep.
1023	*/
1024	if (d->bd_immediate \|\| non_block \|\| timed_out) {
1025	/*
1026	* Rotate the buffers and return what's here
1027	* if we are in immediate mode, non-blocking
1028	* flag is set, or this descriptor timed out.
1029	*/
1030	ROTATE_BUFFERS(d);
1031	break;
1032	}
1033	}
1034
1035	/*
1036	* No data is available, check to see if the bpf device
1037	* is still pointed at a real interface. If not, return
1038	* ENXIO so that the userland process knows to rebind
1039	* it before using it again.
1040	*/
1041	if (d->bd_bif == NULL) {
1042	BPFD_UNLOCK(d);
1043	return (ENXIO);
1044	}
1045
1046	if (non_block) {
1047	BPFD_UNLOCK(d);
1048	return (EWOULDBLOCK);
1049	}
1050	error = msleep(d, &d->bd_lock, PRINET\|PCATCH,
1051	"bpf", d->bd_rtout);
1052	if (error == EINTR \|\| error == ERESTART) {
1053	BPFD_UNLOCK(d);
1054	return (error);
1055	}
1056	if (error == EWOULDBLOCK) {
1057	/*
1058	* On a timeout, return what's in the buffer,
1059	* which may be nothing. If there is something
1060	* in the store buffer, we can rotate the buffers.
1061	*/
1062	if (d->bd_hbuf)
1063	/*
1064	* We filled up the buffer in between
1065	* getting the timeout and arriving
1066	* here, so we don't need to rotate.
1067	*/
1068	break;
1069
1070	if (d->bd_slen == 0) {
1071	BPFD_UNLOCK(d);
1072	return (0);
1073	}
1074	ROTATE_BUFFERS(d);
1075	break;
1076	}
1077	}
1078	/*
1079	* At this point, we know we have something in the hold slot.
1080	*/
1081	d->bd_hbuf_in_use = 1;
1082	BPFD_UNLOCK(d);
1083
1084	/*
1085	* Move data from hold buffer into user space.
1086	* We know the entire buffer is transferred since
1087	* we checked above that the read buffer is bpf_bufsize bytes.
1088	*
1089	* We do not have to worry about simultaneous reads because
1090	* we waited for sole access to the hold buffer above.
1091	*/
1092	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
1093
1094	BPFD_LOCK(d);
1095	KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
1096	d->bd_fbuf = d->bd_hbuf;
1097	d->bd_hbuf = NULL;
1098	d->bd_hlen = 0;
1099	bpf_buf_reclaimed(d);
1100	d->bd_hbuf_in_use = 0;
1101	wakeup(&d->bd_hbuf_in_use);
1102	BPFD_UNLOCK(d);
1103
1104	return (error);
1105	}
1106
1107	/*
1108	* If there are processes sleeping on this descriptor, wake them up.
1109	*/
1110	static __inline void
1111	bpf_wakeup(struct bpf_d *d)
1112	{
1113
1114	BPFD_LOCK_ASSERT(d);
1115	if (d->bd_state == BPF_WAITING) {
1116	callout_stop(&d->bd_callout);
1117	d->bd_state = BPF_IDLE;
1118	}
1119	wakeup(d);
1120	#ifndef __rtems__
1121	if (d->bd_async && d->bd_sig && d->bd_sigio)
1122	pgsigio(&d->bd_sigio, d->bd_sig, 0);
1123	#endif /* __rtems__ */
1124
1125	selwakeuppri(&d->bd_sel, PRINET);
1126	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
1127	}
1128
1129	static void
1130	bpf_timed_out(void *arg)
1131	{
1132	struct bpf_d d = (struct bpf_d )arg;
1133
1134	BPFD_LOCK_ASSERT(d);
1135
1136	if (callout_pending(&d->bd_callout) \|\| !callout_active(&d->bd_callout))
1137	return;
1138	if (d->bd_state == BPF_WAITING) {
1139	d->bd_state = BPF_TIMED_OUT;
1140	if (d->bd_slen != 0)
1141	bpf_wakeup(d);
1142	}
1143	}
1144
1145	static int
1146	bpf_ready(struct bpf_d *d)
1147	{
1148
1149	BPFD_LOCK_ASSERT(d);
1150
1151	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
1152	return (1);
1153	if ((d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT) &&
1154	d->bd_slen != 0)
1155	return (1);
1156	return (0);
1157	}
1158
1159	static int
1160	#ifndef __rtems__
1161	bpfwrite(struct cdev dev, struct uio uio, int ioflag)
1162	#else /* __rtems__ */
1163	bpfwrite(struct bpf_d d, struct uio uio, int ioflag)
1164	#endif /* __rtems__ */
1165	{
1166	#ifndef __rtems__
1167	struct bpf_d *d;
1168	#endif /* __rtems__ */
1169	struct ifnet *ifp;
1170	struct mbuf m, mc;
1171	struct sockaddr dst;
1172	struct route ro;
1173	int error, hlen;
1174
1175	error = devfs_get_cdevpriv((void **)&d);
1176	if (error != 0)
1177	return (error);
1178
1179	BPF_PID_REFRESH_CUR(d);
1180	d->bd_wcount++;
1181	/* XXX: locking required */
1182	if (d->bd_bif == NULL) {
1183	d->bd_wdcount++;
1184	return (ENXIO);
1185	}
1186
1187	ifp = d->bd_bif->bif_ifp;
1188
1189	if ((ifp->if_flags & IFF_UP) == 0) {
1190	d->bd_wdcount++;
1191	return (ENETDOWN);
1192	}
1193
1194	if (uio->uio_resid == 0) {
1195	d->bd_wdcount++;
1196	return (0);
1197	}
1198
1199	bzero(&dst, sizeof(dst));
1200	m = NULL;
1201	hlen = 0;
1202	/* XXX: bpf_movein() can sleep */
1203	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
1204	&m, &dst, &hlen, d);
1205	if (error) {
1206	d->bd_wdcount++;
1207	return (error);
1208	}
1209	d->bd_wfcount++;
1210	if (d->bd_hdrcmplt)
1211	dst.sa_family = pseudo_AF_HDRCMPLT;
1212
1213	if (d->bd_feedback) {
1214	mc = m_dup(m, M_NOWAIT);
1215	if (mc != NULL)
1216	mc->m_pkthdr.rcvif = ifp;
1217	/* Set M_PROMISC for outgoing packets to be discarded. */
1218	if (d->bd_direction == BPF_D_INOUT)
1219	m->m_flags \|= M_PROMISC;
1220	} else
1221	mc = NULL;
1222
1223	m->m_pkthdr.len -= hlen;
1224	m->m_len -= hlen;
1225	m->m_data += hlen; /* XXX */
1226
1227	CURVNET_SET(ifp->if_vnet);
1228	#ifdef MAC
1229	BPFD_LOCK(d);
1230	mac_bpfdesc_create_mbuf(d, m);
1231	if (mc != NULL)
1232	mac_bpfdesc_create_mbuf(d, mc);
1233	BPFD_UNLOCK(d);
1234	#endif
1235
1236	bzero(&ro, sizeof(ro));
1237	if (hlen != 0) {
1238	ro.ro_prepend = (u_char *)&dst.sa_data;
1239	ro.ro_plen = hlen;
1240	ro.ro_flags = RT_HAS_HEADER;
1241	}
1242
1243	error = (*ifp->if_output)(ifp, m, &dst, &ro);
1244	if (error)
1245	d->bd_wdcount++;
1246
1247	if (mc != NULL) {
1248	if (error == 0)
1249	(*ifp->if_input)(ifp, mc);
1250	else
1251	m_freem(mc);
1252	}
1253	CURVNET_RESTORE();
1254
1255	return (error);
1256	}
1257
1258	/*
1259	* Reset a descriptor by flushing its packet buffer and clearing the receive
1260	* and drop counts. This is doable for kernel-only buffers, but with
1261	* zero-copy buffers, we can't write to (or rotate) buffers that are
1262	* currently owned by userspace. It would be nice if we could encapsulate
1263	* this logic in the buffer code rather than here.
1264	*/
1265	static void
1266	reset_d(struct bpf_d *d)
1267	{
1268
1269	BPFD_LOCK_ASSERT(d);
1270
1271	while (d->bd_hbuf_in_use)
1272	mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
1273	"bd_hbuf", 0);
1274	if ((d->bd_hbuf != NULL) &&
1275	(d->bd_bufmode != BPF_BUFMODE_ZBUF \|\| bpf_canfreebuf(d))) {
1276	/* Free the hold buffer. */
1277	d->bd_fbuf = d->bd_hbuf;
1278	d->bd_hbuf = NULL;
1279	d->bd_hlen = 0;
1280	bpf_buf_reclaimed(d);
1281	}
1282	if (bpf_canwritebuf(d))
1283	d->bd_slen = 0;
1284	d->bd_rcount = 0;
1285	d->bd_dcount = 0;
1286	d->bd_fcount = 0;
1287	d->bd_wcount = 0;
1288	d->bd_wfcount = 0;
1289	d->bd_wdcount = 0;
1290	d->bd_zcopy = 0;
1291	}
1292
1293	/*
1294	* FIONREAD Check for read packet available.
1295	* BIOCGBLEN Get buffer len [for read()].
1296	* BIOCSETF Set read filter.
1297	* BIOCSETFNR Set read filter without resetting descriptor.
1298	* BIOCSETWF Set write filter.
1299	* BIOCFLUSH Flush read packet buffer.
1300	* BIOCPROMISC Put interface into promiscuous mode.
1301	* BIOCGDLT Get link layer type.
1302	* BIOCGETIF Get interface name.
1303	* BIOCSETIF Set interface.
1304	* BIOCSRTIMEOUT Set read timeout.
1305	* BIOCGRTIMEOUT Get read timeout.
1306	* BIOCGSTATS Get packet stats.
1307	* BIOCIMMEDIATE Set immediate mode.
1308	* BIOCVERSION Get filter language version.
1309	* BIOCGHDRCMPLT Get "header already complete" flag
1310	* BIOCSHDRCMPLT Set "header already complete" flag
1311	* BIOCGDIRECTION Get packet direction flag
1312	* BIOCSDIRECTION Set packet direction flag
1313	* BIOCGTSTAMP Get time stamp format and resolution.
1314	* BIOCSTSTAMP Set time stamp format and resolution.
1315	* BIOCLOCK Set "locked" flag
1316	* BIOCFEEDBACK Set packet feedback mode.
1317	* BIOCSETZBUF Set current zero-copy buffer locations.
1318	* BIOCGETZMAX Get maximum zero-copy buffer size.
1319	* BIOCROTZBUF Force rotation of zero-copy buffer
1320	* BIOCSETBUFMODE Set buffer mode.
1321	* BIOCGETBUFMODE Get current buffer mode.
1322	*/
1323	/* ARGSUSED */
1324	static int
1325	#ifndef __rtems__
1326	bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1327	struct thread *td)
1328	#else /* __rtems__ */
1329	bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
1330	struct thread *td)
1331	#endif /* __rtems__ */
1332	{
1333	#ifndef __rtems__
1334	struct bpf_d *d;
1335	#endif /* __rtems__ */
1336	int error;
1337
1338	error = devfs_get_cdevpriv((void **)&d);
1339	if (error != 0)
1340	return (error);
1341
1342	/*
1343	* Refresh PID associated with this descriptor.
1344	*/
1345	BPFD_LOCK(d);
1346	BPF_PID_REFRESH(d, td);
1347	if (d->bd_state == BPF_WAITING)
1348	callout_stop(&d->bd_callout);
1349	d->bd_state = BPF_IDLE;
1350	BPFD_UNLOCK(d);
1351
1352	if (d->bd_locked == 1) {
1353	switch (cmd) {
1354	case BIOCGBLEN:
1355	case BIOCFLUSH:
1356	case BIOCGDLT:
1357	case BIOCGDLTLIST:
1358	#ifdef COMPAT_FREEBSD32
1359	case BIOCGDLTLIST32:
1360	#endif
1361	case BIOCGETIF:
1362	case BIOCGRTIMEOUT:
1363	#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1364	case BIOCGRTIMEOUT32:
1365	#endif
1366	case BIOCGSTATS:
1367	case BIOCVERSION:
1368	case BIOCGRSIG:
1369	case BIOCGHDRCMPLT:
1370	case BIOCSTSTAMP:
1371	case BIOCFEEDBACK:
1372	case FIONREAD:
1373	case BIOCLOCK:
1374	case BIOCSRTIMEOUT:
1375	#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1376	case BIOCSRTIMEOUT32:
1377	#endif
1378	case BIOCIMMEDIATE:
1379	case TIOCGPGRP:
1380	case BIOCROTZBUF:
1381	break;
1382	default:
1383	return (EPERM);
1384	}
1385	}
1386	#ifdef COMPAT_FREEBSD32
1387	/*
1388	* If we see a 32-bit compat ioctl, mark the stream as 32-bit so
1389	* that it will get 32-bit packet headers.
1390	*/
1391	switch (cmd) {
1392	case BIOCSETF32:
1393	case BIOCSETFNR32:
1394	case BIOCSETWF32:
1395	case BIOCGDLTLIST32:
1396	case BIOCGRTIMEOUT32:
1397	case BIOCSRTIMEOUT32:
1398	BPFD_LOCK(d);
1399	d->bd_compat32 = 1;
1400	BPFD_UNLOCK(d);
1401	}
1402	#endif
1403
1404	CURVNET_SET(TD_TO_VNET(td));
1405	switch (cmd) {
1406
1407	default:
1408	error = EINVAL;
1409	break;
1410
1411	/*
1412	* Check for read packet available.
1413	*/
1414	case FIONREAD:
1415	{
1416	int n;
1417
1418	BPFD_LOCK(d);
1419	n = d->bd_slen;
1420	while (d->bd_hbuf_in_use)
1421	mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
1422	PRINET, "bd_hbuf", 0);
1423	if (d->bd_hbuf)
1424	n += d->bd_hlen;
1425	BPFD_UNLOCK(d);
1426
1427	(int )addr = n;
1428	break;
1429	}
1430
1431	/*
1432	* Get buffer len [for read()].
1433	*/
1434	case BIOCGBLEN:
1435	BPFD_LOCK(d);
1436	(u_int )addr = d->bd_bufsize;
1437	BPFD_UNLOCK(d);
1438	break;
1439
1440	/*
1441	* Set buffer length.
1442	*/
1443	case BIOCSBLEN:
1444	error = bpf_ioctl_sblen(d, (u_int *)addr);
1445	break;
1446
1447	/*
1448	* Set link layer read filter.
1449	*/
1450	case BIOCSETF:
1451	case BIOCSETFNR:
1452	case BIOCSETWF:
1453	#ifdef COMPAT_FREEBSD32
1454	case BIOCSETF32:
1455	case BIOCSETFNR32:
1456	case BIOCSETWF32:
1457	#endif
1458	error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1459	break;
1460
1461	/*
1462	* Flush read packet buffer.
1463	*/
1464	case BIOCFLUSH:
1465	BPFD_LOCK(d);
1466	reset_d(d);
1467	BPFD_UNLOCK(d);
1468	break;
1469
1470	/*
1471	* Put interface into promiscuous mode.
1472	*/
1473	case BIOCPROMISC:
1474	if (d->bd_bif == NULL) {
1475	/*
1476	* No interface attached yet.
1477	*/
1478	error = EINVAL;
1479	break;
1480	}
1481	if (d->bd_promisc == 0) {
1482	error = ifpromisc(d->bd_bif->bif_ifp, 1);
1483	if (error == 0)
1484	d->bd_promisc = 1;
1485	}
1486	break;
1487
1488	/*
1489	* Get current data link type.
1490	*/
1491	case BIOCGDLT:
1492	BPF_LOCK();
1493	if (d->bd_bif == NULL)
1494	error = EINVAL;
1495	else
1496	(u_int )addr = d->bd_bif->bif_dlt;
1497	BPF_UNLOCK();
1498	break;
1499
1500	/*
1501	* Get a list of supported data link types.
1502	*/
1503	#ifdef COMPAT_FREEBSD32
1504	case BIOCGDLTLIST32:
1505	{
1506	struct bpf_dltlist32 *list32;
1507	struct bpf_dltlist dltlist;
1508
1509	list32 = (struct bpf_dltlist32 *)addr;
1510	dltlist.bfl_len = list32->bfl_len;
1511	dltlist.bfl_list = PTRIN(list32->bfl_list);
1512	BPF_LOCK();
1513	if (d->bd_bif == NULL)
1514	error = EINVAL;
1515	else {
1516	error = bpf_getdltlist(d, &dltlist);
1517	if (error == 0)
1518	list32->bfl_len = dltlist.bfl_len;
1519	}
1520	BPF_UNLOCK();
1521	break;
1522	}
1523	#endif
1524
1525	case BIOCGDLTLIST:
1526	BPF_LOCK();
1527	if (d->bd_bif == NULL)
1528	error = EINVAL;
1529	else
1530	error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1531	BPF_UNLOCK();
1532	break;
1533
1534	/*
1535	* Set data link type.
1536	*/
1537	case BIOCSDLT:
1538	BPF_LOCK();
1539	if (d->bd_bif == NULL)
1540	error = EINVAL;
1541	else
1542	error = bpf_setdlt(d, (u_int )addr);
1543	BPF_UNLOCK();
1544	break;
1545
1546	/*
1547	* Get interface name.
1548	*/
1549	case BIOCGETIF:
1550	BPF_LOCK();
1551	if (d->bd_bif == NULL)
1552	error = EINVAL;
1553	else {
1554	struct ifnet *const ifp = d->bd_bif->bif_ifp;
1555	struct ifreq const ifr = (struct ifreq )addr;
1556
1557	strlcpy(ifr->ifr_name, ifp->if_xname,
1558	sizeof(ifr->ifr_name));
1559	}
1560	BPF_UNLOCK();
1561	break;
1562
1563	/*
1564	* Set interface.
1565	*/
1566	case BIOCSETIF:
1567	{
1568	int alloc_buf, size;
1569
1570	/*
1571	* Behavior here depends on the buffering model. If
1572	* we're using kernel memory buffers, then we can
1573	* allocate them here. If we're using zero-copy,
1574	* then the user process must have registered buffers
1575	* by the time we get here.
1576	*/
1577	alloc_buf = 0;
1578	BPFD_LOCK(d);
1579	if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
1580	d->bd_sbuf == NULL)
1581	alloc_buf = 1;
1582	BPFD_UNLOCK(d);
1583	if (alloc_buf) {
1584	size = d->bd_bufsize;
1585	error = bpf_buffer_ioctl_sblen(d, &size);
1586	if (error != 0)
1587	break;
1588	}
1589	BPF_LOCK();
1590	error = bpf_setif(d, (struct ifreq *)addr);
1591	BPF_UNLOCK();
1592	break;
1593	}
1594
1595	/*
1596	* Set read timeout.
1597	*/
1598	case BIOCSRTIMEOUT:
1599	#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1600	case BIOCSRTIMEOUT32:
1601	#endif
1602	{
1603	struct timeval tv = (struct timeval )addr;
1604	#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1605	struct timeval32 *tv32;
1606	struct timeval tv64;
1607
1608	if (cmd == BIOCSRTIMEOUT32) {
1609	tv32 = (struct timeval32 *)addr;
1610	tv = &tv64;
1611	tv->tv_sec = tv32->tv_sec;
1612	tv->tv_usec = tv32->tv_usec;
1613	} else
1614	#endif
1615	tv = (struct timeval *)addr;
1616
1617	/*
1618	* Subtract 1 tick from tvtohz() since this isn't
1619	* a one-shot timer.
1620	*/
1621	if ((error = itimerfix(tv)) == 0)
1622	d->bd_rtout = tvtohz(tv) - 1;
1623	break;
1624	}
1625
1626	/*
1627	* Get read timeout.
1628	*/
1629	case BIOCGRTIMEOUT:
1630	#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1631	case BIOCGRTIMEOUT32:
1632	#endif
1633	{
1634	struct timeval *tv;
1635	#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1636	struct timeval32 *tv32;
1637	struct timeval tv64;
1638
1639	if (cmd == BIOCGRTIMEOUT32)
1640	tv = &tv64;
1641	else
1642	#endif
1643	tv = (struct timeval *)addr;
1644
1645	tv->tv_sec = d->bd_rtout / hz;
1646	tv->tv_usec = (d->bd_rtout % hz) * tick;
1647	#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1648	if (cmd == BIOCGRTIMEOUT32) {
1649	tv32 = (struct timeval32 *)addr;
1650	tv32->tv_sec = tv->tv_sec;
1651	tv32->tv_usec = tv->tv_usec;
1652	}
1653	#endif
1654
1655	break;
1656	}
1657
1658	/*
1659	* Get packet stats.
1660	*/
1661	case BIOCGSTATS:
1662	{
1663	struct bpf_stat bs = (struct bpf_stat )addr;
1664
1665	/* XXXCSJP overflow */
1666	bs->bs_recv = d->bd_rcount;
1667	bs->bs_drop = d->bd_dcount;
1668	break;
1669	}
1670
1671	/*
1672	* Set immediate mode.
1673	*/
1674	case BIOCIMMEDIATE:
1675	BPFD_LOCK(d);
1676	d->bd_immediate = (u_int )addr;
1677	BPFD_UNLOCK(d);
1678	break;
1679
1680	case BIOCVERSION:
1681	{
1682	struct bpf_version bv = (struct bpf_version )addr;
1683
1684	bv->bv_major = BPF_MAJOR_VERSION;
1685	bv->bv_minor = BPF_MINOR_VERSION;
1686	break;
1687	}
1688
1689	/*
1690	* Get "header already complete" flag
1691	*/
1692	case BIOCGHDRCMPLT:
1693	BPFD_LOCK(d);
1694	(u_int )addr = d->bd_hdrcmplt;
1695	BPFD_UNLOCK(d);
1696	break;
1697
1698	/*
1699	* Set "header already complete" flag
1700	*/
1701	case BIOCSHDRCMPLT:
1702	BPFD_LOCK(d);
1703	d->bd_hdrcmplt = (u_int )addr ? 1 : 0;
1704	BPFD_UNLOCK(d);
1705	break;
1706
1707	/*
1708	* Get packet direction flag
1709	*/
1710	case BIOCGDIRECTION:
1711	BPFD_LOCK(d);
1712	(u_int )addr = d->bd_direction;
1713	BPFD_UNLOCK(d);
1714	break;
1715
1716	/*
1717	* Set packet direction flag
1718	*/
1719	case BIOCSDIRECTION:
1720	{
1721	u_int direction;
1722
1723	direction = (u_int )addr;
1724	switch (direction) {
1725	case BPF_D_IN:
1726	case BPF_D_INOUT:
1727	case BPF_D_OUT:
1728	BPFD_LOCK(d);
1729	d->bd_direction = direction;
1730	BPFD_UNLOCK(d);
1731	break;
1732	default:
1733	error = EINVAL;
1734	}
1735	}
1736	break;
1737
1738	/*
1739	* Get packet timestamp format and resolution.
1740	*/
1741	case BIOCGTSTAMP:
1742	BPFD_LOCK(d);
1743	(u_int )addr = d->bd_tstamp;
1744	BPFD_UNLOCK(d);
1745	break;
1746
1747	/*
1748	* Set packet timestamp format and resolution.
1749	*/
1750	case BIOCSTSTAMP:
1751	{
1752	u_int func;
1753
1754	func = (u_int )addr;
1755	if (BPF_T_VALID(func))
1756	d->bd_tstamp = func;
1757	else
1758	error = EINVAL;
1759	}
1760	break;
1761
1762	case BIOCFEEDBACK:
1763	BPFD_LOCK(d);
1764	d->bd_feedback = (u_int )addr;
1765	BPFD_UNLOCK(d);
1766	break;
1767
1768	case BIOCLOCK:
1769	BPFD_LOCK(d);
1770	d->bd_locked = 1;
1771	BPFD_UNLOCK(d);
1772	break;
1773
1774	case FIONBIO: /* Non-blocking I/O */
1775	break;
1776
1777	#ifndef __rtems__
1778	case FIOASYNC: /* Send signal on receive packets */
1779	BPFD_LOCK(d);
1780	d->bd_async = (int )addr;
1781	BPFD_UNLOCK(d);
1782	break;
1783	#endif /* __rtems__ */
1784
1785	case FIOSETOWN:
1786	/*
1787	* XXX: Add some sort of locking here?
1788	* fsetown() can sleep.
1789	*/
1790	error = fsetown((int )addr, &d->bd_sigio);
1791	break;
1792
1793	case FIOGETOWN:
1794	BPFD_LOCK(d);
1795	(int )addr = fgetown(&d->bd_sigio);
1796	BPFD_UNLOCK(d);
1797	break;
1798
1799	/* This is deprecated, FIOSETOWN should be used instead. */
1800	case TIOCSPGRP:
1801	error = fsetown(-((int )addr), &d->bd_sigio);
1802	break;
1803
1804	/* This is deprecated, FIOGETOWN should be used instead. */
1805	case TIOCGPGRP:
1806	(int )addr = -fgetown(&d->bd_sigio);
1807	break;
1808
1809	case BIOCSRSIG: /* Set receive signal */
1810	{
1811	u_int sig;
1812
1813	sig = (u_int )addr;
1814
1815	if (sig >= NSIG)
1816	error = EINVAL;
1817	else {
1818	BPFD_LOCK(d);
1819	d->bd_sig = sig;
1820	BPFD_UNLOCK(d);
1821	}
1822	break;
1823	}
1824	case BIOCGRSIG:
1825	BPFD_LOCK(d);
1826	(u_int )addr = d->bd_sig;
1827	BPFD_UNLOCK(d);
1828	break;
1829
1830	case BIOCGETBUFMODE:
1831	BPFD_LOCK(d);
1832	(u_int )addr = d->bd_bufmode;
1833	BPFD_UNLOCK(d);
1834	break;
1835
1836	case BIOCSETBUFMODE:
1837	/*
1838	* Allow the buffering mode to be changed as long as we
1839	* haven't yet committed to a particular mode. Our
1840	* definition of commitment, for now, is whether or not a
1841	* buffer has been allocated or an interface attached, since
1842	* that's the point where things get tricky.
1843	*/
1844	switch ((u_int )addr) {
1845	case BPF_BUFMODE_BUFFER:
1846	break;
1847
1848	#ifndef __rtems__
1849	case BPF_BUFMODE_ZBUF:
1850	if (bpf_zerocopy_enable)
1851	break;
1852	/* FALLSTHROUGH */
1853	#endif /* __rtems__ */
1854
1855	default:
1856	CURVNET_RESTORE();
1857	return (EINVAL);
1858	}
1859
1860	BPFD_LOCK(d);
1861	if (d->bd_sbuf != NULL \|\| d->bd_hbuf != NULL \|\|
1862	d->bd_fbuf != NULL \|\| d->bd_bif != NULL) {
1863	BPFD_UNLOCK(d);
1864	CURVNET_RESTORE();
1865	return (EBUSY);
1866	}
1867	d->bd_bufmode = (u_int )addr;
1868	BPFD_UNLOCK(d);
1869	break;
1870
1871	case BIOCGETZMAX:
1872	error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
1873	break;
1874
1875	case BIOCSETZBUF:
1876	error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
1877	break;
1878
1879	case BIOCROTZBUF:
1880	error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
1881	break;
1882	}
1883	CURVNET_RESTORE();
1884	return (error);
1885	}
1886
1887	/*
1888	* Set d's packet filter program to fp. If this file already has a filter,
1889	* free it and replace it. Returns EINVAL for bogus requests.
1890	*
1891	* Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
1892	* since reading d->bd_bif can't be protected by d or interface lock due to
1893	* lock order.
1894	*
1895	* Additionally, we have to acquire interface write lock due to bpf_mtap() uses
1896	* interface read lock to read all filers.
1897	*
1898	*/
1899	static int
1900	bpf_setf(struct bpf_d d, struct bpf_program fp, u_long cmd)
1901	{
1902	#ifdef COMPAT_FREEBSD32
1903	struct bpf_program fp_swab;
1904	struct bpf_program32 *fp32;
1905	#endif
1906	struct bpf_insn fcode, old;
1907	#ifdef BPF_JITTER
1908	bpf_jit_filter jfunc, ofunc;
1909	#endif
1910	size_t size;
1911	u_int flen;
1912	int need_upgrade;
1913
1914	#ifdef COMPAT_FREEBSD32
1915	switch (cmd) {
1916	case BIOCSETF32:
1917	case BIOCSETWF32:
1918	case BIOCSETFNR32:
1919	fp32 = (struct bpf_program32 *)fp;
1920	fp_swab.bf_len = fp32->bf_len;
1921	fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
1922	fp = &fp_swab;
1923	switch (cmd) {
1924	case BIOCSETF32:
1925	cmd = BIOCSETF;
1926	break;
1927	case BIOCSETWF32:
1928	cmd = BIOCSETWF;
1929	break;
1930	}
1931	break;
1932	}
1933	#endif
1934
1935	fcode = NULL;
1936	#ifdef BPF_JITTER
1937	jfunc = ofunc = NULL;
1938	#endif
1939	need_upgrade = 0;
1940
1941	/*
1942	* Check new filter validness before acquiring any locks.
1943	* Allocate memory for new filter, if needed.
1944	*/
1945	flen = fp->bf_len;
1946	if (flen > bpf_maxinsns \|\| (fp->bf_insns == NULL && flen != 0))
1947	return (EINVAL);
1948	size = flen * sizeof(*fp->bf_insns);
1949	if (size > 0) {
1950	/* We're setting up new filter. Copy and check actual data. */
1951	fcode = malloc(size, M_BPF, M_WAITOK);
1952	if (copyin(fp->bf_insns, fcode, size) != 0 \|\|
1953	!bpf_validate(fcode, flen)) {
1954	free(fcode, M_BPF);
1955	return (EINVAL);
1956	}
1957	#ifdef BPF_JITTER
1958	/* Filter is copied inside fcode and is perfectly valid. */
1959	jfunc = bpf_jitter(fcode, flen);
1960	#endif
1961	}
1962
1963	BPF_LOCK();
1964
1965	/*
1966	* Set up new filter.
1967	* Protect filter change by interface lock.
1968	* Additionally, we are protected by global lock here.
1969	*/
1970	if (d->bd_bif != NULL)
1971	BPFIF_WLOCK(d->bd_bif);
1972	BPFD_LOCK(d);
1973	if (cmd == BIOCSETWF) {
1974	old = d->bd_wfilter;
1975	d->bd_wfilter = fcode;
1976	} else {
1977	old = d->bd_rfilter;
1978	d->bd_rfilter = fcode;
1979	#ifdef BPF_JITTER
1980	ofunc = d->bd_bfilter;
1981	d->bd_bfilter = jfunc;
1982	#endif
1983	if (cmd == BIOCSETF)
1984	reset_d(d);
1985
1986	need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen);
1987	}
1988	BPFD_UNLOCK(d);
1989	if (d->bd_bif != NULL)
1990	BPFIF_WUNLOCK(d->bd_bif);
1991	if (old != NULL)
1992	free(old, M_BPF);
1993	#ifdef BPF_JITTER
1994	if (ofunc != NULL)
1995	bpf_destroy_jit_filter(ofunc);
1996	#endif
1997
1998	/* Move d to active readers list. */
1999	if (need_upgrade != 0)
2000	bpf_upgraded(d);
2001
2002	BPF_UNLOCK();
2003	return (0);
2004	}
2005
2006	/*
2007	* Detach a file from its current interface (if attached at all) and attach
2008	* to the interface indicated by the name stored in ifr.
2009	* Return an errno or 0.
2010	*/
2011	static int
2012	bpf_setif(struct bpf_d d, struct ifreq ifr)
2013	{
2014	struct bpf_if *bp;
2015	struct ifnet *theywant;
2016
2017	BPF_LOCK_ASSERT();
2018
2019	theywant = ifunit(ifr->ifr_name);
2020	if (theywant == NULL \|\| theywant->if_bpf == NULL)
2021	return (ENXIO);
2022
2023	bp = theywant->if_bpf;
2024
2025	/* Check if interface is not being detached from BPF */
2026	BPFIF_RLOCK(bp);
2027	if (bp->bif_flags & BPFIF_FLAG_DYING) {
2028	BPFIF_RUNLOCK(bp);
2029	return (ENXIO);
2030	}
2031	BPFIF_RUNLOCK(bp);
2032
2033	/*
2034	* At this point, we expect the buffer is already allocated. If not,
2035	* return an error.
2036	*/
2037	switch (d->bd_bufmode) {
2038	case BPF_BUFMODE_BUFFER:
2039	#ifndef __rtems__
2040	case BPF_BUFMODE_ZBUF:
2041	#endif /* __rtems__ */
2042	if (d->bd_sbuf == NULL)
2043	return (EINVAL);
2044	break;
2045
2046	default:
2047	panic("bpf_setif: bufmode %d", d->bd_bufmode);
2048	}
2049	if (bp != d->bd_bif)
2050	bpf_attachd(d, bp);
2051	BPFD_LOCK(d);
2052	reset_d(d);
2053	BPFD_UNLOCK(d);
2054	return (0);
2055	}
2056
2057	/*
2058	* Support for select() and poll() system calls
2059	*
2060	* Return true iff the specific operation will not block indefinitely.
2061	* Otherwise, return false but make a note that a selwakeup() must be done.
2062	*/
2063	static int
2064	#ifndef __rtems__
2065	bpfpoll(struct cdev dev, int events, struct thread td)
2066	#else /* __rtems__ */
2067	bpfpoll(struct bpf_d d, int events, struct thread td)
2068	#endif /* __rtems__ */
2069	{
2070	#ifndef __rtems__
2071	struct bpf_d *d;
2072	#endif /* __rtems__ */
2073	int revents;
2074
2075	if (devfs_get_cdevpriv((void **)&d) != 0 \|\| d->bd_bif == NULL)
2076	return (events &
2077	(POLLHUP\|POLLIN\|POLLRDNORM\|POLLOUT\|POLLWRNORM));
2078
2079	/*
2080	* Refresh PID associated with this descriptor.
2081	*/
2082	revents = events & (POLLOUT \| POLLWRNORM);
2083	BPFD_LOCK(d);
2084	BPF_PID_REFRESH(d, td);
2085	if (events & (POLLIN \| POLLRDNORM)) {
2086	if (bpf_ready(d))
2087	revents \|= events & (POLLIN \| POLLRDNORM);
2088	else {
2089	selrecord(td, &d->bd_sel);
2090	/* Start the read timeout if necessary. */
2091	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2092	callout_reset(&d->bd_callout, d->bd_rtout,
2093	bpf_timed_out, d);
2094	d->bd_state = BPF_WAITING;
2095	}
2096	}
2097	}
2098	BPFD_UNLOCK(d);
2099	return (revents);
2100	}
2101
2102	/*
2103	* Support for kevent() system call. Register EVFILT_READ filters and
2104	* reject all others.
2105	*/
2106	#ifdef __rtems__
2107	static
2108	#endif /* __rtems__ */
2109	int
2110	#ifndef __rtems__
2111	bpfkqfilter(struct cdev dev, struct knote kn)
2112	#else /* __rtems__ */
2113	bpfkqfilter(struct bpf_d d, struct knote kn)
2114	#endif /* __rtems__ */
2115	{
2116	#ifndef __rtems__
2117	struct bpf_d *d;
2118	#endif /* __rtems__ */
2119
2120	if (devfs_get_cdevpriv((void **)&d) != 0 \|\|
2121	kn->kn_filter != EVFILT_READ)
2122	return (1);
2123
2124	/*
2125	* Refresh PID associated with this descriptor.
2126	*/
2127	BPFD_LOCK(d);
2128	BPF_PID_REFRESH_CUR(d);
2129	kn->kn_fop = &bpfread_filtops;
2130	kn->kn_hook = d;
2131	knlist_add(&d->bd_sel.si_note, kn, 1);
2132	BPFD_UNLOCK(d);
2133
2134	return (0);
2135	}
2136
2137	static void
2138	filt_bpfdetach(struct knote *kn)
2139	{
2140	struct bpf_d d = (struct bpf_d )kn->kn_hook;
2141
2142	knlist_remove(&d->bd_sel.si_note, kn, 0);
2143	}
2144
2145	static int
2146	filt_bpfread(struct knote *kn, long hint)
2147	{
2148	struct bpf_d d = (struct bpf_d )kn->kn_hook;
2149	int ready;
2150
2151	BPFD_LOCK_ASSERT(d);
2152	ready = bpf_ready(d);
2153	if (ready) {
2154	kn->kn_data = d->bd_slen;
2155	/*
2156	* Ignore the hold buffer if it is being copied to user space.
2157	*/
2158	if (!d->bd_hbuf_in_use && d->bd_hbuf)
2159	kn->kn_data += d->bd_hlen;
2160	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2161	callout_reset(&d->bd_callout, d->bd_rtout,
2162	bpf_timed_out, d);
2163	d->bd_state = BPF_WAITING;
2164	}
2165
2166	return (ready);
2167	}
2168
2169	#define BPF_TSTAMP_NONE 0
2170	#define BPF_TSTAMP_FAST 1
2171	#define BPF_TSTAMP_NORMAL 2
2172	#define BPF_TSTAMP_EXTERN 3
2173
2174	static int
2175	bpf_ts_quality(int tstype)
2176	{
2177
2178	if (tstype == BPF_T_NONE)
2179	return (BPF_TSTAMP_NONE);
2180	if ((tstype & BPF_T_FAST) != 0)
2181	return (BPF_TSTAMP_FAST);
2182
2183	return (BPF_TSTAMP_NORMAL);
2184	}
2185
2186	static int
2187	bpf_gettime(struct bintime bt, int tstype, struct mbuf m)
2188	{
2189	struct m_tag *tag;
2190	int quality;
2191
2192	quality = bpf_ts_quality(tstype);
2193	if (quality == BPF_TSTAMP_NONE)
2194	return (quality);
2195
2196	if (m != NULL) {
2197	tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
2198	if (tag != NULL) {
2199	bt = (struct bintime *)(tag + 1);
2200	return (BPF_TSTAMP_EXTERN);
2201	}
2202	}
2203	if (quality == BPF_TSTAMP_NORMAL)
2204	binuptime(bt);
2205	else
2206	getbinuptime(bt);
2207
2208	return (quality);
2209	}
2210
2211	/*
2212	* Incoming linkage from device drivers. Process the packet pkt, of length
2213	* pktlen, which is stored in a contiguous buffer. The packet is parsed
2214	* by each process' filter, and if accepted, stashed into the corresponding
2215	* buffer.
2216	*/
2217	void
2218	bpf_tap(struct bpf_if bp, u_char pkt, u_int pktlen)
2219	{
2220	struct bintime bt;
2221	struct bpf_d *d;
2222	#ifdef BPF_JITTER
2223	bpf_jit_filter *bf;
2224	#endif
2225	u_int slen;
2226	int gottime;
2227
2228	gottime = BPF_TSTAMP_NONE;
2229
2230	BPFIF_RLOCK(bp);
2231
2232	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2233	/*
2234	* We are not using any locks for d here because:
2235	* 1) any filter change is protected by interface
2236	* write lock
2237	* 2) destroying/detaching d is protected by interface
2238	* write lock, too
2239	*/
2240
2241	/* XXX: Do not protect counter for the sake of performance. */
2242	++d->bd_rcount;
2243	/*
2244	* NB: We dont call BPF_CHECK_DIRECTION() here since there is no
2245	* way for the caller to indiciate to us whether this packet
2246	* is inbound or outbound. In the bpf_mtap() routines, we use
2247	* the interface pointers on the mbuf to figure it out.
2248	*/
2249	#ifdef BPF_JITTER
2250	bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2251	if (bf != NULL)
2252	slen = (*(bf->func))(pkt, pktlen, pktlen);
2253	else
2254	#endif
2255	slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
2256	if (slen != 0) {
2257	/*
2258	* Filter matches. Let's to acquire write lock.
2259	*/
2260	BPFD_LOCK(d);
2261
2262	d->bd_fcount++;
2263	if (gottime < bpf_ts_quality(d->bd_tstamp))
2264	gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
2265	#ifdef MAC
2266	if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2267	#endif
2268	catchpacket(d, pkt, pktlen, slen,
2269	bpf_append_bytes, &bt);
2270	BPFD_UNLOCK(d);
2271	}
2272	}
2273	BPFIF_RUNLOCK(bp);
2274	}
2275
2276	#define BPF_CHECK_DIRECTION(d, r, i) \
2277	(((d)->bd_direction == BPF_D_IN && (r) != (i)) \|\| \
2278	((d)->bd_direction == BPF_D_OUT && (r) == (i)))
2279
2280	/*
2281	* Incoming linkage from device drivers, when packet is in an mbuf chain.
2282	* Locking model is explained in bpf_tap().
2283	*/
2284	void
2285	bpf_mtap(struct bpf_if bp, struct mbuf m)
2286	{
2287	struct bintime bt;
2288	struct bpf_d *d;
2289	#ifdef BPF_JITTER
2290	bpf_jit_filter *bf;
2291	#endif
2292	u_int pktlen, slen;
2293	int gottime;
2294
2295	/* Skip outgoing duplicate packets. */
2296	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2297	m->m_flags &= ~M_PROMISC;
2298	return;
2299	}
2300
2301	pktlen = m_length(m, NULL);
2302	gottime = BPF_TSTAMP_NONE;
2303
2304	BPFIF_RLOCK(bp);
2305
2306	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2307	if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2308	continue;
2309	++d->bd_rcount;
2310	#ifdef BPF_JITTER
2311	bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2312	/* XXX We cannot handle multiple mbufs. */
2313	if (bf != NULL && m->m_next == NULL)
2314	slen = ((bf->func))(mtod(m, u_char ), pktlen, pktlen);
2315	else
2316	#endif
2317	slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
2318	if (slen != 0) {
2319	BPFD_LOCK(d);
2320
2321	d->bd_fcount++;
2322	if (gottime < bpf_ts_quality(d->bd_tstamp))
2323	gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2324	#ifdef MAC
2325	if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2326	#endif
2327	catchpacket(d, (u_char *)m, pktlen, slen,
2328	bpf_append_mbuf, &bt);
2329	BPFD_UNLOCK(d);
2330	}
2331	}
2332	BPFIF_RUNLOCK(bp);
2333	}
2334
2335	/*
2336	* Incoming linkage from device drivers, when packet is in
2337	* an mbuf chain and to be prepended by a contiguous header.
2338	*/
2339	void
2340	bpf_mtap2(struct bpf_if bp, void data, u_int dlen, struct mbuf *m)
2341	{
2342	struct bintime bt;
2343	struct mbuf mb;
2344	struct bpf_d *d;
2345	u_int pktlen, slen;
2346	int gottime;
2347
2348	/* Skip outgoing duplicate packets. */
2349	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2350	m->m_flags &= ~M_PROMISC;
2351	return;
2352	}
2353
2354	pktlen = m_length(m, NULL);
2355	/*
2356	* Craft on-stack mbuf suitable for passing to bpf_filter.
2357	* Note that we cut corners here; we only setup what's
2358	* absolutely needed--this mbuf should never go anywhere else.
2359	*/
2360	mb.m_next = m;
2361	mb.m_data = data;
2362	mb.m_len = dlen;
2363	pktlen += dlen;
2364
2365	gottime = BPF_TSTAMP_NONE;
2366
2367	BPFIF_RLOCK(bp);
2368
2369	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2370	if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2371	continue;
2372	++d->bd_rcount;
2373	slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
2374	if (slen != 0) {
2375	BPFD_LOCK(d);
2376
2377	d->bd_fcount++;
2378	if (gottime < bpf_ts_quality(d->bd_tstamp))
2379	gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2380	#ifdef MAC
2381	if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2382	#endif
2383	catchpacket(d, (u_char *)&mb, pktlen, slen,
2384	bpf_append_mbuf, &bt);
2385	BPFD_UNLOCK(d);
2386	}
2387	}
2388	BPFIF_RUNLOCK(bp);
2389	}
2390
2391	#undef BPF_CHECK_DIRECTION
2392
2393	#undef BPF_TSTAMP_NONE
2394	#undef BPF_TSTAMP_FAST
2395	#undef BPF_TSTAMP_NORMAL
2396	#undef BPF_TSTAMP_EXTERN
2397
2398	static int
2399	bpf_hdrlen(struct bpf_d *d)
2400	{
2401	int hdrlen;
2402
2403	hdrlen = d->bd_bif->bif_hdrlen;
2404	#ifndef BURN_BRIDGES
2405	if (d->bd_tstamp == BPF_T_NONE \|\|
2406	BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
2407	#ifdef COMPAT_FREEBSD32
2408	if (d->bd_compat32)
2409	hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
2410	else
2411	#endif
2412	hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
2413	else
2414	#endif
2415	hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
2416	#ifdef COMPAT_FREEBSD32
2417	if (d->bd_compat32)
2418	hdrlen = BPF_WORDALIGN32(hdrlen);
2419	else
2420	#endif
2421	hdrlen = BPF_WORDALIGN(hdrlen);
2422
2423	return (hdrlen - d->bd_bif->bif_hdrlen);
2424	}
2425
2426	static void
2427	bpf_bintime2ts(struct bintime bt, struct bpf_ts ts, int tstype)
2428	{
2429	struct bintime bt2, boottimebin;
2430	struct timeval tsm;
2431	struct timespec tsn;
2432
2433	if ((tstype & BPF_T_MONOTONIC) == 0) {
2434	bt2 = *bt;
2435	bintime_add(&bt2, &boottimebin);
2436	bt = &bt2;
2437	}
2438	switch (BPF_T_FORMAT(tstype)) {
2439	case BPF_T_MICROTIME:
2440	bintime2timeval(bt, &tsm);
2441	ts->bt_sec = tsm.tv_sec;
2442	ts->bt_frac = tsm.tv_usec;
2443	break;
2444	case BPF_T_NANOTIME:
2445	bintime2timespec(bt, &tsn);
2446	ts->bt_sec = tsn.tv_sec;
2447	ts->bt_frac = tsn.tv_nsec;
2448	break;
2449	case BPF_T_BINTIME:
2450	ts->bt_sec = bt->sec;
2451	ts->bt_frac = bt->frac;
2452	break;
2453	}
2454	}
2455
2456	/*
2457	* Move the packet data from interface memory (pkt) into the
2458	* store buffer. "cpfn" is the routine called to do the actual data
2459	* transfer. bcopy is passed in to copy contiguous chunks, while
2460	* bpf_append_mbuf is passed in to copy mbuf chains. In the latter case,
2461	* pkt is really an mbuf.
2462	*/
2463	static void
2464	catchpacket(struct bpf_d d, u_char pkt, u_int pktlen, u_int snaplen,
2465	void (cpfn)(struct bpf_d , caddr_t, u_int, void *, u_int),
2466	struct bintime *bt)
2467	{
2468	struct bpf_xhdr hdr;
2469	#ifndef BURN_BRIDGES
2470	struct bpf_hdr hdr_old;
2471	#ifdef COMPAT_FREEBSD32
2472	struct bpf_hdr32 hdr32_old;
2473	#endif
2474	#endif
2475	int caplen, curlen, hdrlen, totlen;
2476	int do_wakeup = 0;
2477	int do_timestamp;
2478	int tstype;
2479
2480	BPFD_LOCK_ASSERT(d);
2481
2482	/*
2483	* Detect whether user space has released a buffer back to us, and if
2484	* so, move it from being a hold buffer to a free buffer. This may
2485	* not be the best place to do it (for example, we might only want to
2486	* run this check if we need the space), but for now it's a reliable
2487	* spot to do it.
2488	*/
2489	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
2490	d->bd_fbuf = d->bd_hbuf;
2491	d->bd_hbuf = NULL;
2492	d->bd_hlen = 0;
2493	bpf_buf_reclaimed(d);
2494	}
2495
2496	/*
2497	* Figure out how many bytes to move. If the packet is
2498	* greater or equal to the snapshot length, transfer that
2499	* much. Otherwise, transfer the whole packet (unless
2500	* we hit the buffer size limit).
2501	*/
2502	hdrlen = bpf_hdrlen(d);
2503	totlen = hdrlen + min(snaplen, pktlen);
2504	if (totlen > d->bd_bufsize)
2505	totlen = d->bd_bufsize;
2506
2507	/*
2508	* Round up the end of the previous packet to the next longword.
2509	*
2510	* Drop the packet if there's no room and no hope of room
2511	* If the packet would overflow the storage buffer or the storage
2512	* buffer is considered immutable by the buffer model, try to rotate
2513	* the buffer and wakeup pending processes.
2514	*/
2515	#ifdef COMPAT_FREEBSD32
2516	if (d->bd_compat32)
2517	curlen = BPF_WORDALIGN32(d->bd_slen);
2518	else
2519	#endif
2520	curlen = BPF_WORDALIGN(d->bd_slen);
2521	if (curlen + totlen > d->bd_bufsize \|\| !bpf_canwritebuf(d)) {
2522	if (d->bd_fbuf == NULL) {
2523	/*
2524	* There's no room in the store buffer, and no
2525	* prospect of room, so drop the packet. Notify the
2526	* buffer model.
2527	*/
2528	bpf_buffull(d);
2529	++d->bd_dcount;
2530	return;
2531	}
2532	KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
2533	ROTATE_BUFFERS(d);
2534	do_wakeup = 1;
2535	curlen = 0;
2536	} else if (d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT)
2537	/*
2538	* Immediate mode is set, or the read timeout has already
2539	* expired during a select call. A packet arrived, so the
2540	* reader should be woken up.
2541	*/
2542	do_wakeup = 1;
2543	caplen = totlen - hdrlen;
2544	tstype = d->bd_tstamp;
2545	do_timestamp = tstype != BPF_T_NONE;
2546	#ifndef BURN_BRIDGES
2547	if (tstype == BPF_T_NONE \|\| BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
2548	struct bpf_ts ts;
2549	if (do_timestamp)
2550	bpf_bintime2ts(bt, &ts, tstype);
2551	#ifdef COMPAT_FREEBSD32
2552	if (d->bd_compat32) {
2553	bzero(&hdr32_old, sizeof(hdr32_old));
2554	if (do_timestamp) {
2555	hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
2556	hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
2557	}
2558	hdr32_old.bh_datalen = pktlen;
2559	hdr32_old.bh_hdrlen = hdrlen;
2560	hdr32_old.bh_caplen = caplen;
2561	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
2562	sizeof(hdr32_old));
2563	goto copy;
2564	}
2565	#endif
2566	bzero(&hdr_old, sizeof(hdr_old));
2567	if (do_timestamp) {
2568	hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
2569	hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
2570	}
2571	hdr_old.bh_datalen = pktlen;
2572	hdr_old.bh_hdrlen = hdrlen;
2573	hdr_old.bh_caplen = caplen;
2574	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
2575	sizeof(hdr_old));
2576	goto copy;
2577	}
2578	#endif
2579
2580	/*
2581	* Append the bpf header. Note we append the actual header size, but
2582	* move forward the length of the header plus padding.
2583	*/
2584	bzero(&hdr, sizeof(hdr));
2585	if (do_timestamp)
2586	bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
2587	hdr.bh_datalen = pktlen;
2588	hdr.bh_hdrlen = hdrlen;
2589	hdr.bh_caplen = caplen;
2590	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
2591
2592	/*
2593	* Copy the packet data into the store buffer and update its length.
2594	*/
2595	#ifndef BURN_BRIDGES
2596	copy:
2597	#endif
2598	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
2599	d->bd_slen = curlen + totlen;
2600
2601	if (do_wakeup)
2602	bpf_wakeup(d);
2603	}
2604
2605	/*
2606	* Free buffers currently in use by a descriptor.
2607	* Called on close.
2608	*/
2609	static void
2610	bpf_freed(struct bpf_d *d)
2611	{
2612
2613	/*
2614	* We don't need to lock out interrupts since this descriptor has
2615	* been detached from its interface and it yet hasn't been marked
2616	* free.
2617	*/
2618	bpf_free(d);
2619	if (d->bd_rfilter != NULL) {
2620	free((caddr_t)d->bd_rfilter, M_BPF);
2621	#ifdef BPF_JITTER
2622	if (d->bd_bfilter != NULL)
2623	bpf_destroy_jit_filter(d->bd_bfilter);
2624	#endif
2625	}
2626	if (d->bd_wfilter != NULL)
2627	free((caddr_t)d->bd_wfilter, M_BPF);
2628	mtx_destroy(&d->bd_lock);
2629	}
2630
2631	/*
2632	* Attach an interface to bpf. dlt is the link layer type; hdrlen is the
2633	* fixed size of the link header (variable length headers not yet supported).
2634	*/
2635	void
2636	bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2637	{
2638
2639	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2640	}
2641
2642	/*
2643	* Attach an interface to bpf. ifp is a pointer to the structure
2644	* defining the interface to be attached, dlt is the link layer type,
2645	* and hdrlen is the fixed size of the link header (variable length
2646	* headers are not yet supporrted).
2647	*/
2648	void
2649	bpfattach2(struct ifnet ifp, u_int dlt, u_int hdrlen, struct bpf_if *driverp)
2650	{
2651	struct bpf_if *bp;
2652
2653	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT \| M_ZERO);
2654	if (bp == NULL)
2655	panic("bpfattach");
2656
2657	LIST_INIT(&bp->bif_dlist);
2658	LIST_INIT(&bp->bif_wlist);
2659	bp->bif_ifp = ifp;
2660	bp->bif_dlt = dlt;
2661	rw_init(&bp->bif_lock, "bpf interface lock");
2662	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
2663	*driverp = bp;
2664
2665	BPF_LOCK();
2666	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
2667	BPF_UNLOCK();
2668
2669	bp->bif_hdrlen = hdrlen;
2670
2671	if (bootverbose && IS_DEFAULT_VNET(curvnet))
2672	if_printf(ifp, "bpf attached\n");
2673	}
2674
2675	#ifdef VIMAGE
2676	/*
2677	* When moving interfaces between vnet instances we need a way to
2678	* query the dlt and hdrlen before detach so we can re-attch the if_bpf
2679	* after the vmove. We unfortunately have no device driver infrastructure
2680	* to query the interface for these values after creation/attach, thus
2681	* add this as a workaround.
2682	*/
2683	int
2684	bpf_get_bp_params(struct bpf_if bp, u_int bif_dlt, u_int *bif_hdrlen)
2685	{
2686
2687	if (bp == NULL)
2688	return (ENXIO);
2689	if (bif_dlt == NULL && bif_hdrlen == NULL)
2690	return (0);
2691
2692	if (bif_dlt != NULL)
2693	*bif_dlt = bp->bif_dlt;
2694	if (bif_hdrlen != NULL)
2695	*bif_hdrlen = bp->bif_hdrlen;
2696
2697	return (0);
2698	}
2699	#endif
2700
2701	/*
2702	* Detach bpf from an interface. This involves detaching each descriptor
2703	* associated with the interface. Notify each descriptor as it's detached
2704	* so that any sleepers wake up and get ENXIO.
2705	*/
2706	void
2707	bpfdetach(struct ifnet *ifp)
2708	{
2709	struct bpf_if bp, bp_temp;
2710	struct bpf_d *d;
2711	int ndetached;
2712
2713	ndetached = 0;
2714
2715	BPF_LOCK();
2716	/* Find all bpf_if struct's which reference ifp and detach them. */
2717	LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
2718	if (ifp != bp->bif_ifp)
2719	continue;
2720
2721	LIST_REMOVE(bp, bif_next);
2722	/* Add to to-be-freed list */
2723	LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
2724
2725	ndetached++;
2726	/*
2727	* Delay freeing bp till interface is detached
2728	* and all routes through this interface are removed.
2729	* Mark bp as detached to restrict new consumers.
2730	*/
2731	BPFIF_WLOCK(bp);
2732	bp->bif_flags \|= BPFIF_FLAG_DYING;
2733	BPFIF_WUNLOCK(bp);
2734
2735	CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
2736	__func__, bp->bif_dlt, bp, ifp);
2737
2738	/* Free common descriptors */
2739	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
2740	bpf_detachd_locked(d);
2741	BPFD_LOCK(d);
2742	bpf_wakeup(d);
2743	BPFD_UNLOCK(d);
2744	}
2745
2746	/* Free writer-only descriptors */
2747	while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
2748	bpf_detachd_locked(d);
2749	BPFD_LOCK(d);
2750	bpf_wakeup(d);
2751	BPFD_UNLOCK(d);
2752	}
2753	}
2754	BPF_UNLOCK();
2755
2756	#ifdef INVARIANTS
2757	if (ndetached == 0)
2758	printf("bpfdetach: %s was not attached\n", ifp->if_xname);
2759	#endif
2760	}
2761
2762	/*
2763	* Interface departure handler.
2764	* Note departure event does not guarantee interface is going down.
2765	* Interface renaming is currently done via departure/arrival event set.
2766	*
2767	* Departure handled is called after all routes pointing to
2768	* given interface are removed and interface is in down state
2769	* restricting any packets to be sent/received. We assume it is now safe
2770	* to free data allocated by BPF.
2771	*/
2772	static void
2773	bpf_ifdetach(void arg __unused, struct ifnet ifp)
2774	{
2775	struct bpf_if bp, bp_temp;
2776	int nmatched = 0;
2777
2778	/* Ignore ifnet renaming. */
2779	if (ifp->if_flags & IFF_RENAMING)
2780	return;
2781
2782	BPF_LOCK();
2783	/*
2784	* Find matching entries in free list.
2785	* Nothing should be found if bpfdetach() was not called.
2786	*/
2787	LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
2788	if (ifp != bp->bif_ifp)
2789	continue;
2790
2791	CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
2792	__func__, bp, ifp);
2793
2794	LIST_REMOVE(bp, bif_next);
2795
2796	rw_destroy(&bp->bif_lock);
2797	free(bp, M_BPF);
2798
2799	nmatched++;
2800	}
2801	BPF_UNLOCK();
2802
2803	/*
2804	* Note that we cannot zero other pointers to
2805	* custom DLTs possibly used by given interface.
2806	*/
2807	if (nmatched != 0)
2808	ifp->if_bpf = NULL;
2809	}
2810
2811	/*
2812	* Get a list of available data link type of the interface.
2813	*/
2814	static int
2815	bpf_getdltlist(struct bpf_d d, struct bpf_dltlist bfl)
2816	{
2817	struct ifnet *ifp;
2818	struct bpf_if *bp;
2819	u_int *lst;
2820	int error, n, n1;
2821
2822	BPF_LOCK_ASSERT();
2823
2824	ifp = d->bd_bif->bif_ifp;
2825	again:
2826	n1 = 0;
2827	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2828	if (bp->bif_ifp == ifp)
2829	n1++;
2830	}
2831	if (bfl->bfl_list == NULL) {
2832	bfl->bfl_len = n1;
2833	return (0);
2834	}
2835	if (n1 > bfl->bfl_len)
2836	return (ENOMEM);
2837	BPF_UNLOCK();
2838	lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
2839	n = 0;
2840	BPF_LOCK();
2841	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2842	if (bp->bif_ifp != ifp)
2843	continue;
2844	if (n >= n1) {
2845	free(lst, M_TEMP);
2846	goto again;
2847	}
2848	lst[n] = bp->bif_dlt;
2849	n++;
2850	}
2851	BPF_UNLOCK();
2852	error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
2853	free(lst, M_TEMP);
2854	BPF_LOCK();
2855	bfl->bfl_len = n;
2856	return (error);
2857	}
2858
2859	/*
2860	* Set the data link type of a BPF instance.
2861	*/
2862	static int
2863	bpf_setdlt(struct bpf_d *d, u_int dlt)
2864	{
2865	int error, opromisc;
2866	struct ifnet *ifp;
2867	struct bpf_if *bp;
2868
2869	BPF_LOCK_ASSERT();
2870
2871	if (d->bd_bif->bif_dlt == dlt)
2872	return (0);
2873	ifp = d->bd_bif->bif_ifp;
2874
2875	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2876	if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2877	break;
2878	}
2879
2880	if (bp != NULL) {
2881	opromisc = d->bd_promisc;
2882	bpf_attachd(d, bp);
2883	BPFD_LOCK(d);
2884	reset_d(d);
2885	BPFD_UNLOCK(d);
2886	if (opromisc) {
2887	error = ifpromisc(bp->bif_ifp, 1);
2888	if (error)
2889	if_printf(bp->bif_ifp,
2890	"bpf_setdlt: ifpromisc failed (%d)\n",
2891	error);
2892	else
2893	d->bd_promisc = 1;
2894	}
2895	}
2896	return (bp == NULL ? EINVAL : 0);
2897	}
2898	#ifdef __rtems__
2899	static struct bpf_d *
2900	bpf_imfs_get_context_by_iop(const rtems_libio_t *iop)
2901	{
2902	return iop->data1;
2903	}
2904
2905	static int
2906	bpf_imfs_open(rtems_libio_t iop, const char path, int oflag, mode_t mode)
2907	{
2908	struct bpf_d *d;
2909
2910	d = bpfopen(NULL, oflag + 1, 0, NULL);
2911	iop->data1 = d;
2912
2913	if (d != NULL) {
2914	return (0);
2915	} else {
2916	rtems_set_errno_and_return_minus_one(ENOMEM);
2917	}
2918	}
2919
2920	static int
2921	bpf_imfs_close(rtems_libio_t *iop)
2922	{
2923	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
2924
2925	bpf_dtor(d);
2926
2927	return (0);
2928	}
2929
2930	static ssize_t
2931	bpf_imfs_readv(rtems_libio_t iop, const struct iovec iov, int iovcnt, ssize_t total)
2932	{
2933	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
2934	struct thread *td = rtems_bsd_get_curthread_or_null();
2935	struct uio uio = {
2936	.uio_iov = iov,
2937	.uio_iovcnt = iovcnt,
2938	.uio_offset = 0,
2939	.uio_resid = total,
2940	.uio_segflg = UIO_USERSPACE,
2941	.uio_rw = UIO_READ,
2942	.uio_td = td
2943	};
2944	int error;
2945
2946	if (td != NULL) {
2947	error = bpfread(d, &uio,
2948	rtems_libio_to_fcntl_flags(iop->flags));
2949	} else {
2950	error = ENOMEM;
2951	}
2952
2953	if (error == 0) {
2954	return (total - uio.uio_resid);
2955	} else {
2956	rtems_set_errno_and_return_minus_one(error);
2957	}
2958	}
2959
2960	static ssize_t
2961	bpf_imfs_read(rtems_libio_t iop, void buffer, size_t count)
2962	{
2963	struct iovec iov = {
2964	.iov_base = buffer,
2965	.iov_len = count
2966	};
2967
2968	return bpf_imfs_readv(iop, &iov, 1, count);
2969	}
2970
2971	static ssize_t
2972	bpf_imfs_writev(rtems_libio_t iop, const struct iovec iov, int iovcnt, ssize_t total)
2973	{
2974	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
2975	struct thread *td = rtems_bsd_get_curthread_or_null();
2976	struct uio uio = {
2977	.uio_iov = iov,
2978	.uio_iovcnt = iovcnt,
2979	.uio_offset = 0,
2980	.uio_resid = total,
2981	.uio_segflg = UIO_USERSPACE,
2982	.uio_rw = UIO_WRITE,
2983	.uio_td = td
2984	};
2985	int error;
2986
2987	if (td != NULL) {
2988	error = bpfwrite(d, &uio,
2989	rtems_libio_to_fcntl_flags(iop->flags));
2990	} else {
2991	error = ENOMEM;
2992	}
2993
2994	if (error == 0) {
2995	return (total - uio.uio_resid);
2996	} else {
2997	rtems_set_errno_and_return_minus_one(error);
2998	}
2999	}
3000
3001	static ssize_t
3002	bpf_imfs_write(rtems_libio_t iop, const void buffer, size_t count)
3003	{
3004	struct iovec iov = {
3005	.iov_base = buffer,
3006	.iov_len = count
3007	};
3008
3009	return bpf_imfs_writev(iop, &iov, 1, count);
3010	}
3011
3012	static int
3013	bpf_imfs_ioctl(rtems_libio_t iop, ioctl_command_t request, void buffer)
3014	{
3015	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
3016	struct thread *td = rtems_bsd_get_curthread_or_null();
3017	int error;
3018
3019	if (td != 0) {
3020	error = bpfioctl(d, request, buffer, 0, td);
3021	} else {
3022	error = ENOMEM;
3023	}
3024
3025	return rtems_bsd_error_to_status_and_errno(error);
3026	}
3027
3028	static int
3029	bpf_imfs_poll(rtems_libio_t *iop, int events)
3030	{
3031	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
3032
3033	return (bpfpoll(d, events, rtems_bsd_get_curthread_or_wait_forever()));
3034	}
3035
3036	static int
3037	bpf_imfs_kqfilter(rtems_libio_t iop, struct knote kn)
3038	{
3039	struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
3040
3041	return (bpfkqfilter(d, kn));
3042	}
3043
3044	static const rtems_filesystem_file_handlers_r bpf_imfs_handlers = {
3045	.open_h = bpf_imfs_open,
3046	.close_h = bpf_imfs_close,
3047	.read_h = bpf_imfs_read,
3048	.write_h = bpf_imfs_write,
3049	.ioctl_h = bpf_imfs_ioctl,
3050	.lseek_h = rtems_filesystem_default_lseek_file,
3051	.fstat_h = rtems_filesystem_default_fstat,
3052	.ftruncate_h = rtems_filesystem_default_ftruncate,
3053	.fsync_h = rtems_filesystem_default_fsync_or_fdatasync,
3054	.fdatasync_h = rtems_filesystem_default_fsync_or_fdatasync,
3055	.fcntl_h = rtems_filesystem_default_fcntl,
3056	.poll_h = bpf_imfs_poll,
3057	.kqfilter_h = bpf_imfs_kqfilter,
3058	.readv_h = bpf_imfs_readv,
3059	.writev_h = bpf_imfs_writev
3060	};
3061
3062	static const IMFS_node_control bpf_imfs_control = IMFS_GENERIC_INITIALIZER(
3063	&bpf_imfs_handlers, IMFS_node_initialize_generic,
3064	IMFS_node_destroy_default);
3065	#endif /* __rtems__ */
3066
3067	static void
3068	bpf_drvinit(void *unused)
3069	{
3070	#ifndef __rtems__
3071	struct cdev *dev;
3072	#else /* __rtems__ */
3073	mode_t mode = S_IFCHR \| S_IRWXU \| S_IRWXG \| S_IRWXO;
3074	int rv;
3075	#endif /* __rtems__ */
3076
3077	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
3078	LIST_INIT(&bpf_iflist);
3079	LIST_INIT(&bpf_freelist);
3080
3081	#ifndef __rtems__
3082	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
3083	/* For compatibility */
3084	make_dev_alias(dev, "bpf0");
3085	#else /* __rtems__ */
3086	rv = IMFS_make_generic_node("/dev/bpf", mode, &bpf_imfs_control, NULL);
3087	BSD_ASSERT(rv == 0);
3088	rv = symlink("/dev/bpf", "/dev/bpf0");
3089	BSD_ASSERT(rv == 0);
3090	#endif /* __rtems__ */
3091
3092	/* Register interface departure handler */
3093	bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
3094	ifnet_departure_event, bpf_ifdetach, NULL,
3095	EVENTHANDLER_PRI_ANY);
3096	}
3097
3098	/*
3099	* Zero out the various packet counters associated with all of the bpf
3100	* descriptors. At some point, we will probably want to get a bit more
3101	* granular and allow the user to specify descriptors to be zeroed.
3102	*/
3103	static void
3104	bpf_zero_counters(void)
3105	{
3106	struct bpf_if *bp;
3107	struct bpf_d *bd;
3108
3109	BPF_LOCK();
3110	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
3111	BPFIF_RLOCK(bp);
3112	LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
3113	BPFD_LOCK(bd);
3114	bd->bd_rcount = 0;
3115	bd->bd_dcount = 0;
3116	bd->bd_fcount = 0;
3117	bd->bd_wcount = 0;
3118	bd->bd_wfcount = 0;
3119	bd->bd_zcopy = 0;
3120	BPFD_UNLOCK(bd);
3121	}
3122	BPFIF_RUNLOCK(bp);
3123	}
3124	BPF_UNLOCK();
3125	}
3126
3127	/*
3128	* Fill filter statistics
3129	*/
3130	static void
3131	bpfstats_fill_xbpf(struct xbpf_d d, struct bpf_d bd)
3132	{
3133
3134	bzero(d, sizeof(*d));
3135	BPFD_LOCK_ASSERT(bd);
3136	d->bd_structsize = sizeof(*d);
3137	/* XXX: reading should be protected by global lock */
3138	d->bd_immediate = bd->bd_immediate;
3139	d->bd_promisc = bd->bd_promisc;
3140	d->bd_hdrcmplt = bd->bd_hdrcmplt;
3141	d->bd_direction = bd->bd_direction;
3142	d->bd_feedback = bd->bd_feedback;
3143	#ifndef __rtems__
3144	d->bd_async = bd->bd_async;
3145	#endif /* __rtems__ */
3146	d->bd_rcount = bd->bd_rcount;
3147	d->bd_dcount = bd->bd_dcount;
3148	d->bd_fcount = bd->bd_fcount;
3149	d->bd_sig = bd->bd_sig;
3150	d->bd_slen = bd->bd_slen;
3151	d->bd_hlen = bd->bd_hlen;
3152	d->bd_bufsize = bd->bd_bufsize;
3153	d->bd_pid = bd->bd_pid;
3154	strlcpy(d->bd_ifname,
3155	bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
3156	d->bd_locked = bd->bd_locked;
3157	d->bd_wcount = bd->bd_wcount;
3158	d->bd_wdcount = bd->bd_wdcount;
3159	d->bd_wfcount = bd->bd_wfcount;
3160	d->bd_zcopy = bd->bd_zcopy;
3161	d->bd_bufmode = bd->bd_bufmode;
3162	}
3163
3164	/*
3165	* Handle `netstat -B' stats request
3166	*/
3167	static int
3168	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
3169	{
3170	static const struct xbpf_d zerostats;
3171	struct xbpf_d xbdbuf, xbd, tempstats;
3172	int index, error;
3173	struct bpf_if *bp;
3174	struct bpf_d *bd;
3175
3176	/*
3177	* XXX This is not technically correct. It is possible for non
3178	* privileged users to open bpf devices. It would make sense
3179	* if the users who opened the devices were able to retrieve
3180	* the statistics for them, too.
3181	*/
3182	error = priv_check(req->td, PRIV_NET_BPF);
3183	if (error)
3184	return (error);
3185	/*
3186	* Check to see if the user is requesting that the counters be
3187	* zeroed out. Explicitly check that the supplied data is zeroed,
3188	* as we aren't allowing the user to set the counters currently.
3189	*/
3190	if (req->newptr != NULL) {
3191	if (req->newlen != sizeof(tempstats))
3192	return (EINVAL);
3193	memset(&tempstats, 0, sizeof(tempstats));
3194	error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
3195	if (error)
3196	return (error);
3197	if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
3198	return (EINVAL);
3199	bpf_zero_counters();
3200	return (0);
3201	}
3202	if (req->oldptr == NULL)
3203	return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
3204	if (bpf_bpfd_cnt == 0)
3205	return (SYSCTL_OUT(req, 0, 0));
3206	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
3207	BPF_LOCK();
3208	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
3209	BPF_UNLOCK();
3210	free(xbdbuf, M_BPF);
3211	return (ENOMEM);
3212	}
3213	index = 0;
3214	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
3215	BPFIF_RLOCK(bp);
3216	/* Send writers-only first */
3217	LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
3218	xbd = &xbdbuf[index++];
3219	BPFD_LOCK(bd);
3220	bpfstats_fill_xbpf(xbd, bd);
3221	BPFD_UNLOCK(bd);
3222	}
3223	LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
3224	xbd = &xbdbuf[index++];
3225	BPFD_LOCK(bd);
3226	bpfstats_fill_xbpf(xbd, bd);
3227	BPFD_UNLOCK(bd);
3228	}
3229	BPFIF_RUNLOCK(bp);
3230	}
3231	BPF_UNLOCK();
3232	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
3233	free(xbdbuf, M_BPF);
3234	return (error);
3235	}
3236
3237	SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
3238
3239	#else /* !DEV_BPF && !NETGRAPH_BPF */
3240	/*
3241	* NOP stubs to allow bpf-using drivers to load and function.
3242	*
3243	* A 'better' implementation would allow the core bpf functionality
3244	* to be loaded at runtime.
3245	*/
3246	static struct bpf_if bp_null;
3247
3248	void
3249	bpf_tap(struct bpf_if bp, u_char pkt, u_int pktlen)
3250	{
3251	}
3252
3253	void
3254	bpf_mtap(struct bpf_if bp, struct mbuf m)
3255	{
3256	}
3257
3258	void
3259	bpf_mtap2(struct bpf_if bp, void d, u_int l, struct mbuf *m)
3260	{
3261	}
3262
3263	void
3264	bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3265	{
3266
3267	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
3268	}
3269
3270	void
3271	bpfattach2(struct ifnet ifp, u_int dlt, u_int hdrlen, struct bpf_if *driverp)
3272	{
3273
3274	*driverp = &bp_null;
3275	}
3276
3277	void
3278	bpfdetach(struct ifnet *ifp)
3279	{
3280	}
3281
3282	u_int
3283	bpf_filter(const struct bpf_insn pc, u_char p, u_int wirelen, u_int buflen)
3284	{
3285	return -1; /* "no filter" behaviour */
3286	}
3287
3288	int
3289	bpf_validate(const struct bpf_insn *f, int len)
3290	{
3291	return 0; /* false */
3292	}
3293
3294	#endif /* !DEV_BPF && !NETGRAPH_BPF */
3295
3296	#ifdef DDB
3297	static void
3298	bpf_show_bpf_if(struct bpf_if *bpf_if)
3299	{
3300
3301	if (bpf_if == NULL)
3302	return;
3303	db_printf("%p:\n", bpf_if);
3304	#define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e);
3305	/* bif_ext.bif_next */
3306	/* bif_ext.bif_dlist */
3307	BPF_DB_PRINTF("%#x", bif_dlt);
3308	BPF_DB_PRINTF("%u", bif_hdrlen);
3309	BPF_DB_PRINTF("%p", bif_ifp);
3310	/* bif_lock */
3311	/* bif_wlist */
3312	BPF_DB_PRINTF("%#x", bif_flags);
3313	}
3314
3315	DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
3316	{
3317
3318	if (!have_addr) {
3319	db_printf("usage: show bpf_if <struct bpf_if *>\n");
3320	return;
3321	}
3322
3323	bpf_show_bpf_if((struct bpf_if *)addr);
3324	}
3325	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats: