Context Navigation

kern_event.c @ 72d5fa1

55-freebsd-126-freebsd-12

Last change on this file since 72d5fa1 was 72d5fa1, checked in by Sebastian Huber <sebastian.huber@…>, on 11/10/17 at 10:01:39

Fix reference counting for file descriptors

Update #3132.

Property mode set to 100644

File size: 63.6 KB

Line
1	#include <machine/rtems-bsd-kernel-space.h>
2
3	/*-
4	* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
5	* Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org>
6	* Copyright (c) 2009 Apple, Inc.
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions
11	* are met:
12	* 1. Redistributions of source code must retain the above copyright
13	* notice, this list of conditions and the following disclaimer.
14	* 2. Redistributions in binary form must reproduce the above copyright
15	* notice, this list of conditions and the following disclaimer in the
16	* documentation and/or other materials provided with the distribution.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28	* SUCH DAMAGE.
29	*/
30
31	#include <sys/cdefs.h>
32	__FBSDID("$FreeBSD$");
33
34	#include <rtems/bsd/local/opt_ktrace.h>
35	#include <rtems/bsd/local/opt_kqueue.h>
36
37	#include <sys/param.h>
38	#include <sys/systm.h>
39	#include <sys/capsicum.h>
40	#include <sys/kernel.h>
41	#include <sys/lock.h>
42	#include <sys/mutex.h>
43	#include <sys/rwlock.h>
44	#include <sys/proc.h>
45	#include <sys/malloc.h>
46	#include <rtems/bsd/sys/unistd.h>
47	#include <sys/file.h>
48	#include <sys/filedesc.h>
49	#include <sys/filio.h>
50	#include <sys/fcntl.h>
51	#include <sys/kthread.h>
52	#include <sys/selinfo.h>
53	#include <sys/queue.h>
54	#include <sys/event.h>
55	#include <sys/eventvar.h>
56	#include <sys/poll.h>
57	#include <sys/protosw.h>
58	#include <sys/resourcevar.h>
59	#include <sys/sigio.h>
60	#include <sys/signalvar.h>
61	#include <sys/socket.h>
62	#include <sys/socketvar.h>
63	#include <sys/stat.h>
64	#include <sys/sysctl.h>
65	#include <sys/sysproto.h>
66	#include <sys/syscallsubr.h>
67	#include <sys/taskqueue.h>
68	#include <sys/uio.h>
69	#include <sys/user.h>
70	#ifdef KTRACE
71	#include <sys/ktrace.h>
72	#endif
73	#include <machine/atomic.h>
74
75	#include <vm/uma.h>
76	#ifdef __rtems__
77	#include <machine/rtems-bsd-syscall-api.h>
78
79	/* Maintain a global kqueue list on RTEMS */
80	static struct kqlist fd_kqlist;
81	#endif /* __rtems__ */
82
83	static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
84
85	/*
86	* This lock is used if multiple kq locks are required. This possibly
87	* should be made into a per proc lock.
88	*/
89	static struct mtx kq_global;
90	MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF);
91	#define KQ_GLOBAL_LOCK(lck, haslck) do { \
92	if (!haslck) \
93	mtx_lock(lck); \
94	haslck = 1; \
95	} while (0)
96	#define KQ_GLOBAL_UNLOCK(lck, haslck) do { \
97	if (haslck) \
98	mtx_unlock(lck); \
99	haslck = 0; \
100	} while (0)
101
102	TASKQUEUE_DEFINE_THREAD(kqueue_ctx);
103
104	static int kevent_copyout(void arg, struct kevent kevp, int count);
105	static int kevent_copyin(void arg, struct kevent kevp, int count);
106	static int kqueue_register(struct kqueue kq, struct kevent kev,
107	struct thread *td, int waitok);
108	static int kqueue_acquire(struct file fp, struct kqueue *kqp);
109	static void kqueue_release(struct kqueue *kq, int locked);
110	static void kqueue_destroy(struct kqueue *kq);
111	static void kqueue_drain(struct kqueue kq, struct thread td);
112	static int kqueue_expand(struct kqueue kq, struct filterops fops,
113	uintptr_t ident, int waitok);
114	static void kqueue_task(void *arg, int pending);
115	static int kqueue_scan(struct kqueue *kq, int maxevents,
116	struct kevent_copyops *k_ops,
117	const struct timespec *timeout,
118	struct kevent keva, struct thread td);
119	static void kqueue_wakeup(struct kqueue *kq);
120	static struct filterops *kqueue_fo_find(int filt);
121	static void kqueue_fo_release(int filt);
122
123	#ifndef __rtems__
124	static fo_rdwr_t kqueue_read;
125	static fo_rdwr_t kqueue_write;
126	static fo_truncate_t kqueue_truncate;
127	static fo_ioctl_t kqueue_ioctl;
128	static fo_poll_t kqueue_poll;
129	static fo_kqfilter_t kqueue_kqfilter;
130	static fo_stat_t kqueue_stat;
131	static fo_close_t kqueue_close;
132	static fo_fill_kinfo_t kqueue_fill_kinfo;
133
134	static struct fileops kqueueops = {
135	.fo_read = invfo_rdwr,
136	.fo_write = invfo_rdwr,
137	.fo_truncate = invfo_truncate,
138	.fo_ioctl = kqueue_ioctl,
139	.fo_poll = kqueue_poll,
140	.fo_kqfilter = kqueue_kqfilter,
141	.fo_stat = kqueue_stat,
142	.fo_close = kqueue_close,
143	.fo_chmod = invfo_chmod,
144	.fo_chown = invfo_chown,
145	.fo_sendfile = invfo_sendfile,
146	.fo_fill_kinfo = kqueue_fill_kinfo,
147	};
148	#else /* __rtems__ */
149	static const rtems_filesystem_file_handlers_r kqueueops;
150	#endif /* __rtems__ */
151
152	static int knote_attach(struct knote kn, struct kqueue kq);
153	static void knote_drop(struct knote kn, struct thread td);
154	static void knote_drop_detached(struct knote kn, struct thread td);
155	static void knote_enqueue(struct knote *kn);
156	static void knote_dequeue(struct knote *kn);
157	static void knote_init(void);
158	static struct knote *knote_alloc(int waitok);
159	static void knote_free(struct knote *kn);
160
161	static void filt_kqdetach(struct knote *kn);
162	static int filt_kqueue(struct knote *kn, long hint);
163	#ifndef __rtems__
164	static int filt_procattach(struct knote *kn);
165	static void filt_procdetach(struct knote *kn);
166	static int filt_proc(struct knote *kn, long hint);
167	#endif /* __rtems__ */
168	static int filt_fileattach(struct knote *kn);
169	static void filt_timerexpire(void *knx);
170	static int filt_timerattach(struct knote *kn);
171	static void filt_timerdetach(struct knote *kn);
172	static int filt_timer(struct knote *kn, long hint);
173	static int filt_userattach(struct knote *kn);
174	static void filt_userdetach(struct knote *kn);
175	static int filt_user(struct knote *kn, long hint);
176	static void filt_usertouch(struct knote kn, struct kevent kev,
177	u_long type);
178
179	static struct filterops file_filtops = {
180	.f_isfd = 1,
181	.f_attach = filt_fileattach,
182	};
183	static struct filterops kqread_filtops = {
184	.f_isfd = 1,
185	.f_detach = filt_kqdetach,
186	.f_event = filt_kqueue,
187	};
188	/* XXX - move to kern_proc.c? */
189	#ifndef __rtems__
190	static struct filterops proc_filtops = {
191	.f_isfd = 0,
192	.f_attach = filt_procattach,
193	.f_detach = filt_procdetach,
194	.f_event = filt_proc,
195	};
196	#endif /* __rtems__ */
197	static struct filterops timer_filtops = {
198	.f_isfd = 0,
199	.f_attach = filt_timerattach,
200	.f_detach = filt_timerdetach,
201	.f_event = filt_timer,
202	};
203	static struct filterops user_filtops = {
204	.f_attach = filt_userattach,
205	.f_detach = filt_userdetach,
206	.f_event = filt_user,
207	.f_touch = filt_usertouch,
208	};
209
210	static uma_zone_t knote_zone;
211	static unsigned int kq_ncallouts = 0;
212	static unsigned int kq_calloutmax = 4 * 1024;
213	SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
214	&kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
215
216	/* XXX - ensure not influx ? */
217	#define KNOTE_ACTIVATE(kn, islock) do { \
218	if ((islock)) \
219	mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \
220	else \
221	KQ_LOCK((kn)->kn_kq); \
222	(kn)->kn_status \|= KN_ACTIVE; \
223	if (((kn)->kn_status & (KN_QUEUED \| KN_DISABLED)) == 0) \
224	knote_enqueue((kn)); \
225	if (!(islock)) \
226	KQ_UNLOCK((kn)->kn_kq); \
227	} while(0)
228	#define KQ_LOCK(kq) do { \
229	mtx_lock(&(kq)->kq_lock); \
230	} while (0)
231	#define KQ_FLUX_WAKEUP(kq) do { \
232	if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \
233	(kq)->kq_state &= ~KQ_FLUXWAIT; \
234	wakeup((kq)); \
235	} \
236	} while (0)
237	#define KQ_UNLOCK_FLUX(kq) do { \
238	KQ_FLUX_WAKEUP(kq); \
239	mtx_unlock(&(kq)->kq_lock); \
240	} while (0)
241	#define KQ_UNLOCK(kq) do { \
242	mtx_unlock(&(kq)->kq_lock); \
243	} while (0)
244	#define KQ_OWNED(kq) do { \
245	mtx_assert(&(kq)->kq_lock, MA_OWNED); \
246	} while (0)
247	#define KQ_NOTOWNED(kq) do { \
248	mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \
249	} while (0)
250
251	static struct knlist *
252	kn_list_lock(struct knote *kn)
253	{
254	struct knlist *knl;
255
256	knl = kn->kn_knlist;
257	if (knl != NULL)
258	knl->kl_lock(knl->kl_lockarg);
259	return (knl);
260	}
261
262	static void
263	kn_list_unlock(struct knlist *knl)
264	{
265	bool do_free;
266
267	if (knl == NULL)
268	return;
269	do_free = knl->kl_autodestroy && knlist_empty(knl);
270	knl->kl_unlock(knl->kl_lockarg);
271	if (do_free) {
272	knlist_destroy(knl);
273	free(knl, M_KQUEUE);
274	}
275	}
276
277	static bool
278	kn_in_flux(struct knote *kn)
279	{
280
281	return (kn->kn_influx > 0);
282	}
283
284	static void
285	kn_enter_flux(struct knote *kn)
286	{
287
288	KQ_OWNED(kn->kn_kq);
289	MPASS(kn->kn_influx < INT_MAX);
290	kn->kn_influx++;
291	}
292
293	static bool
294	kn_leave_flux(struct knote *kn)
295	{
296
297	KQ_OWNED(kn->kn_kq);
298	MPASS(kn->kn_influx > 0);
299	kn->kn_influx--;
300	return (kn->kn_influx == 0);
301	}
302
303	#define KNL_ASSERT_LOCK(knl, islocked) do { \
304	if (islocked) \
305	KNL_ASSERT_LOCKED(knl); \
306	else \
307	KNL_ASSERT_UNLOCKED(knl); \
308	} while (0)
309	#ifdef INVARIANTS
310	#define KNL_ASSERT_LOCKED(knl) do { \
311	knl->kl_assert_locked((knl)->kl_lockarg); \
312	} while (0)
313	#define KNL_ASSERT_UNLOCKED(knl) do { \
314	knl->kl_assert_unlocked((knl)->kl_lockarg); \
315	} while (0)
316	#else /* !INVARIANTS */
317	#define KNL_ASSERT_LOCKED(knl) do {} while(0)
318	#define KNL_ASSERT_UNLOCKED(knl) do {} while (0)
319	#endif /* INVARIANTS */
320
321	#ifndef KN_HASHSIZE
322	#define KN_HASHSIZE 64 /* XXX should be tunable */
323	#endif
324
325	#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
326
327	static int
328	filt_nullattach(struct knote *kn)
329	{
330
331	return (ENXIO);
332	};
333
334	struct filterops null_filtops = {
335	.f_isfd = 0,
336	.f_attach = filt_nullattach,
337	};
338
339	/* XXX - make SYSINIT to add these, and move into respective modules. */
340	extern struct filterops sig_filtops;
341	extern struct filterops fs_filtops;
342
343	/*
344	* Table for for all system-defined filters.
345	*/
346	static struct mtx filterops_lock;
347	MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops",
348	MTX_DEF);
349	static struct {
350	struct filterops *for_fop;
351	int for_nolock;
352	int for_refcnt;
353	} sysfilt_ops[EVFILT_SYSCOUNT] = {
354	{ &file_filtops, 1 }, /* EVFILT_READ */
355	{ &file_filtops, 1 }, /* EVFILT_WRITE */
356	{ &null_filtops }, /* EVFILT_AIO */
357	{ &file_filtops, 1 }, /* EVFILT_VNODE */
358	#ifndef __rtems__
359	{ &proc_filtops, 1 }, /* EVFILT_PROC */
360	{ &sig_filtops, 1 }, /* EVFILT_SIGNAL */
361	#else /* __rtems__ */
362	{ &null_filtops }, /* EVFILT_PROC */
363	{ &null_filtops }, /* EVFILT_SIGNAL */
364	#endif /* __rtems__ */
365	{ &timer_filtops, 1 }, /* EVFILT_TIMER */
366	{ &null_filtops }, /* former EVFILT_NETDEV */
367	#ifndef __rtems__
368	{ &fs_filtops, 1 }, /* EVFILT_FS */
369	#else /* __rtems__ */
370	{ &null_filtops }, /* EVFILT_FS */
371	#endif /* __rtems__ */
372	{ &null_filtops }, /* EVFILT_LIO */
373	{ &user_filtops, 1 }, /* EVFILT_USER */
374	{ &null_filtops }, /* EVFILT_SENDFILE */
375	{ &file_filtops, 1 }, /* EVFILT_EMPTY */
376	};
377
378	/*
379	* Simple redirection for all cdevsw style objects to call their fo_kqfilter
380	* method.
381	*/
382	static int
383	filt_fileattach(struct knote *kn)
384	{
385
386	return (fo_kqfilter(kn->kn_fp, kn));
387	}
388
389	/ARGSUSED/
390	static int
391	kqueue_kqfilter(struct file fp, struct knote kn)
392	{
393	struct kqueue *kq = kn->kn_fp->f_data;
394
395	if (kn->kn_filter != EVFILT_READ)
396	return (EINVAL);
397
398	kn->kn_status \|= KN_KQUEUE;
399	kn->kn_fop = &kqread_filtops;
400	knlist_add(&kq->kq_sel.si_note, kn, 0);
401
402	return (0);
403	}
404	#ifdef __rtems__
405	static int
406	rtems_bsd_kqueue_kqfilter(rtems_libio_t iop, struct knote kn)
407	{
408	struct file *fp = rtems_bsd_iop_to_fp(iop);
409
410	return kqueue_kqfilter(fp, kn);
411	}
412	#endif /* __rtems__ */
413
414	static void
415	filt_kqdetach(struct knote *kn)
416	{
417	struct kqueue *kq = kn->kn_fp->f_data;
418
419	knlist_remove(&kq->kq_sel.si_note, kn, 0);
420	}
421
422	/ARGSUSED/
423	static int
424	filt_kqueue(struct knote *kn, long hint)
425	{
426	struct kqueue *kq = kn->kn_fp->f_data;
427
428	kn->kn_data = kq->kq_count;
429	return (kn->kn_data > 0);
430	}
431
432	#ifndef __rtems__
433	/* XXX - move to kern_proc.c? */
434	static int
435	filt_procattach(struct knote *kn)
436	{
437	struct proc *p;
438	int error;
439	bool exiting, immediate;
440
441	exiting = immediate = false;
442	p = pfind(kn->kn_id);
443	if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) {
444	p = zpfind(kn->kn_id);
445	exiting = true;
446	} else if (p != NULL && (p->p_flag & P_WEXIT)) {
447	exiting = true;
448	}
449
450	if (p == NULL)
451	return (ESRCH);
452	if ((error = p_cansee(curthread, p))) {
453	PROC_UNLOCK(p);
454	return (error);
455	}
456
457	kn->kn_ptr.p_proc = p;
458	kn->kn_flags \|= EV_CLEAR; /* automatically set */
459
460	/*
461	* Internal flag indicating registration done by kernel for the
462	* purposes of getting a NOTE_CHILD notification.
463	*/
464	if (kn->kn_flags & EV_FLAG2) {
465	kn->kn_flags &= ~EV_FLAG2;
466	kn->kn_data = kn->kn_sdata; /* ppid */
467	kn->kn_fflags = NOTE_CHILD;
468	kn->kn_sfflags &= ~(NOTE_EXIT \| NOTE_EXEC \| NOTE_FORK);
469	immediate = true; /* Force immediate activation of child note. */
470	}
471	/*
472	* Internal flag indicating registration done by kernel (for other than
473	* NOTE_CHILD).
474	*/
475	if (kn->kn_flags & EV_FLAG1) {
476	kn->kn_flags &= ~EV_FLAG1;
477	}
478
479	knlist_add(p->p_klist, kn, 1);
480
481	/*
482	* Immediately activate any child notes or, in the case of a zombie
483	* target process, exit notes. The latter is necessary to handle the
484	* case where the target process, e.g. a child, dies before the kevent
485	* is registered.
486	*/
487	if (immediate \|\| (exiting && filt_proc(kn, NOTE_EXIT)))
488	KNOTE_ACTIVATE(kn, 0);
489
490	PROC_UNLOCK(p);
491
492	return (0);
493	}
494
495	/*
496	* The knote may be attached to a different process, which may exit,
497	* leaving nothing for the knote to be attached to. So when the process
498	* exits, the knote is marked as DETACHED and also flagged as ONESHOT so
499	* it will be deleted when read out. However, as part of the knote deletion,
500	* this routine is called, so a check is needed to avoid actually performing
501	* a detach, because the original process does not exist any more.
502	*/
503	/* XXX - move to kern_proc.c? */
504	static void
505	filt_procdetach(struct knote *kn)
506	{
507
508	knlist_remove(kn->kn_knlist, kn, 0);
509	kn->kn_ptr.p_proc = NULL;
510	}
511
512	/* XXX - move to kern_proc.c? */
513	static int
514	filt_proc(struct knote *kn, long hint)
515	{
516	struct proc *p;
517	u_int event;
518
519	p = kn->kn_ptr.p_proc;
520	if (p == NULL) /* already activated, from attach filter */
521	return (0);
522
523	/* Mask off extra data. */
524	event = (u_int)hint & NOTE_PCTRLMASK;
525
526	/* If the user is interested in this event, record it. */
527	if (kn->kn_sfflags & event)
528	kn->kn_fflags \|= event;
529
530	/* Process is gone, so flag the event as finished. */
531	if (event == NOTE_EXIT) {
532	kn->kn_flags \|= EV_EOF \| EV_ONESHOT;
533	kn->kn_ptr.p_proc = NULL;
534	if (kn->kn_fflags & NOTE_EXIT)
535	kn->kn_data = KW_EXITCODE(p->p_xexit, p->p_xsig);
536	if (kn->kn_fflags == 0)
537	kn->kn_flags \|= EV_DROP;
538	return (1);
539	}
540
541	return (kn->kn_fflags != 0);
542	}
543
544	/*
545	* Called when the process forked. It mostly does the same as the
546	* knote(), activating all knotes registered to be activated when the
547	* process forked. Additionally, for each knote attached to the
548	* parent, check whether user wants to track the new process. If so
549	* attach a new knote to it, and immediately report an event with the
550	* child's pid.
551	*/
552	void
553	knote_fork(struct knlist *list, int pid)
554	{
555	struct kqueue *kq;
556	struct knote *kn;
557	struct kevent kev;
558	int error;
559
560	if (list == NULL)
561	return;
562	list->kl_lock(list->kl_lockarg);
563
564	SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
565	kq = kn->kn_kq;
566	KQ_LOCK(kq);
567	if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) {
568	KQ_UNLOCK(kq);
569	continue;
570	}
571
572	/*
573	* The same as knote(), activate the event.
574	*/
575	if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
576	kn->kn_status \|= KN_HASKQLOCK;
577	if (kn->kn_fop->f_event(kn, NOTE_FORK))
578	KNOTE_ACTIVATE(kn, 1);
579	kn->kn_status &= ~KN_HASKQLOCK;
580	KQ_UNLOCK(kq);
581	continue;
582	}
583
584	/*
585	* The NOTE_TRACK case. In addition to the activation
586	* of the event, we need to register new events to
587	* track the child. Drop the locks in preparation for
588	* the call to kqueue_register().
589	*/
590	kn_enter_flux(kn);
591	KQ_UNLOCK(kq);
592	list->kl_unlock(list->kl_lockarg);
593
594	/*
595	* Activate existing knote and register tracking knotes with
596	* new process.
597	*
598	* First register a knote to get just the child notice. This
599	* must be a separate note from a potential NOTE_EXIT
600	* notification since both NOTE_CHILD and NOTE_EXIT are defined
601	* to use the data field (in conflicting ways).
602	*/
603	kev.ident = pid;
604	kev.filter = kn->kn_filter;
605	kev.flags = kn->kn_flags \| EV_ADD \| EV_ENABLE \| EV_ONESHOT \|
606	EV_FLAG2;
607	kev.fflags = kn->kn_sfflags;
608	kev.data = kn->kn_id; /* parent */
609	kev.udata = kn->kn_kevent.udata;/* preserve udata */
610	error = kqueue_register(kq, &kev, NULL, 0);
611	if (error)
612	kn->kn_fflags \|= NOTE_TRACKERR;
613
614	/*
615	* Then register another knote to track other potential events
616	* from the new process.
617	*/
618	kev.ident = pid;
619	kev.filter = kn->kn_filter;
620	kev.flags = kn->kn_flags \| EV_ADD \| EV_ENABLE \| EV_FLAG1;
621	kev.fflags = kn->kn_sfflags;
622	kev.data = kn->kn_id; /* parent */
623	kev.udata = kn->kn_kevent.udata;/* preserve udata */
624	error = kqueue_register(kq, &kev, NULL, 0);
625	if (error)
626	kn->kn_fflags \|= NOTE_TRACKERR;
627	if (kn->kn_fop->f_event(kn, NOTE_FORK))
628	KNOTE_ACTIVATE(kn, 0);
629	KQ_LOCK(kq);
630	kn_leave_flux(kn);
631	KQ_UNLOCK_FLUX(kq);
632	list->kl_lock(list->kl_lockarg);
633	}
634	list->kl_unlock(list->kl_lockarg);
635	}
636	#endif /* __rtems__ */
637
638	/*
639	* XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the
640	* interval timer support code.
641	*/
642
643	#define NOTE_TIMER_PRECMASK (NOTE_SECONDS\|NOTE_MSECONDS\|NOTE_USECONDS\| \
644	NOTE_NSECONDS)
645
646	static sbintime_t
647	timer2sbintime(intptr_t data, int flags)
648	{
649
650	/*
651	* Macros for converting to the fractional second portion of an
652	* sbintime_t using 64bit multiplication to improve precision.
653	*/
654	#define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32)
655	#define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32)
656	#define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32)
657	switch (flags & NOTE_TIMER_PRECMASK) {
658	case NOTE_SECONDS:
659	#ifdef __LP64__
660	if (data > (SBT_MAX / SBT_1S))
661	return (SBT_MAX);
662	#endif
663	return ((sbintime_t)data << 32);
664	case NOTE_MSECONDS: /* FALLTHROUGH */
665	case 0:
666	if (data >= 1000) {
667	int64_t secs = data / 1000;
668	#ifdef __LP64__
669	if (secs > (SBT_MAX / SBT_1S))
670	return (SBT_MAX);
671	#endif
672	return (secs << 32 \| MS_TO_SBT(data % 1000));
673	}
674	return MS_TO_SBT(data);
675	case NOTE_USECONDS:
676	if (data >= 1000000) {
677	int64_t secs = data / 1000000;
678	#ifdef __LP64__
679	if (secs > (SBT_MAX / SBT_1S))
680	return (SBT_MAX);
681	#endif
682	return (secs << 32 \| US_TO_SBT(data % 1000000));
683	}
684	return US_TO_SBT(data);
685	case NOTE_NSECONDS:
686	if (data >= 1000000000) {
687	int64_t secs = data / 1000000000;
688	#ifdef __LP64__
689	if (secs > (SBT_MAX / SBT_1S))
690	return (SBT_MAX);
691	#endif
692	return (secs << 32 \| US_TO_SBT(data % 1000000000));
693	}
694	return (NS_TO_SBT(data));
695	default:
696	break;
697	}
698	return (-1);
699	}
700
701	struct kq_timer_cb_data {
702	struct callout c;
703	sbintime_t next; /* next timer event fires at */
704	sbintime_t to; /* precalculated timer period */
705	};
706
707	static void
708	filt_timerexpire(void *knx)
709	{
710	struct knote *kn;
711	struct kq_timer_cb_data *kc;
712
713	kn = knx;
714	kn->kn_data++;
715	KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */
716
717	if ((kn->kn_flags & EV_ONESHOT) != 0)
718	return;
719
720	kc = kn->kn_ptr.p_v;
721	kc->next += kc->to;
722	callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn,
723	PCPU_GET(cpuid), C_ABSOLUTE);
724	}
725
726	/*
727	* data contains amount of time to sleep
728	*/
729	static int
730	filt_timerattach(struct knote *kn)
731	{
732	struct kq_timer_cb_data *kc;
733	sbintime_t to;
734	unsigned int ncallouts;
735
736	if (kn->kn_sdata < 0)
737	return (EINVAL);
738	if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0)
739	kn->kn_sdata = 1;
740	/* Only precision unit are supported in flags so far */
741	if ((kn->kn_sfflags & ~NOTE_TIMER_PRECMASK) != 0)
742	return (EINVAL);
743
744	to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
745	if (to < 0)
746	return (EINVAL);
747
748	do {
749	ncallouts = kq_ncallouts;
750	if (ncallouts >= kq_calloutmax)
751	return (ENOMEM);
752	} while (!atomic_cmpset_int(&kq_ncallouts, ncallouts, ncallouts + 1));
753
754	kn->kn_flags \|= EV_CLEAR; /* automatically set */
755	kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */
756	kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK);
757	callout_init(&kc->c, 1);
758	kc->next = to + sbinuptime();
759	kc->to = to;
760	callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn,
761	PCPU_GET(cpuid), C_ABSOLUTE);
762
763	return (0);
764	}
765
766	static void
767	filt_timerdetach(struct knote *kn)
768	{
769	struct kq_timer_cb_data *kc;
770	unsigned int old;
771
772	kc = kn->kn_ptr.p_v;
773	callout_drain(&kc->c);
774	free(kc, M_KQUEUE);
775	old = atomic_fetchadd_int(&kq_ncallouts, -1);
776	KASSERT(old > 0, ("Number of callouts cannot become negative"));
777	kn->kn_status \|= KN_DETACHED; /* knlist_remove sets it */
778	}
779
780	static int
781	filt_timer(struct knote *kn, long hint)
782	{
783
784	return (kn->kn_data != 0);
785	}
786
787	static int
788	filt_userattach(struct knote *kn)
789	{
790
791	/*
792	* EVFILT_USER knotes are not attached to anything in the kernel.
793	*/
794	kn->kn_hook = NULL;
795	if (kn->kn_fflags & NOTE_TRIGGER)
796	kn->kn_hookid = 1;
797	else
798	kn->kn_hookid = 0;
799	return (0);
800	}
801
802	static void
803	filt_userdetach(__unused struct knote *kn)
804	{
805
806	/*
807	* EVFILT_USER knotes are not attached to anything in the kernel.
808	*/
809	}
810
811	static int
812	filt_user(struct knote *kn, __unused long hint)
813	{
814
815	return (kn->kn_hookid);
816	}
817
818	static void
819	filt_usertouch(struct knote kn, struct kevent kev, u_long type)
820	{
821	u_int ffctrl;
822
823	switch (type) {
824	case EVENT_REGISTER:
825	if (kev->fflags & NOTE_TRIGGER)
826	kn->kn_hookid = 1;
827
828	ffctrl = kev->fflags & NOTE_FFCTRLMASK;
829	kev->fflags &= NOTE_FFLAGSMASK;
830	switch (ffctrl) {
831	case NOTE_FFNOP:
832	break;
833
834	case NOTE_FFAND:
835	kn->kn_sfflags &= kev->fflags;
836	break;
837
838	case NOTE_FFOR:
839	kn->kn_sfflags \|= kev->fflags;
840	break;
841
842	case NOTE_FFCOPY:
843	kn->kn_sfflags = kev->fflags;
844	break;
845
846	default:
847	/* XXX Return error? */
848	break;
849	}
850	kn->kn_sdata = kev->data;
851	if (kev->flags & EV_CLEAR) {
852	kn->kn_hookid = 0;
853	kn->kn_data = 0;
854	kn->kn_fflags = 0;
855	}
856	break;
857
858	case EVENT_PROCESS:
859	*kev = kn->kn_kevent;
860	kev->fflags = kn->kn_sfflags;
861	kev->data = kn->kn_sdata;
862	if (kn->kn_flags & EV_CLEAR) {
863	kn->kn_hookid = 0;
864	kn->kn_data = 0;
865	kn->kn_fflags = 0;
866	}
867	break;
868
869	default:
870	panic("filt_usertouch() - invalid type (%ld)", type);
871	break;
872	}
873	}
874
875	#ifdef __rtems__
876	static int
877	kern_kqueue(struct thread td, int flags, struct filecaps fcaps);
878
879	static
880	#endif /* __rtems__ */
881	int
882	sys_kqueue(struct thread td, struct kqueue_args uap)
883	{
884
885	return (kern_kqueue(td, 0, NULL));
886	}
887
888	static void
889	kqueue_init(struct kqueue *kq)
890	{
891
892	mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF \| MTX_DUPOK);
893	TAILQ_INIT(&kq->kq_head);
894	knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
895	TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
896	}
897
898	int
899	kern_kqueue(struct thread td, int flags, struct filecaps fcaps)
900	{
901	struct filedesc *fdp;
902	struct kqueue *kq;
903	struct file *fp;
904	struct ucred *cred;
905	int fd, error;
906
907	#ifndef __rtems__
908	fdp = td->td_proc->p_fd;
909	cred = td->td_ucred;
910	if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES)))
911	return (ENOMEM);
912	#else /* __rtems__ */
913	(void)fdp;
914	(void)cred;
915	#endif /* __rtems__ */
916
917	error = falloc_caps(td, &fp, &fd, flags, fcaps);
918	if (error != 0) {
919	chgkqcnt(cred->cr_ruidinfo, -1, 0);
920	return (error);
921	}
922
923	/* An extra reference on `fp' has been held for us by falloc(). */
924	kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK \| M_ZERO);
925	kqueue_init(kq);
926	#ifndef __rtems__
927	kq->kq_fdp = fdp;
928	kq->kq_cred = crhold(cred);
929	#endif /* __rtems__ */
930
931	#ifndef __rtems__
932	FILEDESC_XLOCK(fdp);
933	TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
934	FILEDESC_XUNLOCK(fdp);
935	#else /* __rtems__ */
936	rtems_libio_lock();
937	TAILQ_INSERT_HEAD(&fd_kqlist, kq, kq_list);
938	rtems_libio_unlock();
939	#endif /* __rtems__ */
940
941	finit(fp, FREAD \| FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
942	#ifndef __rtems__
943	fdrop(fp, td);
944	#endif /* __rtems__ */
945
946	td->td_retval[0] = fd;
947	return (0);
948	}
949	#ifdef __rtems__
950	int
951	kqueue(void)
952	{
953	struct thread *td = rtems_bsd_get_curthread_or_null();
954	struct kqueue_args ua;
955	int error;
956
957	if (td != NULL) {
958	error = sys_kqueue(td, &ua);
959	} else {
960	error = ENOMEM;
961	}
962
963	if (error == 0) {
964	return td->td_retval[0];
965	} else {
966	rtems_set_errno_and_return_minus_one(error);
967	}
968	}
969	#endif /* __rtems__ */
970
971	#ifdef KTRACE
972	static size_t
973	kev_iovlen(int n, u_int kgio)
974	{
975
976	if (n < 0 \|\| n >= kgio / sizeof(struct kevent))
977	return (kgio);
978	return (n * sizeof(struct kevent));
979	}
980	#endif
981
982	#ifndef _SYS_SYSPROTO_H_
983	struct kevent_args {
984	int fd;
985	const struct kevent *changelist;
986	int nchanges;
987	struct kevent *eventlist;
988	int nevents;
989	const struct timespec *timeout;
990	};
991	#endif
992	#ifdef __rtems__
993	static int kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
994	struct kevent_copyops k_ops, const struct timespec timeout);
995
996	static int kern_kevent_fp(struct thread td, struct file fp, int nchanges,
997	int nevents, struct kevent_copyops k_ops, const struct timespec timeout);
998
999	static
1000	#endif /* __rtems__ */
1001	int
1002	sys_kevent(struct thread td, struct kevent_args uap)
1003	{
1004	struct timespec ts, *tsp;
1005	struct kevent_copyops k_ops = {
1006	.arg = uap,
1007	.k_copyout = kevent_copyout,
1008	.k_copyin = kevent_copyin,
1009	};
1010	int error;
1011	#ifdef KTRACE
1012	struct uio ktruio;
1013	struct iovec ktriov;
1014	struct uio *ktruioin = NULL;
1015	struct uio *ktruioout = NULL;
1016	u_int kgio;
1017	#endif
1018
1019	if (uap->timeout != NULL) {
1020	error = copyin(uap->timeout, &ts, sizeof(ts));
1021	if (error)
1022	return (error);
1023	tsp = &ts;
1024	} else
1025	tsp = NULL;
1026
1027	#ifdef KTRACE
1028	if (KTRPOINT(td, KTR_GENIO)) {
1029	kgio = ktr_geniosize;
1030	ktriov.iov_base = uap->changelist;
1031	ktriov.iov_len = kev_iovlen(uap->nchanges, kgio);
1032	ktruio = (struct uio){ .uio_iov = &ktriov, .uio_iovcnt = 1,
1033	.uio_segflg = UIO_USERSPACE, .uio_rw = UIO_READ,
1034	.uio_td = td };
1035	ktruioin = cloneuio(&ktruio);
1036	ktriov.iov_base = uap->eventlist;
1037	ktriov.iov_len = kev_iovlen(uap->nevents, kgio);
1038	ktriov.iov_len = uap->nevents * sizeof(struct kevent);
1039	ktruioout = cloneuio(&ktruio);
1040	}
1041	#endif
1042
1043	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
1044	&k_ops, tsp);
1045
1046	#ifdef KTRACE
1047	if (ktruioin != NULL) {
1048	ktruioin->uio_resid = kev_iovlen(uap->nchanges, kgio);
1049	ktrgenio(uap->fd, UIO_WRITE, ktruioin, 0);
1050	ktruioout->uio_resid = kev_iovlen(td->td_retval[0], kgio);
1051	ktrgenio(uap->fd, UIO_READ, ktruioout, error);
1052	}
1053	#endif
1054
1055	return (error);
1056	}
1057	#ifdef __rtems__
1058	__weak_reference(kevent, _kevent);
1059
1060	int
1061	kevent(int kq, const struct kevent *changelist, int nchanges,
1062	struct kevent *eventlist, int nevents,
1063	const struct timespec *timeout)
1064	{
1065	struct thread *td = rtems_bsd_get_curthread_or_null();
1066	struct kevent_args ua = {
1067	.fd = kq,
1068	.changelist = changelist,
1069	.nchanges = nchanges,
1070	.eventlist = eventlist,
1071	.nevents = nevents,
1072	.timeout = timeout
1073	};
1074	int error;
1075
1076	if (td != NULL) {
1077	error = sys_kevent(td, &ua);
1078	} else {
1079	error = ENOMEM;
1080	}
1081
1082	if (error == 0) {
1083	return td->td_retval[0];
1084	} else {
1085	rtems_set_errno_and_return_minus_one(error);
1086	}
1087	}
1088	#endif /* __rtems__ */
1089
1090	/*
1091	* Copy 'count' items into the destination list pointed to by uap->eventlist.
1092	*/
1093	static int
1094	kevent_copyout(void arg, struct kevent kevp, int count)
1095	{
1096	struct kevent_args *uap;
1097	int error;
1098
1099	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
1100	uap = (struct kevent_args *)arg;
1101
1102	error = copyout(kevp, uap->eventlist, count * sizeof *kevp);
1103	if (error == 0)
1104	uap->eventlist += count;
1105	return (error);
1106	}
1107
1108	/*
1109	* Copy 'count' items from the list pointed to by uap->changelist.
1110	*/
1111	static int
1112	kevent_copyin(void arg, struct kevent kevp, int count)
1113	{
1114	struct kevent_args *uap;
1115	int error;
1116
1117	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
1118	uap = (struct kevent_args *)arg;
1119
1120	error = copyin(uap->changelist, kevp, count * sizeof *kevp);
1121	if (error == 0)
1122	uap->changelist += count;
1123	return (error);
1124	}
1125
1126	int
1127	kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
1128	struct kevent_copyops k_ops, const struct timespec timeout)
1129	{
1130	cap_rights_t rights;
1131	struct file *fp;
1132	int error;
1133
1134	cap_rights_init(&rights);
1135	if (nchanges > 0)
1136	cap_rights_set(&rights, CAP_KQUEUE_CHANGE);
1137	if (nevents > 0)
1138	cap_rights_set(&rights, CAP_KQUEUE_EVENT);
1139	error = fget(td, fd, &rights, &fp);
1140	if (error != 0)
1141	return (error);
1142
1143	error = kern_kevent_fp(td, fp, nchanges, nevents, k_ops, timeout);
1144	fdrop(fp, td);
1145
1146	return (error);
1147	}
1148
1149	static int
1150	kqueue_kevent(struct kqueue kq, struct thread td, int nchanges, int nevents,
1151	struct kevent_copyops k_ops, const struct timespec timeout)
1152	{
1153	struct kevent keva[KQ_NEVENTS];
1154	struct kevent kevp, changes;
1155	int i, n, nerrors, error;
1156
1157	nerrors = 0;
1158	while (nchanges > 0) {
1159	n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges;
1160	error = k_ops->k_copyin(k_ops->arg, keva, n);
1161	if (error)
1162	return (error);
1163	changes = keva;
1164	for (i = 0; i < n; i++) {
1165	kevp = &changes[i];
1166	if (!kevp->filter)
1167	continue;
1168	kevp->flags &= ~EV_SYSFLAGS;
1169	error = kqueue_register(kq, kevp, td, 1);
1170	if (error \|\| (kevp->flags & EV_RECEIPT)) {
1171	if (nevents == 0)
1172	return (error);
1173	kevp->flags = EV_ERROR;
1174	kevp->data = error;
1175	(void)k_ops->k_copyout(k_ops->arg, kevp, 1);
1176	nevents--;
1177	nerrors++;
1178	}
1179	}
1180	nchanges -= n;
1181	}
1182	if (nerrors) {
1183	td->td_retval[0] = nerrors;
1184	return (0);
1185	}
1186
1187	return (kqueue_scan(kq, nevents, k_ops, timeout, keva, td));
1188	}
1189
1190	int
1191	kern_kevent_fp(struct thread td, struct file fp, int nchanges, int nevents,
1192	struct kevent_copyops k_ops, const struct timespec timeout)
1193	{
1194	struct kqueue *kq;
1195	int error;
1196
1197	error = kqueue_acquire(fp, &kq);
1198	if (error != 0)
1199	return (error);
1200	error = kqueue_kevent(kq, td, nchanges, nevents, k_ops, timeout);
1201	kqueue_release(kq, 0);
1202	return (error);
1203	}
1204
1205	/*
1206	* Performs a kevent() call on a temporarily created kqueue. This can be
1207	* used to perform one-shot polling, similar to poll() and select().
1208	*/
1209	int
1210	kern_kevent_anonymous(struct thread *td, int nevents,
1211	struct kevent_copyops *k_ops)
1212	{
1213	struct kqueue kq = {};
1214	int error;
1215
1216	kqueue_init(&kq);
1217	kq.kq_refcnt = 1;
1218	error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL);
1219	kqueue_drain(&kq, td);
1220	kqueue_destroy(&kq);
1221	return (error);
1222	}
1223
1224	int
1225	kqueue_add_filteropts(int filt, struct filterops *filtops)
1226	{
1227	int error;
1228
1229	error = 0;
1230	if (filt > 0 \|\| filt + EVFILT_SYSCOUNT < 0) {
1231	printf(
1232	"trying to add a filterop that is out of range: %d is beyond %d\n",
1233	~filt, EVFILT_SYSCOUNT);
1234	return EINVAL;
1235	}
1236	mtx_lock(&filterops_lock);
1237	if (sysfilt_ops[~filt].for_fop != &null_filtops &&
1238	sysfilt_ops[~filt].for_fop != NULL)
1239	error = EEXIST;
1240	else {
1241	sysfilt_ops[~filt].for_fop = filtops;
1242	sysfilt_ops[~filt].for_refcnt = 0;
1243	}
1244	mtx_unlock(&filterops_lock);
1245
1246	return (error);
1247	}
1248
1249	int
1250	kqueue_del_filteropts(int filt)
1251	{
1252	int error;
1253
1254	error = 0;
1255	if (filt > 0 \|\| filt + EVFILT_SYSCOUNT < 0)
1256	return EINVAL;
1257
1258	mtx_lock(&filterops_lock);
1259	if (sysfilt_ops[~filt].for_fop == &null_filtops \|\|
1260	sysfilt_ops[~filt].for_fop == NULL)
1261	error = EINVAL;
1262	else if (sysfilt_ops[~filt].for_refcnt != 0)
1263	error = EBUSY;
1264	else {
1265	sysfilt_ops[~filt].for_fop = &null_filtops;
1266	sysfilt_ops[~filt].for_refcnt = 0;
1267	}
1268	mtx_unlock(&filterops_lock);
1269
1270	return error;
1271	}
1272
1273	static struct filterops *
1274	kqueue_fo_find(int filt)
1275	{
1276
1277	if (filt > 0 \|\| filt + EVFILT_SYSCOUNT < 0)
1278	return NULL;
1279
1280	if (sysfilt_ops[~filt].for_nolock)
1281	return sysfilt_ops[~filt].for_fop;
1282
1283	mtx_lock(&filterops_lock);
1284	sysfilt_ops[~filt].for_refcnt++;
1285	if (sysfilt_ops[~filt].for_fop == NULL)
1286	sysfilt_ops[~filt].for_fop = &null_filtops;
1287	mtx_unlock(&filterops_lock);
1288
1289	return sysfilt_ops[~filt].for_fop;
1290	}
1291
1292	static void
1293	kqueue_fo_release(int filt)
1294	{
1295
1296	if (filt > 0 \|\| filt + EVFILT_SYSCOUNT < 0)
1297	return;
1298
1299	if (sysfilt_ops[~filt].for_nolock)
1300	return;
1301
1302	mtx_lock(&filterops_lock);
1303	KASSERT(sysfilt_ops[~filt].for_refcnt > 0,
1304	("filter object refcount not valid on release"));
1305	sysfilt_ops[~filt].for_refcnt--;
1306	mtx_unlock(&filterops_lock);
1307	}
1308
1309	/*
1310	* A ref to kq (obtained via kqueue_acquire) must be held. waitok will
1311	* influence if memory allocation should wait. Make sure it is 0 if you
1312	* hold any mutexes.
1313	*/
1314	static int
1315	kqueue_register(struct kqueue kq, struct kevent kev, struct thread *td, int waitok)
1316	{
1317	struct filterops *fops;
1318	struct file *fp;
1319	struct knote kn, tkn;
1320	struct knlist *knl;
1321	cap_rights_t rights;
1322	int error, filt, event;
1323	int haskqglobal, filedesc_unlock;
1324
1325	if ((kev->flags & (EV_ENABLE \| EV_DISABLE)) == (EV_ENABLE \| EV_DISABLE))
1326	return (EINVAL);
1327
1328	fp = NULL;
1329	kn = NULL;
1330	knl = NULL;
1331	error = 0;
1332	haskqglobal = 0;
1333	filedesc_unlock = 0;
1334
1335	filt = kev->filter;
1336	fops = kqueue_fo_find(filt);
1337	if (fops == NULL)
1338	return EINVAL;
1339
1340	if (kev->flags & EV_ADD) {
1341	/*
1342	* Prevent waiting with locks. Non-sleepable
1343	* allocation failures are handled in the loop, only
1344	* if the spare knote appears to be actually required.
1345	*/
1346	tkn = knote_alloc(waitok);
1347	} else {
1348	tkn = NULL;
1349	}
1350
1351	findkn:
1352	if (fops->f_isfd) {
1353	KASSERT(td != NULL, ("td is NULL"));
1354	if (kev->ident > INT_MAX)
1355	error = EBADF;
1356	else
1357	error = fget(td, kev->ident,
1358	cap_rights_init(&rights, CAP_EVENT), &fp);
1359	if (error)
1360	goto done;
1361
1362	if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops,
1363	kev->ident, 0) != 0) {
1364	/* try again */
1365	fdrop(fp, td);
1366	fp = NULL;
1367	error = kqueue_expand(kq, fops, kev->ident, waitok);
1368	if (error)
1369	goto done;
1370	goto findkn;
1371	}
1372
1373	#ifndef __rtems__
1374	if (fp->f_type == DTYPE_KQUEUE) {
1375	#else /* __rtems__ */
1376	if (fp->f_io.pathinfo.handlers == &kqueueops) {
1377	#endif /* __rtems__ */
1378	/*
1379	* If we add some intelligence about what we are doing,
1380	* we should be able to support events on ourselves.
1381	* We need to know when we are doing this to prevent
1382	* getting both the knlist lock and the kq lock since
1383	* they are the same thing.
1384	*/
1385	if (fp->f_data == kq) {
1386	error = EINVAL;
1387	goto done;
1388	}
1389
1390	/*
1391	* Pre-lock the filedesc before the global
1392	* lock mutex, see the comment in
1393	* kqueue_close().
1394	*/
1395	FILEDESC_XLOCK(td->td_proc->p_fd);
1396	filedesc_unlock = 1;
1397	KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1398	}
1399
1400	KQ_LOCK(kq);
1401	if (kev->ident < kq->kq_knlistsize) {
1402	SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link)
1403	if (kev->filter == kn->kn_filter)
1404	break;
1405	}
1406	} else {
1407	if ((kev->flags & EV_ADD) == EV_ADD)
1408	kqueue_expand(kq, fops, kev->ident, waitok);
1409
1410	KQ_LOCK(kq);
1411
1412	/*
1413	* If possible, find an existing knote to use for this kevent.
1414	*/
1415	if (kev->filter == EVFILT_PROC &&
1416	(kev->flags & (EV_FLAG1 \| EV_FLAG2)) != 0) {
1417	/* This is an internal creation of a process tracking
1418	* note. Don't attempt to coalesce this with an
1419	* existing note.
1420	*/
1421	;
1422	} else if (kq->kq_knhashmask != 0) {
1423	struct klist *list;
1424
1425	list = &kq->kq_knhash[
1426	KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
1427	SLIST_FOREACH(kn, list, kn_link)
1428	if (kev->ident == kn->kn_id &&
1429	kev->filter == kn->kn_filter)
1430	break;
1431	}
1432	}
1433
1434	/* knote is in the process of changing, wait for it to stabilize. */
1435	if (kn != NULL && kn_in_flux(kn)) {
1436	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1437	if (filedesc_unlock) {
1438	FILEDESC_XUNLOCK(td->td_proc->p_fd);
1439	filedesc_unlock = 0;
1440	}
1441	kq->kq_state \|= KQ_FLUXWAIT;
1442	msleep(kq, &kq->kq_lock, PSOCK \| PDROP, "kqflxwt", 0);
1443	if (fp != NULL) {
1444	fdrop(fp, td);
1445	fp = NULL;
1446	}
1447	goto findkn;
1448	}
1449
1450	/*
1451	* kn now contains the matching knote, or NULL if no match
1452	*/
1453	if (kn == NULL) {
1454	if (kev->flags & EV_ADD) {
1455	kn = tkn;
1456	tkn = NULL;
1457	if (kn == NULL) {
1458	KQ_UNLOCK(kq);
1459	error = ENOMEM;
1460	goto done;
1461	}
1462	kn->kn_fp = fp;
1463	kn->kn_kq = kq;
1464	kn->kn_fop = fops;
1465	/*
1466	* apply reference counts to knote structure, and
1467	* do not release it at the end of this routine.
1468	*/
1469	fops = NULL;
1470	fp = NULL;
1471
1472	kn->kn_sfflags = kev->fflags;
1473	kn->kn_sdata = kev->data;
1474	kev->fflags = 0;
1475	kev->data = 0;
1476	kn->kn_kevent = *kev;
1477	kn->kn_kevent.flags &= ~(EV_ADD \| EV_DELETE \|
1478	EV_ENABLE \| EV_DISABLE \| EV_FORCEONESHOT);
1479	kn->kn_status = KN_DETACHED;
1480	kn_enter_flux(kn);
1481
1482	error = knote_attach(kn, kq);
1483	KQ_UNLOCK(kq);
1484	if (error != 0) {
1485	tkn = kn;
1486	goto done;
1487	}
1488
1489	if ((error = kn->kn_fop->f_attach(kn)) != 0) {
1490	knote_drop_detached(kn, td);
1491	goto done;
1492	}
1493	knl = kn_list_lock(kn);
1494	goto done_ev_add;
1495	} else {
1496	/* No matching knote and the EV_ADD flag is not set. */
1497	KQ_UNLOCK(kq);
1498	error = ENOENT;
1499	goto done;
1500	}
1501	}
1502
1503	if (kev->flags & EV_DELETE) {
1504	kn_enter_flux(kn);
1505	KQ_UNLOCK(kq);
1506	knote_drop(kn, td);
1507	goto done;
1508	}
1509
1510	if (kev->flags & EV_FORCEONESHOT) {
1511	kn->kn_flags \|= EV_ONESHOT;
1512	KNOTE_ACTIVATE(kn, 1);
1513	}
1514
1515	/*
1516	* The user may change some filter values after the initial EV_ADD,
1517	* but doing so will not reset any filter which has already been
1518	* triggered.
1519	*/
1520	kn->kn_status \|= KN_SCAN;
1521	kn_enter_flux(kn);
1522	KQ_UNLOCK(kq);
1523	knl = kn_list_lock(kn);
1524	kn->kn_kevent.udata = kev->udata;
1525	if (!fops->f_isfd && fops->f_touch != NULL) {
1526	fops->f_touch(kn, kev, EVENT_REGISTER);
1527	} else {
1528	kn->kn_sfflags = kev->fflags;
1529	kn->kn_sdata = kev->data;
1530	}
1531
1532	/*
1533	* We can get here with kn->kn_knlist == NULL. This can happen when
1534	* the initial attach event decides that the event is "completed"
1535	* already. i.e. filt_procattach is called on a zombie process. It
1536	* will call filt_proc which will remove it from the list, and NULL
1537	* kn_knlist.
1538	*/
1539	done_ev_add:
1540	if ((kev->flags & EV_ENABLE) != 0)
1541	kn->kn_status &= ~KN_DISABLED;
1542	else if ((kev->flags & EV_DISABLE) != 0)
1543	kn->kn_status \|= KN_DISABLED;
1544
1545	if ((kn->kn_status & KN_DISABLED) == 0)
1546	event = kn->kn_fop->f_event(kn, 0);
1547	else
1548	event = 0;
1549
1550	KQ_LOCK(kq);
1551	if (event)
1552	kn->kn_status \|= KN_ACTIVE;
1553	if ((kn->kn_status & (KN_ACTIVE \| KN_DISABLED \| KN_QUEUED)) ==
1554	KN_ACTIVE)
1555	knote_enqueue(kn);
1556	kn->kn_status &= ~KN_SCAN;
1557	kn_leave_flux(kn);
1558	kn_list_unlock(knl);
1559	KQ_UNLOCK_FLUX(kq);
1560
1561	done:
1562	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1563	if (filedesc_unlock)
1564	FILEDESC_XUNLOCK(td->td_proc->p_fd);
1565	if (fp != NULL)
1566	fdrop(fp, td);
1567	knote_free(tkn);
1568	if (fops != NULL)
1569	kqueue_fo_release(filt);
1570	return (error);
1571	}
1572
1573	static int
1574	kqueue_acquire(struct file fp, struct kqueue *kqp)
1575	{
1576	int error;
1577	struct kqueue *kq;
1578
1579	error = 0;
1580
1581	kq = fp->f_data;
1582	#ifndef __rtems__
1583	if (fp->f_type != DTYPE_KQUEUE \|\| kq == NULL)
1584	#else /* __rtems__ */
1585	if (fp->f_io.pathinfo.handlers != &kqueueops \|\| kq == NULL)
1586	#endif /* __rtems__ */
1587	return (EBADF);
1588	*kqp = kq;
1589	KQ_LOCK(kq);
1590	if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
1591	KQ_UNLOCK(kq);
1592	return (EBADF);
1593	}
1594	kq->kq_refcnt++;
1595	KQ_UNLOCK(kq);
1596
1597	return error;
1598	}
1599
1600	static void
1601	kqueue_release(struct kqueue *kq, int locked)
1602	{
1603	if (locked)
1604	KQ_OWNED(kq);
1605	else
1606	KQ_LOCK(kq);
1607	kq->kq_refcnt--;
1608	if (kq->kq_refcnt == 1)
1609	wakeup(&kq->kq_refcnt);
1610	if (!locked)
1611	KQ_UNLOCK(kq);
1612	}
1613
1614	static void
1615	kqueue_schedtask(struct kqueue *kq)
1616	{
1617
1618	KQ_OWNED(kq);
1619	KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN),
1620	("scheduling kqueue task while draining"));
1621
1622	if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) {
1623	taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task);
1624	kq->kq_state \|= KQ_TASKSCHED;
1625	}
1626	}
1627
1628	/*
1629	* Expand the kq to make sure we have storage for fops/ident pair.
1630	*
1631	* Return 0 on success (or no work necessary), return errno on failure.
1632	*
1633	* Not calling hashinit w/ waitok (proper malloc flag) should be safe.
1634	* If kqueue_register is called from a non-fd context, there usually/should
1635	* be no locks held.
1636	*/
1637	static int
1638	kqueue_expand(struct kqueue kq, struct filterops fops, uintptr_t ident,
1639	int waitok)
1640	{
1641	struct klist list, tmp_knhash, *to_free;
1642	u_long tmp_knhashmask;
1643	int size;
1644	int fd;
1645	int mflag = waitok ? M_WAITOK : M_NOWAIT;
1646
1647	KQ_NOTOWNED(kq);
1648
1649	to_free = NULL;
1650	if (fops->f_isfd) {
1651	fd = ident;
1652	if (kq->kq_knlistsize <= fd) {
1653	size = kq->kq_knlistsize;
1654	while (size <= fd)
1655	size += KQEXTENT;
1656	list = malloc(size * sizeof(*list), M_KQUEUE, mflag);
1657	if (list == NULL)
1658	return ENOMEM;
1659	KQ_LOCK(kq);
1660	if (kq->kq_knlistsize > fd) {
1661	to_free = list;
1662	list = NULL;
1663	} else {
1664	if (kq->kq_knlist != NULL) {
1665	bcopy(kq->kq_knlist, list,
1666	kq->kq_knlistsize * sizeof(*list));
1667	to_free = kq->kq_knlist;
1668	kq->kq_knlist = NULL;
1669	}
1670	bzero((caddr_t)list +
1671	kq->kq_knlistsize * sizeof(*list),
1672	(size - kq->kq_knlistsize) * sizeof(*list));
1673	kq->kq_knlistsize = size;
1674	kq->kq_knlist = list;
1675	}
1676	KQ_UNLOCK(kq);
1677	}
1678	} else {
1679	if (kq->kq_knhashmask == 0) {
1680	tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1681	&tmp_knhashmask);
1682	if (tmp_knhash == NULL)
1683	return ENOMEM;
1684	KQ_LOCK(kq);
1685	if (kq->kq_knhashmask == 0) {
1686	kq->kq_knhash = tmp_knhash;
1687	kq->kq_knhashmask = tmp_knhashmask;
1688	} else {
1689	to_free = tmp_knhash;
1690	}
1691	KQ_UNLOCK(kq);
1692	}
1693	}
1694	free(to_free, M_KQUEUE);
1695
1696	KQ_NOTOWNED(kq);
1697	return 0;
1698	}
1699
1700	static void
1701	kqueue_task(void *arg, int pending)
1702	{
1703	struct kqueue *kq;
1704	int haskqglobal;
1705
1706	haskqglobal = 0;
1707	kq = arg;
1708
1709	KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1710	KQ_LOCK(kq);
1711
1712	KNOTE_LOCKED(&kq->kq_sel.si_note, 0);
1713
1714	kq->kq_state &= ~KQ_TASKSCHED;
1715	if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) {
1716	wakeup(&kq->kq_state);
1717	}
1718	KQ_UNLOCK(kq);
1719	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1720	}
1721
1722	/*
1723	* Scan, update kn_data (if not ONESHOT), and copyout triggered events.
1724	* We treat KN_MARKER knotes as if they are in flux.
1725	*/
1726	static int
1727	kqueue_scan(struct kqueue kq, int maxevents, struct kevent_copyops k_ops,
1728	const struct timespec tsp, struct kevent keva, struct thread *td)
1729	{
1730	struct kevent *kevp;
1731	struct knote kn, marker;
1732	struct knlist *knl;
1733	sbintime_t asbt, rsbt;
1734	int count, error, haskqglobal, influx, nkev, touch;
1735
1736	count = maxevents;
1737	nkev = 0;
1738	error = 0;
1739	haskqglobal = 0;
1740
1741	if (maxevents == 0)
1742	goto done_nl;
1743
1744	rsbt = 0;
1745	if (tsp != NULL) {
1746	if (tsp->tv_sec < 0 \|\| tsp->tv_nsec < 0 \|\|
1747	tsp->tv_nsec >= 1000000000) {
1748	error = EINVAL;
1749	goto done_nl;
1750	}
1751	if (timespecisset(tsp)) {
1752	if (tsp->tv_sec <= INT32_MAX) {
1753	rsbt = tstosbt(*tsp);
1754	if (TIMESEL(&asbt, rsbt))
1755	asbt += tc_tick_sbt;
1756	if (asbt <= SBT_MAX - rsbt)
1757	asbt += rsbt;
1758	else
1759	asbt = 0;
1760	rsbt >>= tc_precexp;
1761	} else
1762	asbt = 0;
1763	} else
1764	asbt = -1;
1765	} else
1766	asbt = 0;
1767	marker = knote_alloc(1);
1768	marker->kn_status = KN_MARKER;
1769	KQ_LOCK(kq);
1770
1771	retry:
1772	kevp = keva;
1773	if (kq->kq_count == 0) {
1774	if (asbt == -1) {
1775	error = EWOULDBLOCK;
1776	} else {
1777	kq->kq_state \|= KQ_SLEEP;
1778	error = msleep_sbt(kq, &kq->kq_lock, PSOCK \| PCATCH,
1779	"kqread", asbt, rsbt, C_ABSOLUTE);
1780	}
1781	if (error == 0)
1782	goto retry;
1783	/* don't restart after signals... */
1784	if (error == ERESTART)
1785	error = EINTR;
1786	else if (error == EWOULDBLOCK)
1787	error = 0;
1788	goto done;
1789	}
1790
1791	TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
1792	influx = 0;
1793	while (count) {
1794	KQ_OWNED(kq);
1795	kn = TAILQ_FIRST(&kq->kq_head);
1796
1797	if ((kn->kn_status == KN_MARKER && kn != marker) \|\|
1798	kn_in_flux(kn)) {
1799	if (influx) {
1800	influx = 0;
1801	KQ_FLUX_WAKEUP(kq);
1802	}
1803	kq->kq_state \|= KQ_FLUXWAIT;
1804	error = msleep(kq, &kq->kq_lock, PSOCK,
1805	"kqflxwt", 0);
1806	continue;
1807	}
1808
1809	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1810	if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) {
1811	kn->kn_status &= ~KN_QUEUED;
1812	kq->kq_count--;
1813	continue;
1814	}
1815	if (kn == marker) {
1816	KQ_FLUX_WAKEUP(kq);
1817	if (count == maxevents)
1818	goto retry;
1819	goto done;
1820	}
1821	KASSERT(!kn_in_flux(kn),
1822	("knote %p is unexpectedly in flux", kn));
1823
1824	if ((kn->kn_flags & EV_DROP) == EV_DROP) {
1825	kn->kn_status &= ~KN_QUEUED;
1826	kn_enter_flux(kn);
1827	kq->kq_count--;
1828	KQ_UNLOCK(kq);
1829	/*
1830	* We don't need to lock the list since we've
1831	* marked it as in flux.
1832	*/
1833	knote_drop(kn, td);
1834	KQ_LOCK(kq);
1835	continue;
1836	} else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) {
1837	kn->kn_status &= ~KN_QUEUED;
1838	kn_enter_flux(kn);
1839	kq->kq_count--;
1840	KQ_UNLOCK(kq);
1841	/*
1842	* We don't need to lock the list since we've
1843	* marked the knote as being in flux.
1844	*/
1845	*kevp = kn->kn_kevent;
1846	knote_drop(kn, td);
1847	KQ_LOCK(kq);
1848	kn = NULL;
1849	} else {
1850	kn->kn_status \|= KN_SCAN;
1851	kn_enter_flux(kn);
1852	KQ_UNLOCK(kq);
1853	if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE)
1854	KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1855	knl = kn_list_lock(kn);
1856	if (kn->kn_fop->f_event(kn, 0) == 0) {
1857	KQ_LOCK(kq);
1858	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1859	kn->kn_status &= ~(KN_QUEUED \| KN_ACTIVE \|
1860	KN_SCAN);
1861	kn_leave_flux(kn);
1862	kq->kq_count--;
1863	kn_list_unlock(knl);
1864	influx = 1;
1865	continue;
1866	}
1867	touch = (!kn->kn_fop->f_isfd &&
1868	kn->kn_fop->f_touch != NULL);
1869	if (touch)
1870	kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS);
1871	else
1872	*kevp = kn->kn_kevent;
1873	KQ_LOCK(kq);
1874	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1875	if (kn->kn_flags & (EV_CLEAR \| EV_DISPATCH)) {
1876	/*
1877	* Manually clear knotes who weren't
1878	* 'touch'ed.
1879	*/
1880	if (touch == 0 && kn->kn_flags & EV_CLEAR) {
1881	kn->kn_data = 0;
1882	kn->kn_fflags = 0;
1883	}
1884	if (kn->kn_flags & EV_DISPATCH)
1885	kn->kn_status \|= KN_DISABLED;
1886	kn->kn_status &= ~(KN_QUEUED \| KN_ACTIVE);
1887	kq->kq_count--;
1888	} else
1889	TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1890
1891	kn->kn_status &= ~KN_SCAN;
1892	kn_leave_flux(kn);
1893	kn_list_unlock(knl);
1894	influx = 1;
1895	}
1896
1897	/* we are returning a copy to the user */
1898	kevp++;
1899	nkev++;
1900	count--;
1901
1902	if (nkev == KQ_NEVENTS) {
1903	influx = 0;
1904	KQ_UNLOCK_FLUX(kq);
1905	error = k_ops->k_copyout(k_ops->arg, keva, nkev);
1906	nkev = 0;
1907	kevp = keva;
1908	KQ_LOCK(kq);
1909	if (error)
1910	break;
1911	}
1912	}
1913	TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
1914	done:
1915	KQ_OWNED(kq);
1916	KQ_UNLOCK_FLUX(kq);
1917	knote_free(marker);
1918	done_nl:
1919	KQ_NOTOWNED(kq);
1920	if (nkev != 0)
1921	error = k_ops->k_copyout(k_ops->arg, keva, nkev);
1922	td->td_retval[0] = maxevents - count;
1923	return (error);
1924	}
1925
1926	#ifndef __rtems__
1927	/ARGSUSED/
1928	static int
1929	kqueue_ioctl(struct file fp, u_long cmd, void data,
1930	struct ucred active_cred, struct thread td)
1931	{
1932	/*
1933	* Enabling sigio causes two major problems:
1934	* 1) infinite recursion:
1935	* Synopsys: kevent is being used to track signals and have FIOASYNC
1936	* set. On receipt of a signal this will cause a kqueue to recurse
1937	* into itself over and over. Sending the sigio causes the kqueue
1938	* to become ready, which in turn posts sigio again, forever.
1939	* Solution: this can be solved by setting a flag in the kqueue that
1940	* we have a SIGIO in progress.
1941	* 2) locking problems:
1942	* Synopsys: Kqueue is a leaf subsystem, but adding signalling puts
1943	* us above the proc and pgrp locks.
1944	* Solution: Post a signal using an async mechanism, being sure to
1945	* record a generation count in the delivery so that we do not deliver
1946	* a signal to the wrong process.
1947	*
1948	* Note, these two mechanisms are somewhat mutually exclusive!
1949	*/
1950	#if 0
1951	struct kqueue *kq;
1952
1953	kq = fp->f_data;
1954	switch (cmd) {
1955	case FIOASYNC:
1956	if ((int )data) {
1957	kq->kq_state \|= KQ_ASYNC;
1958	} else {
1959	kq->kq_state &= ~KQ_ASYNC;
1960	}
1961	return (0);
1962
1963	case FIOSETOWN:
1964	return (fsetown((int )data, &kq->kq_sigio));
1965
1966	case FIOGETOWN:
1967	(int )data = fgetown(&kq->kq_sigio);
1968	return (0);
1969	}
1970	#endif
1971
1972	return (ENOTTY);
1973	}
1974	#endif /* __rtems__ */
1975
1976	/ARGSUSED/
1977	static int
1978	kqueue_poll(struct file fp, int events, struct ucred active_cred,
1979	struct thread *td)
1980	{
1981	struct kqueue *kq;
1982	int revents = 0;
1983	int error;
1984
1985	if ((error = kqueue_acquire(fp, &kq)))
1986	return POLLERR;
1987
1988	KQ_LOCK(kq);
1989	if (events & (POLLIN \| POLLRDNORM)) {
1990	if (kq->kq_count) {
1991	revents \|= events & (POLLIN \| POLLRDNORM);
1992	} else {
1993	selrecord(td, &kq->kq_sel);
1994	if (SEL_WAITING(&kq->kq_sel))
1995	kq->kq_state \|= KQ_SEL;
1996	}
1997	}
1998	kqueue_release(kq, 1);
1999	KQ_UNLOCK(kq);
2000	return (revents);
2001	}
2002	#ifdef __rtems__
2003	static int
2004	rtems_bsd_kqueue_poll(rtems_libio_t *iop, int events)
2005	{
2006	struct thread *td = rtems_bsd_get_curthread_or_null();
2007	struct file *fp = rtems_bsd_iop_to_fp(iop);
2008	int error;
2009
2010	if (td != NULL) {
2011	error = kqueue_poll(fp, events, NULL, td);
2012	} else {
2013	error = ENOMEM;
2014	}
2015
2016	return error;
2017	}
2018	#endif /* __rtems__ */
2019
2020	/ARGSUSED/
2021	#ifndef __rtems__
2022	static int
2023	kqueue_stat(struct file fp, struct stat st, struct ucred *active_cred,
2024	struct thread *td)
2025	{
2026
2027	bzero((void )st, sizeof st);
2028	#else /* __rtems__ */
2029	static int
2030	rtems_bsd_kqueue_stat(const rtems_filesystem_location_info_t *loc,
2031	struct stat *st)
2032	{
2033	(void) loc;
2034	#endif /* __rtems__ */
2035	/*
2036	* We no longer return kq_count because the unlocked value is useless.
2037	* If you spent all this time getting the count, why not spend your
2038	* syscall better by calling kevent?
2039	*
2040	* XXX - This is needed for libc_r.
2041	*/
2042	st->st_mode = S_IFIFO;
2043	return (0);
2044	}
2045
2046	static void
2047	kqueue_drain(struct kqueue kq, struct thread td)
2048	{
2049	struct knote *kn;
2050	int i;
2051
2052	KQ_LOCK(kq);
2053
2054	KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING,
2055	("kqueue already closing"));
2056	kq->kq_state \|= KQ_CLOSING;
2057	if (kq->kq_refcnt > 1)
2058	msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0);
2059
2060	KASSERT(kq->kq_refcnt == 1, ("other refs are out there!"));
2061
2062	KASSERT(knlist_empty(&kq->kq_sel.si_note),
2063	("kqueue's knlist not empty"));
2064
2065	for (i = 0; i < kq->kq_knlistsize; i++) {
2066	while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) {
2067	if (kn_in_flux(kn)) {
2068	kq->kq_state \|= KQ_FLUXWAIT;
2069	msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0);
2070	continue;
2071	}
2072	kn_enter_flux(kn);
2073	KQ_UNLOCK(kq);
2074	knote_drop(kn, td);
2075	KQ_LOCK(kq);
2076	}
2077	}
2078	if (kq->kq_knhashmask != 0) {
2079	for (i = 0; i <= kq->kq_knhashmask; i++) {
2080	while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) {
2081	if (kn_in_flux(kn)) {
2082	kq->kq_state \|= KQ_FLUXWAIT;
2083	msleep(kq, &kq->kq_lock, PSOCK,
2084	"kqclo2", 0);
2085	continue;
2086	}
2087	kn_enter_flux(kn);
2088	KQ_UNLOCK(kq);
2089	knote_drop(kn, td);
2090	KQ_LOCK(kq);
2091	}
2092	}
2093	}
2094
2095	if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) {
2096	kq->kq_state \|= KQ_TASKDRAIN;
2097	msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0);
2098	}
2099
2100	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
2101	selwakeuppri(&kq->kq_sel, PSOCK);
2102	if (!SEL_WAITING(&kq->kq_sel))
2103	kq->kq_state &= ~KQ_SEL;
2104	}
2105
2106	KQ_UNLOCK(kq);
2107	}
2108
2109	static void
2110	kqueue_destroy(struct kqueue *kq)
2111	{
2112
2113	#ifndef __rtems__
2114	KASSERT(kq->kq_fdp == NULL,
2115	("kqueue still attached to a file descriptor"));
2116	#endif /* __rtems__ */
2117	seldrain(&kq->kq_sel);
2118	knlist_destroy(&kq->kq_sel.si_note);
2119	mtx_destroy(&kq->kq_lock);
2120
2121	if (kq->kq_knhash != NULL)
2122	free(kq->kq_knhash, M_KQUEUE);
2123	if (kq->kq_knlist != NULL)
2124	free(kq->kq_knlist, M_KQUEUE);
2125
2126	funsetown(&kq->kq_sigio);
2127	}
2128
2129	/ARGSUSED/
2130	static int
2131	kqueue_close(struct file fp, struct thread td)
2132	{
2133	struct kqueue *kq = fp->f_data;
2134	struct filedesc *fdp;
2135	int error;
2136	int filedesc_unlock;
2137
2138	if ((error = kqueue_acquire(fp, &kq)))
2139	return error;
2140	kqueue_drain(kq, td);
2141
2142	#ifndef __rtems__
2143	/*
2144	* We could be called due to the knote_drop() doing fdrop(),
2145	* called from kqueue_register(). In this case the global
2146	* lock is owned, and filedesc sx is locked before, to not
2147	* take the sleepable lock after non-sleepable.
2148	*/
2149	fdp = kq->kq_fdp;
2150	kq->kq_fdp = NULL;
2151	if (!sx_xlocked(FILEDESC_LOCK(fdp))) {
2152	FILEDESC_XLOCK(fdp);
2153	filedesc_unlock = 1;
2154	} else
2155	filedesc_unlock = 0;
2156	TAILQ_REMOVE(&fdp->fd_kqlist, kq, kq_list);
2157	if (filedesc_unlock)
2158	FILEDESC_XUNLOCK(fdp);
2159	#else /* __rtems__ */
2160	(void)filedesc_unlock;
2161	rtems_libio_lock();
2162	TAILQ_REMOVE(&fd_kqlist, kq, kq_list);
2163	rtems_libio_unlock();
2164	#endif /* __rtems__ */
2165
2166	kqueue_destroy(kq);
2167	chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0);
2168	crfree(kq->kq_cred);
2169	free(kq, M_KQUEUE);
2170	fp->f_data = NULL;
2171
2172	return (0);
2173	}
2174	#ifdef __rtems__
2175	static int
2176	rtems_bsd_kqueue_close(rtems_libio_t *iop)
2177	{
2178	struct thread *td = rtems_bsd_get_curthread_or_null();
2179	struct file *fp = rtems_bsd_iop_to_fp(iop);
2180	int error;
2181
2182	if (td != NULL) {
2183	error = kqueue_close(fp, td);
2184	} else {
2185	error = ENOMEM;
2186	}
2187
2188	return rtems_bsd_error_to_status_and_errno(error);
2189	}
2190	#endif /* __rtems__ */
2191
2192	#ifndef __rtems__
2193	static int
2194	kqueue_fill_kinfo(struct file fp, struct kinfo_file kif, struct filedesc *fdp)
2195	{
2196
2197	kif->kf_type = KF_TYPE_KQUEUE;
2198	return (0);
2199	}
2200	#endif /* __rtems__ */
2201
2202	static void
2203	kqueue_wakeup(struct kqueue *kq)
2204	{
2205	KQ_OWNED(kq);
2206
2207	if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) {
2208	kq->kq_state &= ~KQ_SLEEP;
2209	wakeup(kq);
2210	}
2211	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
2212	selwakeuppri(&kq->kq_sel, PSOCK);
2213	if (!SEL_WAITING(&kq->kq_sel))
2214	kq->kq_state &= ~KQ_SEL;
2215	}
2216	if (!knlist_empty(&kq->kq_sel.si_note))
2217	kqueue_schedtask(kq);
2218	if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) {
2219	#ifndef __rtems__
2220	pgsigio(&kq->kq_sigio, SIGIO, 0);
2221	#else /* __rtems__ */
2222	BSD_ASSERT(0);
2223	#endif /* __rtems__ */
2224	}
2225	}
2226
2227	/*
2228	* Walk down a list of knotes, activating them if their event has triggered.
2229	*
2230	* There is a possibility to optimize in the case of one kq watching another.
2231	* Instead of scheduling a task to wake it up, you could pass enough state
2232	* down the chain to make up the parent kqueue. Make this code functional
2233	* first.
2234	*/
2235	void
2236	knote(struct knlist *list, long hint, int lockflags)
2237	{
2238	struct kqueue *kq;
2239	struct knote kn, tkn;
2240	int error;
2241
2242	if (list == NULL)
2243	return;
2244
2245	KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED);
2246
2247	if ((lockflags & KNF_LISTLOCKED) == 0)
2248	list->kl_lock(list->kl_lockarg);
2249
2250	/*
2251	* If we unlock the list lock (and enter influx), we can
2252	* eliminate the kqueue scheduling, but this will introduce
2253	* four lock/unlock's for each knote to test. Also, marker
2254	* would be needed to keep iteration position, since filters
2255	* or other threads could remove events.
2256	*/
2257	SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) {
2258	kq = kn->kn_kq;
2259	KQ_LOCK(kq);
2260	if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) {
2261	/*
2262	* Do not process the influx notes, except for
2263	* the influx coming from the kq unlock in the
2264	* kqueue_scan(). In the later case, we do
2265	* not interfere with the scan, since the code
2266	* fragment in kqueue_scan() locks the knlist,
2267	* and cannot proceed until we finished.
2268	*/
2269	KQ_UNLOCK(kq);
2270	} else if ((lockflags & KNF_NOKQLOCK) != 0) {
2271	kn_enter_flux(kn);
2272	KQ_UNLOCK(kq);
2273	error = kn->kn_fop->f_event(kn, hint);
2274	KQ_LOCK(kq);
2275	kn_leave_flux(kn);
2276	if (error)
2277	KNOTE_ACTIVATE(kn, 1);
2278	KQ_UNLOCK_FLUX(kq);
2279	} else {
2280	kn->kn_status \|= KN_HASKQLOCK;
2281	if (kn->kn_fop->f_event(kn, hint))
2282	KNOTE_ACTIVATE(kn, 1);
2283	kn->kn_status &= ~KN_HASKQLOCK;
2284	KQ_UNLOCK(kq);
2285	}
2286	}
2287	if ((lockflags & KNF_LISTLOCKED) == 0)
2288	list->kl_unlock(list->kl_lockarg);
2289	}
2290
2291	/*
2292	* add a knote to a knlist
2293	*/
2294	void
2295	knlist_add(struct knlist knl, struct knote kn, int islocked)
2296	{
2297
2298	KNL_ASSERT_LOCK(knl, islocked);
2299	KQ_NOTOWNED(kn->kn_kq);
2300	KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn));
2301	KASSERT((kn->kn_status & KN_DETACHED) != 0,
2302	("knote %p was not detached", kn));
2303	if (!islocked)
2304	knl->kl_lock(knl->kl_lockarg);
2305	SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext);
2306	if (!islocked)
2307	knl->kl_unlock(knl->kl_lockarg);
2308	KQ_LOCK(kn->kn_kq);
2309	kn->kn_knlist = knl;
2310	kn->kn_status &= ~KN_DETACHED;
2311	KQ_UNLOCK(kn->kn_kq);
2312	}
2313
2314	static void
2315	knlist_remove_kq(struct knlist knl, struct knote kn, int knlislocked,
2316	int kqislocked)
2317	{
2318
2319	KASSERT(!kqislocked \|\| knlislocked, ("kq locked w/o knl locked"));
2320	KNL_ASSERT_LOCK(knl, knlislocked);
2321	mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED);
2322	KASSERT(kqislocked \|\| kn_in_flux(kn), ("knote %p not in flux", kn));
2323	KASSERT((kn->kn_status & KN_DETACHED) == 0,
2324	("knote %p was already detached", kn));
2325	if (!knlislocked)
2326	knl->kl_lock(knl->kl_lockarg);
2327	SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext);
2328	kn->kn_knlist = NULL;
2329	if (!knlislocked)
2330	kn_list_unlock(knl);
2331	if (!kqislocked)
2332	KQ_LOCK(kn->kn_kq);
2333	kn->kn_status \|= KN_DETACHED;
2334	if (!kqislocked)
2335	KQ_UNLOCK(kn->kn_kq);
2336	}
2337
2338	/*
2339	* remove knote from the specified knlist
2340	*/
2341	void
2342	knlist_remove(struct knlist knl, struct knote kn, int islocked)
2343	{
2344
2345	knlist_remove_kq(knl, kn, islocked, 0);
2346	}
2347
2348	int
2349	knlist_empty(struct knlist *knl)
2350	{
2351
2352	KNL_ASSERT_LOCKED(knl);
2353	return (SLIST_EMPTY(&knl->kl_list));
2354	}
2355
2356	static struct mtx knlist_lock;
2357	MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects",
2358	MTX_DEF);
2359	static void knlist_mtx_lock(void *arg);
2360	static void knlist_mtx_unlock(void *arg);
2361
2362	static void
2363	knlist_mtx_lock(void *arg)
2364	{
2365
2366	mtx_lock((struct mtx *)arg);
2367	}
2368
2369	static void
2370	knlist_mtx_unlock(void *arg)
2371	{
2372
2373	mtx_unlock((struct mtx *)arg);
2374	}
2375
2376	static void
2377	knlist_mtx_assert_locked(void *arg)
2378	{
2379
2380	mtx_assert((struct mtx *)arg, MA_OWNED);
2381	}
2382
2383	static void
2384	knlist_mtx_assert_unlocked(void *arg)
2385	{
2386
2387	mtx_assert((struct mtx *)arg, MA_NOTOWNED);
2388	}
2389
2390	#ifndef __rtems__
2391	static void
2392	knlist_rw_rlock(void *arg)
2393	{
2394
2395	rw_rlock((struct rwlock *)arg);
2396	}
2397
2398	static void
2399	knlist_rw_runlock(void *arg)
2400	{
2401
2402	rw_runlock((struct rwlock *)arg);
2403	}
2404
2405	static void
2406	knlist_rw_assert_locked(void *arg)
2407	{
2408
2409	rw_assert((struct rwlock *)arg, RA_LOCKED);
2410	}
2411
2412	static void
2413	knlist_rw_assert_unlocked(void *arg)
2414	{
2415
2416	rw_assert((struct rwlock *)arg, RA_UNLOCKED);
2417	}
2418	#endif /* __rtems__ */
2419
2420	void
2421	knlist_init(struct knlist knl, void lock, void (kl_lock)(void ),
2422	void (kl_unlock)(void ),
2423	void (kl_assert_locked)(void ), void (kl_assert_unlocked)(void ))
2424	{
2425
2426	if (lock == NULL)
2427	knl->kl_lockarg = &knlist_lock;
2428	else
2429	knl->kl_lockarg = lock;
2430
2431	if (kl_lock == NULL)
2432	knl->kl_lock = knlist_mtx_lock;
2433	else
2434	knl->kl_lock = kl_lock;
2435	if (kl_unlock == NULL)
2436	knl->kl_unlock = knlist_mtx_unlock;
2437	else
2438	knl->kl_unlock = kl_unlock;
2439	if (kl_assert_locked == NULL)
2440	knl->kl_assert_locked = knlist_mtx_assert_locked;
2441	else
2442	knl->kl_assert_locked = kl_assert_locked;
2443	if (kl_assert_unlocked == NULL)
2444	knl->kl_assert_unlocked = knlist_mtx_assert_unlocked;
2445	else
2446	knl->kl_assert_unlocked = kl_assert_unlocked;
2447
2448	knl->kl_autodestroy = 0;
2449	SLIST_INIT(&knl->kl_list);
2450	}
2451
2452	void
2453	knlist_init_mtx(struct knlist knl, struct mtx lock)
2454	{
2455
2456	knlist_init(knl, lock, NULL, NULL, NULL, NULL);
2457	}
2458
2459	struct knlist *
2460	knlist_alloc(struct mtx *lock)
2461	{
2462	struct knlist *knl;
2463
2464	knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK);
2465	knlist_init_mtx(knl, lock);
2466	return (knl);
2467	}
2468
2469	#ifndef __rtems__
2470	void
2471	knlist_init_rw_reader(struct knlist knl, struct rwlock lock)
2472	{
2473
2474	knlist_init(knl, lock, knlist_rw_rlock, knlist_rw_runlock,
2475	knlist_rw_assert_locked, knlist_rw_assert_unlocked);
2476	}
2477	#endif /* __rtems__ */
2478
2479	void
2480	knlist_destroy(struct knlist *knl)
2481	{
2482
2483	KASSERT(KNLIST_EMPTY(knl),
2484	("destroying knlist %p with knotes on it", knl));
2485	}
2486
2487	void
2488	knlist_detach(struct knlist *knl)
2489	{
2490
2491	KNL_ASSERT_LOCKED(knl);
2492	knl->kl_autodestroy = 1;
2493	if (knlist_empty(knl)) {
2494	knlist_destroy(knl);
2495	free(knl, M_KQUEUE);
2496	}
2497	}
2498
2499	/*
2500	* Even if we are locked, we may need to drop the lock to allow any influx
2501	* knotes time to "settle".
2502	*/
2503	void
2504	knlist_cleardel(struct knlist knl, struct thread td, int islocked, int killkn)
2505	{
2506	struct knote kn, kn2;
2507	struct kqueue *kq;
2508
2509	KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl));
2510	if (islocked)
2511	KNL_ASSERT_LOCKED(knl);
2512	else {
2513	KNL_ASSERT_UNLOCKED(knl);
2514	again: /* need to reacquire lock since we have dropped it */
2515	knl->kl_lock(knl->kl_lockarg);
2516	}
2517
2518	SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) {
2519	kq = kn->kn_kq;
2520	KQ_LOCK(kq);
2521	if (kn_in_flux(kn)) {
2522	KQ_UNLOCK(kq);
2523	continue;
2524	}
2525	knlist_remove_kq(knl, kn, 1, 1);
2526	if (killkn) {
2527	kn_enter_flux(kn);
2528	KQ_UNLOCK(kq);
2529	knote_drop_detached(kn, td);
2530	} else {
2531	/* Make sure cleared knotes disappear soon */
2532	kn->kn_flags \|= EV_EOF \| EV_ONESHOT;
2533	KQ_UNLOCK(kq);
2534	}
2535	kq = NULL;
2536	}
2537
2538	if (!SLIST_EMPTY(&knl->kl_list)) {
2539	/* there are still in flux knotes remaining */
2540	kn = SLIST_FIRST(&knl->kl_list);
2541	kq = kn->kn_kq;
2542	KQ_LOCK(kq);
2543	KASSERT(kn_in_flux(kn), ("knote removed w/o list lock"));
2544	knl->kl_unlock(knl->kl_lockarg);
2545	kq->kq_state \|= KQ_FLUXWAIT;
2546	msleep(kq, &kq->kq_lock, PSOCK \| PDROP, "kqkclr", 0);
2547	kq = NULL;
2548	goto again;
2549	}
2550
2551	if (islocked)
2552	KNL_ASSERT_LOCKED(knl);
2553	else {
2554	knl->kl_unlock(knl->kl_lockarg);
2555	KNL_ASSERT_UNLOCKED(knl);
2556	}
2557	}
2558
2559	/*
2560	* Remove all knotes referencing a specified fd must be called with FILEDESC
2561	* lock. This prevents a race where a new fd comes along and occupies the
2562	* entry and we attach a knote to the fd.
2563	*/
2564	void
2565	knote_fdclose(struct thread *td, int fd)
2566	{
2567	#ifndef __rtems__
2568	struct filedesc *fdp = td->td_proc->p_fd;
2569	#endif /* __rtems__ */
2570	struct kqueue *kq;
2571	struct knote *kn;
2572	int influx;
2573
2574	#ifndef __rtems__
2575	FILEDESC_XLOCK_ASSERT(fdp);
2576	#endif /* __rtems__ */
2577
2578	/*
2579	* We shouldn't have to worry about new kevents appearing on fd
2580	* since filedesc is locked.
2581	*/
2582	#ifndef __rtems__
2583	TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) {
2584	#else /* __rtems__ */
2585	/* FIXME: Use separate lock? */
2586	rtems_libio_lock();
2587	TAILQ_FOREACH(kq, &fd_kqlist, kq_list) {
2588	#endif /* __rtems__ */
2589	KQ_LOCK(kq);
2590
2591	again:
2592	influx = 0;
2593	while (kq->kq_knlistsize > fd &&
2594	(kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) {
2595	if (kn_in_flux(kn)) {
2596	/* someone else might be waiting on our knote */
2597	if (influx)
2598	wakeup(kq);
2599	kq->kq_state \|= KQ_FLUXWAIT;
2600	msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0);
2601	goto again;
2602	}
2603	kn_enter_flux(kn);
2604	KQ_UNLOCK(kq);
2605	influx = 1;
2606	knote_drop(kn, td);
2607	KQ_LOCK(kq);
2608	}
2609	KQ_UNLOCK_FLUX(kq);
2610	}
2611	#ifdef __rtems__
2612	rtems_libio_unlock();
2613	#endif /* __rtems__ */
2614	}
2615
2616	static int
2617	knote_attach(struct knote kn, struct kqueue kq)
2618	{
2619	struct klist *list;
2620
2621	KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn));
2622	KQ_OWNED(kq);
2623
2624	if (kn->kn_fop->f_isfd) {
2625	if (kn->kn_id >= kq->kq_knlistsize)
2626	return (ENOMEM);
2627	list = &kq->kq_knlist[kn->kn_id];
2628	} else {
2629	if (kq->kq_knhash == NULL)
2630	return (ENOMEM);
2631	list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
2632	}
2633	SLIST_INSERT_HEAD(list, kn, kn_link);
2634	return (0);
2635	}
2636
2637	static void
2638	knote_drop(struct knote kn, struct thread td)
2639	{
2640
2641	if ((kn->kn_status & KN_DETACHED) == 0)
2642	kn->kn_fop->f_detach(kn);
2643	knote_drop_detached(kn, td);
2644	}
2645
2646	static void
2647	knote_drop_detached(struct knote kn, struct thread td)
2648	{
2649	struct kqueue *kq;
2650	struct klist *list;
2651
2652	kq = kn->kn_kq;
2653
2654	KASSERT((kn->kn_status & KN_DETACHED) != 0,
2655	("knote %p still attached", kn));
2656	KQ_NOTOWNED(kq);
2657
2658	KQ_LOCK(kq);
2659	KASSERT(kn->kn_influx == 1,
2660	("knote_drop called on %p with influx %d", kn, kn->kn_influx));
2661
2662	if (kn->kn_fop->f_isfd)
2663	list = &kq->kq_knlist[kn->kn_id];
2664	else
2665	list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
2666
2667	if (!SLIST_EMPTY(list))
2668	SLIST_REMOVE(list, kn, knote, kn_link);
2669	if (kn->kn_status & KN_QUEUED)
2670	knote_dequeue(kn);
2671	KQ_UNLOCK_FLUX(kq);
2672
2673	if (kn->kn_fop->f_isfd) {
2674	fdrop(kn->kn_fp, td);
2675	kn->kn_fp = NULL;
2676	}
2677	kqueue_fo_release(kn->kn_kevent.filter);
2678	kn->kn_fop = NULL;
2679	knote_free(kn);
2680	}
2681
2682	static void
2683	knote_enqueue(struct knote *kn)
2684	{
2685	struct kqueue *kq = kn->kn_kq;
2686
2687	KQ_OWNED(kn->kn_kq);
2688	KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
2689
2690	TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
2691	kn->kn_status \|= KN_QUEUED;
2692	kq->kq_count++;
2693	kqueue_wakeup(kq);
2694	}
2695
2696	static void
2697	knote_dequeue(struct knote *kn)
2698	{
2699	struct kqueue *kq = kn->kn_kq;
2700
2701	KQ_OWNED(kn->kn_kq);
2702	KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
2703
2704	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
2705	kn->kn_status &= ~KN_QUEUED;
2706	kq->kq_count--;
2707	}
2708
2709	static void
2710	knote_init(void)
2711	{
2712
2713	knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
2714	NULL, NULL, UMA_ALIGN_PTR, 0);
2715	}
2716	SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL);
2717
2718	static struct knote *
2719	knote_alloc(int waitok)
2720	{
2721
2722	return (uma_zalloc(knote_zone, (waitok ? M_WAITOK : M_NOWAIT) \|
2723	M_ZERO));
2724	}
2725
2726	static void
2727	knote_free(struct knote *kn)
2728	{
2729
2730	uma_zfree(knote_zone, kn);
2731	}
2732
2733	/*
2734	* Register the kev w/ the kq specified by fd.
2735	*/
2736	int
2737	kqfd_register(int fd, struct kevent kev, struct thread td, int waitok)
2738	{
2739	struct kqueue *kq;
2740	struct file *fp;
2741	cap_rights_t rights;
2742	int error;
2743
2744	error = fget(td, fd, cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &fp);
2745	if (error != 0)
2746	return (error);
2747	if ((error = kqueue_acquire(fp, &kq)) != 0)
2748	goto noacquire;
2749
2750	error = kqueue_register(kq, kev, td, waitok);
2751	kqueue_release(kq, 0);
2752
2753	noacquire:
2754	fdrop(fp, td);
2755	return (error);
2756	}
2757	#ifdef __rtems__
2758	static const rtems_filesystem_file_handlers_r kqueueops = {
2759	.open_h = rtems_filesystem_default_open,
2760	.close_h = rtems_bsd_kqueue_close,
2761	.read_h = rtems_filesystem_default_read,
2762	.write_h = rtems_filesystem_default_write,
2763	.ioctl_h = rtems_filesystem_default_ioctl,
2764	.lseek_h = rtems_filesystem_default_lseek,
2765	.fstat_h = rtems_bsd_kqueue_stat,
2766	.ftruncate_h = rtems_filesystem_default_ftruncate,
2767	.fsync_h = rtems_filesystem_default_fsync_or_fdatasync,
2768	.fdatasync_h = rtems_filesystem_default_fsync_or_fdatasync,
2769	.fcntl_h = rtems_filesystem_default_fcntl,
2770	.poll_h = rtems_bsd_kqueue_poll,
2771	.kqfilter_h = rtems_bsd_kqueue_kqfilter,
2772	.readv_h = rtems_filesystem_default_readv,
2773	.writev_h = rtems_filesystem_default_writev,
2774	.mmap_h = rtems_filesystem_default_mmap
2775	};
2776	#endif /* __rtems__ */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: rtems-libbsd/freebsd/sys/kern/kern_event.c @ 72d5fa1

Download in other formats: