Context Navigation

source: rtems-libbsd/freebsd/sys/kern/kern_event.c @ e0b4edbd

55-freebsd-126-freebsd-12

Last change on this file since e0b4edbd was d4bf70e, checked in by Sebastian Huber <sebastian.huber@…>, on 11/15/18 at 09:06:00
Disable or make static kern_* functions
Property mode set to `100644`
File size: 68.0 KB

Line
1	#include <machine/rtems-bsd-kernel-space.h>
2
3	/*-
4	* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
5	*
6	* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
7	* Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org>
8	* Copyright (c) 2009 Apple, Inc.
9	* All rights reserved.
10	*
11	* Redistribution and use in source and binary forms, with or without
12	* modification, are permitted provided that the following conditions
13	* are met:
14	* 1. Redistributions of source code must retain the above copyright
15	* notice, this list of conditions and the following disclaimer.
16	* 2. Redistributions in binary form must reproduce the above copyright
17	* notice, this list of conditions and the following disclaimer in the
18	* documentation and/or other materials provided with the distribution.
19	*
20	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30	* SUCH DAMAGE.
31	*/
32
33	#include <sys/cdefs.h>
34	__FBSDID("$FreeBSD$");
35
36	#include <rtems/bsd/local/opt_ktrace.h>
37	#include <rtems/bsd/local/opt_kqueue.h>
38
39	#ifdef COMPAT_FREEBSD11
40	#define _WANT_FREEBSD11_KEVENT
41	#endif
42
43	#include <sys/param.h>
44	#include <sys/systm.h>
45	#include <sys/capsicum.h>
46	#include <sys/kernel.h>
47	#include <sys/lock.h>
48	#include <sys/mutex.h>
49	#include <sys/rwlock.h>
50	#include <sys/proc.h>
51	#include <sys/malloc.h>
52	#include <rtems/bsd/sys/unistd.h>
53	#include <sys/file.h>
54	#include <sys/filedesc.h>
55	#include <sys/filio.h>
56	#include <sys/fcntl.h>
57	#include <sys/kthread.h>
58	#include <sys/selinfo.h>
59	#include <sys/queue.h>
60	#include <sys/event.h>
61	#include <sys/eventvar.h>
62	#include <sys/poll.h>
63	#include <sys/protosw.h>
64	#include <sys/resourcevar.h>
65	#include <sys/sigio.h>
66	#include <sys/signalvar.h>
67	#include <sys/socket.h>
68	#include <sys/socketvar.h>
69	#include <sys/stat.h>
70	#include <sys/sysctl.h>
71	#include <sys/sysproto.h>
72	#include <sys/syscallsubr.h>
73	#include <sys/taskqueue.h>
74	#include <sys/uio.h>
75	#include <sys/user.h>
76	#ifdef KTRACE
77	#include <sys/ktrace.h>
78	#endif
79	#include <machine/atomic.h>
80
81	#include <vm/uma.h>
82	#ifdef __rtems__
83	#include <machine/rtems-bsd-syscall-api.h>
84
85	/* Maintain a global kqueue list on RTEMS */
86	static struct kqlist fd_kqlist;
87	#endif /* __rtems__ */
88
89	static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
90
91	/*
92	* This lock is used if multiple kq locks are required. This possibly
93	* should be made into a per proc lock.
94	*/
95	static struct mtx kq_global;
96	MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF);
97	#define KQ_GLOBAL_LOCK(lck, haslck) do { \
98	if (!haslck) \
99	mtx_lock(lck); \
100	haslck = 1; \
101	} while (0)
102	#define KQ_GLOBAL_UNLOCK(lck, haslck) do { \
103	if (haslck) \
104	mtx_unlock(lck); \
105	haslck = 0; \
106	} while (0)
107
108	TASKQUEUE_DEFINE_THREAD(kqueue_ctx);
109
110	static int kevent_copyout(void arg, struct kevent kevp, int count);
111	static int kevent_copyin(void arg, struct kevent kevp, int count);
112	static int kqueue_register(struct kqueue kq, struct kevent kev,
113	struct thread *td, int waitok);
114	static int kqueue_acquire(struct file fp, struct kqueue *kqp);
115	static void kqueue_release(struct kqueue *kq, int locked);
116	static void kqueue_destroy(struct kqueue *kq);
117	static void kqueue_drain(struct kqueue kq, struct thread td);
118	static int kqueue_expand(struct kqueue kq, struct filterops fops,
119	uintptr_t ident, int waitok);
120	static void kqueue_task(void *arg, int pending);
121	static int kqueue_scan(struct kqueue *kq, int maxevents,
122	struct kevent_copyops *k_ops,
123	const struct timespec *timeout,
124	struct kevent keva, struct thread td);
125	static void kqueue_wakeup(struct kqueue *kq);
126	static struct filterops *kqueue_fo_find(int filt);
127	static void kqueue_fo_release(int filt);
128	struct g_kevent_args;
129	static int kern_kevent_generic(struct thread *td,
130	struct g_kevent_args *uap,
131	struct kevent_copyops k_ops, const char struct_name);
132
133	#ifndef __rtems__
134	static fo_rdwr_t kqueue_read;
135	static fo_rdwr_t kqueue_write;
136	static fo_truncate_t kqueue_truncate;
137	static fo_ioctl_t kqueue_ioctl;
138	static fo_poll_t kqueue_poll;
139	static fo_kqfilter_t kqueue_kqfilter;
140	static fo_stat_t kqueue_stat;
141	static fo_close_t kqueue_close;
142	static fo_fill_kinfo_t kqueue_fill_kinfo;
143
144	static struct fileops kqueueops = {
145	.fo_read = invfo_rdwr,
146	.fo_write = invfo_rdwr,
147	.fo_truncate = invfo_truncate,
148	.fo_ioctl = kqueue_ioctl,
149	.fo_poll = kqueue_poll,
150	.fo_kqfilter = kqueue_kqfilter,
151	.fo_stat = kqueue_stat,
152	.fo_close = kqueue_close,
153	.fo_chmod = invfo_chmod,
154	.fo_chown = invfo_chown,
155	.fo_sendfile = invfo_sendfile,
156	.fo_fill_kinfo = kqueue_fill_kinfo,
157	};
158	#else /* __rtems__ */
159	static const rtems_filesystem_file_handlers_r kqueueops;
160	#endif /* __rtems__ */
161
162	static int knote_attach(struct knote kn, struct kqueue kq);
163	static void knote_drop(struct knote kn, struct thread td);
164	static void knote_drop_detached(struct knote kn, struct thread td);
165	static void knote_enqueue(struct knote *kn);
166	static void knote_dequeue(struct knote *kn);
167	static void knote_init(void);
168	static struct knote *knote_alloc(int waitok);
169	static void knote_free(struct knote *kn);
170
171	static void filt_kqdetach(struct knote *kn);
172	static int filt_kqueue(struct knote *kn, long hint);
173	#ifndef __rtems__
174	static int filt_procattach(struct knote *kn);
175	static void filt_procdetach(struct knote *kn);
176	static int filt_proc(struct knote *kn, long hint);
177	#endif /* __rtems__ */
178	static int filt_fileattach(struct knote *kn);
179	static void filt_timerexpire(void *knx);
180	static int filt_timerattach(struct knote *kn);
181	static void filt_timerdetach(struct knote *kn);
182	static void filt_timerstart(struct knote *kn, sbintime_t to);
183	static void filt_timertouch(struct knote kn, struct kevent kev,
184	u_long type);
185	static int filt_timervalidate(struct knote kn, sbintime_t to);
186	static int filt_timer(struct knote *kn, long hint);
187	static int filt_userattach(struct knote *kn);
188	static void filt_userdetach(struct knote *kn);
189	static int filt_user(struct knote *kn, long hint);
190	static void filt_usertouch(struct knote kn, struct kevent kev,
191	u_long type);
192
193	static struct filterops file_filtops = {
194	.f_isfd = 1,
195	.f_attach = filt_fileattach,
196	};
197	static struct filterops kqread_filtops = {
198	.f_isfd = 1,
199	.f_detach = filt_kqdetach,
200	.f_event = filt_kqueue,
201	};
202	/* XXX - move to kern_proc.c? */
203	#ifndef __rtems__
204	static struct filterops proc_filtops = {
205	.f_isfd = 0,
206	.f_attach = filt_procattach,
207	.f_detach = filt_procdetach,
208	.f_event = filt_proc,
209	};
210	#endif /* __rtems__ */
211	static struct filterops timer_filtops = {
212	.f_isfd = 0,
213	.f_attach = filt_timerattach,
214	.f_detach = filt_timerdetach,
215	.f_event = filt_timer,
216	.f_touch = filt_timertouch,
217	};
218	static struct filterops user_filtops = {
219	.f_attach = filt_userattach,
220	.f_detach = filt_userdetach,
221	.f_event = filt_user,
222	.f_touch = filt_usertouch,
223	};
224
225	static uma_zone_t knote_zone;
226	static unsigned int kq_ncallouts = 0;
227	static unsigned int kq_calloutmax = 4 * 1024;
228	SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
229	&kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
230
231	/* XXX - ensure not influx ? */
232	#define KNOTE_ACTIVATE(kn, islock) do { \
233	if ((islock)) \
234	mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \
235	else \
236	KQ_LOCK((kn)->kn_kq); \
237	(kn)->kn_status \|= KN_ACTIVE; \
238	if (((kn)->kn_status & (KN_QUEUED \| KN_DISABLED)) == 0) \
239	knote_enqueue((kn)); \
240	if (!(islock)) \
241	KQ_UNLOCK((kn)->kn_kq); \
242	} while(0)
243	#define KQ_LOCK(kq) do { \
244	mtx_lock(&(kq)->kq_lock); \
245	} while (0)
246	#define KQ_FLUX_WAKEUP(kq) do { \
247	if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \
248	(kq)->kq_state &= ~KQ_FLUXWAIT; \
249	wakeup((kq)); \
250	} \
251	} while (0)
252	#define KQ_UNLOCK_FLUX(kq) do { \
253	KQ_FLUX_WAKEUP(kq); \
254	mtx_unlock(&(kq)->kq_lock); \
255	} while (0)
256	#define KQ_UNLOCK(kq) do { \
257	mtx_unlock(&(kq)->kq_lock); \
258	} while (0)
259	#define KQ_OWNED(kq) do { \
260	mtx_assert(&(kq)->kq_lock, MA_OWNED); \
261	} while (0)
262	#define KQ_NOTOWNED(kq) do { \
263	mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \
264	} while (0)
265
266	static struct knlist *
267	kn_list_lock(struct knote *kn)
268	{
269	struct knlist *knl;
270
271	knl = kn->kn_knlist;
272	if (knl != NULL)
273	knl->kl_lock(knl->kl_lockarg);
274	return (knl);
275	}
276
277	static void
278	kn_list_unlock(struct knlist *knl)
279	{
280	bool do_free;
281
282	if (knl == NULL)
283	return;
284	do_free = knl->kl_autodestroy && knlist_empty(knl);
285	knl->kl_unlock(knl->kl_lockarg);
286	if (do_free) {
287	knlist_destroy(knl);
288	free(knl, M_KQUEUE);
289	}
290	}
291
292	static bool
293	kn_in_flux(struct knote *kn)
294	{
295
296	return (kn->kn_influx > 0);
297	}
298
299	static void
300	kn_enter_flux(struct knote *kn)
301	{
302
303	KQ_OWNED(kn->kn_kq);
304	MPASS(kn->kn_influx < INT_MAX);
305	kn->kn_influx++;
306	}
307
308	static bool
309	kn_leave_flux(struct knote *kn)
310	{
311
312	KQ_OWNED(kn->kn_kq);
313	MPASS(kn->kn_influx > 0);
314	kn->kn_influx--;
315	return (kn->kn_influx == 0);
316	}
317
318	#define KNL_ASSERT_LOCK(knl, islocked) do { \
319	if (islocked) \
320	KNL_ASSERT_LOCKED(knl); \
321	else \
322	KNL_ASSERT_UNLOCKED(knl); \
323	} while (0)
324	#ifdef INVARIANTS
325	#define KNL_ASSERT_LOCKED(knl) do { \
326	knl->kl_assert_locked((knl)->kl_lockarg); \
327	} while (0)
328	#define KNL_ASSERT_UNLOCKED(knl) do { \
329	knl->kl_assert_unlocked((knl)->kl_lockarg); \
330	} while (0)
331	#else /* !INVARIANTS */
332	#define KNL_ASSERT_LOCKED(knl) do {} while(0)
333	#define KNL_ASSERT_UNLOCKED(knl) do {} while (0)
334	#endif /* INVARIANTS */
335
336	#ifndef KN_HASHSIZE
337	#define KN_HASHSIZE 64 /* XXX should be tunable */
338	#endif
339
340	#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
341
342	static int
343	filt_nullattach(struct knote *kn)
344	{
345
346	return (ENXIO);
347	};
348
349	struct filterops null_filtops = {
350	.f_isfd = 0,
351	.f_attach = filt_nullattach,
352	};
353
354	/* XXX - make SYSINIT to add these, and move into respective modules. */
355	extern struct filterops sig_filtops;
356	extern struct filterops fs_filtops;
357
358	/*
359	* Table for for all system-defined filters.
360	*/
361	static struct mtx filterops_lock;
362	MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops",
363	MTX_DEF);
364	static struct {
365	struct filterops *for_fop;
366	int for_nolock;
367	int for_refcnt;
368	} sysfilt_ops[EVFILT_SYSCOUNT] = {
369	{ &file_filtops, 1 }, /* EVFILT_READ */
370	{ &file_filtops, 1 }, /* EVFILT_WRITE */
371	{ &null_filtops }, /* EVFILT_AIO */
372	{ &file_filtops, 1 }, /* EVFILT_VNODE */
373	#ifndef __rtems__
374	{ &proc_filtops, 1 }, /* EVFILT_PROC */
375	{ &sig_filtops, 1 }, /* EVFILT_SIGNAL */
376	#else /* __rtems__ */
377	{ &null_filtops }, /* EVFILT_PROC */
378	{ &null_filtops }, /* EVFILT_SIGNAL */
379	#endif /* __rtems__ */
380	{ &timer_filtops, 1 }, /* EVFILT_TIMER */
381	{ &null_filtops }, /* former EVFILT_NETDEV */
382	#ifndef __rtems__
383	{ &fs_filtops, 1 }, /* EVFILT_FS */
384	#else /* __rtems__ */
385	{ &null_filtops }, /* EVFILT_FS */
386	#endif /* __rtems__ */
387	{ &null_filtops }, /* EVFILT_LIO */
388	{ &user_filtops, 1 }, /* EVFILT_USER */
389	{ &null_filtops }, /* EVFILT_SENDFILE */
390	{ &file_filtops, 1 }, /* EVFILT_EMPTY */
391	};
392
393	/*
394	* Simple redirection for all cdevsw style objects to call their fo_kqfilter
395	* method.
396	*/
397	static int
398	filt_fileattach(struct knote *kn)
399	{
400
401	return (fo_kqfilter(kn->kn_fp, kn));
402	}
403
404	/ARGSUSED/
405	static int
406	kqueue_kqfilter(struct file fp, struct knote kn)
407	{
408	struct kqueue *kq = kn->kn_fp->f_data;
409
410	if (kn->kn_filter != EVFILT_READ)
411	return (EINVAL);
412
413	kn->kn_status \|= KN_KQUEUE;
414	kn->kn_fop = &kqread_filtops;
415	knlist_add(&kq->kq_sel.si_note, kn, 0);
416
417	return (0);
418	}
419	#ifdef __rtems__
420	static int
421	rtems_bsd_kqueue_kqfilter(rtems_libio_t iop, struct knote kn)
422	{
423	struct file *fp = rtems_bsd_iop_to_fp(iop);
424
425	return kqueue_kqfilter(fp, kn);
426	}
427	#endif /* __rtems__ */
428
429	static void
430	filt_kqdetach(struct knote *kn)
431	{
432	struct kqueue *kq = kn->kn_fp->f_data;
433
434	knlist_remove(&kq->kq_sel.si_note, kn, 0);
435	}
436
437	/ARGSUSED/
438	static int
439	filt_kqueue(struct knote *kn, long hint)
440	{
441	struct kqueue *kq = kn->kn_fp->f_data;
442
443	kn->kn_data = kq->kq_count;
444	return (kn->kn_data > 0);
445	}
446
447	#ifndef __rtems__
448	/* XXX - move to kern_proc.c? */
449	static int
450	filt_procattach(struct knote *kn)
451	{
452	struct proc *p;
453	int error;
454	bool exiting, immediate;
455
456	exiting = immediate = false;
457	if (kn->kn_sfflags & NOTE_EXIT)
458	p = pfind_any(kn->kn_id);
459	else
460	p = pfind(kn->kn_id);
461	if (p == NULL)
462	return (ESRCH);
463	if (p->p_flag & P_WEXIT)
464	exiting = true;
465
466	if ((error = p_cansee(curthread, p))) {
467	PROC_UNLOCK(p);
468	return (error);
469	}
470
471	kn->kn_ptr.p_proc = p;
472	kn->kn_flags \|= EV_CLEAR; /* automatically set */
473
474	/*
475	* Internal flag indicating registration done by kernel for the
476	* purposes of getting a NOTE_CHILD notification.
477	*/
478	if (kn->kn_flags & EV_FLAG2) {
479	kn->kn_flags &= ~EV_FLAG2;
480	kn->kn_data = kn->kn_sdata; /* ppid */
481	kn->kn_fflags = NOTE_CHILD;
482	kn->kn_sfflags &= ~(NOTE_EXIT \| NOTE_EXEC \| NOTE_FORK);
483	immediate = true; /* Force immediate activation of child note. */
484	}
485	/*
486	* Internal flag indicating registration done by kernel (for other than
487	* NOTE_CHILD).
488	*/
489	if (kn->kn_flags & EV_FLAG1) {
490	kn->kn_flags &= ~EV_FLAG1;
491	}
492
493	knlist_add(p->p_klist, kn, 1);
494
495	/*
496	* Immediately activate any child notes or, in the case of a zombie
497	* target process, exit notes. The latter is necessary to handle the
498	* case where the target process, e.g. a child, dies before the kevent
499	* is registered.
500	*/
501	if (immediate \|\| (exiting && filt_proc(kn, NOTE_EXIT)))
502	KNOTE_ACTIVATE(kn, 0);
503
504	PROC_UNLOCK(p);
505
506	return (0);
507	}
508
509	/*
510	* The knote may be attached to a different process, which may exit,
511	* leaving nothing for the knote to be attached to. So when the process
512	* exits, the knote is marked as DETACHED and also flagged as ONESHOT so
513	* it will be deleted when read out. However, as part of the knote deletion,
514	* this routine is called, so a check is needed to avoid actually performing
515	* a detach, because the original process does not exist any more.
516	*/
517	/* XXX - move to kern_proc.c? */
518	static void
519	filt_procdetach(struct knote *kn)
520	{
521
522	knlist_remove(kn->kn_knlist, kn, 0);
523	kn->kn_ptr.p_proc = NULL;
524	}
525
526	/* XXX - move to kern_proc.c? */
527	static int
528	filt_proc(struct knote *kn, long hint)
529	{
530	struct proc *p;
531	u_int event;
532
533	p = kn->kn_ptr.p_proc;
534	if (p == NULL) /* already activated, from attach filter */
535	return (0);
536
537	/* Mask off extra data. */
538	event = (u_int)hint & NOTE_PCTRLMASK;
539
540	/* If the user is interested in this event, record it. */
541	if (kn->kn_sfflags & event)
542	kn->kn_fflags \|= event;
543
544	/* Process is gone, so flag the event as finished. */
545	if (event == NOTE_EXIT) {
546	kn->kn_flags \|= EV_EOF \| EV_ONESHOT;
547	kn->kn_ptr.p_proc = NULL;
548	if (kn->kn_fflags & NOTE_EXIT)
549	kn->kn_data = KW_EXITCODE(p->p_xexit, p->p_xsig);
550	if (kn->kn_fflags == 0)
551	kn->kn_flags \|= EV_DROP;
552	return (1);
553	}
554
555	return (kn->kn_fflags != 0);
556	}
557
558	/*
559	* Called when the process forked. It mostly does the same as the
560	* knote(), activating all knotes registered to be activated when the
561	* process forked. Additionally, for each knote attached to the
562	* parent, check whether user wants to track the new process. If so
563	* attach a new knote to it, and immediately report an event with the
564	* child's pid.
565	*/
566	void
567	knote_fork(struct knlist *list, int pid)
568	{
569	struct kqueue *kq;
570	struct knote *kn;
571	struct kevent kev;
572	int error;
573
574	if (list == NULL)
575	return;
576	list->kl_lock(list->kl_lockarg);
577
578	SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
579	kq = kn->kn_kq;
580	KQ_LOCK(kq);
581	if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) {
582	KQ_UNLOCK(kq);
583	continue;
584	}
585
586	/*
587	* The same as knote(), activate the event.
588	*/
589	if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
590	kn->kn_status \|= KN_HASKQLOCK;
591	if (kn->kn_fop->f_event(kn, NOTE_FORK))
592	KNOTE_ACTIVATE(kn, 1);
593	kn->kn_status &= ~KN_HASKQLOCK;
594	KQ_UNLOCK(kq);
595	continue;
596	}
597
598	/*
599	* The NOTE_TRACK case. In addition to the activation
600	* of the event, we need to register new events to
601	* track the child. Drop the locks in preparation for
602	* the call to kqueue_register().
603	*/
604	kn_enter_flux(kn);
605	KQ_UNLOCK(kq);
606	list->kl_unlock(list->kl_lockarg);
607
608	/*
609	* Activate existing knote and register tracking knotes with
610	* new process.
611	*
612	* First register a knote to get just the child notice. This
613	* must be a separate note from a potential NOTE_EXIT
614	* notification since both NOTE_CHILD and NOTE_EXIT are defined
615	* to use the data field (in conflicting ways).
616	*/
617	kev.ident = pid;
618	kev.filter = kn->kn_filter;
619	kev.flags = kn->kn_flags \| EV_ADD \| EV_ENABLE \| EV_ONESHOT \|
620	EV_FLAG2;
621	kev.fflags = kn->kn_sfflags;
622	kev.data = kn->kn_id; /* parent */
623	kev.udata = kn->kn_kevent.udata;/* preserve udata */
624	error = kqueue_register(kq, &kev, NULL, 0);
625	if (error)
626	kn->kn_fflags \|= NOTE_TRACKERR;
627
628	/*
629	* Then register another knote to track other potential events
630	* from the new process.
631	*/
632	kev.ident = pid;
633	kev.filter = kn->kn_filter;
634	kev.flags = kn->kn_flags \| EV_ADD \| EV_ENABLE \| EV_FLAG1;
635	kev.fflags = kn->kn_sfflags;
636	kev.data = kn->kn_id; /* parent */
637	kev.udata = kn->kn_kevent.udata;/* preserve udata */
638	error = kqueue_register(kq, &kev, NULL, 0);
639	if (error)
640	kn->kn_fflags \|= NOTE_TRACKERR;
641	if (kn->kn_fop->f_event(kn, NOTE_FORK))
642	KNOTE_ACTIVATE(kn, 0);
643	KQ_LOCK(kq);
644	kn_leave_flux(kn);
645	KQ_UNLOCK_FLUX(kq);
646	list->kl_lock(list->kl_lockarg);
647	}
648	list->kl_unlock(list->kl_lockarg);
649	}
650	#endif /* __rtems__ */
651
652	/*
653	* XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the
654	* interval timer support code.
655	*/
656
657	#define NOTE_TIMER_PRECMASK \
658	(NOTE_SECONDS \| NOTE_MSECONDS \| NOTE_USECONDS \| NOTE_NSECONDS)
659
660	static sbintime_t
661	timer2sbintime(intptr_t data, int flags)
662	{
663	int64_t secs;
664
665	/*
666	* Macros for converting to the fractional second portion of an
667	* sbintime_t using 64bit multiplication to improve precision.
668	*/
669	#define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32)
670	#define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32)
671	#define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32)
672	switch (flags & NOTE_TIMER_PRECMASK) {
673	case NOTE_SECONDS:
674	#ifdef __LP64__
675	if (data > (SBT_MAX / SBT_1S))
676	return (SBT_MAX);
677	#endif
678	return ((sbintime_t)data << 32);
679	case NOTE_MSECONDS: /* FALLTHROUGH */
680	case 0:
681	if (data >= 1000) {
682	secs = data / 1000;
683	#ifdef __LP64__
684	if (secs > (SBT_MAX / SBT_1S))
685	return (SBT_MAX);
686	#endif
687	return (secs << 32 \| MS_TO_SBT(data % 1000));
688	}
689	return (MS_TO_SBT(data));
690	case NOTE_USECONDS:
691	if (data >= 1000000) {
692	secs = data / 1000000;
693	#ifdef __LP64__
694	if (secs > (SBT_MAX / SBT_1S))
695	return (SBT_MAX);
696	#endif
697	return (secs << 32 \| US_TO_SBT(data % 1000000));
698	}
699	return (US_TO_SBT(data));
700	case NOTE_NSECONDS:
701	if (data >= 1000000000) {
702	secs = data / 1000000000;
703	#ifdef __LP64__
704	if (secs > (SBT_MAX / SBT_1S))
705	return (SBT_MAX);
706	#endif
707	return (secs << 32 \| US_TO_SBT(data % 1000000000));
708	}
709	return (NS_TO_SBT(data));
710	default:
711	break;
712	}
713	return (-1);
714	}
715
716	struct kq_timer_cb_data {
717	struct callout c;
718	sbintime_t next; /* next timer event fires at */
719	sbintime_t to; /* precalculated timer period, 0 for abs */
720	};
721
722	static void
723	filt_timerexpire(void *knx)
724	{
725	struct knote *kn;
726	struct kq_timer_cb_data *kc;
727
728	kn = knx;
729	kn->kn_data++;
730	KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */
731
732	if ((kn->kn_flags & EV_ONESHOT) != 0)
733	return;
734	kc = kn->kn_ptr.p_v;
735	if (kc->to == 0)
736	return;
737	kc->next += kc->to;
738	callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn,
739	PCPU_GET(cpuid), C_ABSOLUTE);
740	}
741
742	/*
743	* data contains amount of time to sleep
744	*/
745	static int
746	filt_timervalidate(struct knote kn, sbintime_t to)
747	{
748	struct bintime bt;
749	sbintime_t sbt;
750
751	if (kn->kn_sdata < 0)
752	return (EINVAL);
753	if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0)
754	kn->kn_sdata = 1;
755	/*
756	* The only fflags values supported are the timer unit
757	* (precision) and the absolute time indicator.
758	*/
759	if ((kn->kn_sfflags & ~(NOTE_TIMER_PRECMASK \| NOTE_ABSTIME)) != 0)
760	return (EINVAL);
761
762	*to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
763	if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) {
764	getboottimebin(&bt);
765	sbt = bttosbt(bt);
766	*to -= sbt;
767	}
768	if (*to < 0)
769	return (EINVAL);
770	return (0);
771	}
772
773	static int
774	filt_timerattach(struct knote *kn)
775	{
776	struct kq_timer_cb_data *kc;
777	sbintime_t to;
778	unsigned int ncallouts;
779	int error;
780
781	error = filt_timervalidate(kn, &to);
782	if (error != 0)
783	return (error);
784
785	do {
786	ncallouts = kq_ncallouts;
787	if (ncallouts >= kq_calloutmax)
788	return (ENOMEM);
789	} while (!atomic_cmpset_int(&kq_ncallouts, ncallouts, ncallouts + 1));
790
791	if ((kn->kn_sfflags & NOTE_ABSTIME) == 0)
792	kn->kn_flags \|= EV_CLEAR; /* automatically set */
793	kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */
794	kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK);
795	callout_init(&kc->c, 1);
796	filt_timerstart(kn, to);
797
798	return (0);
799	}
800
801	static void
802	filt_timerstart(struct knote *kn, sbintime_t to)
803	{
804	struct kq_timer_cb_data *kc;
805
806	kc = kn->kn_ptr.p_v;
807	if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) {
808	kc->next = to;
809	kc->to = 0;
810	} else {
811	kc->next = to + sbinuptime();
812	kc->to = to;
813	}
814	callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn,
815	PCPU_GET(cpuid), C_ABSOLUTE);
816	}
817
818	static void
819	filt_timerdetach(struct knote *kn)
820	{
821	struct kq_timer_cb_data *kc;
822	unsigned int old __unused;
823
824	kc = kn->kn_ptr.p_v;
825	callout_drain(&kc->c);
826	free(kc, M_KQUEUE);
827	old = atomic_fetchadd_int(&kq_ncallouts, -1);
828	KASSERT(old > 0, ("Number of callouts cannot become negative"));
829	kn->kn_status \|= KN_DETACHED; /* knlist_remove sets it */
830	}
831
832	static void
833	filt_timertouch(struct knote kn, struct kevent kev, u_long type)
834	{
835	struct kq_timer_cb_data *kc;
836	struct kqueue *kq;
837	sbintime_t to;
838	int error;
839
840	switch (type) {
841	case EVENT_REGISTER:
842	/* Handle re-added timers that update data/fflags */
843	if (kev->flags & EV_ADD) {
844	kc = kn->kn_ptr.p_v;
845
846	/* Drain any existing callout. */
847	callout_drain(&kc->c);
848
849	/* Throw away any existing undelivered record
850	* of the timer expiration. This is done under
851	* the presumption that if a process is
852	* re-adding this timer with new parameters,
853	* it is no longer interested in what may have
854	* happened under the old parameters. If it is
855	* interested, it can wait for the expiration,
856	* delete the old timer definition, and then
857	* add the new one.
858	*
859	* This has to be done while the kq is locked:
860	* - if enqueued, dequeue
861	* - make it no longer active
862	* - clear the count of expiration events
863	*/
864	kq = kn->kn_kq;
865	KQ_LOCK(kq);
866	if (kn->kn_status & KN_QUEUED)
867	knote_dequeue(kn);
868
869	kn->kn_status &= ~KN_ACTIVE;
870	kn->kn_data = 0;
871	KQ_UNLOCK(kq);
872
873	/* Reschedule timer based on new data/fflags */
874	kn->kn_sfflags = kev->fflags;
875	kn->kn_sdata = kev->data;
876	error = filt_timervalidate(kn, &to);
877	if (error != 0) {
878	kn->kn_flags \|= EV_ERROR;
879	kn->kn_data = error;
880	} else
881	filt_timerstart(kn, to);
882	}
883	break;
884
885	case EVENT_PROCESS:
886	*kev = kn->kn_kevent;
887	if (kn->kn_flags & EV_CLEAR) {
888	kn->kn_data = 0;
889	kn->kn_fflags = 0;
890	}
891	break;
892
893	default:
894	panic("filt_timertouch() - invalid type (%ld)", type);
895	break;
896	}
897	}
898
899	static int
900	filt_timer(struct knote *kn, long hint)
901	{
902
903	return (kn->kn_data != 0);
904	}
905
906	static int
907	filt_userattach(struct knote *kn)
908	{
909
910	/*
911	* EVFILT_USER knotes are not attached to anything in the kernel.
912	*/
913	kn->kn_hook = NULL;
914	if (kn->kn_fflags & NOTE_TRIGGER)
915	kn->kn_hookid = 1;
916	else
917	kn->kn_hookid = 0;
918	return (0);
919	}
920
921	static void
922	filt_userdetach(__unused struct knote *kn)
923	{
924
925	/*
926	* EVFILT_USER knotes are not attached to anything in the kernel.
927	*/
928	}
929
930	static int
931	filt_user(struct knote *kn, __unused long hint)
932	{
933
934	return (kn->kn_hookid);
935	}
936
937	static void
938	filt_usertouch(struct knote kn, struct kevent kev, u_long type)
939	{
940	u_int ffctrl;
941
942	switch (type) {
943	case EVENT_REGISTER:
944	if (kev->fflags & NOTE_TRIGGER)
945	kn->kn_hookid = 1;
946
947	ffctrl = kev->fflags & NOTE_FFCTRLMASK;
948	kev->fflags &= NOTE_FFLAGSMASK;
949	switch (ffctrl) {
950	case NOTE_FFNOP:
951	break;
952
953	case NOTE_FFAND:
954	kn->kn_sfflags &= kev->fflags;
955	break;
956
957	case NOTE_FFOR:
958	kn->kn_sfflags \|= kev->fflags;
959	break;
960
961	case NOTE_FFCOPY:
962	kn->kn_sfflags = kev->fflags;
963	break;
964
965	default:
966	/* XXX Return error? */
967	break;
968	}
969	kn->kn_sdata = kev->data;
970	if (kev->flags & EV_CLEAR) {
971	kn->kn_hookid = 0;
972	kn->kn_data = 0;
973	kn->kn_fflags = 0;
974	}
975	break;
976
977	case EVENT_PROCESS:
978	*kev = kn->kn_kevent;
979	kev->fflags = kn->kn_sfflags;
980	kev->data = kn->kn_sdata;
981	if (kn->kn_flags & EV_CLEAR) {
982	kn->kn_hookid = 0;
983	kn->kn_data = 0;
984	kn->kn_fflags = 0;
985	}
986	break;
987
988	default:
989	panic("filt_usertouch() - invalid type (%ld)", type);
990	break;
991	}
992	}
993
994	#ifdef __rtems__
995	static int
996	kern_kqueue(struct thread td, int flags, struct filecaps fcaps);
997
998	static
999	#endif /* __rtems__ */
1000	int
1001	sys_kqueue(struct thread td, struct kqueue_args uap)
1002	{
1003
1004	return (kern_kqueue(td, 0, NULL));
1005	}
1006
1007	static void
1008	kqueue_init(struct kqueue *kq)
1009	{
1010
1011	mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF \| MTX_DUPOK);
1012	TAILQ_INIT(&kq->kq_head);
1013	knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
1014	TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
1015	}
1016
1017	int
1018	kern_kqueue(struct thread td, int flags, struct filecaps fcaps)
1019	{
1020	struct filedesc *fdp;
1021	struct kqueue *kq;
1022	struct file *fp;
1023	struct ucred *cred;
1024	int fd, error;
1025
1026	#ifndef __rtems__
1027	fdp = td->td_proc->p_fd;
1028	cred = td->td_ucred;
1029	if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES)))
1030	return (ENOMEM);
1031	#else /* __rtems__ */
1032	(void)fdp;
1033	(void)cred;
1034	#endif /* __rtems__ */
1035
1036	error = falloc_caps(td, &fp, &fd, flags, fcaps);
1037	if (error != 0) {
1038	chgkqcnt(cred->cr_ruidinfo, -1, 0);
1039	return (error);
1040	}
1041
1042	/* An extra reference on `fp' has been held for us by falloc(). */
1043	kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK \| M_ZERO);
1044	kqueue_init(kq);
1045	#ifndef __rtems__
1046	kq->kq_fdp = fdp;
1047	kq->kq_cred = crhold(cred);
1048	#endif /* __rtems__ */
1049
1050	#ifndef __rtems__
1051	FILEDESC_XLOCK(fdp);
1052	TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
1053	FILEDESC_XUNLOCK(fdp);
1054	#else /* __rtems__ */
1055	rtems_libio_lock();
1056	TAILQ_INSERT_HEAD(&fd_kqlist, kq, kq_list);
1057	rtems_libio_unlock();
1058	#endif /* __rtems__ */
1059
1060	finit(fp, FREAD \| FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
1061	#ifndef __rtems__
1062	fdrop(fp, td);
1063	#endif /* __rtems__ */
1064
1065	td->td_retval[0] = fd;
1066	return (0);
1067	}
1068	#ifdef __rtems__
1069	int
1070	kqueue(void)
1071	{
1072	struct thread *td = rtems_bsd_get_curthread_or_null();
1073	struct kqueue_args ua;
1074	int error;
1075
1076	if (td != NULL) {
1077	error = sys_kqueue(td, &ua);
1078	} else {
1079	error = ENOMEM;
1080	}
1081
1082	if (error == 0) {
1083	return td->td_retval[0];
1084	} else {
1085	rtems_set_errno_and_return_minus_one(error);
1086	}
1087	}
1088	#endif /* __rtems__ */
1089
1090	struct g_kevent_args {
1091	int fd;
1092	void *changelist;
1093	int nchanges;
1094	void *eventlist;
1095	int nevents;
1096	const struct timespec *timeout;
1097	};
1098
1099	#ifdef __rtems__
1100	static int kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
1101	struct kevent_copyops k_ops, const struct timespec timeout);
1102
1103	static int kern_kevent_fp(struct thread td, struct file fp, int nchanges,
1104	int nevents, struct kevent_copyops k_ops, const struct timespec timeout);
1105
1106	static
1107	#endif /* __rtems__ */
1108	int
1109	sys_kevent(struct thread td, struct kevent_args uap)
1110	{
1111	struct kevent_copyops k_ops = {
1112	.arg = uap,
1113	.k_copyout = kevent_copyout,
1114	.k_copyin = kevent_copyin,
1115	.kevent_size = sizeof(struct kevent),
1116	};
1117	struct g_kevent_args gk_args = {
1118	.fd = uap->fd,
1119	.changelist = uap->changelist,
1120	.nchanges = uap->nchanges,
1121	.eventlist = uap->eventlist,
1122	.nevents = uap->nevents,
1123	.timeout = uap->timeout,
1124	};
1125
1126	return (kern_kevent_generic(td, &gk_args, &k_ops, "kevent"));
1127	}
1128
1129	static int
1130	kern_kevent_generic(struct thread td, struct g_kevent_args uap,
1131	struct kevent_copyops k_ops, const char struct_name)
1132	{
1133	struct timespec ts, *tsp;
1134	#ifdef KTRACE
1135	struct kevent *eventlist = uap->eventlist;
1136	#endif
1137	int error;
1138
1139	if (uap->timeout != NULL) {
1140	error = copyin(uap->timeout, &ts, sizeof(ts));
1141	if (error)
1142	return (error);
1143	tsp = &ts;
1144	} else
1145	tsp = NULL;
1146
1147	#ifdef KTRACE
1148	if (KTRPOINT(td, KTR_STRUCT_ARRAY))
1149	ktrstructarray(struct_name, UIO_USERSPACE, uap->changelist,
1150	uap->nchanges, k_ops->kevent_size);
1151	#endif
1152
1153	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
1154	k_ops, tsp);
1155
1156	#ifdef KTRACE
1157	if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY))
1158	ktrstructarray(struct_name, UIO_USERSPACE, eventlist,
1159	td->td_retval[0], k_ops->kevent_size);
1160	#endif
1161
1162	return (error);
1163	}
1164	#ifdef __rtems__
1165	__weak_reference(kevent, _kevent);
1166
1167	int
1168	kevent(int kq, const struct kevent *changelist, int nchanges,
1169	struct kevent *eventlist, int nevents,
1170	const struct timespec *timeout)
1171	{
1172	struct thread *td = rtems_bsd_get_curthread_or_null();
1173	struct kevent_args ua = {
1174	.fd = kq,
1175	.changelist = changelist,
1176	.nchanges = nchanges,
1177	.eventlist = eventlist,
1178	.nevents = nevents,
1179	.timeout = timeout
1180	};
1181	int error;
1182
1183	if (td != NULL) {
1184	error = sys_kevent(td, &ua);
1185	} else {
1186	error = ENOMEM;
1187	}
1188
1189	if (error == 0) {
1190	return td->td_retval[0];
1191	} else {
1192	rtems_set_errno_and_return_minus_one(error);
1193	}
1194	}
1195	#endif /* __rtems__ */
1196
1197	/*
1198	* Copy 'count' items into the destination list pointed to by uap->eventlist.
1199	*/
1200	static int
1201	kevent_copyout(void arg, struct kevent kevp, int count)
1202	{
1203	struct kevent_args *uap;
1204	int error;
1205
1206	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
1207	uap = (struct kevent_args *)arg;
1208
1209	error = copyout(kevp, uap->eventlist, count * sizeof *kevp);
1210	if (error == 0)
1211	uap->eventlist += count;
1212	return (error);
1213	}
1214
1215	/*
1216	* Copy 'count' items from the list pointed to by uap->changelist.
1217	*/
1218	static int
1219	kevent_copyin(void arg, struct kevent kevp, int count)
1220	{
1221	struct kevent_args *uap;
1222	int error;
1223
1224	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
1225	uap = (struct kevent_args *)arg;
1226
1227	error = copyin(uap->changelist, kevp, count * sizeof *kevp);
1228	if (error == 0)
1229	uap->changelist += count;
1230	return (error);
1231	}
1232
1233	#ifdef COMPAT_FREEBSD11
1234	static int
1235	kevent11_copyout(void arg, struct kevent kevp, int count)
1236	{
1237	struct freebsd11_kevent_args *uap;
1238	struct kevent_freebsd11 kev11;
1239	int error, i;
1240
1241	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
1242	uap = (struct freebsd11_kevent_args *)arg;
1243
1244	for (i = 0; i < count; i++) {
1245	kev11.ident = kevp->ident;
1246	kev11.filter = kevp->filter;
1247	kev11.flags = kevp->flags;
1248	kev11.fflags = kevp->fflags;
1249	kev11.data = kevp->data;
1250	kev11.udata = kevp->udata;
1251	error = copyout(&kev11, uap->eventlist, sizeof(kev11));
1252	if (error != 0)
1253	break;
1254	uap->eventlist++;
1255	kevp++;
1256	}
1257	return (error);
1258	}
1259
1260	/*
1261	* Copy 'count' items from the list pointed to by uap->changelist.
1262	*/
1263	static int
1264	kevent11_copyin(void arg, struct kevent kevp, int count)
1265	{
1266	struct freebsd11_kevent_args *uap;
1267	struct kevent_freebsd11 kev11;
1268	int error, i;
1269
1270	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
1271	uap = (struct freebsd11_kevent_args *)arg;
1272
1273	for (i = 0; i < count; i++) {
1274	error = copyin(uap->changelist, &kev11, sizeof(kev11));
1275	if (error != 0)
1276	break;
1277	kevp->ident = kev11.ident;
1278	kevp->filter = kev11.filter;
1279	kevp->flags = kev11.flags;
1280	kevp->fflags = kev11.fflags;
1281	kevp->data = (uintptr_t)kev11.data;
1282	kevp->udata = kev11.udata;
1283	bzero(&kevp->ext, sizeof(kevp->ext));
1284	uap->changelist++;
1285	kevp++;
1286	}
1287	return (error);
1288	}
1289
1290	int
1291	freebsd11_kevent(struct thread td, struct freebsd11_kevent_args uap)
1292	{
1293	struct kevent_copyops k_ops = {
1294	.arg = uap,
1295	.k_copyout = kevent11_copyout,
1296	.k_copyin = kevent11_copyin,
1297	.kevent_size = sizeof(struct kevent_freebsd11),
1298	};
1299	struct g_kevent_args gk_args = {
1300	.fd = uap->fd,
1301	.changelist = uap->changelist,
1302	.nchanges = uap->nchanges,
1303	.eventlist = uap->eventlist,
1304	.nevents = uap->nevents,
1305	.timeout = uap->timeout,
1306	};
1307
1308	return (kern_kevent_generic(td, &gk_args, &k_ops, "kevent_freebsd11"));
1309	}
1310	#endif
1311
1312	int
1313	kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
1314	struct kevent_copyops k_ops, const struct timespec timeout)
1315	{
1316	cap_rights_t rights;
1317	struct file *fp;
1318	int error;
1319
1320	cap_rights_init(&rights);
1321	if (nchanges > 0)
1322	cap_rights_set(&rights, CAP_KQUEUE_CHANGE);
1323	if (nevents > 0)
1324	cap_rights_set(&rights, CAP_KQUEUE_EVENT);
1325	error = fget(td, fd, &rights, &fp);
1326	if (error != 0)
1327	return (error);
1328
1329	error = kern_kevent_fp(td, fp, nchanges, nevents, k_ops, timeout);
1330	fdrop(fp, td);
1331
1332	return (error);
1333	}
1334
1335	static int
1336	kqueue_kevent(struct kqueue kq, struct thread td, int nchanges, int nevents,
1337	struct kevent_copyops k_ops, const struct timespec timeout)
1338	{
1339	struct kevent keva[KQ_NEVENTS];
1340	struct kevent kevp, changes;
1341	int i, n, nerrors, error;
1342
1343	nerrors = 0;
1344	while (nchanges > 0) {
1345	n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges;
1346	error = k_ops->k_copyin(k_ops->arg, keva, n);
1347	if (error)
1348	return (error);
1349	changes = keva;
1350	for (i = 0; i < n; i++) {
1351	kevp = &changes[i];
1352	if (!kevp->filter)
1353	continue;
1354	kevp->flags &= ~EV_SYSFLAGS;
1355	error = kqueue_register(kq, kevp, td, 1);
1356	if (error \|\| (kevp->flags & EV_RECEIPT)) {
1357	if (nevents == 0)
1358	return (error);
1359	kevp->flags = EV_ERROR;
1360	kevp->data = error;
1361	(void)k_ops->k_copyout(k_ops->arg, kevp, 1);
1362	nevents--;
1363	nerrors++;
1364	}
1365	}
1366	nchanges -= n;
1367	}
1368	if (nerrors) {
1369	td->td_retval[0] = nerrors;
1370	return (0);
1371	}
1372
1373	return (kqueue_scan(kq, nevents, k_ops, timeout, keva, td));
1374	}
1375
1376	int
1377	kern_kevent_fp(struct thread td, struct file fp, int nchanges, int nevents,
1378	struct kevent_copyops k_ops, const struct timespec timeout)
1379	{
1380	struct kqueue *kq;
1381	int error;
1382
1383	error = kqueue_acquire(fp, &kq);
1384	if (error != 0)
1385	return (error);
1386	error = kqueue_kevent(kq, td, nchanges, nevents, k_ops, timeout);
1387	kqueue_release(kq, 0);
1388	return (error);
1389	}
1390
1391	#ifndef __rtems__
1392	/*
1393	* Performs a kevent() call on a temporarily created kqueue. This can be
1394	* used to perform one-shot polling, similar to poll() and select().
1395	*/
1396	int
1397	kern_kevent_anonymous(struct thread *td, int nevents,
1398	struct kevent_copyops *k_ops)
1399	{
1400	struct kqueue kq = {};
1401	int error;
1402
1403	kqueue_init(&kq);
1404	kq.kq_refcnt = 1;
1405	error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL);
1406	kqueue_drain(&kq, td);
1407	kqueue_destroy(&kq);
1408	return (error);
1409	}
1410	#endif /* __rtems__ */
1411
1412	int
1413	kqueue_add_filteropts(int filt, struct filterops *filtops)
1414	{
1415	int error;
1416
1417	error = 0;
1418	if (filt > 0 \|\| filt + EVFILT_SYSCOUNT < 0) {
1419	printf(
1420	"trying to add a filterop that is out of range: %d is beyond %d\n",
1421	~filt, EVFILT_SYSCOUNT);
1422	return EINVAL;
1423	}
1424	mtx_lock(&filterops_lock);
1425	if (sysfilt_ops[~filt].for_fop != &null_filtops &&
1426	sysfilt_ops[~filt].for_fop != NULL)
1427	error = EEXIST;
1428	else {
1429	sysfilt_ops[~filt].for_fop = filtops;
1430	sysfilt_ops[~filt].for_refcnt = 0;
1431	}
1432	mtx_unlock(&filterops_lock);
1433
1434	return (error);
1435	}
1436
1437	int
1438	kqueue_del_filteropts(int filt)
1439	{
1440	int error;
1441
1442	error = 0;
1443	if (filt > 0 \|\| filt + EVFILT_SYSCOUNT < 0)
1444	return EINVAL;
1445
1446	mtx_lock(&filterops_lock);
1447	if (sysfilt_ops[~filt].for_fop == &null_filtops \|\|
1448	sysfilt_ops[~filt].for_fop == NULL)
1449	error = EINVAL;
1450	else if (sysfilt_ops[~filt].for_refcnt != 0)
1451	error = EBUSY;
1452	else {
1453	sysfilt_ops[~filt].for_fop = &null_filtops;
1454	sysfilt_ops[~filt].for_refcnt = 0;
1455	}
1456	mtx_unlock(&filterops_lock);
1457
1458	return error;
1459	}
1460
1461	static struct filterops *
1462	kqueue_fo_find(int filt)
1463	{
1464
1465	if (filt > 0 \|\| filt + EVFILT_SYSCOUNT < 0)
1466	return NULL;
1467
1468	if (sysfilt_ops[~filt].for_nolock)
1469	return sysfilt_ops[~filt].for_fop;
1470
1471	mtx_lock(&filterops_lock);
1472	sysfilt_ops[~filt].for_refcnt++;
1473	if (sysfilt_ops[~filt].for_fop == NULL)
1474	sysfilt_ops[~filt].for_fop = &null_filtops;
1475	mtx_unlock(&filterops_lock);
1476
1477	return sysfilt_ops[~filt].for_fop;
1478	}
1479
1480	static void
1481	kqueue_fo_release(int filt)
1482	{
1483
1484	if (filt > 0 \|\| filt + EVFILT_SYSCOUNT < 0)
1485	return;
1486
1487	if (sysfilt_ops[~filt].for_nolock)
1488	return;
1489
1490	mtx_lock(&filterops_lock);
1491	KASSERT(sysfilt_ops[~filt].for_refcnt > 0,
1492	("filter object refcount not valid on release"));
1493	sysfilt_ops[~filt].for_refcnt--;
1494	mtx_unlock(&filterops_lock);
1495	}
1496
1497	/*
1498	* A ref to kq (obtained via kqueue_acquire) must be held. waitok will
1499	* influence if memory allocation should wait. Make sure it is 0 if you
1500	* hold any mutexes.
1501	*/
1502	static int
1503	kqueue_register(struct kqueue kq, struct kevent kev, struct thread *td, int waitok)
1504	{
1505	struct filterops *fops;
1506	struct file *fp;
1507	struct knote kn, tkn;
1508	struct knlist *knl;
1509	int error, filt, event;
1510	int haskqglobal, filedesc_unlock;
1511
1512	if ((kev->flags & (EV_ENABLE \| EV_DISABLE)) == (EV_ENABLE \| EV_DISABLE))
1513	return (EINVAL);
1514
1515	fp = NULL;
1516	kn = NULL;
1517	knl = NULL;
1518	error = 0;
1519	haskqglobal = 0;
1520	filedesc_unlock = 0;
1521
1522	filt = kev->filter;
1523	fops = kqueue_fo_find(filt);
1524	if (fops == NULL)
1525	return EINVAL;
1526
1527	if (kev->flags & EV_ADD) {
1528	/*
1529	* Prevent waiting with locks. Non-sleepable
1530	* allocation failures are handled in the loop, only
1531	* if the spare knote appears to be actually required.
1532	*/
1533	tkn = knote_alloc(waitok);
1534	} else {
1535	tkn = NULL;
1536	}
1537
1538	findkn:
1539	if (fops->f_isfd) {
1540	KASSERT(td != NULL, ("td is NULL"));
1541	if (kev->ident > INT_MAX)
1542	error = EBADF;
1543	else
1544	error = fget(td, kev->ident, &cap_event_rights, &fp);
1545	if (error)
1546	goto done;
1547
1548	if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops,
1549	kev->ident, 0) != 0) {
1550	/* try again */
1551	fdrop(fp, td);
1552	fp = NULL;
1553	error = kqueue_expand(kq, fops, kev->ident, waitok);
1554	if (error)
1555	goto done;
1556	goto findkn;
1557	}
1558
1559	#ifndef __rtems__
1560	if (fp->f_type == DTYPE_KQUEUE) {
1561	#else /* __rtems__ */
1562	if (fp->f_io.pathinfo.handlers == &kqueueops) {
1563	#endif /* __rtems__ */
1564	/*
1565	* If we add some intelligence about what we are doing,
1566	* we should be able to support events on ourselves.
1567	* We need to know when we are doing this to prevent
1568	* getting both the knlist lock and the kq lock since
1569	* they are the same thing.
1570	*/
1571	if (fp->f_data == kq) {
1572	error = EINVAL;
1573	goto done;
1574	}
1575
1576	/*
1577	* Pre-lock the filedesc before the global
1578	* lock mutex, see the comment in
1579	* kqueue_close().
1580	*/
1581	FILEDESC_XLOCK(td->td_proc->p_fd);
1582	filedesc_unlock = 1;
1583	KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1584	}
1585
1586	KQ_LOCK(kq);
1587	if (kev->ident < kq->kq_knlistsize) {
1588	SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link)
1589	if (kev->filter == kn->kn_filter)
1590	break;
1591	}
1592	} else {
1593	if ((kev->flags & EV_ADD) == EV_ADD)
1594	kqueue_expand(kq, fops, kev->ident, waitok);
1595
1596	KQ_LOCK(kq);
1597
1598	/*
1599	* If possible, find an existing knote to use for this kevent.
1600	*/
1601	if (kev->filter == EVFILT_PROC &&
1602	(kev->flags & (EV_FLAG1 \| EV_FLAG2)) != 0) {
1603	/* This is an internal creation of a process tracking
1604	* note. Don't attempt to coalesce this with an
1605	* existing note.
1606	*/
1607	;
1608	} else if (kq->kq_knhashmask != 0) {
1609	struct klist *list;
1610
1611	list = &kq->kq_knhash[
1612	KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
1613	SLIST_FOREACH(kn, list, kn_link)
1614	if (kev->ident == kn->kn_id &&
1615	kev->filter == kn->kn_filter)
1616	break;
1617	}
1618	}
1619
1620	/* knote is in the process of changing, wait for it to stabilize. */
1621	if (kn != NULL && kn_in_flux(kn)) {
1622	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1623	if (filedesc_unlock) {
1624	FILEDESC_XUNLOCK(td->td_proc->p_fd);
1625	filedesc_unlock = 0;
1626	}
1627	kq->kq_state \|= KQ_FLUXWAIT;
1628	msleep(kq, &kq->kq_lock, PSOCK \| PDROP, "kqflxwt", 0);
1629	if (fp != NULL) {
1630	fdrop(fp, td);
1631	fp = NULL;
1632	}
1633	goto findkn;
1634	}
1635
1636	/*
1637	* kn now contains the matching knote, or NULL if no match
1638	*/
1639	if (kn == NULL) {
1640	if (kev->flags & EV_ADD) {
1641	kn = tkn;
1642	tkn = NULL;
1643	if (kn == NULL) {
1644	KQ_UNLOCK(kq);
1645	error = ENOMEM;
1646	goto done;
1647	}
1648	kn->kn_fp = fp;
1649	kn->kn_kq = kq;
1650	kn->kn_fop = fops;
1651	/*
1652	* apply reference counts to knote structure, and
1653	* do not release it at the end of this routine.
1654	*/
1655	fops = NULL;
1656	fp = NULL;
1657
1658	kn->kn_sfflags = kev->fflags;
1659	kn->kn_sdata = kev->data;
1660	kev->fflags = 0;
1661	kev->data = 0;
1662	kn->kn_kevent = *kev;
1663	kn->kn_kevent.flags &= ~(EV_ADD \| EV_DELETE \|
1664	EV_ENABLE \| EV_DISABLE \| EV_FORCEONESHOT);
1665	kn->kn_status = KN_DETACHED;
1666	kn_enter_flux(kn);
1667
1668	error = knote_attach(kn, kq);
1669	KQ_UNLOCK(kq);
1670	if (error != 0) {
1671	tkn = kn;
1672	goto done;
1673	}
1674
1675	if ((error = kn->kn_fop->f_attach(kn)) != 0) {
1676	knote_drop_detached(kn, td);
1677	goto done;
1678	}
1679	knl = kn_list_lock(kn);
1680	goto done_ev_add;
1681	} else {
1682	/* No matching knote and the EV_ADD flag is not set. */
1683	KQ_UNLOCK(kq);
1684	error = ENOENT;
1685	goto done;
1686	}
1687	}
1688
1689	if (kev->flags & EV_DELETE) {
1690	kn_enter_flux(kn);
1691	KQ_UNLOCK(kq);
1692	knote_drop(kn, td);
1693	goto done;
1694	}
1695
1696	if (kev->flags & EV_FORCEONESHOT) {
1697	kn->kn_flags \|= EV_ONESHOT;
1698	KNOTE_ACTIVATE(kn, 1);
1699	}
1700
1701	/*
1702	* The user may change some filter values after the initial EV_ADD,
1703	* but doing so will not reset any filter which has already been
1704	* triggered.
1705	*/
1706	kn->kn_status \|= KN_SCAN;
1707	kn_enter_flux(kn);
1708	KQ_UNLOCK(kq);
1709	knl = kn_list_lock(kn);
1710	kn->kn_kevent.udata = kev->udata;
1711	if (!fops->f_isfd && fops->f_touch != NULL) {
1712	fops->f_touch(kn, kev, EVENT_REGISTER);
1713	} else {
1714	kn->kn_sfflags = kev->fflags;
1715	kn->kn_sdata = kev->data;
1716	}
1717
1718	/*
1719	* We can get here with kn->kn_knlist == NULL. This can happen when
1720	* the initial attach event decides that the event is "completed"
1721	* already. i.e. filt_procattach is called on a zombie process. It
1722	* will call filt_proc which will remove it from the list, and NULL
1723	* kn_knlist.
1724	*/
1725	done_ev_add:
1726	if ((kev->flags & EV_ENABLE) != 0)
1727	kn->kn_status &= ~KN_DISABLED;
1728	else if ((kev->flags & EV_DISABLE) != 0)
1729	kn->kn_status \|= KN_DISABLED;
1730
1731	if ((kn->kn_status & KN_DISABLED) == 0)
1732	event = kn->kn_fop->f_event(kn, 0);
1733	else
1734	event = 0;
1735
1736	KQ_LOCK(kq);
1737	if (event)
1738	kn->kn_status \|= KN_ACTIVE;
1739	if ((kn->kn_status & (KN_ACTIVE \| KN_DISABLED \| KN_QUEUED)) ==
1740	KN_ACTIVE)
1741	knote_enqueue(kn);
1742	kn->kn_status &= ~KN_SCAN;
1743	kn_leave_flux(kn);
1744	kn_list_unlock(knl);
1745	KQ_UNLOCK_FLUX(kq);
1746
1747	done:
1748	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1749	if (filedesc_unlock)
1750	FILEDESC_XUNLOCK(td->td_proc->p_fd);
1751	if (fp != NULL)
1752	fdrop(fp, td);
1753	knote_free(tkn);
1754	if (fops != NULL)
1755	kqueue_fo_release(filt);
1756	return (error);
1757	}
1758
1759	static int
1760	kqueue_acquire(struct file fp, struct kqueue *kqp)
1761	{
1762	int error;
1763	struct kqueue *kq;
1764
1765	error = 0;
1766
1767	kq = fp->f_data;
1768	#ifndef __rtems__
1769	if (fp->f_type != DTYPE_KQUEUE \|\| kq == NULL)
1770	#else /* __rtems__ */
1771	if (fp->f_io.pathinfo.handlers != &kqueueops \|\| kq == NULL)
1772	#endif /* __rtems__ */
1773	return (EBADF);
1774	*kqp = kq;
1775	KQ_LOCK(kq);
1776	if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
1777	KQ_UNLOCK(kq);
1778	return (EBADF);
1779	}
1780	kq->kq_refcnt++;
1781	KQ_UNLOCK(kq);
1782
1783	return error;
1784	}
1785
1786	static void
1787	kqueue_release(struct kqueue *kq, int locked)
1788	{
1789	if (locked)
1790	KQ_OWNED(kq);
1791	else
1792	KQ_LOCK(kq);
1793	kq->kq_refcnt--;
1794	if (kq->kq_refcnt == 1)
1795	wakeup(&kq->kq_refcnt);
1796	if (!locked)
1797	KQ_UNLOCK(kq);
1798	}
1799
1800	static void
1801	kqueue_schedtask(struct kqueue *kq)
1802	{
1803
1804	KQ_OWNED(kq);
1805	KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN),
1806	("scheduling kqueue task while draining"));
1807
1808	if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) {
1809	taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task);
1810	kq->kq_state \|= KQ_TASKSCHED;
1811	}
1812	}
1813
1814	/*
1815	* Expand the kq to make sure we have storage for fops/ident pair.
1816	*
1817	* Return 0 on success (or no work necessary), return errno on failure.
1818	*
1819	* Not calling hashinit w/ waitok (proper malloc flag) should be safe.
1820	* If kqueue_register is called from a non-fd context, there usually/should
1821	* be no locks held.
1822	*/
1823	static int
1824	kqueue_expand(struct kqueue kq, struct filterops fops, uintptr_t ident,
1825	int waitok)
1826	{
1827	struct klist list, tmp_knhash, *to_free;
1828	u_long tmp_knhashmask;
1829	int size;
1830	int fd;
1831	int mflag = waitok ? M_WAITOK : M_NOWAIT;
1832
1833	KQ_NOTOWNED(kq);
1834
1835	to_free = NULL;
1836	if (fops->f_isfd) {
1837	fd = ident;
1838	if (kq->kq_knlistsize <= fd) {
1839	size = kq->kq_knlistsize;
1840	while (size <= fd)
1841	size += KQEXTENT;
1842	list = malloc(size * sizeof(*list), M_KQUEUE, mflag);
1843	if (list == NULL)
1844	return ENOMEM;
1845	KQ_LOCK(kq);
1846	if (kq->kq_knlistsize > fd) {
1847	to_free = list;
1848	list = NULL;
1849	} else {
1850	if (kq->kq_knlist != NULL) {
1851	bcopy(kq->kq_knlist, list,
1852	kq->kq_knlistsize * sizeof(*list));
1853	to_free = kq->kq_knlist;
1854	kq->kq_knlist = NULL;
1855	}
1856	bzero((caddr_t)list +
1857	kq->kq_knlistsize * sizeof(*list),
1858	(size - kq->kq_knlistsize) * sizeof(*list));
1859	kq->kq_knlistsize = size;
1860	kq->kq_knlist = list;
1861	}
1862	KQ_UNLOCK(kq);
1863	}
1864	} else {
1865	if (kq->kq_knhashmask == 0) {
1866	tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1867	&tmp_knhashmask);
1868	if (tmp_knhash == NULL)
1869	return ENOMEM;
1870	KQ_LOCK(kq);
1871	if (kq->kq_knhashmask == 0) {
1872	kq->kq_knhash = tmp_knhash;
1873	kq->kq_knhashmask = tmp_knhashmask;
1874	} else {
1875	to_free = tmp_knhash;
1876	}
1877	KQ_UNLOCK(kq);
1878	}
1879	}
1880	free(to_free, M_KQUEUE);
1881
1882	KQ_NOTOWNED(kq);
1883	return 0;
1884	}
1885
1886	static void
1887	kqueue_task(void *arg, int pending)
1888	{
1889	struct kqueue *kq;
1890	int haskqglobal;
1891
1892	haskqglobal = 0;
1893	kq = arg;
1894
1895	KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1896	KQ_LOCK(kq);
1897
1898	KNOTE_LOCKED(&kq->kq_sel.si_note, 0);
1899
1900	kq->kq_state &= ~KQ_TASKSCHED;
1901	if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) {
1902	wakeup(&kq->kq_state);
1903	}
1904	KQ_UNLOCK(kq);
1905	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1906	}
1907
1908	/*
1909	* Scan, update kn_data (if not ONESHOT), and copyout triggered events.
1910	* We treat KN_MARKER knotes as if they are in flux.
1911	*/
1912	static int
1913	kqueue_scan(struct kqueue kq, int maxevents, struct kevent_copyops k_ops,
1914	const struct timespec tsp, struct kevent keva, struct thread *td)
1915	{
1916	struct kevent *kevp;
1917	struct knote kn, marker;
1918	struct knlist *knl;
1919	sbintime_t asbt, rsbt;
1920	int count, error, haskqglobal, influx, nkev, touch;
1921
1922	count = maxevents;
1923	nkev = 0;
1924	error = 0;
1925	haskqglobal = 0;
1926
1927	if (maxevents == 0)
1928	goto done_nl;
1929
1930	rsbt = 0;
1931	if (tsp != NULL) {
1932	if (tsp->tv_sec < 0 \|\| tsp->tv_nsec < 0 \|\|
1933	tsp->tv_nsec >= 1000000000) {
1934	error = EINVAL;
1935	goto done_nl;
1936	}
1937	if (timespecisset(tsp)) {
1938	if (tsp->tv_sec <= INT32_MAX) {
1939	rsbt = tstosbt(*tsp);
1940	if (TIMESEL(&asbt, rsbt))
1941	asbt += tc_tick_sbt;
1942	if (asbt <= SBT_MAX - rsbt)
1943	asbt += rsbt;
1944	else
1945	asbt = 0;
1946	rsbt >>= tc_precexp;
1947	} else
1948	asbt = 0;
1949	} else
1950	asbt = -1;
1951	} else
1952	asbt = 0;
1953	marker = knote_alloc(1);
1954	marker->kn_status = KN_MARKER;
1955	KQ_LOCK(kq);
1956
1957	retry:
1958	kevp = keva;
1959	if (kq->kq_count == 0) {
1960	if (asbt == -1) {
1961	error = EWOULDBLOCK;
1962	} else {
1963	kq->kq_state \|= KQ_SLEEP;
1964	error = msleep_sbt(kq, &kq->kq_lock, PSOCK \| PCATCH,
1965	"kqread", asbt, rsbt, C_ABSOLUTE);
1966	}
1967	if (error == 0)
1968	goto retry;
1969	/* don't restart after signals... */
1970	if (error == ERESTART)
1971	error = EINTR;
1972	else if (error == EWOULDBLOCK)
1973	error = 0;
1974	goto done;
1975	}
1976
1977	TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
1978	influx = 0;
1979	while (count) {
1980	KQ_OWNED(kq);
1981	kn = TAILQ_FIRST(&kq->kq_head);
1982
1983	if ((kn->kn_status == KN_MARKER && kn != marker) \|\|
1984	kn_in_flux(kn)) {
1985	if (influx) {
1986	influx = 0;
1987	KQ_FLUX_WAKEUP(kq);
1988	}
1989	kq->kq_state \|= KQ_FLUXWAIT;
1990	error = msleep(kq, &kq->kq_lock, PSOCK,
1991	"kqflxwt", 0);
1992	continue;
1993	}
1994
1995	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1996	if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) {
1997	kn->kn_status &= ~KN_QUEUED;
1998	kq->kq_count--;
1999	continue;
2000	}
2001	if (kn == marker) {
2002	KQ_FLUX_WAKEUP(kq);
2003	if (count == maxevents)
2004	goto retry;
2005	goto done;
2006	}
2007	KASSERT(!kn_in_flux(kn),
2008	("knote %p is unexpectedly in flux", kn));
2009
2010	if ((kn->kn_flags & EV_DROP) == EV_DROP) {
2011	kn->kn_status &= ~KN_QUEUED;
2012	kn_enter_flux(kn);
2013	kq->kq_count--;
2014	KQ_UNLOCK(kq);
2015	/*
2016	* We don't need to lock the list since we've
2017	* marked it as in flux.
2018	*/
2019	knote_drop(kn, td);
2020	KQ_LOCK(kq);
2021	continue;
2022	} else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) {
2023	kn->kn_status &= ~KN_QUEUED;
2024	kn_enter_flux(kn);
2025	kq->kq_count--;
2026	KQ_UNLOCK(kq);
2027	/*
2028	* We don't need to lock the list since we've
2029	* marked the knote as being in flux.
2030	*/
2031	*kevp = kn->kn_kevent;
2032	knote_drop(kn, td);
2033	KQ_LOCK(kq);
2034	kn = NULL;
2035	} else {
2036	kn->kn_status \|= KN_SCAN;
2037	kn_enter_flux(kn);
2038	KQ_UNLOCK(kq);
2039	if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE)
2040	KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
2041	knl = kn_list_lock(kn);
2042	if (kn->kn_fop->f_event(kn, 0) == 0) {
2043	KQ_LOCK(kq);
2044	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
2045	kn->kn_status &= ~(KN_QUEUED \| KN_ACTIVE \|
2046	KN_SCAN);
2047	kn_leave_flux(kn);
2048	kq->kq_count--;
2049	kn_list_unlock(knl);
2050	influx = 1;
2051	continue;
2052	}
2053	touch = (!kn->kn_fop->f_isfd &&
2054	kn->kn_fop->f_touch != NULL);
2055	if (touch)
2056	kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS);
2057	else
2058	*kevp = kn->kn_kevent;
2059	KQ_LOCK(kq);
2060	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
2061	if (kn->kn_flags & (EV_CLEAR \| EV_DISPATCH)) {
2062	/*
2063	* Manually clear knotes who weren't
2064	* 'touch'ed.
2065	*/
2066	if (touch == 0 && kn->kn_flags & EV_CLEAR) {
2067	kn->kn_data = 0;
2068	kn->kn_fflags = 0;
2069	}
2070	if (kn->kn_flags & EV_DISPATCH)
2071	kn->kn_status \|= KN_DISABLED;
2072	kn->kn_status &= ~(KN_QUEUED \| KN_ACTIVE);
2073	kq->kq_count--;
2074	} else
2075	TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
2076
2077	kn->kn_status &= ~KN_SCAN;
2078	kn_leave_flux(kn);
2079	kn_list_unlock(knl);
2080	influx = 1;
2081	}
2082
2083	/* we are returning a copy to the user */
2084	kevp++;
2085	nkev++;
2086	count--;
2087
2088	if (nkev == KQ_NEVENTS) {
2089	influx = 0;
2090	KQ_UNLOCK_FLUX(kq);
2091	error = k_ops->k_copyout(k_ops->arg, keva, nkev);
2092	nkev = 0;
2093	kevp = keva;
2094	KQ_LOCK(kq);
2095	if (error)
2096	break;
2097	}
2098	}
2099	TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
2100	done:
2101	KQ_OWNED(kq);
2102	KQ_UNLOCK_FLUX(kq);
2103	knote_free(marker);
2104	done_nl:
2105	KQ_NOTOWNED(kq);
2106	if (nkev != 0)
2107	error = k_ops->k_copyout(k_ops->arg, keva, nkev);
2108	td->td_retval[0] = maxevents - count;
2109	return (error);
2110	}
2111
2112	#ifndef __rtems__
2113	/ARGSUSED/
2114	static int
2115	kqueue_ioctl(struct file fp, u_long cmd, void data,
2116	struct ucred active_cred, struct thread td)
2117	{
2118	/*
2119	* Enabling sigio causes two major problems:
2120	* 1) infinite recursion:
2121	* Synopsys: kevent is being used to track signals and have FIOASYNC
2122	* set. On receipt of a signal this will cause a kqueue to recurse
2123	* into itself over and over. Sending the sigio causes the kqueue
2124	* to become ready, which in turn posts sigio again, forever.
2125	* Solution: this can be solved by setting a flag in the kqueue that
2126	* we have a SIGIO in progress.
2127	* 2) locking problems:
2128	* Synopsys: Kqueue is a leaf subsystem, but adding signalling puts
2129	* us above the proc and pgrp locks.
2130	* Solution: Post a signal using an async mechanism, being sure to
2131	* record a generation count in the delivery so that we do not deliver
2132	* a signal to the wrong process.
2133	*
2134	* Note, these two mechanisms are somewhat mutually exclusive!
2135	*/
2136	#if 0
2137	struct kqueue *kq;
2138
2139	kq = fp->f_data;
2140	switch (cmd) {
2141	case FIOASYNC:
2142	if ((int )data) {
2143	kq->kq_state \|= KQ_ASYNC;
2144	} else {
2145	kq->kq_state &= ~KQ_ASYNC;
2146	}
2147	return (0);
2148
2149	case FIOSETOWN:
2150	return (fsetown((int )data, &kq->kq_sigio));
2151
2152	case FIOGETOWN:
2153	(int )data = fgetown(&kq->kq_sigio);
2154	return (0);
2155	}
2156	#endif
2157
2158	return (ENOTTY);
2159	}
2160	#endif /* __rtems__ */
2161
2162	/ARGSUSED/
2163	static int
2164	kqueue_poll(struct file fp, int events, struct ucred active_cred,
2165	struct thread *td)
2166	{
2167	struct kqueue *kq;
2168	int revents = 0;
2169	int error;
2170
2171	if ((error = kqueue_acquire(fp, &kq)))
2172	return POLLERR;
2173
2174	KQ_LOCK(kq);
2175	if (events & (POLLIN \| POLLRDNORM)) {
2176	if (kq->kq_count) {
2177	revents \|= events & (POLLIN \| POLLRDNORM);
2178	} else {
2179	selrecord(td, &kq->kq_sel);
2180	if (SEL_WAITING(&kq->kq_sel))
2181	kq->kq_state \|= KQ_SEL;
2182	}
2183	}
2184	kqueue_release(kq, 1);
2185	KQ_UNLOCK(kq);
2186	return (revents);
2187	}
2188	#ifdef __rtems__
2189	static int
2190	rtems_bsd_kqueue_poll(rtems_libio_t *iop, int events)
2191	{
2192	struct thread *td = rtems_bsd_get_curthread_or_null();
2193	struct file *fp = rtems_bsd_iop_to_fp(iop);
2194	int error;
2195
2196	if (td != NULL) {
2197	error = kqueue_poll(fp, events, NULL, td);
2198	} else {
2199	error = ENOMEM;
2200	}
2201
2202	return error;
2203	}
2204	#endif /* __rtems__ */
2205
2206	/ARGSUSED/
2207	#ifndef __rtems__
2208	static int
2209	kqueue_stat(struct file fp, struct stat st, struct ucred *active_cred,
2210	struct thread *td)
2211	{
2212
2213	bzero((void )st, sizeof st);
2214	#else /* __rtems__ */
2215	static int
2216	rtems_bsd_kqueue_stat(const rtems_filesystem_location_info_t *loc,
2217	struct stat *st)
2218	{
2219	(void) loc;
2220	#endif /* __rtems__ */
2221	/*
2222	* We no longer return kq_count because the unlocked value is useless.
2223	* If you spent all this time getting the count, why not spend your
2224	* syscall better by calling kevent?
2225	*
2226	* XXX - This is needed for libc_r.
2227	*/
2228	st->st_mode = S_IFIFO;
2229	return (0);
2230	}
2231
2232	static void
2233	kqueue_drain(struct kqueue kq, struct thread td)
2234	{
2235	struct knote *kn;
2236	int i;
2237
2238	KQ_LOCK(kq);
2239
2240	KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING,
2241	("kqueue already closing"));
2242	kq->kq_state \|= KQ_CLOSING;
2243	if (kq->kq_refcnt > 1)
2244	msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0);
2245
2246	KASSERT(kq->kq_refcnt == 1, ("other refs are out there!"));
2247
2248	KASSERT(knlist_empty(&kq->kq_sel.si_note),
2249	("kqueue's knlist not empty"));
2250
2251	for (i = 0; i < kq->kq_knlistsize; i++) {
2252	while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) {
2253	if (kn_in_flux(kn)) {
2254	kq->kq_state \|= KQ_FLUXWAIT;
2255	msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0);
2256	continue;
2257	}
2258	kn_enter_flux(kn);
2259	KQ_UNLOCK(kq);
2260	knote_drop(kn, td);
2261	KQ_LOCK(kq);
2262	}
2263	}
2264	if (kq->kq_knhashmask != 0) {
2265	for (i = 0; i <= kq->kq_knhashmask; i++) {
2266	while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) {
2267	if (kn_in_flux(kn)) {
2268	kq->kq_state \|= KQ_FLUXWAIT;
2269	msleep(kq, &kq->kq_lock, PSOCK,
2270	"kqclo2", 0);
2271	continue;
2272	}
2273	kn_enter_flux(kn);
2274	KQ_UNLOCK(kq);
2275	knote_drop(kn, td);
2276	KQ_LOCK(kq);
2277	}
2278	}
2279	}
2280
2281	if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) {
2282	kq->kq_state \|= KQ_TASKDRAIN;
2283	msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0);
2284	}
2285
2286	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
2287	selwakeuppri(&kq->kq_sel, PSOCK);
2288	if (!SEL_WAITING(&kq->kq_sel))
2289	kq->kq_state &= ~KQ_SEL;
2290	}
2291
2292	KQ_UNLOCK(kq);
2293	}
2294
2295	static void
2296	kqueue_destroy(struct kqueue *kq)
2297	{
2298
2299	#ifndef __rtems__
2300	KASSERT(kq->kq_fdp == NULL,
2301	("kqueue still attached to a file descriptor"));
2302	#endif /* __rtems__ */
2303	seldrain(&kq->kq_sel);
2304	knlist_destroy(&kq->kq_sel.si_note);
2305	mtx_destroy(&kq->kq_lock);
2306
2307	if (kq->kq_knhash != NULL)
2308	free(kq->kq_knhash, M_KQUEUE);
2309	if (kq->kq_knlist != NULL)
2310	free(kq->kq_knlist, M_KQUEUE);
2311
2312	funsetown(&kq->kq_sigio);
2313	}
2314
2315	/ARGSUSED/
2316	static int
2317	kqueue_close(struct file fp, struct thread td)
2318	{
2319	struct kqueue *kq = fp->f_data;
2320	struct filedesc *fdp;
2321	int error;
2322	int filedesc_unlock;
2323
2324	if ((error = kqueue_acquire(fp, &kq)))
2325	return error;
2326	kqueue_drain(kq, td);
2327
2328	#ifndef __rtems__
2329	/*
2330	* We could be called due to the knote_drop() doing fdrop(),
2331	* called from kqueue_register(). In this case the global
2332	* lock is owned, and filedesc sx is locked before, to not
2333	* take the sleepable lock after non-sleepable.
2334	*/
2335	fdp = kq->kq_fdp;
2336	kq->kq_fdp = NULL;
2337	if (!sx_xlocked(FILEDESC_LOCK(fdp))) {
2338	FILEDESC_XLOCK(fdp);
2339	filedesc_unlock = 1;
2340	} else
2341	filedesc_unlock = 0;
2342	TAILQ_REMOVE(&fdp->fd_kqlist, kq, kq_list);
2343	if (filedesc_unlock)
2344	FILEDESC_XUNLOCK(fdp);
2345	#else /* __rtems__ */
2346	(void)filedesc_unlock;
2347	rtems_libio_lock();
2348	TAILQ_REMOVE(&fd_kqlist, kq, kq_list);
2349	rtems_libio_unlock();
2350	#endif /* __rtems__ */
2351
2352	kqueue_destroy(kq);
2353	chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0);
2354	crfree(kq->kq_cred);
2355	free(kq, M_KQUEUE);
2356	fp->f_data = NULL;
2357
2358	return (0);
2359	}
2360	#ifdef __rtems__
2361	static int
2362	rtems_bsd_kqueue_close(rtems_libio_t *iop)
2363	{
2364	struct thread *td = rtems_bsd_get_curthread_or_null();
2365	struct file *fp = rtems_bsd_iop_to_fp(iop);
2366	int error;
2367
2368	if (td != NULL) {
2369	error = kqueue_close(fp, td);
2370	} else {
2371	error = ENOMEM;
2372	}
2373
2374	return rtems_bsd_error_to_status_and_errno(error);
2375	}
2376	#endif /* __rtems__ */
2377
2378	#ifndef __rtems__
2379	static int
2380	kqueue_fill_kinfo(struct file fp, struct kinfo_file kif, struct filedesc *fdp)
2381	{
2382
2383	kif->kf_type = KF_TYPE_KQUEUE;
2384	return (0);
2385	}
2386	#endif /* __rtems__ */
2387
2388	static void
2389	kqueue_wakeup(struct kqueue *kq)
2390	{
2391	KQ_OWNED(kq);
2392
2393	if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) {
2394	kq->kq_state &= ~KQ_SLEEP;
2395	wakeup(kq);
2396	}
2397	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
2398	selwakeuppri(&kq->kq_sel, PSOCK);
2399	if (!SEL_WAITING(&kq->kq_sel))
2400	kq->kq_state &= ~KQ_SEL;
2401	}
2402	if (!knlist_empty(&kq->kq_sel.si_note))
2403	kqueue_schedtask(kq);
2404	if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) {
2405	#ifndef __rtems__
2406	pgsigio(&kq->kq_sigio, SIGIO, 0);
2407	#else /* __rtems__ */
2408	BSD_ASSERT(0);
2409	#endif /* __rtems__ */
2410	}
2411	}
2412
2413	/*
2414	* Walk down a list of knotes, activating them if their event has triggered.
2415	*
2416	* There is a possibility to optimize in the case of one kq watching another.
2417	* Instead of scheduling a task to wake it up, you could pass enough state
2418	* down the chain to make up the parent kqueue. Make this code functional
2419	* first.
2420	*/
2421	void
2422	knote(struct knlist *list, long hint, int lockflags)
2423	{
2424	struct kqueue *kq;
2425	struct knote kn, tkn;
2426	int error;
2427
2428	if (list == NULL)
2429	return;
2430
2431	KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED);
2432
2433	if ((lockflags & KNF_LISTLOCKED) == 0)
2434	list->kl_lock(list->kl_lockarg);
2435
2436	/*
2437	* If we unlock the list lock (and enter influx), we can
2438	* eliminate the kqueue scheduling, but this will introduce
2439	* four lock/unlock's for each knote to test. Also, marker
2440	* would be needed to keep iteration position, since filters
2441	* or other threads could remove events.
2442	*/
2443	SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) {
2444	kq = kn->kn_kq;
2445	KQ_LOCK(kq);
2446	if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) {
2447	/*
2448	* Do not process the influx notes, except for
2449	* the influx coming from the kq unlock in the
2450	* kqueue_scan(). In the later case, we do
2451	* not interfere with the scan, since the code
2452	* fragment in kqueue_scan() locks the knlist,
2453	* and cannot proceed until we finished.
2454	*/
2455	KQ_UNLOCK(kq);
2456	} else if ((lockflags & KNF_NOKQLOCK) != 0) {
2457	kn_enter_flux(kn);
2458	KQ_UNLOCK(kq);
2459	error = kn->kn_fop->f_event(kn, hint);
2460	KQ_LOCK(kq);
2461	kn_leave_flux(kn);
2462	if (error)
2463	KNOTE_ACTIVATE(kn, 1);
2464	KQ_UNLOCK_FLUX(kq);
2465	} else {
2466	kn->kn_status \|= KN_HASKQLOCK;
2467	if (kn->kn_fop->f_event(kn, hint))
2468	KNOTE_ACTIVATE(kn, 1);
2469	kn->kn_status &= ~KN_HASKQLOCK;
2470	KQ_UNLOCK(kq);
2471	}
2472	}
2473	if ((lockflags & KNF_LISTLOCKED) == 0)
2474	list->kl_unlock(list->kl_lockarg);
2475	}
2476
2477	/*
2478	* add a knote to a knlist
2479	*/
2480	void
2481	knlist_add(struct knlist knl, struct knote kn, int islocked)
2482	{
2483
2484	KNL_ASSERT_LOCK(knl, islocked);
2485	KQ_NOTOWNED(kn->kn_kq);
2486	KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn));
2487	KASSERT((kn->kn_status & KN_DETACHED) != 0,
2488	("knote %p was not detached", kn));
2489	if (!islocked)
2490	knl->kl_lock(knl->kl_lockarg);
2491	SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext);
2492	if (!islocked)
2493	knl->kl_unlock(knl->kl_lockarg);
2494	KQ_LOCK(kn->kn_kq);
2495	kn->kn_knlist = knl;
2496	kn->kn_status &= ~KN_DETACHED;
2497	KQ_UNLOCK(kn->kn_kq);
2498	}
2499
2500	static void
2501	knlist_remove_kq(struct knlist knl, struct knote kn, int knlislocked,
2502	int kqislocked)
2503	{
2504
2505	KASSERT(!kqislocked \|\| knlislocked, ("kq locked w/o knl locked"));
2506	KNL_ASSERT_LOCK(knl, knlislocked);
2507	mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED);
2508	KASSERT(kqislocked \|\| kn_in_flux(kn), ("knote %p not in flux", kn));
2509	KASSERT((kn->kn_status & KN_DETACHED) == 0,
2510	("knote %p was already detached", kn));
2511	if (!knlislocked)
2512	knl->kl_lock(knl->kl_lockarg);
2513	SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext);
2514	kn->kn_knlist = NULL;
2515	if (!knlislocked)
2516	kn_list_unlock(knl);
2517	if (!kqislocked)
2518	KQ_LOCK(kn->kn_kq);
2519	kn->kn_status \|= KN_DETACHED;
2520	if (!kqislocked)
2521	KQ_UNLOCK(kn->kn_kq);
2522	}
2523
2524	/*
2525	* remove knote from the specified knlist
2526	*/
2527	void
2528	knlist_remove(struct knlist knl, struct knote kn, int islocked)
2529	{
2530
2531	knlist_remove_kq(knl, kn, islocked, 0);
2532	}
2533
2534	int
2535	knlist_empty(struct knlist *knl)
2536	{
2537
2538	KNL_ASSERT_LOCKED(knl);
2539	return (SLIST_EMPTY(&knl->kl_list));
2540	}
2541
2542	static struct mtx knlist_lock;
2543	MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects",
2544	MTX_DEF);
2545	static void knlist_mtx_lock(void *arg);
2546	static void knlist_mtx_unlock(void *arg);
2547
2548	static void
2549	knlist_mtx_lock(void *arg)
2550	{
2551
2552	mtx_lock((struct mtx *)arg);
2553	}
2554
2555	static void
2556	knlist_mtx_unlock(void *arg)
2557	{
2558
2559	mtx_unlock((struct mtx *)arg);
2560	}
2561
2562	static void
2563	knlist_mtx_assert_locked(void *arg)
2564	{
2565
2566	mtx_assert((struct mtx *)arg, MA_OWNED);
2567	}
2568
2569	static void
2570	knlist_mtx_assert_unlocked(void *arg)
2571	{
2572
2573	mtx_assert((struct mtx *)arg, MA_NOTOWNED);
2574	}
2575
2576	#ifndef __rtems__
2577	static void
2578	knlist_rw_rlock(void *arg)
2579	{
2580
2581	rw_rlock((struct rwlock *)arg);
2582	}
2583
2584	static void
2585	knlist_rw_runlock(void *arg)
2586	{
2587
2588	rw_runlock((struct rwlock *)arg);
2589	}
2590
2591	static void
2592	knlist_rw_assert_locked(void *arg)
2593	{
2594
2595	rw_assert((struct rwlock *)arg, RA_LOCKED);
2596	}
2597
2598	static void
2599	knlist_rw_assert_unlocked(void *arg)
2600	{
2601
2602	rw_assert((struct rwlock *)arg, RA_UNLOCKED);
2603	}
2604	#endif /* __rtems__ */
2605
2606	void
2607	knlist_init(struct knlist knl, void lock, void (kl_lock)(void ),
2608	void (kl_unlock)(void ),
2609	void (kl_assert_locked)(void ), void (kl_assert_unlocked)(void ))
2610	{
2611
2612	if (lock == NULL)
2613	knl->kl_lockarg = &knlist_lock;
2614	else
2615	knl->kl_lockarg = lock;
2616
2617	if (kl_lock == NULL)
2618	knl->kl_lock = knlist_mtx_lock;
2619	else
2620	knl->kl_lock = kl_lock;
2621	if (kl_unlock == NULL)
2622	knl->kl_unlock = knlist_mtx_unlock;
2623	else
2624	knl->kl_unlock = kl_unlock;
2625	if (kl_assert_locked == NULL)
2626	knl->kl_assert_locked = knlist_mtx_assert_locked;
2627	else
2628	knl->kl_assert_locked = kl_assert_locked;
2629	if (kl_assert_unlocked == NULL)
2630	knl->kl_assert_unlocked = knlist_mtx_assert_unlocked;
2631	else
2632	knl->kl_assert_unlocked = kl_assert_unlocked;
2633
2634	knl->kl_autodestroy = 0;
2635	SLIST_INIT(&knl->kl_list);
2636	}
2637
2638	void
2639	knlist_init_mtx(struct knlist knl, struct mtx lock)
2640	{
2641
2642	knlist_init(knl, lock, NULL, NULL, NULL, NULL);
2643	}
2644
2645	struct knlist *
2646	knlist_alloc(struct mtx *lock)
2647	{
2648	struct knlist *knl;
2649
2650	knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK);
2651	knlist_init_mtx(knl, lock);
2652	return (knl);
2653	}
2654
2655	#ifndef __rtems__
2656	void
2657	knlist_init_rw_reader(struct knlist knl, struct rwlock lock)
2658	{
2659
2660	knlist_init(knl, lock, knlist_rw_rlock, knlist_rw_runlock,
2661	knlist_rw_assert_locked, knlist_rw_assert_unlocked);
2662	}
2663	#endif /* __rtems__ */
2664
2665	void
2666	knlist_destroy(struct knlist *knl)
2667	{
2668
2669	KASSERT(KNLIST_EMPTY(knl),
2670	("destroying knlist %p with knotes on it", knl));
2671	}
2672
2673	void
2674	knlist_detach(struct knlist *knl)
2675	{
2676
2677	KNL_ASSERT_LOCKED(knl);
2678	knl->kl_autodestroy = 1;
2679	if (knlist_empty(knl)) {
2680	knlist_destroy(knl);
2681	free(knl, M_KQUEUE);
2682	}
2683	}
2684
2685	/*
2686	* Even if we are locked, we may need to drop the lock to allow any influx
2687	* knotes time to "settle".
2688	*/
2689	void
2690	knlist_cleardel(struct knlist knl, struct thread td, int islocked, int killkn)
2691	{
2692	struct knote kn, kn2;
2693	struct kqueue *kq;
2694
2695	KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl));
2696	if (islocked)
2697	KNL_ASSERT_LOCKED(knl);
2698	else {
2699	KNL_ASSERT_UNLOCKED(knl);
2700	again: /* need to reacquire lock since we have dropped it */
2701	knl->kl_lock(knl->kl_lockarg);
2702	}
2703
2704	SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) {
2705	kq = kn->kn_kq;
2706	KQ_LOCK(kq);
2707	if (kn_in_flux(kn)) {
2708	KQ_UNLOCK(kq);
2709	continue;
2710	}
2711	knlist_remove_kq(knl, kn, 1, 1);
2712	if (killkn) {
2713	kn_enter_flux(kn);
2714	KQ_UNLOCK(kq);
2715	knote_drop_detached(kn, td);
2716	} else {
2717	/* Make sure cleared knotes disappear soon */
2718	kn->kn_flags \|= EV_EOF \| EV_ONESHOT;
2719	KQ_UNLOCK(kq);
2720	}
2721	kq = NULL;
2722	}
2723
2724	if (!SLIST_EMPTY(&knl->kl_list)) {
2725	/* there are still in flux knotes remaining */
2726	kn = SLIST_FIRST(&knl->kl_list);
2727	kq = kn->kn_kq;
2728	KQ_LOCK(kq);
2729	KASSERT(kn_in_flux(kn), ("knote removed w/o list lock"));
2730	knl->kl_unlock(knl->kl_lockarg);
2731	kq->kq_state \|= KQ_FLUXWAIT;
2732	msleep(kq, &kq->kq_lock, PSOCK \| PDROP, "kqkclr", 0);
2733	kq = NULL;
2734	goto again;
2735	}
2736
2737	if (islocked)
2738	KNL_ASSERT_LOCKED(knl);
2739	else {
2740	knl->kl_unlock(knl->kl_lockarg);
2741	KNL_ASSERT_UNLOCKED(knl);
2742	}
2743	}
2744
2745	/*
2746	* Remove all knotes referencing a specified fd must be called with FILEDESC
2747	* lock. This prevents a race where a new fd comes along and occupies the
2748	* entry and we attach a knote to the fd.
2749	*/
2750	void
2751	knote_fdclose(struct thread *td, int fd)
2752	{
2753	#ifndef __rtems__
2754	struct filedesc *fdp = td->td_proc->p_fd;
2755	#endif /* __rtems__ */
2756	struct kqueue *kq;
2757	struct knote *kn;
2758	int influx;
2759
2760	#ifndef __rtems__
2761	FILEDESC_XLOCK_ASSERT(fdp);
2762	#endif /* __rtems__ */
2763
2764	/*
2765	* We shouldn't have to worry about new kevents appearing on fd
2766	* since filedesc is locked.
2767	*/
2768	#ifndef __rtems__
2769	TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) {
2770	#else /* __rtems__ */
2771	/* FIXME: Use separate lock? */
2772	rtems_libio_lock();
2773	TAILQ_FOREACH(kq, &fd_kqlist, kq_list) {
2774	#endif /* __rtems__ */
2775	KQ_LOCK(kq);
2776
2777	again:
2778	influx = 0;
2779	while (kq->kq_knlistsize > fd &&
2780	(kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) {
2781	if (kn_in_flux(kn)) {
2782	/* someone else might be waiting on our knote */
2783	if (influx)
2784	wakeup(kq);
2785	kq->kq_state \|= KQ_FLUXWAIT;
2786	msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0);
2787	goto again;
2788	}
2789	kn_enter_flux(kn);
2790	KQ_UNLOCK(kq);
2791	influx = 1;
2792	knote_drop(kn, td);
2793	KQ_LOCK(kq);
2794	}
2795	KQ_UNLOCK_FLUX(kq);
2796	}
2797	#ifdef __rtems__
2798	rtems_libio_unlock();
2799	#endif /* __rtems__ */
2800	}
2801
2802	static int
2803	knote_attach(struct knote kn, struct kqueue kq)
2804	{
2805	struct klist *list;
2806
2807	KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn));
2808	KQ_OWNED(kq);
2809
2810	if (kn->kn_fop->f_isfd) {
2811	if (kn->kn_id >= kq->kq_knlistsize)
2812	return (ENOMEM);
2813	list = &kq->kq_knlist[kn->kn_id];
2814	} else {
2815	if (kq->kq_knhash == NULL)
2816	return (ENOMEM);
2817	list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
2818	}
2819	SLIST_INSERT_HEAD(list, kn, kn_link);
2820	return (0);
2821	}
2822
2823	static void
2824	knote_drop(struct knote kn, struct thread td)
2825	{
2826
2827	if ((kn->kn_status & KN_DETACHED) == 0)
2828	kn->kn_fop->f_detach(kn);
2829	knote_drop_detached(kn, td);
2830	}
2831
2832	static void
2833	knote_drop_detached(struct knote kn, struct thread td)
2834	{
2835	struct kqueue *kq;
2836	struct klist *list;
2837
2838	kq = kn->kn_kq;
2839
2840	KASSERT((kn->kn_status & KN_DETACHED) != 0,
2841	("knote %p still attached", kn));
2842	KQ_NOTOWNED(kq);
2843
2844	KQ_LOCK(kq);
2845	KASSERT(kn->kn_influx == 1,
2846	("knote_drop called on %p with influx %d", kn, kn->kn_influx));
2847
2848	if (kn->kn_fop->f_isfd)
2849	list = &kq->kq_knlist[kn->kn_id];
2850	else
2851	list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
2852
2853	if (!SLIST_EMPTY(list))
2854	SLIST_REMOVE(list, kn, knote, kn_link);
2855	if (kn->kn_status & KN_QUEUED)
2856	knote_dequeue(kn);
2857	KQ_UNLOCK_FLUX(kq);
2858
2859	if (kn->kn_fop->f_isfd) {
2860	fdrop(kn->kn_fp, td);
2861	kn->kn_fp = NULL;
2862	}
2863	kqueue_fo_release(kn->kn_kevent.filter);
2864	kn->kn_fop = NULL;
2865	knote_free(kn);
2866	}
2867
2868	static void
2869	knote_enqueue(struct knote *kn)
2870	{
2871	struct kqueue *kq = kn->kn_kq;
2872
2873	KQ_OWNED(kn->kn_kq);
2874	KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
2875
2876	TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
2877	kn->kn_status \|= KN_QUEUED;
2878	kq->kq_count++;
2879	kqueue_wakeup(kq);
2880	}
2881
2882	static void
2883	knote_dequeue(struct knote *kn)
2884	{
2885	struct kqueue *kq = kn->kn_kq;
2886
2887	KQ_OWNED(kn->kn_kq);
2888	KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
2889
2890	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
2891	kn->kn_status &= ~KN_QUEUED;
2892	kq->kq_count--;
2893	}
2894
2895	static void
2896	knote_init(void)
2897	{
2898
2899	knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
2900	NULL, NULL, UMA_ALIGN_PTR, 0);
2901	}
2902	SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL);
2903
2904	static struct knote *
2905	knote_alloc(int waitok)
2906	{
2907
2908	return (uma_zalloc(knote_zone, (waitok ? M_WAITOK : M_NOWAIT) \|
2909	M_ZERO));
2910	}
2911
2912	static void
2913	knote_free(struct knote *kn)
2914	{
2915
2916	uma_zfree(knote_zone, kn);
2917	}
2918
2919	/*
2920	* Register the kev w/ the kq specified by fd.
2921	*/
2922	int
2923	kqfd_register(int fd, struct kevent kev, struct thread td, int waitok)
2924	{
2925	struct kqueue *kq;
2926	struct file *fp;
2927	cap_rights_t rights;
2928	int error;
2929
2930	error = fget(td, fd, cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &fp);
2931	if (error != 0)
2932	return (error);
2933	if ((error = kqueue_acquire(fp, &kq)) != 0)
2934	goto noacquire;
2935
2936	error = kqueue_register(kq, kev, td, waitok);
2937	kqueue_release(kq, 0);
2938
2939	noacquire:
2940	fdrop(fp, td);
2941	return (error);
2942	}
2943	#ifdef __rtems__
2944	static const rtems_filesystem_file_handlers_r kqueueops = {
2945	.open_h = rtems_filesystem_default_open,
2946	.close_h = rtems_bsd_kqueue_close,
2947	.read_h = rtems_filesystem_default_read,
2948	.write_h = rtems_filesystem_default_write,
2949	.ioctl_h = rtems_filesystem_default_ioctl,
2950	.lseek_h = rtems_filesystem_default_lseek,
2951	.fstat_h = rtems_bsd_kqueue_stat,
2952	.ftruncate_h = rtems_filesystem_default_ftruncate,
2953	.fsync_h = rtems_filesystem_default_fsync_or_fdatasync,
2954	.fdatasync_h = rtems_filesystem_default_fsync_or_fdatasync,
2955	.fcntl_h = rtems_filesystem_default_fcntl,
2956	.poll_h = rtems_bsd_kqueue_poll,
2957	.kqfilter_h = rtems_bsd_kqueue_kqfilter,
2958	.readv_h = rtems_filesystem_default_readv,
2959	.writev_h = rtems_filesystem_default_writev,
2960	.mmap_h = rtems_filesystem_default_mmap
2961	};
2962	#endif /* __rtems__ */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: