source: rtems-libbsd/freebsd/sys/kern/sys_pipe.c @ 3c967ca

55-freebsd-126-freebsd-12
Last change on this file since 3c967ca was 3c967ca, checked in by Sebastian Huber <sebastian.huber@…>, on 06/08/17 at 11:15:12

Use <sys/lock.h> provided by Newlib

  • Property mode set to 100755
File size: 52.5 KB
Line 
1#include <machine/rtems-bsd-kernel-space.h>
2
3/*-
4 * Copyright (c) 1996 John S. Dyson
5 * Copyright (c) 2012 Giovanni Trematerra
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice immediately at the beginning of the file, without modification,
13 *    this list of conditions, and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Absolutely no warranty of function or purpose is made by the author
18 *    John S. Dyson.
19 * 4. Modifications may be freely made to this file if the above conditions
20 *    are met.
21 */
22
23/*
24 * This file contains a high-performance replacement for the socket-based
25 * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
26 * all features of sockets, but does do everything that pipes normally
27 * do.
28 */
29
30/*
31 * This code has two modes of operation, a small write mode and a large
32 * write mode.  The small write mode acts like conventional pipes with
33 * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
34 * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
35 * and PIPE_SIZE in size, the sending process pins the underlying pages in
36 * memory, and the receiving process copies directly from these pinned pages
37 * in the sending process.
38 *
39 * If the sending process receives a signal, it is possible that it will
40 * go away, and certainly its address space can change, because control
41 * is returned back to the user-mode side.  In that case, the pipe code
42 * arranges to copy the buffer supplied by the user process, to a pageable
43 * kernel buffer, and the receiving process will grab the data from the
44 * pageable kernel buffer.  Since signals don't happen all that often,
45 * the copy operation is normally eliminated.
46 *
47 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
48 * happen for small transfers so that the system will not spend all of
49 * its time context switching.
50 *
51 * In order to limit the resource use of pipes, two sysctls exist:
52 *
53 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
54 * address space available to us in pipe_map. This value is normally
55 * autotuned, but may also be loader tuned.
56 *
57 * kern.ipc.pipekva - This read-only sysctl tracks the current amount of
58 * memory in use by pipes.
59 *
60 * Based on how large pipekva is relative to maxpipekva, the following
61 * will happen:
62 *
63 * 0% - 50%:
64 *     New pipes are given 16K of memory backing, pipes may dynamically
65 *     grow to as large as 64K where needed.
66 * 50% - 75%:
67 *     New pipes are given 4K (or PAGE_SIZE) of memory backing,
68 *     existing pipes may NOT grow.
69 * 75% - 100%:
70 *     New pipes are given 4K (or PAGE_SIZE) of memory backing,
71 *     existing pipes will be shrunk down to 4K whenever possible.
72 *
73 * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0.  If
74 * that is set,  the only resize that will occur is the 0 -> SMALL_PIPE_SIZE
75 * resize which MUST occur for reverse-direction pipes when they are
76 * first used.
77 *
78 * Additional information about the current state of pipes may be obtained
79 * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail,
80 * and kern.ipc.piperesizefail.
81 *
82 * Locking rules:  There are two locks present here:  A mutex, used via
83 * PIPE_LOCK, and a flag, used via pipelock().  All locking is done via
84 * the flag, as mutexes can not persist over uiomove.  The mutex
85 * exists only to guard access to the flag, and is not in itself a
86 * locking mechanism.  Also note that there is only a single mutex for
87 * both directions of a pipe.
88 *
89 * As pipelock() may have to sleep before it can acquire the flag, it
90 * is important to reread all data after a call to pipelock(); everything
91 * in the structure may have changed.
92 */
93
94#include <rtems/bsd/local/opt_compat.h>
95
96#include <sys/cdefs.h>
97__FBSDID("$FreeBSD$");
98
99#include <sys/param.h>
100#include <sys/systm.h>
101#include <sys/conf.h>
102#include <sys/fcntl.h>
103#include <sys/file.h>
104#include <sys/filedesc.h>
105#include <sys/filio.h>
106#include <sys/kernel.h>
107#include <sys/lock.h>
108#include <sys/mutex.h>
109#include <sys/ttycom.h>
110#include <sys/stat.h>
111#include <sys/malloc.h>
112#include <sys/poll.h>
113#include <sys/selinfo.h>
114#include <sys/signalvar.h>
115#include <sys/syscallsubr.h>
116#include <sys/sysctl.h>
117#include <sys/sysproto.h>
118#include <sys/pipe.h>
119#include <sys/proc.h>
120#include <sys/vnode.h>
121#include <sys/uio.h>
122#include <sys/user.h>
123#include <sys/event.h>
124
125#include <security/mac/mac_framework.h>
126
127#include <vm/vm.h>
128#include <vm/vm_param.h>
129#include <vm/vm_object.h>
130#include <vm/vm_kern.h>
131#include <vm/vm_extern.h>
132#include <vm/pmap.h>
133#include <vm/vm_map.h>
134#include <vm/vm_page.h>
135#include <vm/uma.h>
136
137/*
138 * Use this define if you want to disable *fancy* VM things.  Expect an
139 * approx 30% decrease in transfer rate.  This could be useful for
140 * NetBSD or OpenBSD.
141 */
142/* #define PIPE_NODIRECT */
143
144#define PIPE_PEER(pipe) \
145        (((pipe)->pipe_state & PIPE_NAMED) ? (pipe) : ((pipe)->pipe_peer))
146
147/*
148 * interfaces to the outside world
149 */
150#ifndef __rtems__
151static fo_rdwr_t        pipe_read;
152static fo_rdwr_t        pipe_write;
153static fo_truncate_t    pipe_truncate;
154static fo_ioctl_t       pipe_ioctl;
155static fo_poll_t        pipe_poll;
156static fo_kqfilter_t    pipe_kqfilter;
157static fo_stat_t        pipe_stat;
158static fo_close_t       pipe_close;
159static fo_chmod_t       pipe_chmod;
160static fo_chown_t       pipe_chown;
161static fo_fill_kinfo_t  pipe_fill_kinfo;
162
163struct fileops pipeops = {
164        .fo_read = pipe_read,
165        .fo_write = pipe_write,
166        .fo_truncate = pipe_truncate,
167        .fo_ioctl = pipe_ioctl,
168        .fo_poll = pipe_poll,
169        .fo_kqfilter = pipe_kqfilter,
170        .fo_stat = pipe_stat,
171        .fo_close = pipe_close,
172        .fo_chmod = pipe_chmod,
173        .fo_chown = pipe_chown,
174        .fo_sendfile = invfo_sendfile,
175        .fo_fill_kinfo = pipe_fill_kinfo,
176        .fo_flags = DFLAG_PASSABLE
177};
178#else /* __rtems__ */
179#define PIPE_NODIRECT
180#define PRIBIO                  (0)
181
182static int rtems_bsd_pipe_open(rtems_libio_t *iop, const char *path, int oflag, mode_t mode);
183static int rtems_bsd_pipe_close(rtems_libio_t *iop);
184static ssize_t rtems_bsd_pipe_read(rtems_libio_t *iop, void *buffer, size_t count);
185static ssize_t rtems_bsd_pipe_write(rtems_libio_t *iop, const void *buffer, size_t count);
186static int rtems_bsd_pipe_ioctl(rtems_libio_t *iop, ioctl_command_t request, void *buffer);
187static int rtems_bsd_pipe_stat(const rtems_filesystem_location_info_t *loc,     struct stat *buf);
188static int rtems_bsd_pipe_fcntl(rtems_libio_t *iop, int cmd);
189static int rtems_bsd_pipe_poll(rtems_libio_t *iop, int events);
190int rtems_bsd_pipe_kqfilter(rtems_libio_t *iop, struct knote *kn);
191
192static const rtems_filesystem_file_handlers_r pipeops = {
193        .open_h = rtems_bsd_pipe_open,
194        .close_h = rtems_bsd_pipe_close,
195        .read_h = rtems_bsd_pipe_read,
196        .write_h = rtems_bsd_pipe_write,
197        .ioctl_h = rtems_bsd_pipe_ioctl,
198        .lseek_h = rtems_filesystem_default_lseek,
199        .fstat_h = rtems_bsd_pipe_stat,
200        .ftruncate_h = rtems_filesystem_default_ftruncate,
201        .fsync_h = rtems_filesystem_default_fsync_or_fdatasync,
202        .fdatasync_h = rtems_filesystem_default_fsync_or_fdatasync,
203        .fcntl_h = rtems_bsd_pipe_fcntl,
204        .poll_h = rtems_bsd_pipe_poll,
205        .kqfilter_h = rtems_bsd_pipe_kqfilter
206};
207
208long    maxpipekva;                     /* Limit on pipe KVA */
209
210#endif /* __rtems__ */
211
212static void     filt_pipedetach(struct knote *kn);
213static void     filt_pipedetach_notsup(struct knote *kn);
214static int      filt_pipenotsup(struct knote *kn, long hint);
215static int      filt_piperead(struct knote *kn, long hint);
216static int      filt_pipewrite(struct knote *kn, long hint);
217
218static struct filterops pipe_nfiltops = {
219        .f_isfd = 1,
220        .f_detach = filt_pipedetach_notsup,
221        .f_event = filt_pipenotsup
222};
223static struct filterops pipe_rfiltops = {
224        .f_isfd = 1,
225        .f_detach = filt_pipedetach,
226        .f_event = filt_piperead
227};
228static struct filterops pipe_wfiltops = {
229        .f_isfd = 1,
230        .f_detach = filt_pipedetach,
231        .f_event = filt_pipewrite
232};
233
234/*
235 * Default pipe buffer size(s), this can be kind-of large now because pipe
236 * space is pageable.  The pipe code will try to maintain locality of
237 * reference for performance reasons, so small amounts of outstanding I/O
238 * will not wipe the cache.
239 */
240#define MINPIPESIZE (PIPE_SIZE/3)
241#define MAXPIPESIZE (2*PIPE_SIZE/3)
242
243static long amountpipekva;
244static int pipefragretry;
245static int pipeallocfail;
246static int piperesizefail;
247static int piperesizeallowed = 1;
248
249SYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
250           &maxpipekva, 0, "Pipe KVA limit");
251SYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
252           &amountpipekva, 0, "Pipe KVA usage");
253SYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD,
254          &pipefragretry, 0, "Pipe allocation retries due to fragmentation");
255SYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD,
256          &pipeallocfail, 0, "Pipe allocation failures");
257SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD,
258          &piperesizefail, 0, "Pipe resize failures");
259SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW,
260          &piperesizeallowed, 0, "Pipe resizing allowed");
261
262static void pipeinit(void *dummy __unused);
263static void pipeclose(struct pipe *cpipe);
264static void pipe_free_kmem(struct pipe *cpipe);
265static void pipe_create(struct pipe *pipe, int backing);
266static void pipe_paircreate(struct thread *td, struct pipepair **p_pp);
267static __inline int pipelock(struct pipe *cpipe, int catch);
268static __inline void pipeunlock(struct pipe *cpipe);
269#ifndef PIPE_NODIRECT
270static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
271static void pipe_destroy_write_buffer(struct pipe *wpipe);
272static int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
273static void pipe_clone_write_buffer(struct pipe *wpipe);
274#endif
275static int pipespace(struct pipe *cpipe, int size);
276static int pipespace_new(struct pipe *cpipe, int size);
277
278static int      pipe_zone_ctor(void *mem, int size, void *arg, int flags);
279static int      pipe_zone_init(void *mem, int size, int flags);
280static void     pipe_zone_fini(void *mem, int size);
281
282static uma_zone_t pipe_zone;
283static struct unrhdr *pipeino_unr;
284static dev_t pipedev_ino;
285
286SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
287
288static void
289pipeinit(void *dummy __unused)
290{
291
292        pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair),
293            pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini,
294            UMA_ALIGN_PTR, 0);
295        KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
296        pipeino_unr = new_unrhdr(1, INT32_MAX, NULL);
297        KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized"));
298        pipedev_ino = devfs_alloc_cdp_inode();
299        KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized"));
300}
301
302static int
303pipe_zone_ctor(void *mem, int size, void *arg, int flags)
304{
305        struct pipepair *pp;
306        struct pipe *rpipe, *wpipe;
307
308        KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size"));
309
310        pp = (struct pipepair *)mem;
311
312        /*
313         * We zero both pipe endpoints to make sure all the kmem pointers
314         * are NULL, flag fields are zero'd, etc.  We timestamp both
315         * endpoints with the same time.
316         */
317        rpipe = &pp->pp_rpipe;
318        bzero(rpipe, sizeof(*rpipe));
319#ifndef __rtems__
320        vfs_timestamp(&rpipe->pipe_ctime);
321#else /* __rtems__ */
322        rpipe->pipe_ctime.tv_sec = time(NULL);
323#endif /* __rtems__ */
324        rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime;
325
326        wpipe = &pp->pp_wpipe;
327        bzero(wpipe, sizeof(*wpipe));
328        wpipe->pipe_ctime = rpipe->pipe_ctime;
329        wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime;
330
331        rpipe->pipe_peer = wpipe;
332        rpipe->pipe_pair = pp;
333        wpipe->pipe_peer = rpipe;
334        wpipe->pipe_pair = pp;
335
336        /*
337         * Mark both endpoints as present; they will later get free'd
338         * one at a time.  When both are free'd, then the whole pair
339         * is released.
340         */
341        rpipe->pipe_present = PIPE_ACTIVE;
342        wpipe->pipe_present = PIPE_ACTIVE;
343
344        /*
345         * Eventually, the MAC Framework may initialize the label
346         * in ctor or init, but for now we do it elswhere to avoid
347         * blocking in ctor or init.
348         */
349        pp->pp_label = NULL;
350
351        return (0);
352}
353
354static int
355pipe_zone_init(void *mem, int size, int flags)
356{
357        struct pipepair *pp;
358
359        KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size"));
360
361        pp = (struct pipepair *)mem;
362
363        mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_NEW);
364        return (0);
365}
366
367static void
368pipe_zone_fini(void *mem, int size)
369{
370        struct pipepair *pp;
371
372        KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size"));
373
374        pp = (struct pipepair *)mem;
375
376        mtx_destroy(&pp->pp_mtx);
377}
378
379static void
380pipe_paircreate(struct thread *td, struct pipepair **p_pp)
381{
382        struct pipepair *pp;
383        struct pipe *rpipe, *wpipe;
384
385        *p_pp = pp = uma_zalloc(pipe_zone, M_WAITOK);
386#ifdef MAC
387        /*
388         * The MAC label is shared between the connected endpoints.  As a
389         * result mac_pipe_init() and mac_pipe_create() are called once
390         * for the pair, and not on the endpoints.
391         */
392        mac_pipe_init(pp);
393        mac_pipe_create(td->td_ucred, pp);
394#endif
395        rpipe = &pp->pp_rpipe;
396        wpipe = &pp->pp_wpipe;
397
398        knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe));
399        knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe));
400
401        /* Only the forward direction pipe is backed by default */
402        pipe_create(rpipe, 1);
403        pipe_create(wpipe, 0);
404
405        rpipe->pipe_state |= PIPE_DIRECTOK;
406        wpipe->pipe_state |= PIPE_DIRECTOK;
407}
408
409void
410pipe_named_ctor(struct pipe **ppipe, struct thread *td)
411{
412        struct pipepair *pp;
413
414        pipe_paircreate(td, &pp);
415        pp->pp_rpipe.pipe_state |= PIPE_NAMED;
416        *ppipe = &pp->pp_rpipe;
417}
418
419void
420pipe_dtor(struct pipe *dpipe)
421{
422        struct pipe *peer;
423        ino_t ino;
424
425        ino = dpipe->pipe_ino;
426        peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL;
427        funsetown(&dpipe->pipe_sigio);
428        pipeclose(dpipe);
429        if (peer != NULL) {
430                funsetown(&peer->pipe_sigio);
431                pipeclose(peer);
432        }
433        if (ino != 0 && ino != (ino_t)-1)
434                free_unr(pipeino_unr, ino);
435}
436
437/*
438 * The pipe system call for the DTYPE_PIPE type of pipes.  If we fail, let
439 * the zone pick up the pieces via pipeclose().
440 */
441int
442kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1,
443    struct filecaps *fcaps2)
444{
445        struct file *rf, *wf;
446        struct pipe *rpipe, *wpipe;
447        struct pipepair *pp;
448        int fd, fflags, error;
449
450        pipe_paircreate(td, &pp);
451        rpipe = &pp->pp_rpipe;
452        wpipe = &pp->pp_wpipe;
453        error = falloc_caps(td, &rf, &fd, flags, fcaps1);
454        if (error) {
455                pipeclose(rpipe);
456                pipeclose(wpipe);
457                return (error);
458        }
459        /* An extra reference on `rf' has been held for us by falloc_caps(). */
460        fildes[0] = fd;
461
462        fflags = FREAD | FWRITE;
463        if ((flags & O_NONBLOCK) != 0)
464                fflags |= FNONBLOCK;
465
466        /*
467         * Warning: once we've gotten past allocation of the fd for the
468         * read-side, we can only drop the read side via fdrop() in order
469         * to avoid races against processes which manage to dup() the read
470         * side while we are blocked trying to allocate the write side.
471         */
472        finit(rf, fflags, DTYPE_PIPE, rpipe, &pipeops);
473        error = falloc_caps(td, &wf, &fd, flags, fcaps2);
474        if (error) {
475                fdclose(td, rf, fildes[0]);
476                fdrop(rf, td);
477                /* rpipe has been closed by fdrop(). */
478                pipeclose(wpipe);
479                return (error);
480        }
481        /* An extra reference on `wf' has been held for us by falloc_caps(). */
482        finit(wf, fflags, DTYPE_PIPE, wpipe, &pipeops);
483        fdrop(wf, td);
484        fildes[1] = fd;
485        fdrop(rf, td);
486
487        return (0);
488}
489
490#ifdef COMPAT_FREEBSD10
491/* ARGSUSED */
492int
493freebsd10_pipe(struct thread *td, struct freebsd10_pipe_args *uap __unused)
494{
495        int error;
496        int fildes[2];
497
498        error = kern_pipe(td, fildes, 0, NULL, NULL);
499        if (error)
500                return (error);
501
502        td->td_retval[0] = fildes[0];
503        td->td_retval[1] = fildes[1];
504
505        return (0);
506}
507#endif
508
509#ifndef __rtems__
510int
511sys_pipe2(struct thread *td, struct pipe2_args *uap)
512{
513        int error, fildes[2];
514
515        if (uap->flags & ~(O_CLOEXEC | O_NONBLOCK))
516                return (EINVAL);
517        error = kern_pipe(td, fildes, uap->flags, NULL, NULL);
518        if (error)
519                return (error);
520        error = copyout(fildes, uap->fildes, 2 * sizeof(int));
521        if (error) {
522                (void)kern_close(td, fildes[0]);
523                (void)kern_close(td, fildes[1]);
524        }
525        return (error);
526}
527#endif /* __rtems__ */
528
529#ifdef __rtems__
530int
531pipe(int fildes[2])
532{
533        struct thread *td = rtems_bsd_get_curthread_or_null();
534        int error;
535
536        if (td != NULL) {
537                error = kern_pipe(td, fildes, 0, NULL, NULL);
538        } else {
539                error = ENOMEM;
540        }
541
542        if (error == 0) {
543                return error;
544        } else {
545                rtems_set_errno_and_return_minus_one(error);
546        }
547}
548#endif /* __rtems__ */
549
550/*
551 * Allocate kva for pipe circular buffer, the space is pageable
552 * This routine will 'realloc' the size of a pipe safely, if it fails
553 * it will retain the old buffer.
554 * If it fails it will return ENOMEM.
555 */
556static int
557pipespace_new(cpipe, size)
558        struct pipe *cpipe;
559        int size;
560{
561        caddr_t buffer;
562        int error, cnt, firstseg;
563        static int curfail = 0;
564        static struct timeval lastfail;
565
566        KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked"));
567        KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW),
568                ("pipespace: resize of direct writes not allowed"));
569retry:
570        cnt = cpipe->pipe_buffer.cnt;
571        if (cnt > size)
572                size = cnt;
573
574        size = round_page(size);
575#ifndef __rtems__
576        buffer = (caddr_t) vm_map_min(pipe_map);
577
578        error = vm_map_find(pipe_map, NULL, 0,
579                (vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE,
580                VM_PROT_ALL, VM_PROT_ALL, 0);
581        if (error != KERN_SUCCESS) {
582#else /* __rtems__ */
583        (void)error;
584        buffer = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
585        if (buffer == NULL) {
586#endif /* __rtems__ */
587                if ((cpipe->pipe_buffer.buffer == NULL) &&
588                        (size > SMALL_PIPE_SIZE)) {
589                        size = SMALL_PIPE_SIZE;
590                        pipefragretry++;
591                        goto retry;
592                }
593                if (cpipe->pipe_buffer.buffer == NULL) {
594                        pipeallocfail++;
595                        if (ppsratecheck(&lastfail, &curfail, 1))
596                                printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n");
597                } else {
598                        piperesizefail++;
599                }
600                return (ENOMEM);
601        }
602
603        /* copy data, then free old resources if we're resizing */
604        if (cnt > 0) {
605                if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) {
606                        firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out;
607                        bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out],
608                                buffer, firstseg);
609                        if ((cnt - firstseg) > 0)
610                                bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg],
611                                        cpipe->pipe_buffer.in);
612                } else {
613                        bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out],
614                                buffer, cnt);
615                }
616        }
617        pipe_free_kmem(cpipe);
618        cpipe->pipe_buffer.buffer = buffer;
619        cpipe->pipe_buffer.size = size;
620        cpipe->pipe_buffer.in = cnt;
621        cpipe->pipe_buffer.out = 0;
622        cpipe->pipe_buffer.cnt = cnt;
623        atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size);
624        return (0);
625}
626
627/*
628 * Wrapper for pipespace_new() that performs locking assertions.
629 */
630static int
631pipespace(cpipe, size)
632        struct pipe *cpipe;
633        int size;
634{
635
636        KASSERT(cpipe->pipe_state & PIPE_LOCKFL,
637                ("Unlocked pipe passed to pipespace"));
638        return (pipespace_new(cpipe, size));
639}
640
641/*
642 * lock a pipe for I/O, blocking other access
643 */
644static __inline int
645pipelock(cpipe, catch)
646        struct pipe *cpipe;
647        int catch;
648{
649        int error;
650
651        PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
652        while (cpipe->pipe_state & PIPE_LOCKFL) {
653                cpipe->pipe_state |= PIPE_LWANT;
654                error = msleep(cpipe, PIPE_MTX(cpipe),
655                    catch ? (PRIBIO | PCATCH) : PRIBIO,
656                    "pipelk", 0);
657                if (error != 0)
658                        return (error);
659        }
660        cpipe->pipe_state |= PIPE_LOCKFL;
661        return (0);
662}
663
664/*
665 * unlock a pipe I/O lock
666 */
667static __inline void
668pipeunlock(cpipe)
669        struct pipe *cpipe;
670{
671
672        PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
673        KASSERT(cpipe->pipe_state & PIPE_LOCKFL,
674                ("Unlocked pipe passed to pipeunlock"));
675        cpipe->pipe_state &= ~PIPE_LOCKFL;
676        if (cpipe->pipe_state & PIPE_LWANT) {
677                cpipe->pipe_state &= ~PIPE_LWANT;
678                wakeup(cpipe);
679        }
680}
681
682void
683pipeselwakeup(cpipe)
684        struct pipe *cpipe;
685{
686
687        PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
688        if (cpipe->pipe_state & PIPE_SEL) {
689                selwakeuppri(&cpipe->pipe_sel, PSOCK);
690                if (!SEL_WAITING(&cpipe->pipe_sel))
691                        cpipe->pipe_state &= ~PIPE_SEL;
692        }
693#ifndef __rtems__
694        if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
695                pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
696#endif /* __rtems__ */
697        KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0);
698}
699
700/*
701 * Initialize and allocate VM and memory for pipe.  The structure
702 * will start out zero'd from the ctor, so we just manage the kmem.
703 */
704static void
705pipe_create(pipe, backing)
706        struct pipe *pipe;
707        int backing;
708{
709
710        if (backing) {
711                /*
712                 * Note that these functions can fail if pipe map is exhausted
713                 * (as a result of too many pipes created), but we ignore the
714                 * error as it is not fatal and could be provoked by
715                 * unprivileged users. The only consequence is worse performance
716                 * with given pipe.
717                 */
718                if (amountpipekva > maxpipekva / 2)
719                        (void)pipespace_new(pipe, SMALL_PIPE_SIZE);
720                else
721                        (void)pipespace_new(pipe, PIPE_SIZE);
722        }
723
724        pipe->pipe_ino = -1;
725}
726
727/* ARGSUSED */
728static int
729pipe_read(fp, uio, active_cred, flags, td)
730        struct file *fp;
731        struct uio *uio;
732        struct ucred *active_cred;
733        struct thread *td;
734        int flags;
735{
736        struct pipe *rpipe;
737        int error;
738        int nread = 0;
739        int size;
740
741        rpipe = fp->f_data;
742        PIPE_LOCK(rpipe);
743        ++rpipe->pipe_busy;
744        error = pipelock(rpipe, 1);
745        if (error)
746                goto unlocked_error;
747
748#ifdef MAC
749        error = mac_pipe_check_read(active_cred, rpipe->pipe_pair);
750        if (error)
751                goto locked_error;
752#endif
753        if (amountpipekva > (3 * maxpipekva) / 4) {
754                if (!(rpipe->pipe_state & PIPE_DIRECTW) &&
755                        (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) &&
756                        (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) &&
757                        (piperesizeallowed == 1)) {
758                        PIPE_UNLOCK(rpipe);
759                        pipespace(rpipe, SMALL_PIPE_SIZE);
760                        PIPE_LOCK(rpipe);
761                }
762        }
763
764        while (uio->uio_resid) {
765                /*
766                 * normal pipe buffer receive
767                 */
768                if (rpipe->pipe_buffer.cnt > 0) {
769                        size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
770                        if (size > rpipe->pipe_buffer.cnt)
771                                size = rpipe->pipe_buffer.cnt;
772                        if (size > uio->uio_resid)
773                                size = uio->uio_resid;
774
775                        PIPE_UNLOCK(rpipe);
776                        error = uiomove(
777                            &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
778                            size, uio);
779                        PIPE_LOCK(rpipe);
780                        if (error)
781                                break;
782
783                        rpipe->pipe_buffer.out += size;
784                        if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
785                                rpipe->pipe_buffer.out = 0;
786
787                        rpipe->pipe_buffer.cnt -= size;
788
789                        /*
790                         * If there is no more to read in the pipe, reset
791                         * its pointers to the beginning.  This improves
792                         * cache hit stats.
793                         */
794                        if (rpipe->pipe_buffer.cnt == 0) {
795                                rpipe->pipe_buffer.in = 0;
796                                rpipe->pipe_buffer.out = 0;
797                        }
798                        nread += size;
799#ifndef PIPE_NODIRECT
800                /*
801                 * Direct copy, bypassing a kernel buffer.
802                 */
803                } else if ((size = rpipe->pipe_map.cnt) &&
804                           (rpipe->pipe_state & PIPE_DIRECTW)) {
805                        if (size > uio->uio_resid)
806                                size = (u_int) uio->uio_resid;
807
808                        PIPE_UNLOCK(rpipe);
809                        error = uiomove_fromphys(rpipe->pipe_map.ms,
810                            rpipe->pipe_map.pos, size, uio);
811                        PIPE_LOCK(rpipe);
812                        if (error)
813                                break;
814                        nread += size;
815                        rpipe->pipe_map.pos += size;
816                        rpipe->pipe_map.cnt -= size;
817                        if (rpipe->pipe_map.cnt == 0) {
818                                rpipe->pipe_state &= ~(PIPE_DIRECTW|PIPE_WANTW);
819                                wakeup(rpipe);
820                        }
821#endif
822                } else {
823                        /*
824                         * detect EOF condition
825                         * read returns 0 on EOF, no need to set error
826                         */
827                        if (rpipe->pipe_state & PIPE_EOF)
828                                break;
829
830                        /*
831                         * If the "write-side" has been blocked, wake it up now.
832                         */
833                        if (rpipe->pipe_state & PIPE_WANTW) {
834                                rpipe->pipe_state &= ~PIPE_WANTW;
835                                wakeup(rpipe);
836                        }
837
838                        /*
839                         * Break if some data was read.
840                         */
841                        if (nread > 0)
842                                break;
843
844                        /*
845                         * Unlock the pipe buffer for our remaining processing.
846                         * We will either break out with an error or we will
847                         * sleep and relock to loop.
848                         */
849                        pipeunlock(rpipe);
850
851                        /*
852                         * Handle non-blocking mode operation or
853                         * wait for more data.
854                         */
855#ifndef __rtems__
856                        if (fp->f_flag & FNONBLOCK) {
857#else /* __rtems__ */
858                        if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FNONBLOCK) {
859#endif /* __rtems__ */
860                                error = EAGAIN;
861                        } else {
862                                rpipe->pipe_state |= PIPE_WANTR;
863                                if ((error = msleep(rpipe, PIPE_MTX(rpipe),
864                                    PRIBIO | PCATCH,
865                                    "piperd", 0)) == 0)
866                                        error = pipelock(rpipe, 1);
867                        }
868                        if (error)
869                                goto unlocked_error;
870                }
871        }
872#ifdef MAC
873locked_error:
874#endif
875        pipeunlock(rpipe);
876
877        /* XXX: should probably do this before getting any locks. */
878        if (error == 0)
879#ifndef __rtems__
880                vfs_timestamp(&rpipe->pipe_atime);
881#else /* __rtems__ */
882                rpipe->pipe_atime.tv_sec = time(NULL);
883#endif /* __rtems__ */
884unlocked_error:
885        --rpipe->pipe_busy;
886
887        /*
888         * PIPE_WANT processing only makes sense if pipe_busy is 0.
889         */
890        if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
891                rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
892                wakeup(rpipe);
893        } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
894                /*
895                 * Handle write blocking hysteresis.
896                 */
897                if (rpipe->pipe_state & PIPE_WANTW) {
898                        rpipe->pipe_state &= ~PIPE_WANTW;
899                        wakeup(rpipe);
900                }
901        }
902
903        if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
904                pipeselwakeup(rpipe);
905
906        PIPE_UNLOCK(rpipe);
907        return (error);
908}
909#ifdef __rtems__
910static ssize_t
911rtems_bsd_pipe_read(rtems_libio_t *iop, void *buffer, size_t count)
912{
913        struct thread *td = rtems_bsd_get_curthread_or_null();
914        struct file *fp = rtems_bsd_iop_to_fp(iop);
915        struct iovec iov = {
916                .iov_base = buffer,
917                .iov_len = count
918        };
919        struct uio auio = {
920                .uio_iov = &iov,
921                .uio_iovcnt = 1,
922                .uio_offset = 0,
923                .uio_resid = count,
924                .uio_segflg = UIO_USERSPACE,
925                .uio_rw = UIO_READ,
926                .uio_td = td
927        };
928        int error;
929
930        if (td != NULL) {
931                error = pipe_read(fp, &auio, NULL, 0, NULL);
932        } else {
933                error = ENOMEM;
934        }
935
936        if (error == 0) {
937                return (count - auio.uio_resid);
938        } else {
939                rtems_set_errno_and_return_minus_one(error);
940        }
941}
942#endif /* __rtems__ */
943
944#ifndef PIPE_NODIRECT
945/*
946 * Map the sending processes' buffer into kernel space and wire it.
947 * This is similar to a physical write operation.
948 */
949static int
950pipe_build_write_buffer(wpipe, uio)
951        struct pipe *wpipe;
952        struct uio *uio;
953{
954        u_int size;
955        int i;
956
957        PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
958        KASSERT(wpipe->pipe_state & PIPE_DIRECTW,
959                ("Clone attempt on non-direct write pipe!"));
960
961        if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size)
962                size = wpipe->pipe_buffer.size;
963        else
964                size = uio->uio_iov->iov_len;
965
966        if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
967            (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ,
968            wpipe->pipe_map.ms, PIPENPAGES)) < 0)
969                return (EFAULT);
970
971/*
972 * set up the control block
973 */
974        wpipe->pipe_map.npages = i;
975        wpipe->pipe_map.pos =
976            ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
977        wpipe->pipe_map.cnt = size;
978
979/*
980 * and update the uio data
981 */
982
983        uio->uio_iov->iov_len -= size;
984        uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
985        if (uio->uio_iov->iov_len == 0)
986                uio->uio_iov++;
987        uio->uio_resid -= size;
988        uio->uio_offset += size;
989        return (0);
990}
991
992/*
993 * unmap and unwire the process buffer
994 */
995static void
996pipe_destroy_write_buffer(wpipe)
997        struct pipe *wpipe;
998{
999
1000        PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
1001        vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages);
1002        wpipe->pipe_map.npages = 0;
1003}
1004
1005/*
1006 * In the case of a signal, the writing process might go away.  This
1007 * code copies the data into the circular buffer so that the source
1008 * pages can be freed without loss of data.
1009 */
1010static void
1011pipe_clone_write_buffer(wpipe)
1012        struct pipe *wpipe;
1013{
1014        struct uio uio;
1015        struct iovec iov;
1016        int size;
1017        int pos;
1018
1019        PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
1020        size = wpipe->pipe_map.cnt;
1021        pos = wpipe->pipe_map.pos;
1022
1023        wpipe->pipe_buffer.in = size;
1024        wpipe->pipe_buffer.out = 0;
1025        wpipe->pipe_buffer.cnt = size;
1026        wpipe->pipe_state &= ~PIPE_DIRECTW;
1027
1028        PIPE_UNLOCK(wpipe);
1029        iov.iov_base = wpipe->pipe_buffer.buffer;
1030        iov.iov_len = size;
1031        uio.uio_iov = &iov;
1032        uio.uio_iovcnt = 1;
1033        uio.uio_offset = 0;
1034        uio.uio_resid = size;
1035        uio.uio_segflg = UIO_SYSSPACE;
1036        uio.uio_rw = UIO_READ;
1037        uio.uio_td = curthread;
1038        uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio);
1039        PIPE_LOCK(wpipe);
1040        pipe_destroy_write_buffer(wpipe);
1041}
1042
1043/*
1044 * This implements the pipe buffer write mechanism.  Note that only
1045 * a direct write OR a normal pipe write can be pending at any given time.
1046 * If there are any characters in the pipe buffer, the direct write will
1047 * be deferred until the receiving process grabs all of the bytes from
1048 * the pipe buffer.  Then the direct mapping write is set-up.
1049 */
1050static int
1051pipe_direct_write(wpipe, uio)
1052        struct pipe *wpipe;
1053        struct uio *uio;
1054{
1055        int error;
1056
1057retry:
1058        PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
1059        error = pipelock(wpipe, 1);
1060        if (error != 0)
1061                goto error1;
1062        if ((wpipe->pipe_state & PIPE_EOF) != 0) {
1063                error = EPIPE;
1064                pipeunlock(wpipe);
1065                goto error1;
1066        }
1067        while (wpipe->pipe_state & PIPE_DIRECTW) {
1068                if (wpipe->pipe_state & PIPE_WANTR) {
1069                        wpipe->pipe_state &= ~PIPE_WANTR;
1070                        wakeup(wpipe);
1071                }
1072                pipeselwakeup(wpipe);
1073                wpipe->pipe_state |= PIPE_WANTW;
1074                pipeunlock(wpipe);
1075                error = msleep(wpipe, PIPE_MTX(wpipe),
1076                    PRIBIO | PCATCH, "pipdww", 0);
1077                if (error)
1078                        goto error1;
1079                else
1080                        goto retry;
1081        }
1082        wpipe->pipe_map.cnt = 0;        /* transfer not ready yet */
1083        if (wpipe->pipe_buffer.cnt > 0) {
1084                if (wpipe->pipe_state & PIPE_WANTR) {
1085                        wpipe->pipe_state &= ~PIPE_WANTR;
1086                        wakeup(wpipe);
1087                }
1088                pipeselwakeup(wpipe);
1089                wpipe->pipe_state |= PIPE_WANTW;
1090                pipeunlock(wpipe);
1091                error = msleep(wpipe, PIPE_MTX(wpipe),
1092                    PRIBIO | PCATCH, "pipdwc", 0);
1093                if (error)
1094                        goto error1;
1095                else
1096                        goto retry;
1097        }
1098
1099        wpipe->pipe_state |= PIPE_DIRECTW;
1100
1101        PIPE_UNLOCK(wpipe);
1102        error = pipe_build_write_buffer(wpipe, uio);
1103        PIPE_LOCK(wpipe);
1104        if (error) {
1105                wpipe->pipe_state &= ~PIPE_DIRECTW;
1106                pipeunlock(wpipe);
1107                goto error1;
1108        }
1109
1110        error = 0;
1111        while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
1112                if (wpipe->pipe_state & PIPE_EOF) {
1113                        pipe_destroy_write_buffer(wpipe);
1114                        pipeselwakeup(wpipe);
1115                        pipeunlock(wpipe);
1116                        error = EPIPE;
1117                        goto error1;
1118                }
1119                if (wpipe->pipe_state & PIPE_WANTR) {
1120                        wpipe->pipe_state &= ~PIPE_WANTR;
1121                        wakeup(wpipe);
1122                }
1123                pipeselwakeup(wpipe);
1124                wpipe->pipe_state |= PIPE_WANTW;
1125                pipeunlock(wpipe);
1126                error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
1127                    "pipdwt", 0);
1128                pipelock(wpipe, 0);
1129        }
1130
1131        if (wpipe->pipe_state & PIPE_EOF)
1132                error = EPIPE;
1133        if (wpipe->pipe_state & PIPE_DIRECTW) {
1134                /*
1135                 * this bit of trickery substitutes a kernel buffer for
1136                 * the process that might be going away.
1137                 */
1138                pipe_clone_write_buffer(wpipe);
1139        } else {
1140                pipe_destroy_write_buffer(wpipe);
1141        }
1142        pipeunlock(wpipe);
1143        return (error);
1144
1145error1:
1146        wakeup(wpipe);
1147        return (error);
1148}
1149#endif
1150
1151static int
1152pipe_write(fp, uio, active_cred, flags, td)
1153        struct file *fp;
1154        struct uio *uio;
1155        struct ucred *active_cred;
1156        struct thread *td;
1157        int flags;
1158{
1159        int error = 0;
1160        int desiredsize;
1161        ssize_t orig_resid;
1162        struct pipe *wpipe, *rpipe;
1163
1164        rpipe = fp->f_data;
1165        wpipe = PIPE_PEER(rpipe);
1166        PIPE_LOCK(rpipe);
1167        error = pipelock(wpipe, 1);
1168        if (error) {
1169                PIPE_UNLOCK(rpipe);
1170                return (error);
1171        }
1172        /*
1173         * detect loss of pipe read side, issue SIGPIPE if lost.
1174         */
1175        if (wpipe->pipe_present != PIPE_ACTIVE ||
1176            (wpipe->pipe_state & PIPE_EOF)) {
1177                pipeunlock(wpipe);
1178                PIPE_UNLOCK(rpipe);
1179                return (EPIPE);
1180        }
1181#ifdef MAC
1182        error = mac_pipe_check_write(active_cred, wpipe->pipe_pair);
1183        if (error) {
1184                pipeunlock(wpipe);
1185                PIPE_UNLOCK(rpipe);
1186                return (error);
1187        }
1188#endif
1189        ++wpipe->pipe_busy;
1190
1191        /* Choose a larger size if it's advantageous */
1192        desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size);
1193        while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) {
1194                if (piperesizeallowed != 1)
1195                        break;
1196                if (amountpipekva > maxpipekva / 2)
1197                        break;
1198                if (desiredsize == BIG_PIPE_SIZE)
1199                        break;
1200                desiredsize = desiredsize * 2;
1201        }
1202
1203        /* Choose a smaller size if we're in a OOM situation */
1204        if ((amountpipekva > (3 * maxpipekva) / 4) &&
1205                (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) &&
1206                (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) &&
1207                (piperesizeallowed == 1))
1208                desiredsize = SMALL_PIPE_SIZE;
1209
1210        /* Resize if the above determined that a new size was necessary */
1211        if ((desiredsize != wpipe->pipe_buffer.size) &&
1212                ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) {
1213                PIPE_UNLOCK(wpipe);
1214                pipespace(wpipe, desiredsize);
1215                PIPE_LOCK(wpipe);
1216        }
1217        if (wpipe->pipe_buffer.size == 0) {
1218                /*
1219                 * This can only happen for reverse direction use of pipes
1220                 * in a complete OOM situation.
1221                 */
1222                error = ENOMEM;
1223                --wpipe->pipe_busy;
1224                pipeunlock(wpipe);
1225                PIPE_UNLOCK(wpipe);
1226                return (error);
1227        }
1228
1229        pipeunlock(wpipe);
1230
1231        orig_resid = uio->uio_resid;
1232
1233        while (uio->uio_resid) {
1234                int space;
1235
1236                pipelock(wpipe, 0);
1237                if (wpipe->pipe_state & PIPE_EOF) {
1238                        pipeunlock(wpipe);
1239                        error = EPIPE;
1240                        break;
1241                }
1242#ifndef PIPE_NODIRECT
1243                /*
1244                 * If the transfer is large, we can gain performance if
1245                 * we do process-to-process copies directly.
1246                 * If the write is non-blocking, we don't use the
1247                 * direct write mechanism.
1248                 *
1249                 * The direct write mechanism will detect the reader going
1250                 * away on us.
1251                 */
1252                if (uio->uio_segflg == UIO_USERSPACE &&
1253                    uio->uio_iov->iov_len >= PIPE_MINDIRECT &&
1254                    wpipe->pipe_buffer.size >= PIPE_MINDIRECT &&
1255                    (fp->f_flag & FNONBLOCK) == 0) {
1256                        pipeunlock(wpipe);
1257                        error = pipe_direct_write(wpipe, uio);
1258                        if (error)
1259                                break;
1260                        continue;
1261                }
1262#endif
1263
1264                /*
1265                 * Pipe buffered writes cannot be coincidental with
1266                 * direct writes.  We wait until the currently executing
1267                 * direct write is completed before we start filling the
1268                 * pipe buffer.  We break out if a signal occurs or the
1269                 * reader goes away.
1270                 */
1271                if (wpipe->pipe_state & PIPE_DIRECTW) {
1272                        if (wpipe->pipe_state & PIPE_WANTR) {
1273                                wpipe->pipe_state &= ~PIPE_WANTR;
1274                                wakeup(wpipe);
1275                        }
1276                        pipeselwakeup(wpipe);
1277                        wpipe->pipe_state |= PIPE_WANTW;
1278                        pipeunlock(wpipe);
1279                        error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
1280                            "pipbww", 0);
1281                        if (error)
1282                                break;
1283                        else
1284                                continue;
1285                }
1286
1287                space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1288
1289                /* Writes of size <= PIPE_BUF must be atomic. */
1290                if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
1291                        space = 0;
1292
1293                if (space > 0) {
1294                        int size;       /* Transfer size */
1295                        int segsize;    /* first segment to transfer */
1296
1297                        /*
1298                         * Transfer size is minimum of uio transfer
1299                         * and free space in pipe buffer.
1300                         */
1301                        if (space > uio->uio_resid)
1302                                size = uio->uio_resid;
1303                        else
1304                                size = space;
1305                        /*
1306                         * First segment to transfer is minimum of
1307                         * transfer size and contiguous space in
1308                         * pipe buffer.  If first segment to transfer
1309                         * is less than the transfer size, we've got
1310                         * a wraparound in the buffer.
1311                         */
1312                        segsize = wpipe->pipe_buffer.size -
1313                                wpipe->pipe_buffer.in;
1314                        if (segsize > size)
1315                                segsize = size;
1316
1317                        /* Transfer first segment */
1318
1319                        PIPE_UNLOCK(rpipe);
1320                        error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1321                                        segsize, uio);
1322                        PIPE_LOCK(rpipe);
1323
1324                        if (error == 0 && segsize < size) {
1325                                KASSERT(wpipe->pipe_buffer.in + segsize ==
1326                                        wpipe->pipe_buffer.size,
1327                                        ("Pipe buffer wraparound disappeared"));
1328                                /*
1329                                 * Transfer remaining part now, to
1330                                 * support atomic writes.  Wraparound
1331                                 * happened.
1332                                 */
1333
1334                                PIPE_UNLOCK(rpipe);
1335                                error = uiomove(
1336                                    &wpipe->pipe_buffer.buffer[0],
1337                                    size - segsize, uio);
1338                                PIPE_LOCK(rpipe);
1339                        }
1340                        if (error == 0) {
1341                                wpipe->pipe_buffer.in += size;
1342                                if (wpipe->pipe_buffer.in >=
1343                                    wpipe->pipe_buffer.size) {
1344                                        KASSERT(wpipe->pipe_buffer.in ==
1345                                                size - segsize +
1346                                                wpipe->pipe_buffer.size,
1347                                                ("Expected wraparound bad"));
1348                                        wpipe->pipe_buffer.in = size - segsize;
1349                                }
1350
1351                                wpipe->pipe_buffer.cnt += size;
1352                                KASSERT(wpipe->pipe_buffer.cnt <=
1353                                        wpipe->pipe_buffer.size,
1354                                        ("Pipe buffer overflow"));
1355                        }
1356                        pipeunlock(wpipe);
1357                        if (error != 0)
1358                                break;
1359                } else {
1360                        /*
1361                         * If the "read-side" has been blocked, wake it up now.
1362                         */
1363                        if (wpipe->pipe_state & PIPE_WANTR) {
1364                                wpipe->pipe_state &= ~PIPE_WANTR;
1365                                wakeup(wpipe);
1366                        }
1367
1368                        /*
1369                         * don't block on non-blocking I/O
1370                         */
1371#ifndef __rtems__
1372                        if (fp->f_flag & FNONBLOCK) {
1373#else /* __rtems__ */
1374                        if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FNONBLOCK) {
1375#endif /* __rtems__ */
1376                                error = EAGAIN;
1377                                pipeunlock(wpipe);
1378                                break;
1379                        }
1380
1381                        /*
1382                         * We have no more space and have something to offer,
1383                         * wake up select/poll.
1384                         */
1385                        pipeselwakeup(wpipe);
1386
1387                        wpipe->pipe_state |= PIPE_WANTW;
1388                        pipeunlock(wpipe);
1389                        error = msleep(wpipe, PIPE_MTX(rpipe),
1390                            PRIBIO | PCATCH, "pipewr", 0);
1391                        if (error != 0)
1392                                break;
1393                }
1394        }
1395
1396        pipelock(wpipe, 0);
1397        --wpipe->pipe_busy;
1398
1399        if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1400                wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1401                wakeup(wpipe);
1402        } else if (wpipe->pipe_buffer.cnt > 0) {
1403                /*
1404                 * If we have put any characters in the buffer, we wake up
1405                 * the reader.
1406                 */
1407                if (wpipe->pipe_state & PIPE_WANTR) {
1408                        wpipe->pipe_state &= ~PIPE_WANTR;
1409                        wakeup(wpipe);
1410                }
1411        }
1412
1413        /*
1414         * Don't return EPIPE if any byte was written.
1415         * EINTR and other interrupts are handled by generic I/O layer.
1416         * Do not pretend that I/O succeeded for obvious user error
1417         * like EFAULT.
1418         */
1419        if (uio->uio_resid != orig_resid && error == EPIPE)
1420                error = 0;
1421
1422        if (error == 0)
1423#ifndef __rtems__
1424                vfs_timestamp(&wpipe->pipe_mtime);
1425#else /* __rtems__ */
1426                wpipe->pipe_mtime.tv_sec = time(NULL);
1427#endif /* __rtems__ */
1428
1429        /*
1430         * We have something to offer,
1431         * wake up select/poll.
1432         */
1433        if (wpipe->pipe_buffer.cnt)
1434                pipeselwakeup(wpipe);
1435
1436        pipeunlock(wpipe);
1437        PIPE_UNLOCK(rpipe);
1438        return (error);
1439}
1440#ifdef __rtems__
1441static ssize_t
1442rtems_bsd_pipe_write(rtems_libio_t *iop, const void *buffer, size_t count)
1443{
1444        struct thread *td = rtems_bsd_get_curthread_or_null();
1445        struct file *fp = rtems_bsd_iop_to_fp(iop);
1446        struct iovec iov = {
1447                .iov_base = __DECONST(void *, buffer),
1448                .iov_len = count
1449        };
1450        struct uio auio = {
1451                .uio_iov = &iov,
1452                .uio_iovcnt = 1,
1453                .uio_offset = 0,
1454                .uio_resid = count,
1455                .uio_segflg = UIO_USERSPACE,
1456                .uio_rw = UIO_WRITE,
1457                .uio_td = td
1458        };
1459        int error;
1460
1461        if (td != NULL) {
1462                error = pipe_write(fp, &auio, NULL, 0, NULL);
1463        } else {
1464                error = ENOMEM;
1465        }
1466
1467        if (error == 0) {
1468                return (count - auio.uio_resid);
1469        } else {
1470                rtems_set_errno_and_return_minus_one(error);
1471        }
1472}
1473#endif /* __rtems__ */
1474
1475/* ARGSUSED */
1476#ifndef __rtems__
1477static int
1478pipe_truncate(fp, length, active_cred, td)
1479        struct file *fp;
1480        off_t length;
1481        struct ucred *active_cred;
1482        struct thread *td;
1483{
1484        struct pipe *cpipe;
1485        int error;
1486
1487        cpipe = fp->f_data;
1488        if (cpipe->pipe_state & PIPE_NAMED)
1489                error = vnops.fo_truncate(fp, length, active_cred, td);
1490        else
1491                error = invfo_truncate(fp, length, active_cred, td);
1492        return (error);
1493}
1494#endif /* __rtems__ */
1495
1496/*
1497 * we implement a very minimal set of ioctls for compatibility with sockets.
1498 */
1499static int
1500pipe_ioctl(fp, cmd, data, active_cred, td)
1501        struct file *fp;
1502        u_long cmd;
1503        void *data;
1504        struct ucred *active_cred;
1505        struct thread *td;
1506{
1507        struct pipe *mpipe = fp->f_data;
1508        int error;
1509
1510        PIPE_LOCK(mpipe);
1511
1512#ifdef MAC
1513        error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data);
1514        if (error) {
1515                PIPE_UNLOCK(mpipe);
1516                return (error);
1517        }
1518#endif
1519
1520        error = 0;
1521        switch (cmd) {
1522
1523        case FIONBIO:
1524                break;
1525
1526        case FIOASYNC:
1527                if (*(int *)data) {
1528                        mpipe->pipe_state |= PIPE_ASYNC;
1529                } else {
1530                        mpipe->pipe_state &= ~PIPE_ASYNC;
1531                }
1532                break;
1533
1534        case FIONREAD:
1535#ifndef __rtems__
1536                if (!(fp->f_flag & FREAD)) {
1537#else /* __rtems__ */
1538                if (!(rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD)) {
1539#endif /* __rtems__ */
1540                        *(int *)data = 0;
1541                        PIPE_UNLOCK(mpipe);
1542                        return (0);
1543                }
1544                if (mpipe->pipe_state & PIPE_DIRECTW)
1545                        *(int *)data = mpipe->pipe_map.cnt;
1546                else
1547                        *(int *)data = mpipe->pipe_buffer.cnt;
1548                break;
1549
1550        case FIOSETOWN:
1551                PIPE_UNLOCK(mpipe);
1552                error = fsetown(*(int *)data, &mpipe->pipe_sigio);
1553                goto out_unlocked;
1554
1555        case FIOGETOWN:
1556                *(int *)data = fgetown(&mpipe->pipe_sigio);
1557                break;
1558
1559        /* This is deprecated, FIOSETOWN should be used instead. */
1560        case TIOCSPGRP:
1561                PIPE_UNLOCK(mpipe);
1562                error = fsetown(-(*(int *)data), &mpipe->pipe_sigio);
1563                goto out_unlocked;
1564
1565        /* This is deprecated, FIOGETOWN should be used instead. */
1566        case TIOCGPGRP:
1567                *(int *)data = -fgetown(&mpipe->pipe_sigio);
1568                break;
1569
1570        default:
1571                error = ENOTTY;
1572                break;
1573        }
1574        PIPE_UNLOCK(mpipe);
1575out_unlocked:
1576        return (error);
1577}
1578#ifdef __rtems__
1579static int
1580rtems_bsd_pipe_ioctl(rtems_libio_t *iop, ioctl_command_t request, void *buffer)
1581{
1582        struct thread *td = rtems_bsd_get_curthread_or_null();
1583        struct file *fp = rtems_bsd_iop_to_fp(iop);
1584        int error;
1585
1586        if (td != NULL) {
1587                error = pipe_ioctl(fp, request, buffer, NULL, td);
1588        } else {
1589                error = ENOMEM;
1590        }
1591
1592        return rtems_bsd_error_to_status_and_errno(error);
1593}
1594#endif /* __rtems__ */
1595
1596static int
1597pipe_poll(fp, events, active_cred, td)
1598        struct file *fp;
1599        int events;
1600        struct ucred *active_cred;
1601        struct thread *td;
1602{
1603        struct pipe *rpipe;
1604        struct pipe *wpipe;
1605        int levents, revents;
1606#ifdef MAC
1607        int error;
1608#endif
1609
1610        revents = 0;
1611        rpipe = fp->f_data;
1612        wpipe = PIPE_PEER(rpipe);
1613        PIPE_LOCK(rpipe);
1614#ifdef MAC
1615        error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair);
1616        if (error)
1617                goto locked_error;
1618#endif
1619#ifndef __rtems__
1620        if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM))
1621#else /* __rtems__ */
1622        if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && events & (POLLIN | POLLRDNORM))
1623#endif /* __rtems__ */
1624                if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1625                    (rpipe->pipe_buffer.cnt > 0))
1626                        revents |= events & (POLLIN | POLLRDNORM);
1627
1628#ifndef __rtems__
1629        if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM))
1630#else /* __rtems__ */
1631        if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FWRITE && events & (POLLOUT | POLLWRNORM))
1632#endif /* __rtems__ */
1633                if (wpipe->pipe_present != PIPE_ACTIVE ||
1634                    (wpipe->pipe_state & PIPE_EOF) ||
1635                    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1636                     ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF ||
1637                         wpipe->pipe_buffer.size == 0)))
1638                        revents |= events & (POLLOUT | POLLWRNORM);
1639
1640        levents = events &
1641            (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
1642#ifndef __rtems__
1643        if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents &&
1644            fp->f_seqcount == rpipe->pipe_wgen)
1645#else /* __rtems__ */
1646        if (rpipe->pipe_state & PIPE_NAMED && rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && levents)
1647#endif /* __rtems__ */
1648                events |= POLLINIGNEOF;
1649
1650        if ((events & POLLINIGNEOF) == 0) {
1651                if (rpipe->pipe_state & PIPE_EOF) {
1652                        revents |= (events & (POLLIN | POLLRDNORM));
1653                        if (wpipe->pipe_present != PIPE_ACTIVE ||
1654                            (wpipe->pipe_state & PIPE_EOF))
1655                                revents |= POLLHUP;
1656                }
1657        }
1658
1659        if (revents == 0) {
1660#ifndef __rtems__
1661                if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) {
1662#else /* __rtems__ */
1663                if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && events & (POLLIN | POLLRDNORM)) {
1664#endif /* __rtems__ */
1665                        selrecord(td, &rpipe->pipe_sel);
1666                        if (SEL_WAITING(&rpipe->pipe_sel))
1667                                rpipe->pipe_state |= PIPE_SEL;
1668                }
1669
1670#ifndef __rtems__
1671                if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) {
1672#else /* __rtems__ */
1673                if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FWRITE && events & (POLLOUT | POLLWRNORM)) {
1674#endif /* __rtems__ */
1675                        selrecord(td, &wpipe->pipe_sel);
1676                        if (SEL_WAITING(&wpipe->pipe_sel))
1677                                wpipe->pipe_state |= PIPE_SEL;
1678                }
1679        }
1680#ifdef MAC
1681locked_error:
1682#endif
1683        PIPE_UNLOCK(rpipe);
1684
1685        return (revents);
1686}
1687#ifdef __rtems__
1688static int
1689rtems_bsd_pipe_poll(rtems_libio_t *iop, int events)
1690{
1691        struct thread *td = rtems_bsd_get_curthread_or_null();
1692        struct file *fp = rtems_bsd_iop_to_fp(iop);
1693        int error;
1694
1695        if (td != NULL) {
1696                error = pipe_poll(fp, events, NULL, td);
1697        } else {
1698                error = ENOMEM;
1699        }
1700
1701        return error;
1702}
1703#endif /* __rtems__ */
1704
1705/*
1706 * We shouldn't need locks here as we're doing a read and this should
1707 * be a natural race.
1708 */
1709#ifndef __rtems__
1710static int
1711pipe_stat(fp, ub, active_cred, td)
1712        struct file *fp;
1713        struct stat *ub;
1714        struct ucred *active_cred;
1715        struct thread *td;
1716{
1717        struct pipe *pipe;
1718#else /* __rtems__ */
1719static int
1720pipe_stat(struct pipe *pipe, struct stat *ub)
1721{
1722#endif /* __rtems__ */
1723        int new_unr;
1724#ifdef MAC
1725        int error;
1726#endif
1727
1728#ifndef __rtems__
1729        pipe = fp->f_data;
1730#endif /* __rtems__ */
1731        PIPE_LOCK(pipe);
1732#ifdef MAC
1733        error = mac_pipe_check_stat(active_cred, pipe->pipe_pair);
1734        if (error) {
1735                PIPE_UNLOCK(pipe);
1736                return (error);
1737        }
1738#endif
1739
1740        /* For named pipes ask the underlying filesystem. */
1741        if (pipe->pipe_state & PIPE_NAMED) {
1742                PIPE_UNLOCK(pipe);
1743#ifndef __rtems__
1744                return (vnops.fo_stat(fp, ub, active_cred, td));
1745#else /* __rtems__ */
1746                return (ENXIO);
1747#endif /* __rtems__ */
1748        }
1749
1750        /*
1751         * Lazily allocate an inode number for the pipe.  Most pipe
1752         * users do not call fstat(2) on the pipe, which means that
1753         * postponing the inode allocation until it is must be
1754         * returned to userland is useful.  If alloc_unr failed,
1755         * assign st_ino zero instead of returning an error.
1756         * Special pipe_ino values:
1757         *  -1 - not yet initialized;
1758         *  0  - alloc_unr failed, return 0 as st_ino forever.
1759         */
1760        if (pipe->pipe_ino == (ino_t)-1) {
1761                new_unr = alloc_unr(pipeino_unr);
1762                if (new_unr != -1)
1763                        pipe->pipe_ino = new_unr;
1764                else
1765                        pipe->pipe_ino = 0;
1766        }
1767        PIPE_UNLOCK(pipe);
1768
1769#ifndef __rtems__
1770        bzero(ub, sizeof(*ub));
1771#endif /* __rtems__ */
1772        ub->st_mode = S_IFIFO;
1773        ub->st_blksize = PAGE_SIZE;
1774        if (pipe->pipe_state & PIPE_DIRECTW)
1775                ub->st_size = pipe->pipe_map.cnt;
1776        else
1777                ub->st_size = pipe->pipe_buffer.cnt;
1778        ub->st_blocks = howmany(ub->st_size, ub->st_blksize);
1779        ub->st_atim = pipe->pipe_atime;
1780        ub->st_mtim = pipe->pipe_mtime;
1781        ub->st_ctim = pipe->pipe_ctime;
1782#ifndef __rtems__
1783        ub->st_uid = fp->f_cred->cr_uid;
1784        ub->st_gid = fp->f_cred->cr_gid;
1785        ub->st_dev = pipedev_ino;
1786        ub->st_ino = pipe->pipe_ino;
1787#else /* __rtems__ */
1788        ub->st_uid = BSD_DEFAULT_UID;
1789        ub->st_gid = BSD_DEFAULT_GID;
1790        ub->st_dev = rtems_filesystem_make_dev_t(0xcc494cd6U, 0x1d970b4dU);
1791        ub->st_ino = pipe->pipe_ino;
1792#endif /* __rtems__ */
1793        /*
1794         * Left as 0: st_nlink, st_rdev, st_flags, st_gen.
1795         */
1796        return (0);
1797}
1798#ifdef __rtems__
1799static int
1800rtems_bsd_pipe_stat(
1801        const rtems_filesystem_location_info_t *loc,
1802        struct stat *buf
1803)
1804{
1805        struct pipe *pipe = rtems_bsd_loc_to_f_data(loc);
1806        int error = pipe_stat(pipe, buf);
1807
1808        return rtems_bsd_error_to_status_and_errno(error);
1809}
1810#endif /* __rtems__ */
1811
1812/* ARGSUSED */
1813static int
1814pipe_close(fp, td)
1815        struct file *fp;
1816        struct thread *td;
1817{
1818
1819#ifndef __rtems__
1820        if (fp->f_vnode != NULL)
1821                return vnops.fo_close(fp, td);
1822        fp->f_ops = &badfileops;
1823#else /* __rtems__ */
1824        fp->f_io.pathinfo.handlers = &rtems_filesystem_handlers_default;
1825#endif /* __rtems__ */
1826        pipe_dtor(fp->f_data);
1827        fp->f_data = NULL;
1828        return (0);
1829}
1830
1831#ifndef __rtems__
1832static int
1833pipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td)
1834{
1835        struct pipe *cpipe;
1836        int error;
1837
1838        cpipe = fp->f_data;
1839        if (cpipe->pipe_state & PIPE_NAMED)
1840                error = vn_chmod(fp, mode, active_cred, td);
1841        else
1842                error = invfo_chmod(fp, mode, active_cred, td);
1843        return (error);
1844}
1845
1846static int
1847pipe_chown(fp, uid, gid, active_cred, td)
1848        struct file *fp;
1849        uid_t uid;
1850        gid_t gid;
1851        struct ucred *active_cred;
1852        struct thread *td;
1853{
1854        struct pipe *cpipe;
1855        int error;
1856
1857        cpipe = fp->f_data;
1858        if (cpipe->pipe_state & PIPE_NAMED)
1859                error = vn_chown(fp, uid, gid, active_cred, td);
1860        else
1861                error = invfo_chown(fp, uid, gid, active_cred, td);
1862        return (error);
1863}
1864
1865static int
1866pipe_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
1867{
1868        struct pipe *pi;
1869
1870        if (fp->f_type == DTYPE_FIFO)
1871                return (vn_fill_kinfo(fp, kif, fdp));
1872        kif->kf_type = KF_TYPE_PIPE;
1873        pi = fp->f_data;
1874        kif->kf_un.kf_pipe.kf_pipe_addr = (uintptr_t)pi;
1875        kif->kf_un.kf_pipe.kf_pipe_peer = (uintptr_t)pi->pipe_peer;
1876        kif->kf_un.kf_pipe.kf_pipe_buffer_cnt = pi->pipe_buffer.cnt;
1877        return (0);
1878}
1879#endif /* __rtems__ */
1880
1881static void
1882pipe_free_kmem(cpipe)
1883        struct pipe *cpipe;
1884{
1885
1886        KASSERT(!mtx_owned(PIPE_MTX(cpipe)),
1887            ("pipe_free_kmem: pipe mutex locked"));
1888
1889        if (cpipe->pipe_buffer.buffer != NULL) {
1890                atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size);
1891#ifndef __rtems__
1892                vm_map_remove(pipe_map,
1893                    (vm_offset_t)cpipe->pipe_buffer.buffer,
1894                    (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size);
1895#else /* __rtems__ */
1896                free(cpipe->pipe_buffer.buffer, M_TEMP);
1897#endif /* __rtems__ */
1898                cpipe->pipe_buffer.buffer = NULL;
1899        }
1900#ifndef PIPE_NODIRECT
1901        {
1902                cpipe->pipe_map.cnt = 0;
1903                cpipe->pipe_map.pos = 0;
1904                cpipe->pipe_map.npages = 0;
1905        }
1906#endif
1907}
1908
1909/*
1910 * shutdown the pipe
1911 */
1912static void
1913pipeclose(cpipe)
1914        struct pipe *cpipe;
1915{
1916        struct pipepair *pp;
1917        struct pipe *ppipe;
1918
1919        KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL"));
1920
1921        PIPE_LOCK(cpipe);
1922        pipelock(cpipe, 0);
1923        pp = cpipe->pipe_pair;
1924
1925        pipeselwakeup(cpipe);
1926
1927        /*
1928         * If the other side is blocked, wake it up saying that
1929         * we want to close it down.
1930         */
1931        cpipe->pipe_state |= PIPE_EOF;
1932        while (cpipe->pipe_busy) {
1933                wakeup(cpipe);
1934                cpipe->pipe_state |= PIPE_WANT;
1935                pipeunlock(cpipe);
1936                msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1937                pipelock(cpipe, 0);
1938        }
1939
1940
1941        /*
1942         * Disconnect from peer, if any.
1943         */
1944        ppipe = cpipe->pipe_peer;
1945        if (ppipe->pipe_present == PIPE_ACTIVE) {
1946                pipeselwakeup(ppipe);
1947
1948                ppipe->pipe_state |= PIPE_EOF;
1949                wakeup(ppipe);
1950                KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0);
1951        }
1952
1953        /*
1954         * Mark this endpoint as free.  Release kmem resources.  We
1955         * don't mark this endpoint as unused until we've finished
1956         * doing that, or the pipe might disappear out from under
1957         * us.
1958         */
1959        PIPE_UNLOCK(cpipe);
1960        pipe_free_kmem(cpipe);
1961        PIPE_LOCK(cpipe);
1962        cpipe->pipe_present = PIPE_CLOSING;
1963        pipeunlock(cpipe);
1964
1965        /*
1966         * knlist_clear() may sleep dropping the PIPE_MTX. Set the
1967         * PIPE_FINALIZED, that allows other end to free the
1968         * pipe_pair, only after the knotes are completely dismantled.
1969         */
1970        knlist_clear(&cpipe->pipe_sel.si_note, 1);
1971        cpipe->pipe_present = PIPE_FINALIZED;
1972        seldrain(&cpipe->pipe_sel);
1973        knlist_destroy(&cpipe->pipe_sel.si_note);
1974
1975        /*
1976         * If both endpoints are now closed, release the memory for the
1977         * pipe pair.  If not, unlock.
1978         */
1979        if (ppipe->pipe_present == PIPE_FINALIZED) {
1980                PIPE_UNLOCK(cpipe);
1981#ifdef MAC
1982                mac_pipe_destroy(pp);
1983#endif
1984                uma_zfree(pipe_zone, cpipe->pipe_pair);
1985        } else
1986                PIPE_UNLOCK(cpipe);
1987}
1988
1989/*ARGSUSED*/
1990static int
1991pipe_kqfilter(struct file *fp, struct knote *kn)
1992{
1993        struct pipe *cpipe;
1994
1995        /*
1996         * If a filter is requested that is not supported by this file
1997         * descriptor, don't return an error, but also don't ever generate an
1998         * event.
1999         */
2000#ifndef __rtems__
2001        if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) {
2002#else /* __rtems__ */
2003        if ((kn->kn_filter == EVFILT_READ) && !(rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD)) {
2004#endif /* __rtems__ */
2005                kn->kn_fop = &pipe_nfiltops;
2006                return (0);
2007        }
2008#ifndef __rtems__
2009        if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) {
2010#else /* __rtems__ */
2011        if ((kn->kn_filter == EVFILT_WRITE) && !(rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FWRITE)) {
2012#endif /* __rtems__ */
2013                kn->kn_fop = &pipe_nfiltops;
2014                return (0);
2015        }
2016        cpipe = fp->f_data;
2017        PIPE_LOCK(cpipe);
2018        switch (kn->kn_filter) {
2019        case EVFILT_READ:
2020                kn->kn_fop = &pipe_rfiltops;
2021                break;
2022        case EVFILT_WRITE:
2023                kn->kn_fop = &pipe_wfiltops;
2024                if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) {
2025                        /* other end of pipe has been closed */
2026                        PIPE_UNLOCK(cpipe);
2027                        return (EPIPE);
2028                }
2029                cpipe = PIPE_PEER(cpipe);
2030                break;
2031        default:
2032                PIPE_UNLOCK(cpipe);
2033                return (EINVAL);
2034        }
2035
2036        kn->kn_hook = cpipe;
2037        knlist_add(&cpipe->pipe_sel.si_note, kn, 1);
2038        PIPE_UNLOCK(cpipe);
2039        return (0);
2040}
2041#ifdef __rtems__
2042int
2043rtems_bsd_pipe_kqfilter(rtems_libio_t *iop, struct knote *kn)
2044{
2045        struct file *fp = rtems_bsd_iop_to_fp(iop);
2046
2047        return pipe_kqfilter(fp, kn);
2048}
2049#endif /* __rtems__ */
2050
2051static void
2052filt_pipedetach(struct knote *kn)
2053{
2054        struct pipe *cpipe = kn->kn_hook;
2055
2056        PIPE_LOCK(cpipe);
2057        knlist_remove(&cpipe->pipe_sel.si_note, kn, 1);
2058        PIPE_UNLOCK(cpipe);
2059}
2060
2061/*ARGSUSED*/
2062static int
2063filt_piperead(struct knote *kn, long hint)
2064{
2065        struct pipe *rpipe = kn->kn_hook;
2066        struct pipe *wpipe = rpipe->pipe_peer;
2067        int ret;
2068
2069        PIPE_LOCK_ASSERT(rpipe, MA_OWNED);
2070        kn->kn_data = rpipe->pipe_buffer.cnt;
2071        if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
2072                kn->kn_data = rpipe->pipe_map.cnt;
2073
2074        if ((rpipe->pipe_state & PIPE_EOF) ||
2075            wpipe->pipe_present != PIPE_ACTIVE ||
2076            (wpipe->pipe_state & PIPE_EOF)) {
2077                kn->kn_flags |= EV_EOF;
2078                return (1);
2079        }
2080        ret = kn->kn_data > 0;
2081        return ret;
2082}
2083
2084/*ARGSUSED*/
2085static int
2086filt_pipewrite(struct knote *kn, long hint)
2087{
2088        struct pipe *wpipe;
2089   
2090        wpipe = kn->kn_hook;
2091        PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
2092        if (wpipe->pipe_present != PIPE_ACTIVE ||
2093            (wpipe->pipe_state & PIPE_EOF)) {
2094                kn->kn_data = 0;
2095                kn->kn_flags |= EV_EOF;
2096                return (1);
2097        }
2098        kn->kn_data = (wpipe->pipe_buffer.size > 0) ?
2099            (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) : PIPE_BUF;
2100        if (wpipe->pipe_state & PIPE_DIRECTW)
2101                kn->kn_data = 0;
2102
2103        return (kn->kn_data >= PIPE_BUF);
2104}
2105
2106static void
2107filt_pipedetach_notsup(struct knote *kn)
2108{
2109
2110}
2111
2112static int
2113filt_pipenotsup(struct knote *kn, long hint)
2114{
2115
2116        return (0);
2117}
2118#ifdef __rtems__
2119static int
2120rtems_bsd_pipe_open(rtems_libio_t *iop, const char *path, int oflag,
2121    mode_t mode)
2122{
2123        return rtems_bsd_error_to_status_and_errno(ENXIO);
2124}
2125
2126static int
2127rtems_bsd_pipe_close(rtems_libio_t *iop)
2128{
2129        struct file *fp = rtems_bsd_iop_to_fp(iop);
2130        int error = pipe_close(fp, NULL);
2131
2132        return rtems_bsd_error_to_status_and_errno(error);
2133}
2134
2135static int
2136rtems_bsd_pipe_fcntl(rtems_libio_t *iop, int cmd)
2137{
2138        int error = 0;
2139
2140        if (cmd == F_SETFL) {
2141                struct file *fp = rtems_bsd_iop_to_fp(iop);
2142                int nbio = iop->flags & LIBIO_FLAGS_NO_DELAY;
2143
2144                error = pipe_ioctl(fp, FIONBIO, &nbio, NULL, NULL);
2145        }
2146
2147        return rtems_bsd_error_to_status_and_errno(error);
2148}
2149#endif /* __rtems__ */
Note: See TracBrowser for help on using the repository browser.