source: rtems-libbsd/freebsd/sys/kern/sys_pipe.c @ e0b4edbd

55-freebsd-126-freebsd-12
Last change on this file since e0b4edbd was e0b4edbd, checked in by Sebastian Huber <sebastian.huber@…>, on 11/06/18 at 14:42:44

Update to FreeBSD head 2018-11-15

Git mirror commit a18b0830c4be01b39489a891b63d6023ada6358a.

Update #3472.

  • Property mode set to 100755
File size: 54.0 KB
Line 
1#include <machine/rtems-bsd-kernel-space.h>
2
3/*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1996 John S. Dyson
7 * Copyright (c) 2012 Giovanni Trematerra
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice immediately at the beginning of the file, without modification,
15 *    this list of conditions, and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Absolutely no warranty of function or purpose is made by the author
20 *    John S. Dyson.
21 * 4. Modifications may be freely made to this file if the above conditions
22 *    are met.
23 */
24
25/*
26 * This file contains a high-performance replacement for the socket-based
27 * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
28 * all features of sockets, but does do everything that pipes normally
29 * do.
30 */
31
32/*
33 * This code has two modes of operation, a small write mode and a large
34 * write mode.  The small write mode acts like conventional pipes with
35 * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
36 * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
37 * and PIPE_SIZE in size, the sending process pins the underlying pages in
38 * memory, and the receiving process copies directly from these pinned pages
39 * in the sending process.
40 *
41 * If the sending process receives a signal, it is possible that it will
42 * go away, and certainly its address space can change, because control
43 * is returned back to the user-mode side.  In that case, the pipe code
44 * arranges to copy the buffer supplied by the user process, to a pageable
45 * kernel buffer, and the receiving process will grab the data from the
46 * pageable kernel buffer.  Since signals don't happen all that often,
47 * the copy operation is normally eliminated.
48 *
49 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
50 * happen for small transfers so that the system will not spend all of
51 * its time context switching.
52 *
53 * In order to limit the resource use of pipes, two sysctls exist:
54 *
55 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
56 * address space available to us in pipe_map. This value is normally
57 * autotuned, but may also be loader tuned.
58 *
59 * kern.ipc.pipekva - This read-only sysctl tracks the current amount of
60 * memory in use by pipes.
61 *
62 * Based on how large pipekva is relative to maxpipekva, the following
63 * will happen:
64 *
65 * 0% - 50%:
66 *     New pipes are given 16K of memory backing, pipes may dynamically
67 *     grow to as large as 64K where needed.
68 * 50% - 75%:
69 *     New pipes are given 4K (or PAGE_SIZE) of memory backing,
70 *     existing pipes may NOT grow.
71 * 75% - 100%:
72 *     New pipes are given 4K (or PAGE_SIZE) of memory backing,
73 *     existing pipes will be shrunk down to 4K whenever possible.
74 *
75 * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0.  If
76 * that is set,  the only resize that will occur is the 0 -> SMALL_PIPE_SIZE
77 * resize which MUST occur for reverse-direction pipes when they are
78 * first used.
79 *
80 * Additional information about the current state of pipes may be obtained
81 * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail,
82 * and kern.ipc.piperesizefail.
83 *
84 * Locking rules:  There are two locks present here:  A mutex, used via
85 * PIPE_LOCK, and a flag, used via pipelock().  All locking is done via
86 * the flag, as mutexes can not persist over uiomove.  The mutex
87 * exists only to guard access to the flag, and is not in itself a
88 * locking mechanism.  Also note that there is only a single mutex for
89 * both directions of a pipe.
90 *
91 * As pipelock() may have to sleep before it can acquire the flag, it
92 * is important to reread all data after a call to pipelock(); everything
93 * in the structure may have changed.
94 */
95
96#include <sys/cdefs.h>
97__FBSDID("$FreeBSD$");
98
99#include <sys/param.h>
100#include <sys/systm.h>
101#include <sys/conf.h>
102#include <sys/fcntl.h>
103#include <sys/file.h>
104#include <sys/filedesc.h>
105#include <sys/filio.h>
106#include <sys/kernel.h>
107#include <sys/lock.h>
108#include <sys/mutex.h>
109#include <sys/ttycom.h>
110#include <sys/stat.h>
111#include <sys/malloc.h>
112#include <sys/poll.h>
113#include <sys/selinfo.h>
114#include <sys/signalvar.h>
115#include <sys/syscallsubr.h>
116#include <sys/sysctl.h>
117#include <sys/sysproto.h>
118#include <sys/pipe.h>
119#include <sys/proc.h>
120#include <sys/vnode.h>
121#include <sys/uio.h>
122#include <sys/user.h>
123#include <sys/event.h>
124
125#include <security/mac/mac_framework.h>
126
127#include <vm/vm.h>
128#include <vm/vm_param.h>
129#include <vm/vm_object.h>
130#include <vm/vm_kern.h>
131#include <vm/vm_extern.h>
132#include <vm/pmap.h>
133#include <vm/vm_map.h>
134#include <vm/vm_page.h>
135#include <vm/uma.h>
136
137/*
138 * Use this define if you want to disable *fancy* VM things.  Expect an
139 * approx 30% decrease in transfer rate.  This could be useful for
140 * NetBSD or OpenBSD.
141 */
142/* #define PIPE_NODIRECT */
143
144#define PIPE_PEER(pipe) \
145        (((pipe)->pipe_state & PIPE_NAMED) ? (pipe) : ((pipe)->pipe_peer))
146
147/*
148 * interfaces to the outside world
149 */
150#ifndef __rtems__
151static fo_rdwr_t        pipe_read;
152static fo_rdwr_t        pipe_write;
153static fo_truncate_t    pipe_truncate;
154static fo_ioctl_t       pipe_ioctl;
155static fo_poll_t        pipe_poll;
156static fo_kqfilter_t    pipe_kqfilter;
157static fo_stat_t        pipe_stat;
158static fo_close_t       pipe_close;
159static fo_chmod_t       pipe_chmod;
160static fo_chown_t       pipe_chown;
161static fo_fill_kinfo_t  pipe_fill_kinfo;
162
163struct fileops pipeops = {
164        .fo_read = pipe_read,
165        .fo_write = pipe_write,
166        .fo_truncate = pipe_truncate,
167        .fo_ioctl = pipe_ioctl,
168        .fo_poll = pipe_poll,
169        .fo_kqfilter = pipe_kqfilter,
170        .fo_stat = pipe_stat,
171        .fo_close = pipe_close,
172        .fo_chmod = pipe_chmod,
173        .fo_chown = pipe_chown,
174        .fo_sendfile = invfo_sendfile,
175        .fo_fill_kinfo = pipe_fill_kinfo,
176        .fo_flags = DFLAG_PASSABLE
177};
178#else /* __rtems__ */
179#define PIPE_NODIRECT
180#define PRIBIO                  (0)
181
182static int rtems_bsd_pipe_open(rtems_libio_t *iop, const char *path,
183    int oflag, mode_t mode);
184static int rtems_bsd_pipe_close(rtems_libio_t *iop);
185static ssize_t rtems_bsd_pipe_read(rtems_libio_t *iop, void *buffer,
186    size_t count);
187static ssize_t rtems_bsd_pipe_readv(rtems_libio_t *iop,
188    const struct iovec *iov, int iovcnt, ssize_t total);
189static ssize_t rtems_bsd_pipe_write(rtems_libio_t *iop, const void *buffer,
190    size_t count);
191static ssize_t rtems_bsd_pipe_writev(rtems_libio_t *iop,
192    const struct iovec *iov, int iovcnt, ssize_t total);
193static int rtems_bsd_pipe_ioctl(rtems_libio_t *iop, ioctl_command_t request,
194    void *buffer);
195static int rtems_bsd_pipe_stat(const rtems_filesystem_location_info_t *loc,
196    struct stat *buf);
197static int rtems_bsd_pipe_fcntl(rtems_libio_t *iop, int cmd);
198static int rtems_bsd_pipe_poll(rtems_libio_t *iop, int events);
199int rtems_bsd_pipe_kqfilter(rtems_libio_t *iop, struct knote *kn);
200
201static const rtems_filesystem_file_handlers_r pipeops = {
202        .open_h = rtems_bsd_pipe_open,
203        .close_h = rtems_bsd_pipe_close,
204        .read_h = rtems_bsd_pipe_read,
205        .write_h = rtems_bsd_pipe_write,
206        .ioctl_h = rtems_bsd_pipe_ioctl,
207        .lseek_h = rtems_filesystem_default_lseek,
208        .fstat_h = rtems_bsd_pipe_stat,
209        .ftruncate_h = rtems_filesystem_default_ftruncate,
210        .fsync_h = rtems_filesystem_default_fsync_or_fdatasync,
211        .fdatasync_h = rtems_filesystem_default_fsync_or_fdatasync,
212        .fcntl_h = rtems_bsd_pipe_fcntl,
213        .poll_h = rtems_bsd_pipe_poll,
214        .kqfilter_h = rtems_bsd_pipe_kqfilter,
215        .readv_h = rtems_bsd_pipe_readv,
216        .writev_h = rtems_bsd_pipe_writev,
217        .mmap_h = rtems_filesystem_default_mmap
218};
219
220long    maxpipekva;                     /* Limit on pipe KVA */
221
222static int kern_pipe(struct thread *, int [2], int, struct filecaps *,
223    struct filecaps *);
224#endif /* __rtems__ */
225
226static void     filt_pipedetach(struct knote *kn);
227static void     filt_pipedetach_notsup(struct knote *kn);
228static int      filt_pipenotsup(struct knote *kn, long hint);
229static int      filt_piperead(struct knote *kn, long hint);
230static int      filt_pipewrite(struct knote *kn, long hint);
231
232static struct filterops pipe_nfiltops = {
233        .f_isfd = 1,
234        .f_detach = filt_pipedetach_notsup,
235        .f_event = filt_pipenotsup
236};
237static struct filterops pipe_rfiltops = {
238        .f_isfd = 1,
239        .f_detach = filt_pipedetach,
240        .f_event = filt_piperead
241};
242static struct filterops pipe_wfiltops = {
243        .f_isfd = 1,
244        .f_detach = filt_pipedetach,
245        .f_event = filt_pipewrite
246};
247
248/*
249 * Default pipe buffer size(s), this can be kind-of large now because pipe
250 * space is pageable.  The pipe code will try to maintain locality of
251 * reference for performance reasons, so small amounts of outstanding I/O
252 * will not wipe the cache.
253 */
254#define MINPIPESIZE (PIPE_SIZE/3)
255#define MAXPIPESIZE (2*PIPE_SIZE/3)
256
257static long amountpipekva;
258static int pipefragretry;
259static int pipeallocfail;
260static int piperesizefail;
261static int piperesizeallowed = 1;
262
263SYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
264           &maxpipekva, 0, "Pipe KVA limit");
265SYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
266           &amountpipekva, 0, "Pipe KVA usage");
267SYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD,
268          &pipefragretry, 0, "Pipe allocation retries due to fragmentation");
269SYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD,
270          &pipeallocfail, 0, "Pipe allocation failures");
271SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD,
272          &piperesizefail, 0, "Pipe resize failures");
273SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW,
274          &piperesizeallowed, 0, "Pipe resizing allowed");
275
276static void pipeinit(void *dummy __unused);
277static void pipeclose(struct pipe *cpipe);
278static void pipe_free_kmem(struct pipe *cpipe);
279static void pipe_create(struct pipe *pipe, int backing);
280static void pipe_paircreate(struct thread *td, struct pipepair **p_pp);
281static __inline int pipelock(struct pipe *cpipe, int catch);
282static __inline void pipeunlock(struct pipe *cpipe);
283#ifndef PIPE_NODIRECT
284static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
285static void pipe_destroy_write_buffer(struct pipe *wpipe);
286static int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
287static void pipe_clone_write_buffer(struct pipe *wpipe);
288#endif
289static int pipespace(struct pipe *cpipe, int size);
290static int pipespace_new(struct pipe *cpipe, int size);
291
292static int      pipe_zone_ctor(void *mem, int size, void *arg, int flags);
293static int      pipe_zone_init(void *mem, int size, int flags);
294static void     pipe_zone_fini(void *mem, int size);
295
296static uma_zone_t pipe_zone;
297static struct unrhdr *pipeino_unr;
298static dev_t pipedev_ino;
299
300SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
301
302static void
303pipeinit(void *dummy __unused)
304{
305
306        pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair),
307            pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini,
308            UMA_ALIGN_PTR, 0);
309        KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
310        pipeino_unr = new_unrhdr(1, INT32_MAX, NULL);
311        KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized"));
312        pipedev_ino = devfs_alloc_cdp_inode();
313        KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized"));
314}
315
316static int
317pipe_zone_ctor(void *mem, int size, void *arg, int flags)
318{
319        struct pipepair *pp;
320        struct pipe *rpipe, *wpipe;
321
322        KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size"));
323
324        pp = (struct pipepair *)mem;
325
326        /*
327         * We zero both pipe endpoints to make sure all the kmem pointers
328         * are NULL, flag fields are zero'd, etc.  We timestamp both
329         * endpoints with the same time.
330         */
331        rpipe = &pp->pp_rpipe;
332        bzero(rpipe, sizeof(*rpipe));
333#ifndef __rtems__
334        vfs_timestamp(&rpipe->pipe_ctime);
335#else /* __rtems__ */
336        rpipe->pipe_ctime.tv_sec = time(NULL);
337#endif /* __rtems__ */
338        rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime;
339
340        wpipe = &pp->pp_wpipe;
341        bzero(wpipe, sizeof(*wpipe));
342        wpipe->pipe_ctime = rpipe->pipe_ctime;
343        wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime;
344
345        rpipe->pipe_peer = wpipe;
346        rpipe->pipe_pair = pp;
347        wpipe->pipe_peer = rpipe;
348        wpipe->pipe_pair = pp;
349
350        /*
351         * Mark both endpoints as present; they will later get free'd
352         * one at a time.  When both are free'd, then the whole pair
353         * is released.
354         */
355        rpipe->pipe_present = PIPE_ACTIVE;
356        wpipe->pipe_present = PIPE_ACTIVE;
357
358        /*
359         * Eventually, the MAC Framework may initialize the label
360         * in ctor or init, but for now we do it elswhere to avoid
361         * blocking in ctor or init.
362         */
363        pp->pp_label = NULL;
364
365        return (0);
366}
367
368static int
369pipe_zone_init(void *mem, int size, int flags)
370{
371        struct pipepair *pp;
372
373        KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size"));
374
375        pp = (struct pipepair *)mem;
376
377        mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_NEW);
378        return (0);
379}
380
381static void
382pipe_zone_fini(void *mem, int size)
383{
384        struct pipepair *pp;
385
386        KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size"));
387
388        pp = (struct pipepair *)mem;
389
390        mtx_destroy(&pp->pp_mtx);
391}
392
393static void
394pipe_paircreate(struct thread *td, struct pipepair **p_pp)
395{
396        struct pipepair *pp;
397        struct pipe *rpipe, *wpipe;
398
399        *p_pp = pp = uma_zalloc(pipe_zone, M_WAITOK);
400#ifdef MAC
401        /*
402         * The MAC label is shared between the connected endpoints.  As a
403         * result mac_pipe_init() and mac_pipe_create() are called once
404         * for the pair, and not on the endpoints.
405         */
406        mac_pipe_init(pp);
407        mac_pipe_create(td->td_ucred, pp);
408#endif
409        rpipe = &pp->pp_rpipe;
410        wpipe = &pp->pp_wpipe;
411
412        knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe));
413        knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe));
414
415        /* Only the forward direction pipe is backed by default */
416        pipe_create(rpipe, 1);
417        pipe_create(wpipe, 0);
418
419        rpipe->pipe_state |= PIPE_DIRECTOK;
420        wpipe->pipe_state |= PIPE_DIRECTOK;
421}
422
423void
424pipe_named_ctor(struct pipe **ppipe, struct thread *td)
425{
426        struct pipepair *pp;
427
428        pipe_paircreate(td, &pp);
429        pp->pp_rpipe.pipe_state |= PIPE_NAMED;
430        *ppipe = &pp->pp_rpipe;
431}
432
433void
434pipe_dtor(struct pipe *dpipe)
435{
436        struct pipe *peer;
437        ino_t ino;
438
439        ino = dpipe->pipe_ino;
440        peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL;
441        funsetown(&dpipe->pipe_sigio);
442        pipeclose(dpipe);
443        if (peer != NULL) {
444                funsetown(&peer->pipe_sigio);
445                pipeclose(peer);
446        }
447        if (ino != 0 && ino != (ino_t)-1)
448                free_unr(pipeino_unr, ino);
449}
450
451/*
452 * The pipe system call for the DTYPE_PIPE type of pipes.  If we fail, let
453 * the zone pick up the pieces via pipeclose().
454 */
455int
456kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1,
457    struct filecaps *fcaps2)
458{
459        struct file *rf, *wf;
460        struct pipe *rpipe, *wpipe;
461        struct pipepair *pp;
462        int fd, fflags, error;
463
464        pipe_paircreate(td, &pp);
465        rpipe = &pp->pp_rpipe;
466        wpipe = &pp->pp_wpipe;
467        error = falloc_caps(td, &rf, &fd, flags, fcaps1);
468        if (error) {
469                pipeclose(rpipe);
470                pipeclose(wpipe);
471                return (error);
472        }
473        /* An extra reference on `rf' has been held for us by falloc_caps(). */
474        fildes[0] = fd;
475
476        fflags = FREAD | FWRITE;
477        if ((flags & O_NONBLOCK) != 0)
478                fflags |= FNONBLOCK;
479
480        /*
481         * Warning: once we've gotten past allocation of the fd for the
482         * read-side, we can only drop the read side via fdrop() in order
483         * to avoid races against processes which manage to dup() the read
484         * side while we are blocked trying to allocate the write side.
485         */
486        finit(rf, fflags, DTYPE_PIPE, rpipe, &pipeops);
487        error = falloc_caps(td, &wf, &fd, flags, fcaps2);
488        if (error) {
489                fdclose(td, rf, fildes[0]);
490#ifndef __rtems__
491                fdrop(rf, td);
492#endif /* __rtems__ */
493                /* rpipe has been closed by fdrop(). */
494                pipeclose(wpipe);
495                return (error);
496        }
497        /* An extra reference on `wf' has been held for us by falloc_caps(). */
498        finit(wf, fflags, DTYPE_PIPE, wpipe, &pipeops);
499#ifndef __rtems__
500        fdrop(wf, td);
501#endif /* __rtems__ */
502        fildes[1] = fd;
503#ifndef __rtems__
504        fdrop(rf, td);
505#endif /* __rtems__ */
506
507        return (0);
508}
509
510#ifdef COMPAT_FREEBSD10
511/* ARGSUSED */
512int
513freebsd10_pipe(struct thread *td, struct freebsd10_pipe_args *uap __unused)
514{
515        int error;
516        int fildes[2];
517
518        error = kern_pipe(td, fildes, 0, NULL, NULL);
519        if (error)
520                return (error);
521
522        td->td_retval[0] = fildes[0];
523        td->td_retval[1] = fildes[1];
524
525        return (0);
526}
527#endif
528
529#ifndef __rtems__
530int
531sys_pipe2(struct thread *td, struct pipe2_args *uap)
532{
533        int error, fildes[2];
534
535        if (uap->flags & ~(O_CLOEXEC | O_NONBLOCK))
536                return (EINVAL);
537        error = kern_pipe(td, fildes, uap->flags, NULL, NULL);
538        if (error)
539                return (error);
540        error = copyout(fildes, uap->fildes, 2 * sizeof(int));
541        if (error) {
542                (void)kern_close(td, fildes[0]);
543                (void)kern_close(td, fildes[1]);
544        }
545        return (error);
546}
547#endif /* __rtems__ */
548
549#ifdef __rtems__
550int
551pipe(int fildes[2])
552{
553        struct thread *td = rtems_bsd_get_curthread_or_null();
554        int error;
555
556        if (td != NULL) {
557                error = kern_pipe(td, fildes, 0, NULL, NULL);
558        } else {
559                error = ENOMEM;
560        }
561
562        if (error == 0) {
563                return error;
564        } else {
565                rtems_set_errno_and_return_minus_one(error);
566        }
567}
568#endif /* __rtems__ */
569
570/*
571 * Allocate kva for pipe circular buffer, the space is pageable
572 * This routine will 'realloc' the size of a pipe safely, if it fails
573 * it will retain the old buffer.
574 * If it fails it will return ENOMEM.
575 */
576static int
577pipespace_new(struct pipe *cpipe, int size)
578{
579        caddr_t buffer;
580        int error, cnt, firstseg;
581        static int curfail = 0;
582        static struct timeval lastfail;
583
584        KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked"));
585        KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW),
586                ("pipespace: resize of direct writes not allowed"));
587retry:
588        cnt = cpipe->pipe_buffer.cnt;
589        if (cnt > size)
590                size = cnt;
591
592        size = round_page(size);
593#ifndef __rtems__
594        buffer = (caddr_t) vm_map_min(pipe_map);
595
596        error = vm_map_find(pipe_map, NULL, 0, (vm_offset_t *)&buffer, size, 0,
597            VMFS_ANY_SPACE, VM_PROT_RW, VM_PROT_RW, 0);
598        if (error != KERN_SUCCESS) {
599#else /* __rtems__ */
600        (void)error;
601        buffer = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
602        if (buffer == NULL) {
603#endif /* __rtems__ */
604                if ((cpipe->pipe_buffer.buffer == NULL) &&
605                        (size > SMALL_PIPE_SIZE)) {
606                        size = SMALL_PIPE_SIZE;
607                        pipefragretry++;
608                        goto retry;
609                }
610                if (cpipe->pipe_buffer.buffer == NULL) {
611                        pipeallocfail++;
612                        if (ppsratecheck(&lastfail, &curfail, 1))
613                                printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n");
614                } else {
615                        piperesizefail++;
616                }
617                return (ENOMEM);
618        }
619
620        /* copy data, then free old resources if we're resizing */
621        if (cnt > 0) {
622                if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) {
623                        firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out;
624                        bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out],
625                                buffer, firstseg);
626                        if ((cnt - firstseg) > 0)
627                                bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg],
628                                        cpipe->pipe_buffer.in);
629                } else {
630                        bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out],
631                                buffer, cnt);
632                }
633        }
634        pipe_free_kmem(cpipe);
635        cpipe->pipe_buffer.buffer = buffer;
636        cpipe->pipe_buffer.size = size;
637        cpipe->pipe_buffer.in = cnt;
638        cpipe->pipe_buffer.out = 0;
639        cpipe->pipe_buffer.cnt = cnt;
640        atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size);
641        return (0);
642}
643
644/*
645 * Wrapper for pipespace_new() that performs locking assertions.
646 */
647static int
648pipespace(struct pipe *cpipe, int size)
649{
650
651        KASSERT(cpipe->pipe_state & PIPE_LOCKFL,
652                ("Unlocked pipe passed to pipespace"));
653        return (pipespace_new(cpipe, size));
654}
655
656/*
657 * lock a pipe for I/O, blocking other access
658 */
659static __inline int
660pipelock(struct pipe *cpipe, int catch)
661{
662        int error;
663
664        PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
665        while (cpipe->pipe_state & PIPE_LOCKFL) {
666                cpipe->pipe_state |= PIPE_LWANT;
667                error = msleep(cpipe, PIPE_MTX(cpipe),
668                    catch ? (PRIBIO | PCATCH) : PRIBIO,
669                    "pipelk", 0);
670                if (error != 0)
671                        return (error);
672        }
673        cpipe->pipe_state |= PIPE_LOCKFL;
674        return (0);
675}
676
677/*
678 * unlock a pipe I/O lock
679 */
680static __inline void
681pipeunlock(struct pipe *cpipe)
682{
683
684        PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
685        KASSERT(cpipe->pipe_state & PIPE_LOCKFL,
686                ("Unlocked pipe passed to pipeunlock"));
687        cpipe->pipe_state &= ~PIPE_LOCKFL;
688        if (cpipe->pipe_state & PIPE_LWANT) {
689                cpipe->pipe_state &= ~PIPE_LWANT;
690                wakeup(cpipe);
691        }
692}
693
694void
695pipeselwakeup(struct pipe *cpipe)
696{
697
698        PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
699        if (cpipe->pipe_state & PIPE_SEL) {
700                selwakeuppri(&cpipe->pipe_sel, PSOCK);
701                if (!SEL_WAITING(&cpipe->pipe_sel))
702                        cpipe->pipe_state &= ~PIPE_SEL;
703        }
704#ifndef __rtems__
705        if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
706                pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
707#endif /* __rtems__ */
708        KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0);
709}
710
711/*
712 * Initialize and allocate VM and memory for pipe.  The structure
713 * will start out zero'd from the ctor, so we just manage the kmem.
714 */
715static void
716pipe_create(struct pipe *pipe, int backing)
717{
718
719        if (backing) {
720                /*
721                 * Note that these functions can fail if pipe map is exhausted
722                 * (as a result of too many pipes created), but we ignore the
723                 * error as it is not fatal and could be provoked by
724                 * unprivileged users. The only consequence is worse performance
725                 * with given pipe.
726                 */
727                if (amountpipekva > maxpipekva / 2)
728                        (void)pipespace_new(pipe, SMALL_PIPE_SIZE);
729                else
730                        (void)pipespace_new(pipe, PIPE_SIZE);
731        }
732
733        pipe->pipe_ino = -1;
734}
735
736/* ARGSUSED */
737static int
738pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
739    int flags, struct thread *td)
740{
741        struct pipe *rpipe;
742        int error;
743        int nread = 0;
744        int size;
745
746        rpipe = fp->f_data;
747        PIPE_LOCK(rpipe);
748        ++rpipe->pipe_busy;
749        error = pipelock(rpipe, 1);
750        if (error)
751                goto unlocked_error;
752
753#ifdef MAC
754        error = mac_pipe_check_read(active_cred, rpipe->pipe_pair);
755        if (error)
756                goto locked_error;
757#endif
758        if (amountpipekva > (3 * maxpipekva) / 4) {
759                if (!(rpipe->pipe_state & PIPE_DIRECTW) &&
760                        (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) &&
761                        (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) &&
762                        (piperesizeallowed == 1)) {
763                        PIPE_UNLOCK(rpipe);
764                        pipespace(rpipe, SMALL_PIPE_SIZE);
765                        PIPE_LOCK(rpipe);
766                }
767        }
768
769        while (uio->uio_resid) {
770                /*
771                 * normal pipe buffer receive
772                 */
773                if (rpipe->pipe_buffer.cnt > 0) {
774                        size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
775                        if (size > rpipe->pipe_buffer.cnt)
776                                size = rpipe->pipe_buffer.cnt;
777                        if (size > uio->uio_resid)
778                                size = uio->uio_resid;
779
780                        PIPE_UNLOCK(rpipe);
781                        error = uiomove(
782                            &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
783                            size, uio);
784                        PIPE_LOCK(rpipe);
785                        if (error)
786                                break;
787
788                        rpipe->pipe_buffer.out += size;
789                        if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
790                                rpipe->pipe_buffer.out = 0;
791
792                        rpipe->pipe_buffer.cnt -= size;
793
794                        /*
795                         * If there is no more to read in the pipe, reset
796                         * its pointers to the beginning.  This improves
797                         * cache hit stats.
798                         */
799                        if (rpipe->pipe_buffer.cnt == 0) {
800                                rpipe->pipe_buffer.in = 0;
801                                rpipe->pipe_buffer.out = 0;
802                        }
803                        nread += size;
804#ifndef PIPE_NODIRECT
805                /*
806                 * Direct copy, bypassing a kernel buffer.
807                 */
808                } else if ((size = rpipe->pipe_map.cnt) &&
809                           (rpipe->pipe_state & PIPE_DIRECTW)) {
810                        if (size > uio->uio_resid)
811                                size = (u_int) uio->uio_resid;
812
813                        PIPE_UNLOCK(rpipe);
814                        error = uiomove_fromphys(rpipe->pipe_map.ms,
815                            rpipe->pipe_map.pos, size, uio);
816                        PIPE_LOCK(rpipe);
817                        if (error)
818                                break;
819                        nread += size;
820                        rpipe->pipe_map.pos += size;
821                        rpipe->pipe_map.cnt -= size;
822                        if (rpipe->pipe_map.cnt == 0) {
823                                rpipe->pipe_state &= ~(PIPE_DIRECTW|PIPE_WANTW);
824                                wakeup(rpipe);
825                        }
826#endif
827                } else {
828                        /*
829                         * detect EOF condition
830                         * read returns 0 on EOF, no need to set error
831                         */
832                        if (rpipe->pipe_state & PIPE_EOF)
833                                break;
834
835                        /*
836                         * If the "write-side" has been blocked, wake it up now.
837                         */
838                        if (rpipe->pipe_state & PIPE_WANTW) {
839                                rpipe->pipe_state &= ~PIPE_WANTW;
840                                wakeup(rpipe);
841                        }
842
843                        /*
844                         * Break if some data was read.
845                         */
846                        if (nread > 0)
847                                break;
848
849                        /*
850                         * Unlock the pipe buffer for our remaining processing.
851                         * We will either break out with an error or we will
852                         * sleep and relock to loop.
853                         */
854                        pipeunlock(rpipe);
855
856                        /*
857                         * Handle non-blocking mode operation or
858                         * wait for more data.
859                         */
860#ifndef __rtems__
861                        if (fp->f_flag & FNONBLOCK) {
862#else /* __rtems__ */
863                        if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FNONBLOCK) {
864#endif /* __rtems__ */
865                                error = EAGAIN;
866                        } else {
867                                rpipe->pipe_state |= PIPE_WANTR;
868                                if ((error = msleep(rpipe, PIPE_MTX(rpipe),
869                                    PRIBIO | PCATCH,
870                                    "piperd", 0)) == 0)
871                                        error = pipelock(rpipe, 1);
872                        }
873                        if (error)
874                                goto unlocked_error;
875                }
876        }
877#ifdef MAC
878locked_error:
879#endif
880        pipeunlock(rpipe);
881
882        /* XXX: should probably do this before getting any locks. */
883        if (error == 0)
884#ifndef __rtems__
885                vfs_timestamp(&rpipe->pipe_atime);
886#else /* __rtems__ */
887                rpipe->pipe_atime.tv_sec = time(NULL);
888#endif /* __rtems__ */
889unlocked_error:
890        --rpipe->pipe_busy;
891
892        /*
893         * PIPE_WANT processing only makes sense if pipe_busy is 0.
894         */
895        if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
896                rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
897                wakeup(rpipe);
898        } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
899                /*
900                 * Handle write blocking hysteresis.
901                 */
902                if (rpipe->pipe_state & PIPE_WANTW) {
903                        rpipe->pipe_state &= ~PIPE_WANTW;
904                        wakeup(rpipe);
905                }
906        }
907
908        if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
909                pipeselwakeup(rpipe);
910
911        PIPE_UNLOCK(rpipe);
912        return (error);
913}
914#ifdef __rtems__
915static ssize_t
916rtems_bsd_pipe_read(rtems_libio_t *iop, void *buffer, size_t count)
917{
918        struct thread *td = rtems_bsd_get_curthread_or_null();
919        struct file *fp = rtems_bsd_iop_to_fp(iop);
920        struct iovec iov = {
921                .iov_base = buffer,
922                .iov_len = count
923        };
924        struct uio auio = {
925                .uio_iov = &iov,
926                .uio_iovcnt = 1,
927                .uio_offset = 0,
928                .uio_resid = count,
929                .uio_segflg = UIO_USERSPACE,
930                .uio_rw = UIO_READ,
931                .uio_td = td
932        };
933        int error;
934
935        if (td != NULL) {
936                error = pipe_read(fp, &auio, NULL, 0, NULL);
937        } else {
938                error = ENOMEM;
939        }
940
941        if (error == 0) {
942                return (count - auio.uio_resid);
943        } else {
944                rtems_set_errno_and_return_minus_one(error);
945        }
946}
947
948static ssize_t
949rtems_bsd_pipe_readv(rtems_libio_t *iop, const struct iovec *iov,
950    int iovcnt, ssize_t total)
951{
952        struct thread *td = rtems_bsd_get_curthread_or_null();
953        struct file *fp = rtems_bsd_iop_to_fp(iop);
954        struct uio auio = {
955                .uio_iov = __DECONST(struct iovec *, iov),
956                .uio_iovcnt = iovcnt,
957                .uio_offset = 0,
958                .uio_resid = total,
959                .uio_segflg = UIO_USERSPACE,
960                .uio_rw = UIO_READ,
961                .uio_td = td
962        };
963        int error;
964
965        if (td != NULL) {
966                error = pipe_read(fp, &auio, NULL, 0, NULL);
967        } else {
968                error = ENOMEM;
969        }
970
971        if (error == 0) {
972                return (total - auio.uio_resid);
973        } else {
974                rtems_set_errno_and_return_minus_one(error);
975        }
976}
977#endif /* __rtems__ */
978
979#ifndef PIPE_NODIRECT
980/*
981 * Map the sending processes' buffer into kernel space and wire it.
982 * This is similar to a physical write operation.
983 */
984static int
985pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio)
986{
987        u_int size;
988        int i;
989
990        PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
991        KASSERT(wpipe->pipe_state & PIPE_DIRECTW,
992                ("Clone attempt on non-direct write pipe!"));
993
994        if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size)
995                size = wpipe->pipe_buffer.size;
996        else
997                size = uio->uio_iov->iov_len;
998
999        if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
1000            (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ,
1001            wpipe->pipe_map.ms, PIPENPAGES)) < 0)
1002                return (EFAULT);
1003
1004/*
1005 * set up the control block
1006 */
1007        wpipe->pipe_map.npages = i;
1008        wpipe->pipe_map.pos =
1009            ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
1010        wpipe->pipe_map.cnt = size;
1011
1012/*
1013 * and update the uio data
1014 */
1015
1016        uio->uio_iov->iov_len -= size;
1017        uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
1018        if (uio->uio_iov->iov_len == 0)
1019                uio->uio_iov++;
1020        uio->uio_resid -= size;
1021        uio->uio_offset += size;
1022        return (0);
1023}
1024
1025/*
1026 * unmap and unwire the process buffer
1027 */
1028static void
1029pipe_destroy_write_buffer(struct pipe *wpipe)
1030{
1031
1032        PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
1033        vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages);
1034        wpipe->pipe_map.npages = 0;
1035}
1036
1037/*
1038 * In the case of a signal, the writing process might go away.  This
1039 * code copies the data into the circular buffer so that the source
1040 * pages can be freed without loss of data.
1041 */
1042static void
1043pipe_clone_write_buffer(struct pipe *wpipe)
1044{
1045        struct uio uio;
1046        struct iovec iov;
1047        int size;
1048        int pos;
1049
1050        PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
1051        size = wpipe->pipe_map.cnt;
1052        pos = wpipe->pipe_map.pos;
1053
1054        wpipe->pipe_buffer.in = size;
1055        wpipe->pipe_buffer.out = 0;
1056        wpipe->pipe_buffer.cnt = size;
1057        wpipe->pipe_state &= ~PIPE_DIRECTW;
1058
1059        PIPE_UNLOCK(wpipe);
1060        iov.iov_base = wpipe->pipe_buffer.buffer;
1061        iov.iov_len = size;
1062        uio.uio_iov = &iov;
1063        uio.uio_iovcnt = 1;
1064        uio.uio_offset = 0;
1065        uio.uio_resid = size;
1066        uio.uio_segflg = UIO_SYSSPACE;
1067        uio.uio_rw = UIO_READ;
1068        uio.uio_td = curthread;
1069        uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio);
1070        PIPE_LOCK(wpipe);
1071        pipe_destroy_write_buffer(wpipe);
1072}
1073
1074/*
1075 * This implements the pipe buffer write mechanism.  Note that only
1076 * a direct write OR a normal pipe write can be pending at any given time.
1077 * If there are any characters in the pipe buffer, the direct write will
1078 * be deferred until the receiving process grabs all of the bytes from
1079 * the pipe buffer.  Then the direct mapping write is set-up.
1080 */
1081static int
1082pipe_direct_write(struct pipe *wpipe, struct uio *uio)
1083{
1084        int error;
1085
1086retry:
1087        PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
1088        error = pipelock(wpipe, 1);
1089        if (error != 0)
1090                goto error1;
1091        if ((wpipe->pipe_state & PIPE_EOF) != 0) {
1092                error = EPIPE;
1093                pipeunlock(wpipe);
1094                goto error1;
1095        }
1096        while (wpipe->pipe_state & PIPE_DIRECTW) {
1097                if (wpipe->pipe_state & PIPE_WANTR) {
1098                        wpipe->pipe_state &= ~PIPE_WANTR;
1099                        wakeup(wpipe);
1100                }
1101                pipeselwakeup(wpipe);
1102                wpipe->pipe_state |= PIPE_WANTW;
1103                pipeunlock(wpipe);
1104                error = msleep(wpipe, PIPE_MTX(wpipe),
1105                    PRIBIO | PCATCH, "pipdww", 0);
1106                if (error)
1107                        goto error1;
1108                else
1109                        goto retry;
1110        }
1111        wpipe->pipe_map.cnt = 0;        /* transfer not ready yet */
1112        if (wpipe->pipe_buffer.cnt > 0) {
1113                if (wpipe->pipe_state & PIPE_WANTR) {
1114                        wpipe->pipe_state &= ~PIPE_WANTR;
1115                        wakeup(wpipe);
1116                }
1117                pipeselwakeup(wpipe);
1118                wpipe->pipe_state |= PIPE_WANTW;
1119                pipeunlock(wpipe);
1120                error = msleep(wpipe, PIPE_MTX(wpipe),
1121                    PRIBIO | PCATCH, "pipdwc", 0);
1122                if (error)
1123                        goto error1;
1124                else
1125                        goto retry;
1126        }
1127
1128        wpipe->pipe_state |= PIPE_DIRECTW;
1129
1130        PIPE_UNLOCK(wpipe);
1131        error = pipe_build_write_buffer(wpipe, uio);
1132        PIPE_LOCK(wpipe);
1133        if (error) {
1134                wpipe->pipe_state &= ~PIPE_DIRECTW;
1135                pipeunlock(wpipe);
1136                goto error1;
1137        }
1138
1139        error = 0;
1140        while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
1141                if (wpipe->pipe_state & PIPE_EOF) {
1142                        pipe_destroy_write_buffer(wpipe);
1143                        pipeselwakeup(wpipe);
1144                        pipeunlock(wpipe);
1145                        error = EPIPE;
1146                        goto error1;
1147                }
1148                if (wpipe->pipe_state & PIPE_WANTR) {
1149                        wpipe->pipe_state &= ~PIPE_WANTR;
1150                        wakeup(wpipe);
1151                }
1152                pipeselwakeup(wpipe);
1153                wpipe->pipe_state |= PIPE_WANTW;
1154                pipeunlock(wpipe);
1155                error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
1156                    "pipdwt", 0);
1157                pipelock(wpipe, 0);
1158        }
1159
1160        if (wpipe->pipe_state & PIPE_EOF)
1161                error = EPIPE;
1162        if (wpipe->pipe_state & PIPE_DIRECTW) {
1163                /*
1164                 * this bit of trickery substitutes a kernel buffer for
1165                 * the process that might be going away.
1166                 */
1167                pipe_clone_write_buffer(wpipe);
1168        } else {
1169                pipe_destroy_write_buffer(wpipe);
1170        }
1171        pipeunlock(wpipe);
1172        return (error);
1173
1174error1:
1175        wakeup(wpipe);
1176        return (error);
1177}
1178#endif
1179
1180static int
1181pipe_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
1182    int flags, struct thread *td)
1183{
1184        int error = 0;
1185        int desiredsize;
1186        ssize_t orig_resid;
1187        struct pipe *wpipe, *rpipe;
1188
1189        rpipe = fp->f_data;
1190        wpipe = PIPE_PEER(rpipe);
1191        PIPE_LOCK(rpipe);
1192        error = pipelock(wpipe, 1);
1193        if (error) {
1194                PIPE_UNLOCK(rpipe);
1195                return (error);
1196        }
1197        /*
1198         * detect loss of pipe read side, issue SIGPIPE if lost.
1199         */
1200        if (wpipe->pipe_present != PIPE_ACTIVE ||
1201            (wpipe->pipe_state & PIPE_EOF)) {
1202                pipeunlock(wpipe);
1203                PIPE_UNLOCK(rpipe);
1204                return (EPIPE);
1205        }
1206#ifdef MAC
1207        error = mac_pipe_check_write(active_cred, wpipe->pipe_pair);
1208        if (error) {
1209                pipeunlock(wpipe);
1210                PIPE_UNLOCK(rpipe);
1211                return (error);
1212        }
1213#endif
1214        ++wpipe->pipe_busy;
1215
1216        /* Choose a larger size if it's advantageous */
1217        desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size);
1218        while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) {
1219                if (piperesizeallowed != 1)
1220                        break;
1221                if (amountpipekva > maxpipekva / 2)
1222                        break;
1223                if (desiredsize == BIG_PIPE_SIZE)
1224                        break;
1225                desiredsize = desiredsize * 2;
1226        }
1227
1228        /* Choose a smaller size if we're in a OOM situation */
1229        if ((amountpipekva > (3 * maxpipekva) / 4) &&
1230                (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) &&
1231                (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) &&
1232                (piperesizeallowed == 1))
1233                desiredsize = SMALL_PIPE_SIZE;
1234
1235        /* Resize if the above determined that a new size was necessary */
1236        if ((desiredsize != wpipe->pipe_buffer.size) &&
1237                ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) {
1238                PIPE_UNLOCK(wpipe);
1239                pipespace(wpipe, desiredsize);
1240                PIPE_LOCK(wpipe);
1241        }
1242        if (wpipe->pipe_buffer.size == 0) {
1243                /*
1244                 * This can only happen for reverse direction use of pipes
1245                 * in a complete OOM situation.
1246                 */
1247                error = ENOMEM;
1248                --wpipe->pipe_busy;
1249                pipeunlock(wpipe);
1250                PIPE_UNLOCK(wpipe);
1251                return (error);
1252        }
1253
1254        pipeunlock(wpipe);
1255
1256        orig_resid = uio->uio_resid;
1257
1258        while (uio->uio_resid) {
1259                int space;
1260
1261                pipelock(wpipe, 0);
1262                if (wpipe->pipe_state & PIPE_EOF) {
1263                        pipeunlock(wpipe);
1264                        error = EPIPE;
1265                        break;
1266                }
1267#ifndef PIPE_NODIRECT
1268                /*
1269                 * If the transfer is large, we can gain performance if
1270                 * we do process-to-process copies directly.
1271                 * If the write is non-blocking, we don't use the
1272                 * direct write mechanism.
1273                 *
1274                 * The direct write mechanism will detect the reader going
1275                 * away on us.
1276                 */
1277                if (uio->uio_segflg == UIO_USERSPACE &&
1278                    uio->uio_iov->iov_len >= PIPE_MINDIRECT &&
1279                    wpipe->pipe_buffer.size >= PIPE_MINDIRECT &&
1280                    (fp->f_flag & FNONBLOCK) == 0) {
1281                        pipeunlock(wpipe);
1282                        error = pipe_direct_write(wpipe, uio);
1283                        if (error)
1284                                break;
1285                        continue;
1286                }
1287#endif
1288
1289                /*
1290                 * Pipe buffered writes cannot be coincidental with
1291                 * direct writes.  We wait until the currently executing
1292                 * direct write is completed before we start filling the
1293                 * pipe buffer.  We break out if a signal occurs or the
1294                 * reader goes away.
1295                 */
1296                if (wpipe->pipe_state & PIPE_DIRECTW) {
1297                        if (wpipe->pipe_state & PIPE_WANTR) {
1298                                wpipe->pipe_state &= ~PIPE_WANTR;
1299                                wakeup(wpipe);
1300                        }
1301                        pipeselwakeup(wpipe);
1302                        wpipe->pipe_state |= PIPE_WANTW;
1303                        pipeunlock(wpipe);
1304                        error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
1305                            "pipbww", 0);
1306                        if (error)
1307                                break;
1308                        else
1309                                continue;
1310                }
1311
1312                space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1313
1314                /* Writes of size <= PIPE_BUF must be atomic. */
1315                if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
1316                        space = 0;
1317
1318                if (space > 0) {
1319                        int size;       /* Transfer size */
1320                        int segsize;    /* first segment to transfer */
1321
1322                        /*
1323                         * Transfer size is minimum of uio transfer
1324                         * and free space in pipe buffer.
1325                         */
1326                        if (space > uio->uio_resid)
1327                                size = uio->uio_resid;
1328                        else
1329                                size = space;
1330                        /*
1331                         * First segment to transfer is minimum of
1332                         * transfer size and contiguous space in
1333                         * pipe buffer.  If first segment to transfer
1334                         * is less than the transfer size, we've got
1335                         * a wraparound in the buffer.
1336                         */
1337                        segsize = wpipe->pipe_buffer.size -
1338                                wpipe->pipe_buffer.in;
1339                        if (segsize > size)
1340                                segsize = size;
1341
1342                        /* Transfer first segment */
1343
1344                        PIPE_UNLOCK(rpipe);
1345                        error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1346                                        segsize, uio);
1347                        PIPE_LOCK(rpipe);
1348
1349                        if (error == 0 && segsize < size) {
1350                                KASSERT(wpipe->pipe_buffer.in + segsize ==
1351                                        wpipe->pipe_buffer.size,
1352                                        ("Pipe buffer wraparound disappeared"));
1353                                /*
1354                                 * Transfer remaining part now, to
1355                                 * support atomic writes.  Wraparound
1356                                 * happened.
1357                                 */
1358
1359                                PIPE_UNLOCK(rpipe);
1360                                error = uiomove(
1361                                    &wpipe->pipe_buffer.buffer[0],
1362                                    size - segsize, uio);
1363                                PIPE_LOCK(rpipe);
1364                        }
1365                        if (error == 0) {
1366                                wpipe->pipe_buffer.in += size;
1367                                if (wpipe->pipe_buffer.in >=
1368                                    wpipe->pipe_buffer.size) {
1369                                        KASSERT(wpipe->pipe_buffer.in ==
1370                                                size - segsize +
1371                                                wpipe->pipe_buffer.size,
1372                                                ("Expected wraparound bad"));
1373                                        wpipe->pipe_buffer.in = size - segsize;
1374                                }
1375
1376                                wpipe->pipe_buffer.cnt += size;
1377                                KASSERT(wpipe->pipe_buffer.cnt <=
1378                                        wpipe->pipe_buffer.size,
1379                                        ("Pipe buffer overflow"));
1380                        }
1381                        pipeunlock(wpipe);
1382                        if (error != 0)
1383                                break;
1384                } else {
1385                        /*
1386                         * If the "read-side" has been blocked, wake it up now.
1387                         */
1388                        if (wpipe->pipe_state & PIPE_WANTR) {
1389                                wpipe->pipe_state &= ~PIPE_WANTR;
1390                                wakeup(wpipe);
1391                        }
1392
1393                        /*
1394                         * don't block on non-blocking I/O
1395                         */
1396#ifndef __rtems__
1397                        if (fp->f_flag & FNONBLOCK) {
1398#else /* __rtems__ */
1399                        if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FNONBLOCK) {
1400#endif /* __rtems__ */
1401                                error = EAGAIN;
1402                                pipeunlock(wpipe);
1403                                break;
1404                        }
1405
1406                        /*
1407                         * We have no more space and have something to offer,
1408                         * wake up select/poll.
1409                         */
1410                        pipeselwakeup(wpipe);
1411
1412                        wpipe->pipe_state |= PIPE_WANTW;
1413                        pipeunlock(wpipe);
1414                        error = msleep(wpipe, PIPE_MTX(rpipe),
1415                            PRIBIO | PCATCH, "pipewr", 0);
1416                        if (error != 0)
1417                                break;
1418                }
1419        }
1420
1421        pipelock(wpipe, 0);
1422        --wpipe->pipe_busy;
1423
1424        if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1425                wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1426                wakeup(wpipe);
1427        } else if (wpipe->pipe_buffer.cnt > 0) {
1428                /*
1429                 * If we have put any characters in the buffer, we wake up
1430                 * the reader.
1431                 */
1432                if (wpipe->pipe_state & PIPE_WANTR) {
1433                        wpipe->pipe_state &= ~PIPE_WANTR;
1434                        wakeup(wpipe);
1435                }
1436        }
1437
1438        /*
1439         * Don't return EPIPE if any byte was written.
1440         * EINTR and other interrupts are handled by generic I/O layer.
1441         * Do not pretend that I/O succeeded for obvious user error
1442         * like EFAULT.
1443         */
1444        if (uio->uio_resid != orig_resid && error == EPIPE)
1445                error = 0;
1446
1447        if (error == 0)
1448#ifndef __rtems__
1449                vfs_timestamp(&wpipe->pipe_mtime);
1450#else /* __rtems__ */
1451                wpipe->pipe_mtime.tv_sec = time(NULL);
1452#endif /* __rtems__ */
1453
1454        /*
1455         * We have something to offer,
1456         * wake up select/poll.
1457         */
1458        if (wpipe->pipe_buffer.cnt)
1459                pipeselwakeup(wpipe);
1460
1461        pipeunlock(wpipe);
1462        PIPE_UNLOCK(rpipe);
1463        return (error);
1464}
1465#ifdef __rtems__
1466static ssize_t
1467rtems_bsd_pipe_write(rtems_libio_t *iop, const void *buffer, size_t count)
1468{
1469        struct thread *td = rtems_bsd_get_curthread_or_null();
1470        struct file *fp = rtems_bsd_iop_to_fp(iop);
1471        struct iovec iov = {
1472                .iov_base = __DECONST(void *, buffer),
1473                .iov_len = count
1474        };
1475        struct uio auio = {
1476                .uio_iov = &iov,
1477                .uio_iovcnt = 1,
1478                .uio_offset = 0,
1479                .uio_resid = count,
1480                .uio_segflg = UIO_USERSPACE,
1481                .uio_rw = UIO_WRITE,
1482                .uio_td = td
1483        };
1484        int error;
1485
1486        if (td != NULL) {
1487                error = pipe_write(fp, &auio, NULL, 0, NULL);
1488        } else {
1489                error = ENOMEM;
1490        }
1491
1492        if (error == 0) {
1493                return (count - auio.uio_resid);
1494        } else {
1495                rtems_set_errno_and_return_minus_one(error);
1496        }
1497}
1498
1499static ssize_t
1500rtems_bsd_pipe_writev(rtems_libio_t *iop, const struct iovec *iov,
1501    int iovcnt, ssize_t total)
1502{
1503        struct thread *td = rtems_bsd_get_curthread_or_null();
1504        struct file *fp = rtems_bsd_iop_to_fp(iop);
1505        struct uio auio = {
1506                .uio_iov = __DECONST(struct iovec *, iov),
1507                .uio_iovcnt = iovcnt,
1508                .uio_offset = 0,
1509                .uio_resid = total,
1510                .uio_segflg = UIO_USERSPACE,
1511                .uio_rw = UIO_WRITE,
1512                .uio_td = td
1513        };
1514        int error;
1515
1516        if (td != NULL) {
1517                error = pipe_write(fp, &auio, NULL, 0, NULL);
1518        } else {
1519                error = ENOMEM;
1520        }
1521
1522        if (error == 0) {
1523                return (total - auio.uio_resid);
1524        } else {
1525                rtems_set_errno_and_return_minus_one(error);
1526        }
1527}
1528#endif /* __rtems__ */
1529
1530/* ARGSUSED */
1531#ifndef __rtems__
1532static int
1533pipe_truncate(struct file *fp, off_t length, struct ucred *active_cred,
1534    struct thread *td)
1535{
1536        struct pipe *cpipe;
1537        int error;
1538
1539        cpipe = fp->f_data;
1540        if (cpipe->pipe_state & PIPE_NAMED)
1541                error = vnops.fo_truncate(fp, length, active_cred, td);
1542        else
1543                error = invfo_truncate(fp, length, active_cred, td);
1544        return (error);
1545}
1546#endif /* __rtems__ */
1547
1548/*
1549 * we implement a very minimal set of ioctls for compatibility with sockets.
1550 */
1551static int
1552pipe_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
1553    struct thread *td)
1554{
1555        struct pipe *mpipe = fp->f_data;
1556        int error;
1557
1558        PIPE_LOCK(mpipe);
1559
1560#ifdef MAC
1561        error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data);
1562        if (error) {
1563                PIPE_UNLOCK(mpipe);
1564                return (error);
1565        }
1566#endif
1567
1568        error = 0;
1569        switch (cmd) {
1570
1571        case FIONBIO:
1572                break;
1573
1574        case FIOASYNC:
1575                if (*(int *)data) {
1576                        mpipe->pipe_state |= PIPE_ASYNC;
1577                } else {
1578                        mpipe->pipe_state &= ~PIPE_ASYNC;
1579                }
1580                break;
1581
1582        case FIONREAD:
1583#ifndef __rtems__
1584                if (!(fp->f_flag & FREAD)) {
1585#else /* __rtems__ */
1586                if (!(rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD)) {
1587#endif /* __rtems__ */
1588                        *(int *)data = 0;
1589                        PIPE_UNLOCK(mpipe);
1590                        return (0);
1591                }
1592                if (mpipe->pipe_state & PIPE_DIRECTW)
1593                        *(int *)data = mpipe->pipe_map.cnt;
1594                else
1595                        *(int *)data = mpipe->pipe_buffer.cnt;
1596                break;
1597
1598        case FIOSETOWN:
1599                PIPE_UNLOCK(mpipe);
1600                error = fsetown(*(int *)data, &mpipe->pipe_sigio);
1601                goto out_unlocked;
1602
1603        case FIOGETOWN:
1604                *(int *)data = fgetown(&mpipe->pipe_sigio);
1605                break;
1606
1607        /* This is deprecated, FIOSETOWN should be used instead. */
1608        case TIOCSPGRP:
1609                PIPE_UNLOCK(mpipe);
1610                error = fsetown(-(*(int *)data), &mpipe->pipe_sigio);
1611                goto out_unlocked;
1612
1613        /* This is deprecated, FIOGETOWN should be used instead. */
1614        case TIOCGPGRP:
1615                *(int *)data = -fgetown(&mpipe->pipe_sigio);
1616                break;
1617
1618        default:
1619                error = ENOTTY;
1620                break;
1621        }
1622        PIPE_UNLOCK(mpipe);
1623out_unlocked:
1624        return (error);
1625}
1626#ifdef __rtems__
1627static int
1628rtems_bsd_pipe_ioctl(rtems_libio_t *iop, ioctl_command_t request, void *buffer)
1629{
1630        struct thread *td = rtems_bsd_get_curthread_or_null();
1631        struct file *fp = rtems_bsd_iop_to_fp(iop);
1632        int error;
1633
1634        if (td != NULL) {
1635                error = pipe_ioctl(fp, request, buffer, NULL, td);
1636        } else {
1637                error = ENOMEM;
1638        }
1639
1640        return rtems_bsd_error_to_status_and_errno(error);
1641}
1642#endif /* __rtems__ */
1643
1644static int
1645pipe_poll(struct file *fp, int events, struct ucred *active_cred,
1646    struct thread *td)
1647{
1648        struct pipe *rpipe;
1649        struct pipe *wpipe;
1650        int levents, revents;
1651#ifdef MAC
1652        int error;
1653#endif
1654
1655        revents = 0;
1656        rpipe = fp->f_data;
1657        wpipe = PIPE_PEER(rpipe);
1658        PIPE_LOCK(rpipe);
1659#ifdef MAC
1660        error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair);
1661        if (error)
1662                goto locked_error;
1663#endif
1664#ifndef __rtems__
1665        if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM))
1666#else /* __rtems__ */
1667        if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && events & (POLLIN | POLLRDNORM))
1668#endif /* __rtems__ */
1669                if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1670                    (rpipe->pipe_buffer.cnt > 0))
1671                        revents |= events & (POLLIN | POLLRDNORM);
1672
1673#ifndef __rtems__
1674        if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM))
1675#else /* __rtems__ */
1676        if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FWRITE && events & (POLLOUT | POLLWRNORM))
1677#endif /* __rtems__ */
1678                if (wpipe->pipe_present != PIPE_ACTIVE ||
1679                    (wpipe->pipe_state & PIPE_EOF) ||
1680                    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1681                     ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF ||
1682                         wpipe->pipe_buffer.size == 0)))
1683                        revents |= events & (POLLOUT | POLLWRNORM);
1684
1685        levents = events &
1686            (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
1687#ifndef __rtems__
1688        if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents &&
1689            fp->f_seqcount == rpipe->pipe_wgen)
1690#else /* __rtems__ */
1691        if (rpipe->pipe_state & PIPE_NAMED && rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && levents)
1692#endif /* __rtems__ */
1693                events |= POLLINIGNEOF;
1694
1695        if ((events & POLLINIGNEOF) == 0) {
1696                if (rpipe->pipe_state & PIPE_EOF) {
1697                        revents |= (events & (POLLIN | POLLRDNORM));
1698                        if (wpipe->pipe_present != PIPE_ACTIVE ||
1699                            (wpipe->pipe_state & PIPE_EOF))
1700                                revents |= POLLHUP;
1701                }
1702        }
1703
1704        if (revents == 0) {
1705#ifndef __rtems__
1706                if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) {
1707#else /* __rtems__ */
1708                if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && events & (POLLIN | POLLRDNORM)) {
1709#endif /* __rtems__ */
1710                        selrecord(td, &rpipe->pipe_sel);
1711                        if (SEL_WAITING(&rpipe->pipe_sel))
1712                                rpipe->pipe_state |= PIPE_SEL;
1713                }
1714
1715#ifndef __rtems__
1716                if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) {
1717#else /* __rtems__ */
1718                if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FWRITE && events & (POLLOUT | POLLWRNORM)) {
1719#endif /* __rtems__ */
1720                        selrecord(td, &wpipe->pipe_sel);
1721                        if (SEL_WAITING(&wpipe->pipe_sel))
1722                                wpipe->pipe_state |= PIPE_SEL;
1723                }
1724        }
1725#ifdef MAC
1726locked_error:
1727#endif
1728        PIPE_UNLOCK(rpipe);
1729
1730        return (revents);
1731}
1732#ifdef __rtems__
1733static int
1734rtems_bsd_pipe_poll(rtems_libio_t *iop, int events)
1735{
1736        struct thread *td = rtems_bsd_get_curthread_or_null();
1737        struct file *fp = rtems_bsd_iop_to_fp(iop);
1738        int error;
1739
1740        if (td != NULL) {
1741                error = pipe_poll(fp, events, NULL, td);
1742        } else {
1743                error = ENOMEM;
1744        }
1745
1746        return error;
1747}
1748#endif /* __rtems__ */
1749
1750/*
1751 * We shouldn't need locks here as we're doing a read and this should
1752 * be a natural race.
1753 */
1754#ifndef __rtems__
1755static int
1756pipe_stat(struct file *fp, struct stat *ub, struct ucred *active_cred,
1757    struct thread *td)
1758{
1759        struct pipe *pipe;
1760#else /* __rtems__ */
1761static int
1762pipe_stat(struct pipe *pipe, struct stat *ub)
1763{
1764#endif /* __rtems__ */
1765        int new_unr;
1766#ifdef MAC
1767        int error;
1768#endif
1769
1770#ifndef __rtems__
1771        pipe = fp->f_data;
1772#endif /* __rtems__ */
1773        PIPE_LOCK(pipe);
1774#ifdef MAC
1775        error = mac_pipe_check_stat(active_cred, pipe->pipe_pair);
1776        if (error) {
1777                PIPE_UNLOCK(pipe);
1778                return (error);
1779        }
1780#endif
1781
1782        /* For named pipes ask the underlying filesystem. */
1783        if (pipe->pipe_state & PIPE_NAMED) {
1784                PIPE_UNLOCK(pipe);
1785#ifndef __rtems__
1786                return (vnops.fo_stat(fp, ub, active_cred, td));
1787#else /* __rtems__ */
1788                return (ENXIO);
1789#endif /* __rtems__ */
1790        }
1791
1792        /*
1793         * Lazily allocate an inode number for the pipe.  Most pipe
1794         * users do not call fstat(2) on the pipe, which means that
1795         * postponing the inode allocation until it is must be
1796         * returned to userland is useful.  If alloc_unr failed,
1797         * assign st_ino zero instead of returning an error.
1798         * Special pipe_ino values:
1799         *  -1 - not yet initialized;
1800         *  0  - alloc_unr failed, return 0 as st_ino forever.
1801         */
1802        if (pipe->pipe_ino == (ino_t)-1) {
1803                new_unr = alloc_unr(pipeino_unr);
1804                if (new_unr != -1)
1805                        pipe->pipe_ino = new_unr;
1806                else
1807                        pipe->pipe_ino = 0;
1808        }
1809        PIPE_UNLOCK(pipe);
1810
1811#ifndef __rtems__
1812        bzero(ub, sizeof(*ub));
1813#endif /* __rtems__ */
1814        ub->st_mode = S_IFIFO;
1815        ub->st_blksize = PAGE_SIZE;
1816        if (pipe->pipe_state & PIPE_DIRECTW)
1817                ub->st_size = pipe->pipe_map.cnt;
1818        else
1819                ub->st_size = pipe->pipe_buffer.cnt;
1820        ub->st_blocks = howmany(ub->st_size, ub->st_blksize);
1821        ub->st_atim = pipe->pipe_atime;
1822        ub->st_mtim = pipe->pipe_mtime;
1823        ub->st_ctim = pipe->pipe_ctime;
1824#ifndef __rtems__
1825        ub->st_uid = fp->f_cred->cr_uid;
1826        ub->st_gid = fp->f_cred->cr_gid;
1827        ub->st_dev = pipedev_ino;
1828        ub->st_ino = pipe->pipe_ino;
1829#else /* __rtems__ */
1830        ub->st_uid = BSD_DEFAULT_UID;
1831        ub->st_gid = BSD_DEFAULT_GID;
1832        ub->st_dev = rtems_filesystem_make_dev_t(0xcc494cd6U, 0x1d970b4dU);
1833        ub->st_ino = pipe->pipe_ino;
1834#endif /* __rtems__ */
1835        /*
1836         * Left as 0: st_nlink, st_rdev, st_flags, st_gen.
1837         */
1838        return (0);
1839}
1840#ifdef __rtems__
1841static int
1842rtems_bsd_pipe_stat(
1843        const rtems_filesystem_location_info_t *loc,
1844        struct stat *buf
1845)
1846{
1847        struct pipe *pipe = rtems_bsd_loc_to_f_data(loc);
1848        int error = pipe_stat(pipe, buf);
1849
1850        return rtems_bsd_error_to_status_and_errno(error);
1851}
1852#endif /* __rtems__ */
1853
1854/* ARGSUSED */
1855static int
1856pipe_close(struct file *fp, struct thread *td)
1857{
1858
1859#ifndef __rtems__
1860        if (fp->f_vnode != NULL)
1861                return vnops.fo_close(fp, td);
1862        fp->f_ops = &badfileops;
1863#else /* __rtems__ */
1864        fp->f_io.pathinfo.handlers = &rtems_filesystem_handlers_default;
1865#endif /* __rtems__ */
1866        pipe_dtor(fp->f_data);
1867        fp->f_data = NULL;
1868        return (0);
1869}
1870
1871#ifndef __rtems__
1872static int
1873pipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td)
1874{
1875        struct pipe *cpipe;
1876        int error;
1877
1878        cpipe = fp->f_data;
1879        if (cpipe->pipe_state & PIPE_NAMED)
1880                error = vn_chmod(fp, mode, active_cred, td);
1881        else
1882                error = invfo_chmod(fp, mode, active_cred, td);
1883        return (error);
1884}
1885
1886static int
1887pipe_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
1888    struct thread *td)
1889{
1890        struct pipe *cpipe;
1891        int error;
1892
1893        cpipe = fp->f_data;
1894        if (cpipe->pipe_state & PIPE_NAMED)
1895                error = vn_chown(fp, uid, gid, active_cred, td);
1896        else
1897                error = invfo_chown(fp, uid, gid, active_cred, td);
1898        return (error);
1899}
1900
1901static int
1902pipe_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
1903{
1904        struct pipe *pi;
1905
1906        if (fp->f_type == DTYPE_FIFO)
1907                return (vn_fill_kinfo(fp, kif, fdp));
1908        kif->kf_type = KF_TYPE_PIPE;
1909        pi = fp->f_data;
1910        kif->kf_un.kf_pipe.kf_pipe_addr = (uintptr_t)pi;
1911        kif->kf_un.kf_pipe.kf_pipe_peer = (uintptr_t)pi->pipe_peer;
1912        kif->kf_un.kf_pipe.kf_pipe_buffer_cnt = pi->pipe_buffer.cnt;
1913        return (0);
1914}
1915#endif /* __rtems__ */
1916
1917static void
1918pipe_free_kmem(struct pipe *cpipe)
1919{
1920
1921        KASSERT(!mtx_owned(PIPE_MTX(cpipe)),
1922            ("pipe_free_kmem: pipe mutex locked"));
1923
1924        if (cpipe->pipe_buffer.buffer != NULL) {
1925                atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size);
1926#ifndef __rtems__
1927                vm_map_remove(pipe_map,
1928                    (vm_offset_t)cpipe->pipe_buffer.buffer,
1929                    (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size);
1930#else /* __rtems__ */
1931                free(cpipe->pipe_buffer.buffer, M_TEMP);
1932#endif /* __rtems__ */
1933                cpipe->pipe_buffer.buffer = NULL;
1934        }
1935#ifndef PIPE_NODIRECT
1936        {
1937                cpipe->pipe_map.cnt = 0;
1938                cpipe->pipe_map.pos = 0;
1939                cpipe->pipe_map.npages = 0;
1940        }
1941#endif
1942}
1943
1944/*
1945 * shutdown the pipe
1946 */
1947static void
1948pipeclose(struct pipe *cpipe)
1949{
1950        struct pipepair *pp;
1951        struct pipe *ppipe;
1952
1953        KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL"));
1954
1955        PIPE_LOCK(cpipe);
1956        pipelock(cpipe, 0);
1957        pp = cpipe->pipe_pair;
1958
1959        pipeselwakeup(cpipe);
1960
1961        /*
1962         * If the other side is blocked, wake it up saying that
1963         * we want to close it down.
1964         */
1965        cpipe->pipe_state |= PIPE_EOF;
1966        while (cpipe->pipe_busy) {
1967                wakeup(cpipe);
1968                cpipe->pipe_state |= PIPE_WANT;
1969                pipeunlock(cpipe);
1970                msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1971                pipelock(cpipe, 0);
1972        }
1973
1974
1975        /*
1976         * Disconnect from peer, if any.
1977         */
1978        ppipe = cpipe->pipe_peer;
1979        if (ppipe->pipe_present == PIPE_ACTIVE) {
1980                pipeselwakeup(ppipe);
1981
1982                ppipe->pipe_state |= PIPE_EOF;
1983                wakeup(ppipe);
1984                KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0);
1985        }
1986
1987        /*
1988         * Mark this endpoint as free.  Release kmem resources.  We
1989         * don't mark this endpoint as unused until we've finished
1990         * doing that, or the pipe might disappear out from under
1991         * us.
1992         */
1993        PIPE_UNLOCK(cpipe);
1994        pipe_free_kmem(cpipe);
1995        PIPE_LOCK(cpipe);
1996        cpipe->pipe_present = PIPE_CLOSING;
1997        pipeunlock(cpipe);
1998
1999        /*
2000         * knlist_clear() may sleep dropping the PIPE_MTX. Set the
2001         * PIPE_FINALIZED, that allows other end to free the
2002         * pipe_pair, only after the knotes are completely dismantled.
2003         */
2004        knlist_clear(&cpipe->pipe_sel.si_note, 1);
2005        cpipe->pipe_present = PIPE_FINALIZED;
2006        seldrain(&cpipe->pipe_sel);
2007        knlist_destroy(&cpipe->pipe_sel.si_note);
2008
2009        /*
2010         * If both endpoints are now closed, release the memory for the
2011         * pipe pair.  If not, unlock.
2012         */
2013        if (ppipe->pipe_present == PIPE_FINALIZED) {
2014                PIPE_UNLOCK(cpipe);
2015#ifdef MAC
2016                mac_pipe_destroy(pp);
2017#endif
2018                uma_zfree(pipe_zone, cpipe->pipe_pair);
2019        } else
2020                PIPE_UNLOCK(cpipe);
2021}
2022
2023/*ARGSUSED*/
2024static int
2025pipe_kqfilter(struct file *fp, struct knote *kn)
2026{
2027        struct pipe *cpipe;
2028
2029        /*
2030         * If a filter is requested that is not supported by this file
2031         * descriptor, don't return an error, but also don't ever generate an
2032         * event.
2033         */
2034#ifndef __rtems__
2035        if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) {
2036#else /* __rtems__ */
2037        if ((kn->kn_filter == EVFILT_READ) && !(rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD)) {
2038#endif /* __rtems__ */
2039                kn->kn_fop = &pipe_nfiltops;
2040                return (0);
2041        }
2042#ifndef __rtems__
2043        if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) {
2044#else /* __rtems__ */
2045        if ((kn->kn_filter == EVFILT_WRITE) && !(rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FWRITE)) {
2046#endif /* __rtems__ */
2047                kn->kn_fop = &pipe_nfiltops;
2048                return (0);
2049        }
2050        cpipe = fp->f_data;
2051        PIPE_LOCK(cpipe);
2052        switch (kn->kn_filter) {
2053        case EVFILT_READ:
2054                kn->kn_fop = &pipe_rfiltops;
2055                break;
2056        case EVFILT_WRITE:
2057                kn->kn_fop = &pipe_wfiltops;
2058                if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) {
2059                        /* other end of pipe has been closed */
2060                        PIPE_UNLOCK(cpipe);
2061                        return (EPIPE);
2062                }
2063                cpipe = PIPE_PEER(cpipe);
2064                break;
2065        default:
2066                PIPE_UNLOCK(cpipe);
2067                return (EINVAL);
2068        }
2069
2070        kn->kn_hook = cpipe;
2071        knlist_add(&cpipe->pipe_sel.si_note, kn, 1);
2072        PIPE_UNLOCK(cpipe);
2073        return (0);
2074}
2075#ifdef __rtems__
2076int
2077rtems_bsd_pipe_kqfilter(rtems_libio_t *iop, struct knote *kn)
2078{
2079        struct file *fp = rtems_bsd_iop_to_fp(iop);
2080
2081        return pipe_kqfilter(fp, kn);
2082}
2083#endif /* __rtems__ */
2084
2085static void
2086filt_pipedetach(struct knote *kn)
2087{
2088        struct pipe *cpipe = kn->kn_hook;
2089
2090        PIPE_LOCK(cpipe);
2091        knlist_remove(&cpipe->pipe_sel.si_note, kn, 1);
2092        PIPE_UNLOCK(cpipe);
2093}
2094
2095/*ARGSUSED*/
2096static int
2097filt_piperead(struct knote *kn, long hint)
2098{
2099        struct pipe *rpipe = kn->kn_hook;
2100        struct pipe *wpipe = rpipe->pipe_peer;
2101        int ret;
2102
2103        PIPE_LOCK_ASSERT(rpipe, MA_OWNED);
2104        kn->kn_data = rpipe->pipe_buffer.cnt;
2105        if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
2106                kn->kn_data = rpipe->pipe_map.cnt;
2107
2108        if ((rpipe->pipe_state & PIPE_EOF) ||
2109            wpipe->pipe_present != PIPE_ACTIVE ||
2110            (wpipe->pipe_state & PIPE_EOF)) {
2111                kn->kn_flags |= EV_EOF;
2112                return (1);
2113        }
2114        ret = kn->kn_data > 0;
2115        return ret;
2116}
2117
2118/*ARGSUSED*/
2119static int
2120filt_pipewrite(struct knote *kn, long hint)
2121{
2122        struct pipe *wpipe;
2123   
2124        wpipe = kn->kn_hook;
2125        PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
2126        if (wpipe->pipe_present != PIPE_ACTIVE ||
2127            (wpipe->pipe_state & PIPE_EOF)) {
2128                kn->kn_data = 0;
2129                kn->kn_flags |= EV_EOF;
2130                return (1);
2131        }
2132        kn->kn_data = (wpipe->pipe_buffer.size > 0) ?
2133            (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) : PIPE_BUF;
2134        if (wpipe->pipe_state & PIPE_DIRECTW)
2135                kn->kn_data = 0;
2136
2137        return (kn->kn_data >= PIPE_BUF);
2138}
2139
2140static void
2141filt_pipedetach_notsup(struct knote *kn)
2142{
2143
2144}
2145
2146static int
2147filt_pipenotsup(struct knote *kn, long hint)
2148{
2149
2150        return (0);
2151}
2152#ifdef __rtems__
2153static int
2154rtems_bsd_pipe_open(rtems_libio_t *iop, const char *path, int oflag,
2155    mode_t mode)
2156{
2157        return rtems_bsd_error_to_status_and_errno(ENXIO);
2158}
2159
2160static int
2161rtems_bsd_pipe_close(rtems_libio_t *iop)
2162{
2163        struct file *fp = rtems_bsd_iop_to_fp(iop);
2164        int error = pipe_close(fp, NULL);
2165
2166        return rtems_bsd_error_to_status_and_errno(error);
2167}
2168
2169static int
2170rtems_bsd_pipe_fcntl(rtems_libio_t *iop, int cmd)
2171{
2172        int error = 0;
2173
2174        if (cmd == F_SETFL) {
2175                struct file *fp = rtems_bsd_iop_to_fp(iop);
2176                int nbio = iop->flags & LIBIO_FLAGS_NO_DELAY;
2177
2178                error = pipe_ioctl(fp, FIONBIO, &nbio, NULL, NULL);
2179        }
2180
2181        return rtems_bsd_error_to_status_and_errno(error);
2182}
2183#endif /* __rtems__ */
Note: See TracBrowser for help on using the repository browser.