1 | #include <machine/rtems-bsd-kernel-space.h> |
---|
2 | |
---|
3 | /*- |
---|
4 | * SPDX-License-Identifier: BSD-4-Clause |
---|
5 | * |
---|
6 | * Copyright (c) 1996 John S. Dyson |
---|
7 | * Copyright (c) 2012 Giovanni Trematerra |
---|
8 | * All rights reserved. |
---|
9 | * |
---|
10 | * Redistribution and use in source and binary forms, with or without |
---|
11 | * modification, are permitted provided that the following conditions |
---|
12 | * are met: |
---|
13 | * 1. Redistributions of source code must retain the above copyright |
---|
14 | * notice immediately at the beginning of the file, without modification, |
---|
15 | * this list of conditions, and the following disclaimer. |
---|
16 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
17 | * notice, this list of conditions and the following disclaimer in the |
---|
18 | * documentation and/or other materials provided with the distribution. |
---|
19 | * 3. Absolutely no warranty of function or purpose is made by the author |
---|
20 | * John S. Dyson. |
---|
21 | * 4. Modifications may be freely made to this file if the above conditions |
---|
22 | * are met. |
---|
23 | */ |
---|
24 | |
---|
25 | /* |
---|
26 | * This file contains a high-performance replacement for the socket-based |
---|
27 | * pipes scheme originally used in FreeBSD/4.4Lite. It does not support |
---|
28 | * all features of sockets, but does do everything that pipes normally |
---|
29 | * do. |
---|
30 | */ |
---|
31 | |
---|
32 | /* |
---|
33 | * This code has two modes of operation, a small write mode and a large |
---|
34 | * write mode. The small write mode acts like conventional pipes with |
---|
35 | * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the |
---|
36 | * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT |
---|
37 | * and PIPE_SIZE in size, the sending process pins the underlying pages in |
---|
38 | * memory, and the receiving process copies directly from these pinned pages |
---|
39 | * in the sending process. |
---|
40 | * |
---|
41 | * If the sending process receives a signal, it is possible that it will |
---|
42 | * go away, and certainly its address space can change, because control |
---|
43 | * is returned back to the user-mode side. In that case, the pipe code |
---|
44 | * arranges to copy the buffer supplied by the user process, to a pageable |
---|
45 | * kernel buffer, and the receiving process will grab the data from the |
---|
46 | * pageable kernel buffer. Since signals don't happen all that often, |
---|
47 | * the copy operation is normally eliminated. |
---|
48 | * |
---|
49 | * The constant PIPE_MINDIRECT is chosen to make sure that buffering will |
---|
50 | * happen for small transfers so that the system will not spend all of |
---|
51 | * its time context switching. |
---|
52 | * |
---|
53 | * In order to limit the resource use of pipes, two sysctls exist: |
---|
54 | * |
---|
55 | * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable |
---|
56 | * address space available to us in pipe_map. This value is normally |
---|
57 | * autotuned, but may also be loader tuned. |
---|
58 | * |
---|
59 | * kern.ipc.pipekva - This read-only sysctl tracks the current amount of |
---|
60 | * memory in use by pipes. |
---|
61 | * |
---|
62 | * Based on how large pipekva is relative to maxpipekva, the following |
---|
63 | * will happen: |
---|
64 | * |
---|
65 | * 0% - 50%: |
---|
66 | * New pipes are given 16K of memory backing, pipes may dynamically |
---|
67 | * grow to as large as 64K where needed. |
---|
68 | * 50% - 75%: |
---|
69 | * New pipes are given 4K (or PAGE_SIZE) of memory backing, |
---|
70 | * existing pipes may NOT grow. |
---|
71 | * 75% - 100%: |
---|
72 | * New pipes are given 4K (or PAGE_SIZE) of memory backing, |
---|
73 | * existing pipes will be shrunk down to 4K whenever possible. |
---|
74 | * |
---|
75 | * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0. If |
---|
76 | * that is set, the only resize that will occur is the 0 -> SMALL_PIPE_SIZE |
---|
77 | * resize which MUST occur for reverse-direction pipes when they are |
---|
78 | * first used. |
---|
79 | * |
---|
80 | * Additional information about the current state of pipes may be obtained |
---|
81 | * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail, |
---|
82 | * and kern.ipc.piperesizefail. |
---|
83 | * |
---|
84 | * Locking rules: There are two locks present here: A mutex, used via |
---|
85 | * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via |
---|
86 | * the flag, as mutexes can not persist over uiomove. The mutex |
---|
87 | * exists only to guard access to the flag, and is not in itself a |
---|
88 | * locking mechanism. Also note that there is only a single mutex for |
---|
89 | * both directions of a pipe. |
---|
90 | * |
---|
91 | * As pipelock() may have to sleep before it can acquire the flag, it |
---|
92 | * is important to reread all data after a call to pipelock(); everything |
---|
93 | * in the structure may have changed. |
---|
94 | */ |
---|
95 | |
---|
96 | #include <sys/cdefs.h> |
---|
97 | __FBSDID("$FreeBSD$"); |
---|
98 | |
---|
99 | #include <sys/param.h> |
---|
100 | #include <sys/systm.h> |
---|
101 | #include <sys/conf.h> |
---|
102 | #include <sys/fcntl.h> |
---|
103 | #include <sys/file.h> |
---|
104 | #include <sys/filedesc.h> |
---|
105 | #include <sys/filio.h> |
---|
106 | #include <sys/kernel.h> |
---|
107 | #include <sys/lock.h> |
---|
108 | #include <sys/mutex.h> |
---|
109 | #include <sys/ttycom.h> |
---|
110 | #include <sys/stat.h> |
---|
111 | #include <sys/malloc.h> |
---|
112 | #include <sys/poll.h> |
---|
113 | #include <sys/selinfo.h> |
---|
114 | #include <sys/signalvar.h> |
---|
115 | #include <sys/syscallsubr.h> |
---|
116 | #include <sys/sysctl.h> |
---|
117 | #include <sys/sysproto.h> |
---|
118 | #include <sys/pipe.h> |
---|
119 | #include <sys/proc.h> |
---|
120 | #include <sys/vnode.h> |
---|
121 | #include <sys/uio.h> |
---|
122 | #include <sys/user.h> |
---|
123 | #include <sys/event.h> |
---|
124 | |
---|
125 | #include <security/mac/mac_framework.h> |
---|
126 | |
---|
127 | #include <vm/vm.h> |
---|
128 | #include <vm/vm_param.h> |
---|
129 | #include <vm/vm_object.h> |
---|
130 | #include <vm/vm_kern.h> |
---|
131 | #include <vm/vm_extern.h> |
---|
132 | #include <vm/pmap.h> |
---|
133 | #include <vm/vm_map.h> |
---|
134 | #include <vm/vm_page.h> |
---|
135 | #include <vm/uma.h> |
---|
136 | |
---|
137 | /* |
---|
138 | * Use this define if you want to disable *fancy* VM things. Expect an |
---|
139 | * approx 30% decrease in transfer rate. This could be useful for |
---|
140 | * NetBSD or OpenBSD. |
---|
141 | */ |
---|
142 | /* #define PIPE_NODIRECT */ |
---|
143 | |
---|
144 | #define PIPE_PEER(pipe) \ |
---|
145 | (((pipe)->pipe_state & PIPE_NAMED) ? (pipe) : ((pipe)->pipe_peer)) |
---|
146 | |
---|
147 | /* |
---|
148 | * interfaces to the outside world |
---|
149 | */ |
---|
150 | static fo_rdwr_t pipe_read; |
---|
151 | static fo_rdwr_t pipe_write; |
---|
152 | static fo_truncate_t pipe_truncate; |
---|
153 | static fo_ioctl_t pipe_ioctl; |
---|
154 | static fo_poll_t pipe_poll; |
---|
155 | static fo_kqfilter_t pipe_kqfilter; |
---|
156 | static fo_stat_t pipe_stat; |
---|
157 | static fo_close_t pipe_close; |
---|
158 | static fo_chmod_t pipe_chmod; |
---|
159 | static fo_chown_t pipe_chown; |
---|
160 | static fo_fill_kinfo_t pipe_fill_kinfo; |
---|
161 | |
---|
162 | struct fileops pipeops = { |
---|
163 | .fo_read = pipe_read, |
---|
164 | .fo_write = pipe_write, |
---|
165 | .fo_truncate = pipe_truncate, |
---|
166 | .fo_ioctl = pipe_ioctl, |
---|
167 | .fo_poll = pipe_poll, |
---|
168 | .fo_kqfilter = pipe_kqfilter, |
---|
169 | .fo_stat = pipe_stat, |
---|
170 | .fo_close = pipe_close, |
---|
171 | .fo_chmod = pipe_chmod, |
---|
172 | .fo_chown = pipe_chown, |
---|
173 | .fo_sendfile = invfo_sendfile, |
---|
174 | .fo_fill_kinfo = pipe_fill_kinfo, |
---|
175 | .fo_flags = DFLAG_PASSABLE |
---|
176 | }; |
---|
177 | |
---|
178 | #ifdef __rtems__ |
---|
179 | long maxpipekva; /* Limit on pipe KVA */ |
---|
180 | #endif /* __rtems__ */ |
---|
181 | |
---|
182 | static void filt_pipedetach(struct knote *kn); |
---|
183 | static void filt_pipedetach_notsup(struct knote *kn); |
---|
184 | static int filt_pipenotsup(struct knote *kn, long hint); |
---|
185 | static int filt_piperead(struct knote *kn, long hint); |
---|
186 | static int filt_pipewrite(struct knote *kn, long hint); |
---|
187 | |
---|
188 | static struct filterops pipe_nfiltops = { |
---|
189 | .f_isfd = 1, |
---|
190 | .f_detach = filt_pipedetach_notsup, |
---|
191 | .f_event = filt_pipenotsup |
---|
192 | }; |
---|
193 | static struct filterops pipe_rfiltops = { |
---|
194 | .f_isfd = 1, |
---|
195 | .f_detach = filt_pipedetach, |
---|
196 | .f_event = filt_piperead |
---|
197 | }; |
---|
198 | static struct filterops pipe_wfiltops = { |
---|
199 | .f_isfd = 1, |
---|
200 | .f_detach = filt_pipedetach, |
---|
201 | .f_event = filt_pipewrite |
---|
202 | }; |
---|
203 | |
---|
204 | /* |
---|
205 | * Default pipe buffer size(s), this can be kind-of large now because pipe |
---|
206 | * space is pageable. The pipe code will try to maintain locality of |
---|
207 | * reference for performance reasons, so small amounts of outstanding I/O |
---|
208 | * will not wipe the cache. |
---|
209 | */ |
---|
210 | #define MINPIPESIZE (PIPE_SIZE/3) |
---|
211 | #define MAXPIPESIZE (2*PIPE_SIZE/3) |
---|
212 | |
---|
213 | static long amountpipekva; |
---|
214 | static int pipefragretry; |
---|
215 | static int pipeallocfail; |
---|
216 | static int piperesizefail; |
---|
217 | static int piperesizeallowed = 1; |
---|
218 | |
---|
219 | SYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, |
---|
220 | &maxpipekva, 0, "Pipe KVA limit"); |
---|
221 | SYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, |
---|
222 | &amountpipekva, 0, "Pipe KVA usage"); |
---|
223 | SYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD, |
---|
224 | &pipefragretry, 0, "Pipe allocation retries due to fragmentation"); |
---|
225 | SYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD, |
---|
226 | &pipeallocfail, 0, "Pipe allocation failures"); |
---|
227 | SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, |
---|
228 | &piperesizefail, 0, "Pipe resize failures"); |
---|
229 | SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, |
---|
230 | &piperesizeallowed, 0, "Pipe resizing allowed"); |
---|
231 | |
---|
232 | static void pipeinit(void *dummy __unused); |
---|
233 | static void pipeclose(struct pipe *cpipe); |
---|
234 | static void pipe_free_kmem(struct pipe *cpipe); |
---|
235 | static void pipe_create(struct pipe *pipe, int backing); |
---|
236 | static void pipe_paircreate(struct thread *td, struct pipepair **p_pp); |
---|
237 | static __inline int pipelock(struct pipe *cpipe, int catch); |
---|
238 | static __inline void pipeunlock(struct pipe *cpipe); |
---|
239 | #ifndef PIPE_NODIRECT |
---|
240 | static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); |
---|
241 | static void pipe_destroy_write_buffer(struct pipe *wpipe); |
---|
242 | static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); |
---|
243 | static void pipe_clone_write_buffer(struct pipe *wpipe); |
---|
244 | #endif |
---|
245 | static int pipespace(struct pipe *cpipe, int size); |
---|
246 | static int pipespace_new(struct pipe *cpipe, int size); |
---|
247 | |
---|
248 | static int pipe_zone_ctor(void *mem, int size, void *arg, int flags); |
---|
249 | static int pipe_zone_init(void *mem, int size, int flags); |
---|
250 | static void pipe_zone_fini(void *mem, int size); |
---|
251 | |
---|
252 | static uma_zone_t pipe_zone; |
---|
253 | static struct unrhdr64 pipeino_unr; |
---|
254 | static dev_t pipedev_ino; |
---|
255 | |
---|
256 | #ifndef __rtems__ |
---|
257 | SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); |
---|
258 | #else /* __rtems__ */ |
---|
259 | SYSINIT(vfspip, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); |
---|
260 | #endif /* __rtems__ */ |
---|
261 | |
---|
262 | static void |
---|
263 | pipeinit(void *dummy __unused) |
---|
264 | { |
---|
265 | |
---|
266 | pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), |
---|
267 | pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, |
---|
268 | UMA_ALIGN_PTR, 0); |
---|
269 | KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); |
---|
270 | new_unrhdr64(&pipeino_unr, 1); |
---|
271 | pipedev_ino = devfs_alloc_cdp_inode(); |
---|
272 | KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized")); |
---|
273 | } |
---|
274 | |
---|
275 | static int |
---|
276 | pipe_zone_ctor(void *mem, int size, void *arg, int flags) |
---|
277 | { |
---|
278 | struct pipepair *pp; |
---|
279 | struct pipe *rpipe, *wpipe; |
---|
280 | |
---|
281 | KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); |
---|
282 | |
---|
283 | pp = (struct pipepair *)mem; |
---|
284 | |
---|
285 | /* |
---|
286 | * We zero both pipe endpoints to make sure all the kmem pointers |
---|
287 | * are NULL, flag fields are zero'd, etc. We timestamp both |
---|
288 | * endpoints with the same time. |
---|
289 | */ |
---|
290 | rpipe = &pp->pp_rpipe; |
---|
291 | bzero(rpipe, sizeof(*rpipe)); |
---|
292 | vfs_timestamp(&rpipe->pipe_ctime); |
---|
293 | rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; |
---|
294 | |
---|
295 | wpipe = &pp->pp_wpipe; |
---|
296 | bzero(wpipe, sizeof(*wpipe)); |
---|
297 | wpipe->pipe_ctime = rpipe->pipe_ctime; |
---|
298 | wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; |
---|
299 | |
---|
300 | rpipe->pipe_peer = wpipe; |
---|
301 | rpipe->pipe_pair = pp; |
---|
302 | wpipe->pipe_peer = rpipe; |
---|
303 | wpipe->pipe_pair = pp; |
---|
304 | |
---|
305 | /* |
---|
306 | * Mark both endpoints as present; they will later get free'd |
---|
307 | * one at a time. When both are free'd, then the whole pair |
---|
308 | * is released. |
---|
309 | */ |
---|
310 | rpipe->pipe_present = PIPE_ACTIVE; |
---|
311 | wpipe->pipe_present = PIPE_ACTIVE; |
---|
312 | |
---|
313 | /* |
---|
314 | * Eventually, the MAC Framework may initialize the label |
---|
315 | * in ctor or init, but for now we do it elswhere to avoid |
---|
316 | * blocking in ctor or init. |
---|
317 | */ |
---|
318 | pp->pp_label = NULL; |
---|
319 | |
---|
320 | return (0); |
---|
321 | } |
---|
322 | |
---|
323 | static int |
---|
324 | pipe_zone_init(void *mem, int size, int flags) |
---|
325 | { |
---|
326 | struct pipepair *pp; |
---|
327 | |
---|
328 | KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); |
---|
329 | |
---|
330 | pp = (struct pipepair *)mem; |
---|
331 | |
---|
332 | mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_NEW); |
---|
333 | return (0); |
---|
334 | } |
---|
335 | |
---|
336 | static void |
---|
337 | pipe_zone_fini(void *mem, int size) |
---|
338 | { |
---|
339 | struct pipepair *pp; |
---|
340 | |
---|
341 | KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); |
---|
342 | |
---|
343 | pp = (struct pipepair *)mem; |
---|
344 | |
---|
345 | mtx_destroy(&pp->pp_mtx); |
---|
346 | } |
---|
347 | |
---|
348 | static void |
---|
349 | pipe_paircreate(struct thread *td, struct pipepair **p_pp) |
---|
350 | { |
---|
351 | struct pipepair *pp; |
---|
352 | struct pipe *rpipe, *wpipe; |
---|
353 | |
---|
354 | *p_pp = pp = uma_zalloc(pipe_zone, M_WAITOK); |
---|
355 | #ifdef MAC |
---|
356 | /* |
---|
357 | * The MAC label is shared between the connected endpoints. As a |
---|
358 | * result mac_pipe_init() and mac_pipe_create() are called once |
---|
359 | * for the pair, and not on the endpoints. |
---|
360 | */ |
---|
361 | mac_pipe_init(pp); |
---|
362 | mac_pipe_create(td->td_ucred, pp); |
---|
363 | #endif |
---|
364 | rpipe = &pp->pp_rpipe; |
---|
365 | wpipe = &pp->pp_wpipe; |
---|
366 | |
---|
367 | knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); |
---|
368 | knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); |
---|
369 | |
---|
370 | /* Only the forward direction pipe is backed by default */ |
---|
371 | pipe_create(rpipe, 1); |
---|
372 | pipe_create(wpipe, 0); |
---|
373 | |
---|
374 | rpipe->pipe_state |= PIPE_DIRECTOK; |
---|
375 | wpipe->pipe_state |= PIPE_DIRECTOK; |
---|
376 | } |
---|
377 | |
---|
378 | void |
---|
379 | pipe_named_ctor(struct pipe **ppipe, struct thread *td) |
---|
380 | { |
---|
381 | struct pipepair *pp; |
---|
382 | |
---|
383 | pipe_paircreate(td, &pp); |
---|
384 | pp->pp_rpipe.pipe_state |= PIPE_NAMED; |
---|
385 | *ppipe = &pp->pp_rpipe; |
---|
386 | } |
---|
387 | |
---|
388 | void |
---|
389 | pipe_dtor(struct pipe *dpipe) |
---|
390 | { |
---|
391 | struct pipe *peer; |
---|
392 | ino_t ino; |
---|
393 | |
---|
394 | ino = dpipe->pipe_ino; |
---|
395 | peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL; |
---|
396 | funsetown(&dpipe->pipe_sigio); |
---|
397 | pipeclose(dpipe); |
---|
398 | if (peer != NULL) { |
---|
399 | funsetown(&peer->pipe_sigio); |
---|
400 | pipeclose(peer); |
---|
401 | } |
---|
402 | } |
---|
403 | |
---|
404 | /* |
---|
405 | * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let |
---|
406 | * the zone pick up the pieces via pipeclose(). |
---|
407 | */ |
---|
408 | int |
---|
409 | kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1, |
---|
410 | struct filecaps *fcaps2) |
---|
411 | { |
---|
412 | struct file *rf, *wf; |
---|
413 | struct pipe *rpipe, *wpipe; |
---|
414 | struct pipepair *pp; |
---|
415 | int fd, fflags, error; |
---|
416 | |
---|
417 | pipe_paircreate(td, &pp); |
---|
418 | rpipe = &pp->pp_rpipe; |
---|
419 | wpipe = &pp->pp_wpipe; |
---|
420 | error = falloc_caps(td, &rf, &fd, flags, fcaps1); |
---|
421 | if (error) { |
---|
422 | pipeclose(rpipe); |
---|
423 | pipeclose(wpipe); |
---|
424 | return (error); |
---|
425 | } |
---|
426 | /* An extra reference on `rf' has been held for us by falloc_caps(). */ |
---|
427 | fildes[0] = fd; |
---|
428 | |
---|
429 | fflags = FREAD | FWRITE; |
---|
430 | if ((flags & O_NONBLOCK) != 0) |
---|
431 | fflags |= FNONBLOCK; |
---|
432 | |
---|
433 | /* |
---|
434 | * Warning: once we've gotten past allocation of the fd for the |
---|
435 | * read-side, we can only drop the read side via fdrop() in order |
---|
436 | * to avoid races against processes which manage to dup() the read |
---|
437 | * side while we are blocked trying to allocate the write side. |
---|
438 | */ |
---|
439 | finit(rf, fflags, DTYPE_PIPE, rpipe, &pipeops); |
---|
440 | error = falloc_caps(td, &wf, &fd, flags, fcaps2); |
---|
441 | if (error) { |
---|
442 | fdclose(td, rf, fildes[0]); |
---|
443 | fdrop(rf, td); |
---|
444 | /* rpipe has been closed by fdrop(). */ |
---|
445 | pipeclose(wpipe); |
---|
446 | return (error); |
---|
447 | } |
---|
448 | /* An extra reference on `wf' has been held for us by falloc_caps(). */ |
---|
449 | finit(wf, fflags, DTYPE_PIPE, wpipe, &pipeops); |
---|
450 | fdrop(wf, td); |
---|
451 | fildes[1] = fd; |
---|
452 | fdrop(rf, td); |
---|
453 | |
---|
454 | return (0); |
---|
455 | } |
---|
456 | |
---|
457 | #ifdef COMPAT_FREEBSD10 |
---|
458 | /* ARGSUSED */ |
---|
459 | int |
---|
460 | freebsd10_pipe(struct thread *td, struct freebsd10_pipe_args *uap __unused) |
---|
461 | { |
---|
462 | int error; |
---|
463 | int fildes[2]; |
---|
464 | |
---|
465 | error = kern_pipe(td, fildes, 0, NULL, NULL); |
---|
466 | if (error) |
---|
467 | return (error); |
---|
468 | |
---|
469 | td->td_retval[0] = fildes[0]; |
---|
470 | td->td_retval[1] = fildes[1]; |
---|
471 | |
---|
472 | return (0); |
---|
473 | } |
---|
474 | #endif |
---|
475 | |
---|
476 | int |
---|
477 | sys_pipe2(struct thread *td, struct pipe2_args *uap) |
---|
478 | { |
---|
479 | int error, fildes[2]; |
---|
480 | |
---|
481 | if (uap->flags & ~(O_CLOEXEC | O_NONBLOCK)) |
---|
482 | return (EINVAL); |
---|
483 | error = kern_pipe(td, fildes, uap->flags, NULL, NULL); |
---|
484 | if (error) |
---|
485 | return (error); |
---|
486 | error = copyout(fildes, uap->fildes, 2 * sizeof(int)); |
---|
487 | if (error) { |
---|
488 | (void)kern_close(td, fildes[0]); |
---|
489 | (void)kern_close(td, fildes[1]); |
---|
490 | } |
---|
491 | return (error); |
---|
492 | } |
---|
493 | |
---|
494 | |
---|
495 | /* |
---|
496 | * Allocate kva for pipe circular buffer, the space is pageable |
---|
497 | * This routine will 'realloc' the size of a pipe safely, if it fails |
---|
498 | * it will retain the old buffer. |
---|
499 | * If it fails it will return ENOMEM. |
---|
500 | */ |
---|
501 | static int |
---|
502 | pipespace_new(struct pipe *cpipe, int size) |
---|
503 | { |
---|
504 | caddr_t buffer; |
---|
505 | int error, cnt, firstseg; |
---|
506 | static int curfail = 0; |
---|
507 | static struct timeval lastfail; |
---|
508 | |
---|
509 | KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); |
---|
510 | KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), |
---|
511 | ("pipespace: resize of direct writes not allowed")); |
---|
512 | retry: |
---|
513 | cnt = cpipe->pipe_buffer.cnt; |
---|
514 | if (cnt > size) |
---|
515 | size = cnt; |
---|
516 | |
---|
517 | size = round_page(size); |
---|
518 | #ifndef __rtems__ |
---|
519 | buffer = (caddr_t) vm_map_min(pipe_map); |
---|
520 | |
---|
521 | error = vm_map_find(pipe_map, NULL, 0, (vm_offset_t *)&buffer, size, 0, |
---|
522 | VMFS_ANY_SPACE, VM_PROT_RW, VM_PROT_RW, 0); |
---|
523 | if (error != KERN_SUCCESS) { |
---|
524 | #else /* __rtems__ */ |
---|
525 | (void)error; |
---|
526 | buffer = malloc(size, M_TEMP, M_WAITOK | M_ZERO); |
---|
527 | if (buffer == NULL) { |
---|
528 | #endif /* __rtems__ */ |
---|
529 | if ((cpipe->pipe_buffer.buffer == NULL) && |
---|
530 | (size > SMALL_PIPE_SIZE)) { |
---|
531 | size = SMALL_PIPE_SIZE; |
---|
532 | pipefragretry++; |
---|
533 | goto retry; |
---|
534 | } |
---|
535 | if (cpipe->pipe_buffer.buffer == NULL) { |
---|
536 | pipeallocfail++; |
---|
537 | if (ppsratecheck(&lastfail, &curfail, 1)) |
---|
538 | printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); |
---|
539 | } else { |
---|
540 | piperesizefail++; |
---|
541 | } |
---|
542 | return (ENOMEM); |
---|
543 | } |
---|
544 | |
---|
545 | /* copy data, then free old resources if we're resizing */ |
---|
546 | if (cnt > 0) { |
---|
547 | if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) { |
---|
548 | firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out; |
---|
549 | bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], |
---|
550 | buffer, firstseg); |
---|
551 | if ((cnt - firstseg) > 0) |
---|
552 | bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg], |
---|
553 | cpipe->pipe_buffer.in); |
---|
554 | } else { |
---|
555 | bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], |
---|
556 | buffer, cnt); |
---|
557 | } |
---|
558 | } |
---|
559 | pipe_free_kmem(cpipe); |
---|
560 | cpipe->pipe_buffer.buffer = buffer; |
---|
561 | cpipe->pipe_buffer.size = size; |
---|
562 | cpipe->pipe_buffer.in = cnt; |
---|
563 | cpipe->pipe_buffer.out = 0; |
---|
564 | cpipe->pipe_buffer.cnt = cnt; |
---|
565 | atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size); |
---|
566 | return (0); |
---|
567 | } |
---|
568 | |
---|
569 | /* |
---|
570 | * Wrapper for pipespace_new() that performs locking assertions. |
---|
571 | */ |
---|
572 | static int |
---|
573 | pipespace(struct pipe *cpipe, int size) |
---|
574 | { |
---|
575 | |
---|
576 | KASSERT(cpipe->pipe_state & PIPE_LOCKFL, |
---|
577 | ("Unlocked pipe passed to pipespace")); |
---|
578 | return (pipespace_new(cpipe, size)); |
---|
579 | } |
---|
580 | |
---|
581 | /* |
---|
582 | * lock a pipe for I/O, blocking other access |
---|
583 | */ |
---|
584 | static __inline int |
---|
585 | pipelock(struct pipe *cpipe, int catch) |
---|
586 | { |
---|
587 | int error; |
---|
588 | |
---|
589 | PIPE_LOCK_ASSERT(cpipe, MA_OWNED); |
---|
590 | while (cpipe->pipe_state & PIPE_LOCKFL) { |
---|
591 | cpipe->pipe_state |= PIPE_LWANT; |
---|
592 | error = msleep(cpipe, PIPE_MTX(cpipe), |
---|
593 | catch ? (PRIBIO | PCATCH) : PRIBIO, |
---|
594 | "pipelk", 0); |
---|
595 | if (error != 0) |
---|
596 | return (error); |
---|
597 | } |
---|
598 | cpipe->pipe_state |= PIPE_LOCKFL; |
---|
599 | return (0); |
---|
600 | } |
---|
601 | |
---|
602 | /* |
---|
603 | * unlock a pipe I/O lock |
---|
604 | */ |
---|
605 | static __inline void |
---|
606 | pipeunlock(struct pipe *cpipe) |
---|
607 | { |
---|
608 | |
---|
609 | PIPE_LOCK_ASSERT(cpipe, MA_OWNED); |
---|
610 | KASSERT(cpipe->pipe_state & PIPE_LOCKFL, |
---|
611 | ("Unlocked pipe passed to pipeunlock")); |
---|
612 | cpipe->pipe_state &= ~PIPE_LOCKFL; |
---|
613 | if (cpipe->pipe_state & PIPE_LWANT) { |
---|
614 | cpipe->pipe_state &= ~PIPE_LWANT; |
---|
615 | wakeup(cpipe); |
---|
616 | } |
---|
617 | } |
---|
618 | |
---|
619 | void |
---|
620 | pipeselwakeup(struct pipe *cpipe) |
---|
621 | { |
---|
622 | |
---|
623 | PIPE_LOCK_ASSERT(cpipe, MA_OWNED); |
---|
624 | if (cpipe->pipe_state & PIPE_SEL) { |
---|
625 | selwakeuppri(&cpipe->pipe_sel, PSOCK); |
---|
626 | if (!SEL_WAITING(&cpipe->pipe_sel)) |
---|
627 | cpipe->pipe_state &= ~PIPE_SEL; |
---|
628 | } |
---|
629 | #ifndef __rtems__ |
---|
630 | if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) |
---|
631 | pgsigio(&cpipe->pipe_sigio, SIGIO, 0); |
---|
632 | #endif /* __rtems__ */ |
---|
633 | KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0); |
---|
634 | } |
---|
635 | |
---|
636 | /* |
---|
637 | * Initialize and allocate VM and memory for pipe. The structure |
---|
638 | * will start out zero'd from the ctor, so we just manage the kmem. |
---|
639 | */ |
---|
640 | static void |
---|
641 | pipe_create(struct pipe *pipe, int backing) |
---|
642 | { |
---|
643 | |
---|
644 | if (backing) { |
---|
645 | /* |
---|
646 | * Note that these functions can fail if pipe map is exhausted |
---|
647 | * (as a result of too many pipes created), but we ignore the |
---|
648 | * error as it is not fatal and could be provoked by |
---|
649 | * unprivileged users. The only consequence is worse performance |
---|
650 | * with given pipe. |
---|
651 | */ |
---|
652 | if (amountpipekva > maxpipekva / 2) |
---|
653 | (void)pipespace_new(pipe, SMALL_PIPE_SIZE); |
---|
654 | else |
---|
655 | (void)pipespace_new(pipe, PIPE_SIZE); |
---|
656 | } |
---|
657 | |
---|
658 | pipe->pipe_ino = alloc_unr64(&pipeino_unr); |
---|
659 | } |
---|
660 | |
---|
661 | /* ARGSUSED */ |
---|
662 | static int |
---|
663 | pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred, |
---|
664 | int flags, struct thread *td) |
---|
665 | { |
---|
666 | struct pipe *rpipe; |
---|
667 | int error; |
---|
668 | int nread = 0; |
---|
669 | int size; |
---|
670 | |
---|
671 | rpipe = fp->f_data; |
---|
672 | PIPE_LOCK(rpipe); |
---|
673 | ++rpipe->pipe_busy; |
---|
674 | error = pipelock(rpipe, 1); |
---|
675 | if (error) |
---|
676 | goto unlocked_error; |
---|
677 | |
---|
678 | #ifdef MAC |
---|
679 | error = mac_pipe_check_read(active_cred, rpipe->pipe_pair); |
---|
680 | if (error) |
---|
681 | goto locked_error; |
---|
682 | #endif |
---|
683 | if (amountpipekva > (3 * maxpipekva) / 4) { |
---|
684 | if (!(rpipe->pipe_state & PIPE_DIRECTW) && |
---|
685 | (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && |
---|
686 | (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && |
---|
687 | (piperesizeallowed == 1)) { |
---|
688 | PIPE_UNLOCK(rpipe); |
---|
689 | pipespace(rpipe, SMALL_PIPE_SIZE); |
---|
690 | PIPE_LOCK(rpipe); |
---|
691 | } |
---|
692 | } |
---|
693 | |
---|
694 | while (uio->uio_resid) { |
---|
695 | /* |
---|
696 | * normal pipe buffer receive |
---|
697 | */ |
---|
698 | if (rpipe->pipe_buffer.cnt > 0) { |
---|
699 | size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; |
---|
700 | if (size > rpipe->pipe_buffer.cnt) |
---|
701 | size = rpipe->pipe_buffer.cnt; |
---|
702 | if (size > uio->uio_resid) |
---|
703 | size = uio->uio_resid; |
---|
704 | |
---|
705 | PIPE_UNLOCK(rpipe); |
---|
706 | error = uiomove( |
---|
707 | &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], |
---|
708 | size, uio); |
---|
709 | PIPE_LOCK(rpipe); |
---|
710 | if (error) |
---|
711 | break; |
---|
712 | |
---|
713 | rpipe->pipe_buffer.out += size; |
---|
714 | if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) |
---|
715 | rpipe->pipe_buffer.out = 0; |
---|
716 | |
---|
717 | rpipe->pipe_buffer.cnt -= size; |
---|
718 | |
---|
719 | /* |
---|
720 | * If there is no more to read in the pipe, reset |
---|
721 | * its pointers to the beginning. This improves |
---|
722 | * cache hit stats. |
---|
723 | */ |
---|
724 | if (rpipe->pipe_buffer.cnt == 0) { |
---|
725 | rpipe->pipe_buffer.in = 0; |
---|
726 | rpipe->pipe_buffer.out = 0; |
---|
727 | } |
---|
728 | nread += size; |
---|
729 | #ifndef PIPE_NODIRECT |
---|
730 | /* |
---|
731 | * Direct copy, bypassing a kernel buffer. |
---|
732 | */ |
---|
733 | } else if ((size = rpipe->pipe_map.cnt) != 0) { |
---|
734 | if (size > uio->uio_resid) |
---|
735 | size = (u_int) uio->uio_resid; |
---|
736 | PIPE_UNLOCK(rpipe); |
---|
737 | error = uiomove_fromphys(rpipe->pipe_map.ms, |
---|
738 | rpipe->pipe_map.pos, size, uio); |
---|
739 | PIPE_LOCK(rpipe); |
---|
740 | if (error) |
---|
741 | break; |
---|
742 | nread += size; |
---|
743 | rpipe->pipe_map.pos += size; |
---|
744 | rpipe->pipe_map.cnt -= size; |
---|
745 | if (rpipe->pipe_map.cnt == 0) { |
---|
746 | rpipe->pipe_state &= ~PIPE_WANTW; |
---|
747 | wakeup(rpipe); |
---|
748 | } |
---|
749 | #endif |
---|
750 | } else { |
---|
751 | /* |
---|
752 | * detect EOF condition |
---|
753 | * read returns 0 on EOF, no need to set error |
---|
754 | */ |
---|
755 | if (rpipe->pipe_state & PIPE_EOF) |
---|
756 | break; |
---|
757 | |
---|
758 | /* |
---|
759 | * If the "write-side" has been blocked, wake it up now. |
---|
760 | */ |
---|
761 | if (rpipe->pipe_state & PIPE_WANTW) { |
---|
762 | rpipe->pipe_state &= ~PIPE_WANTW; |
---|
763 | wakeup(rpipe); |
---|
764 | } |
---|
765 | |
---|
766 | /* |
---|
767 | * Break if some data was read. |
---|
768 | */ |
---|
769 | if (nread > 0) |
---|
770 | break; |
---|
771 | |
---|
772 | /* |
---|
773 | * Unlock the pipe buffer for our remaining processing. |
---|
774 | * We will either break out with an error or we will |
---|
775 | * sleep and relock to loop. |
---|
776 | */ |
---|
777 | pipeunlock(rpipe); |
---|
778 | |
---|
779 | /* |
---|
780 | * Handle non-blocking mode operation or |
---|
781 | * wait for more data. |
---|
782 | */ |
---|
783 | if (fp->f_flag & FNONBLOCK) { |
---|
784 | error = EAGAIN; |
---|
785 | } else { |
---|
786 | rpipe->pipe_state |= PIPE_WANTR; |
---|
787 | if ((error = msleep(rpipe, PIPE_MTX(rpipe), |
---|
788 | PRIBIO | PCATCH, |
---|
789 | "piperd", 0)) == 0) |
---|
790 | error = pipelock(rpipe, 1); |
---|
791 | } |
---|
792 | if (error) |
---|
793 | goto unlocked_error; |
---|
794 | } |
---|
795 | } |
---|
796 | #ifdef MAC |
---|
797 | locked_error: |
---|
798 | #endif |
---|
799 | pipeunlock(rpipe); |
---|
800 | |
---|
801 | /* XXX: should probably do this before getting any locks. */ |
---|
802 | if (error == 0) |
---|
803 | vfs_timestamp(&rpipe->pipe_atime); |
---|
804 | unlocked_error: |
---|
805 | --rpipe->pipe_busy; |
---|
806 | |
---|
807 | /* |
---|
808 | * PIPE_WANT processing only makes sense if pipe_busy is 0. |
---|
809 | */ |
---|
810 | if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { |
---|
811 | rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); |
---|
812 | wakeup(rpipe); |
---|
813 | } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { |
---|
814 | /* |
---|
815 | * Handle write blocking hysteresis. |
---|
816 | */ |
---|
817 | if (rpipe->pipe_state & PIPE_WANTW) { |
---|
818 | rpipe->pipe_state &= ~PIPE_WANTW; |
---|
819 | wakeup(rpipe); |
---|
820 | } |
---|
821 | } |
---|
822 | |
---|
823 | if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) |
---|
824 | pipeselwakeup(rpipe); |
---|
825 | |
---|
826 | PIPE_UNLOCK(rpipe); |
---|
827 | return (error); |
---|
828 | } |
---|
829 | |
---|
830 | #ifndef PIPE_NODIRECT |
---|
831 | /* |
---|
832 | * Map the sending processes' buffer into kernel space and wire it. |
---|
833 | * This is similar to a physical write operation. |
---|
834 | */ |
---|
835 | static int |
---|
836 | pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio) |
---|
837 | { |
---|
838 | u_int size; |
---|
839 | int i; |
---|
840 | |
---|
841 | PIPE_LOCK_ASSERT(wpipe, MA_OWNED); |
---|
842 | KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0, |
---|
843 | ("%s: PIPE_DIRECTW set on %p", __func__, wpipe)); |
---|
844 | KASSERT(wpipe->pipe_map.cnt == 0, |
---|
845 | ("%s: pipe map for %p contains residual data", __func__, wpipe)); |
---|
846 | |
---|
847 | if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size) |
---|
848 | size = wpipe->pipe_buffer.size; |
---|
849 | else |
---|
850 | size = uio->uio_iov->iov_len; |
---|
851 | |
---|
852 | wpipe->pipe_state |= PIPE_DIRECTW; |
---|
853 | PIPE_UNLOCK(wpipe); |
---|
854 | i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, |
---|
855 | (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ, |
---|
856 | wpipe->pipe_map.ms, PIPENPAGES); |
---|
857 | PIPE_LOCK(wpipe); |
---|
858 | if (i < 0) { |
---|
859 | wpipe->pipe_state &= ~PIPE_DIRECTW; |
---|
860 | return (EFAULT); |
---|
861 | } |
---|
862 | |
---|
863 | wpipe->pipe_map.npages = i; |
---|
864 | wpipe->pipe_map.pos = |
---|
865 | ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; |
---|
866 | wpipe->pipe_map.cnt = size; |
---|
867 | |
---|
868 | uio->uio_iov->iov_len -= size; |
---|
869 | uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; |
---|
870 | if (uio->uio_iov->iov_len == 0) |
---|
871 | uio->uio_iov++; |
---|
872 | uio->uio_resid -= size; |
---|
873 | uio->uio_offset += size; |
---|
874 | return (0); |
---|
875 | } |
---|
876 | |
---|
877 | /* |
---|
878 | * Unwire the process buffer. |
---|
879 | */ |
---|
880 | static void |
---|
881 | pipe_destroy_write_buffer(struct pipe *wpipe) |
---|
882 | { |
---|
883 | |
---|
884 | PIPE_LOCK_ASSERT(wpipe, MA_OWNED); |
---|
885 | KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0, |
---|
886 | ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe)); |
---|
887 | KASSERT(wpipe->pipe_map.cnt == 0, |
---|
888 | ("%s: pipe map for %p contains residual data", __func__, wpipe)); |
---|
889 | |
---|
890 | wpipe->pipe_state &= ~PIPE_DIRECTW; |
---|
891 | vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages); |
---|
892 | wpipe->pipe_map.npages = 0; |
---|
893 | } |
---|
894 | |
---|
895 | /* |
---|
896 | * In the case of a signal, the writing process might go away. This |
---|
897 | * code copies the data into the circular buffer so that the source |
---|
898 | * pages can be freed without loss of data. |
---|
899 | */ |
---|
900 | static void |
---|
901 | pipe_clone_write_buffer(struct pipe *wpipe) |
---|
902 | { |
---|
903 | struct uio uio; |
---|
904 | struct iovec iov; |
---|
905 | int size; |
---|
906 | int pos; |
---|
907 | |
---|
908 | PIPE_LOCK_ASSERT(wpipe, MA_OWNED); |
---|
909 | KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0, |
---|
910 | ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe)); |
---|
911 | |
---|
912 | size = wpipe->pipe_map.cnt; |
---|
913 | pos = wpipe->pipe_map.pos; |
---|
914 | wpipe->pipe_map.cnt = 0; |
---|
915 | |
---|
916 | wpipe->pipe_buffer.in = size; |
---|
917 | wpipe->pipe_buffer.out = 0; |
---|
918 | wpipe->pipe_buffer.cnt = size; |
---|
919 | |
---|
920 | PIPE_UNLOCK(wpipe); |
---|
921 | iov.iov_base = wpipe->pipe_buffer.buffer; |
---|
922 | iov.iov_len = size; |
---|
923 | uio.uio_iov = &iov; |
---|
924 | uio.uio_iovcnt = 1; |
---|
925 | uio.uio_offset = 0; |
---|
926 | uio.uio_resid = size; |
---|
927 | uio.uio_segflg = UIO_SYSSPACE; |
---|
928 | uio.uio_rw = UIO_READ; |
---|
929 | uio.uio_td = curthread; |
---|
930 | uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); |
---|
931 | PIPE_LOCK(wpipe); |
---|
932 | pipe_destroy_write_buffer(wpipe); |
---|
933 | } |
---|
934 | |
---|
935 | /* |
---|
936 | * This implements the pipe buffer write mechanism. Note that only |
---|
937 | * a direct write OR a normal pipe write can be pending at any given time. |
---|
938 | * If there are any characters in the pipe buffer, the direct write will |
---|
939 | * be deferred until the receiving process grabs all of the bytes from |
---|
940 | * the pipe buffer. Then the direct mapping write is set-up. |
---|
941 | */ |
---|
942 | static int |
---|
943 | pipe_direct_write(struct pipe *wpipe, struct uio *uio) |
---|
944 | { |
---|
945 | int error; |
---|
946 | |
---|
947 | retry: |
---|
948 | PIPE_LOCK_ASSERT(wpipe, MA_OWNED); |
---|
949 | error = pipelock(wpipe, 1); |
---|
950 | if (error != 0) |
---|
951 | goto error1; |
---|
952 | if ((wpipe->pipe_state & PIPE_EOF) != 0) { |
---|
953 | error = EPIPE; |
---|
954 | pipeunlock(wpipe); |
---|
955 | goto error1; |
---|
956 | } |
---|
957 | if (wpipe->pipe_state & PIPE_DIRECTW) { |
---|
958 | if (wpipe->pipe_state & PIPE_WANTR) { |
---|
959 | wpipe->pipe_state &= ~PIPE_WANTR; |
---|
960 | wakeup(wpipe); |
---|
961 | } |
---|
962 | pipeselwakeup(wpipe); |
---|
963 | wpipe->pipe_state |= PIPE_WANTW; |
---|
964 | pipeunlock(wpipe); |
---|
965 | error = msleep(wpipe, PIPE_MTX(wpipe), |
---|
966 | PRIBIO | PCATCH, "pipdww", 0); |
---|
967 | if (error) |
---|
968 | goto error1; |
---|
969 | else |
---|
970 | goto retry; |
---|
971 | } |
---|
972 | if (wpipe->pipe_buffer.cnt > 0) { |
---|
973 | if (wpipe->pipe_state & PIPE_WANTR) { |
---|
974 | wpipe->pipe_state &= ~PIPE_WANTR; |
---|
975 | wakeup(wpipe); |
---|
976 | } |
---|
977 | pipeselwakeup(wpipe); |
---|
978 | wpipe->pipe_state |= PIPE_WANTW; |
---|
979 | pipeunlock(wpipe); |
---|
980 | error = msleep(wpipe, PIPE_MTX(wpipe), |
---|
981 | PRIBIO | PCATCH, "pipdwc", 0); |
---|
982 | if (error) |
---|
983 | goto error1; |
---|
984 | else |
---|
985 | goto retry; |
---|
986 | } |
---|
987 | |
---|
988 | error = pipe_build_write_buffer(wpipe, uio); |
---|
989 | if (error) { |
---|
990 | pipeunlock(wpipe); |
---|
991 | goto error1; |
---|
992 | } |
---|
993 | |
---|
994 | while (wpipe->pipe_map.cnt != 0 && |
---|
995 | (wpipe->pipe_state & PIPE_EOF) == 0) { |
---|
996 | if (wpipe->pipe_state & PIPE_WANTR) { |
---|
997 | wpipe->pipe_state &= ~PIPE_WANTR; |
---|
998 | wakeup(wpipe); |
---|
999 | } |
---|
1000 | pipeselwakeup(wpipe); |
---|
1001 | wpipe->pipe_state |= PIPE_WANTW; |
---|
1002 | pipeunlock(wpipe); |
---|
1003 | error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, |
---|
1004 | "pipdwt", 0); |
---|
1005 | pipelock(wpipe, 0); |
---|
1006 | if (error != 0) |
---|
1007 | break; |
---|
1008 | } |
---|
1009 | |
---|
1010 | if ((wpipe->pipe_state & PIPE_EOF) != 0) { |
---|
1011 | wpipe->pipe_map.cnt = 0; |
---|
1012 | pipe_destroy_write_buffer(wpipe); |
---|
1013 | pipeselwakeup(wpipe); |
---|
1014 | error = EPIPE; |
---|
1015 | } else if (error == EINTR || error == ERESTART) { |
---|
1016 | pipe_clone_write_buffer(wpipe); |
---|
1017 | } else { |
---|
1018 | pipe_destroy_write_buffer(wpipe); |
---|
1019 | } |
---|
1020 | pipeunlock(wpipe); |
---|
1021 | KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0, |
---|
1022 | ("pipe %p leaked PIPE_DIRECTW", wpipe)); |
---|
1023 | return (error); |
---|
1024 | |
---|
1025 | error1: |
---|
1026 | wakeup(wpipe); |
---|
1027 | return (error); |
---|
1028 | } |
---|
1029 | #endif |
---|
1030 | |
---|
1031 | static int |
---|
1032 | pipe_write(struct file *fp, struct uio *uio, struct ucred *active_cred, |
---|
1033 | int flags, struct thread *td) |
---|
1034 | { |
---|
1035 | int error = 0; |
---|
1036 | int desiredsize; |
---|
1037 | ssize_t orig_resid; |
---|
1038 | struct pipe *wpipe, *rpipe; |
---|
1039 | |
---|
1040 | rpipe = fp->f_data; |
---|
1041 | wpipe = PIPE_PEER(rpipe); |
---|
1042 | PIPE_LOCK(rpipe); |
---|
1043 | error = pipelock(wpipe, 1); |
---|
1044 | if (error) { |
---|
1045 | PIPE_UNLOCK(rpipe); |
---|
1046 | return (error); |
---|
1047 | } |
---|
1048 | /* |
---|
1049 | * detect loss of pipe read side, issue SIGPIPE if lost. |
---|
1050 | */ |
---|
1051 | if (wpipe->pipe_present != PIPE_ACTIVE || |
---|
1052 | (wpipe->pipe_state & PIPE_EOF)) { |
---|
1053 | pipeunlock(wpipe); |
---|
1054 | PIPE_UNLOCK(rpipe); |
---|
1055 | return (EPIPE); |
---|
1056 | } |
---|
1057 | #ifdef MAC |
---|
1058 | error = mac_pipe_check_write(active_cred, wpipe->pipe_pair); |
---|
1059 | if (error) { |
---|
1060 | pipeunlock(wpipe); |
---|
1061 | PIPE_UNLOCK(rpipe); |
---|
1062 | return (error); |
---|
1063 | } |
---|
1064 | #endif |
---|
1065 | ++wpipe->pipe_busy; |
---|
1066 | |
---|
1067 | /* Choose a larger size if it's advantageous */ |
---|
1068 | desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size); |
---|
1069 | while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) { |
---|
1070 | if (piperesizeallowed != 1) |
---|
1071 | break; |
---|
1072 | if (amountpipekva > maxpipekva / 2) |
---|
1073 | break; |
---|
1074 | if (desiredsize == BIG_PIPE_SIZE) |
---|
1075 | break; |
---|
1076 | desiredsize = desiredsize * 2; |
---|
1077 | } |
---|
1078 | |
---|
1079 | /* Choose a smaller size if we're in a OOM situation */ |
---|
1080 | if ((amountpipekva > (3 * maxpipekva) / 4) && |
---|
1081 | (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && |
---|
1082 | (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && |
---|
1083 | (piperesizeallowed == 1)) |
---|
1084 | desiredsize = SMALL_PIPE_SIZE; |
---|
1085 | |
---|
1086 | /* Resize if the above determined that a new size was necessary */ |
---|
1087 | if ((desiredsize != wpipe->pipe_buffer.size) && |
---|
1088 | ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) { |
---|
1089 | PIPE_UNLOCK(wpipe); |
---|
1090 | pipespace(wpipe, desiredsize); |
---|
1091 | PIPE_LOCK(wpipe); |
---|
1092 | } |
---|
1093 | if (wpipe->pipe_buffer.size == 0) { |
---|
1094 | /* |
---|
1095 | * This can only happen for reverse direction use of pipes |
---|
1096 | * in a complete OOM situation. |
---|
1097 | */ |
---|
1098 | error = ENOMEM; |
---|
1099 | --wpipe->pipe_busy; |
---|
1100 | pipeunlock(wpipe); |
---|
1101 | PIPE_UNLOCK(wpipe); |
---|
1102 | return (error); |
---|
1103 | } |
---|
1104 | |
---|
1105 | pipeunlock(wpipe); |
---|
1106 | |
---|
1107 | orig_resid = uio->uio_resid; |
---|
1108 | |
---|
1109 | while (uio->uio_resid) { |
---|
1110 | int space; |
---|
1111 | |
---|
1112 | pipelock(wpipe, 0); |
---|
1113 | if (wpipe->pipe_state & PIPE_EOF) { |
---|
1114 | pipeunlock(wpipe); |
---|
1115 | error = EPIPE; |
---|
1116 | break; |
---|
1117 | } |
---|
1118 | #ifndef PIPE_NODIRECT |
---|
1119 | /* |
---|
1120 | * If the transfer is large, we can gain performance if |
---|
1121 | * we do process-to-process copies directly. |
---|
1122 | * If the write is non-blocking, we don't use the |
---|
1123 | * direct write mechanism. |
---|
1124 | * |
---|
1125 | * The direct write mechanism will detect the reader going |
---|
1126 | * away on us. |
---|
1127 | */ |
---|
1128 | if (uio->uio_segflg == UIO_USERSPACE && |
---|
1129 | uio->uio_iov->iov_len >= PIPE_MINDIRECT && |
---|
1130 | wpipe->pipe_buffer.size >= PIPE_MINDIRECT && |
---|
1131 | (fp->f_flag & FNONBLOCK) == 0) { |
---|
1132 | pipeunlock(wpipe); |
---|
1133 | error = pipe_direct_write(wpipe, uio); |
---|
1134 | if (error) |
---|
1135 | break; |
---|
1136 | continue; |
---|
1137 | } |
---|
1138 | #endif |
---|
1139 | |
---|
1140 | /* |
---|
1141 | * Pipe buffered writes cannot be coincidental with |
---|
1142 | * direct writes. We wait until the currently executing |
---|
1143 | * direct write is completed before we start filling the |
---|
1144 | * pipe buffer. We break out if a signal occurs or the |
---|
1145 | * reader goes away. |
---|
1146 | */ |
---|
1147 | if (wpipe->pipe_map.cnt != 0) { |
---|
1148 | if (wpipe->pipe_state & PIPE_WANTR) { |
---|
1149 | wpipe->pipe_state &= ~PIPE_WANTR; |
---|
1150 | wakeup(wpipe); |
---|
1151 | } |
---|
1152 | pipeselwakeup(wpipe); |
---|
1153 | wpipe->pipe_state |= PIPE_WANTW; |
---|
1154 | pipeunlock(wpipe); |
---|
1155 | error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, |
---|
1156 | "pipbww", 0); |
---|
1157 | if (error) |
---|
1158 | break; |
---|
1159 | else |
---|
1160 | continue; |
---|
1161 | } |
---|
1162 | |
---|
1163 | space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; |
---|
1164 | |
---|
1165 | /* Writes of size <= PIPE_BUF must be atomic. */ |
---|
1166 | if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) |
---|
1167 | space = 0; |
---|
1168 | |
---|
1169 | if (space > 0) { |
---|
1170 | int size; /* Transfer size */ |
---|
1171 | int segsize; /* first segment to transfer */ |
---|
1172 | |
---|
1173 | /* |
---|
1174 | * Transfer size is minimum of uio transfer |
---|
1175 | * and free space in pipe buffer. |
---|
1176 | */ |
---|
1177 | if (space > uio->uio_resid) |
---|
1178 | size = uio->uio_resid; |
---|
1179 | else |
---|
1180 | size = space; |
---|
1181 | /* |
---|
1182 | * First segment to transfer is minimum of |
---|
1183 | * transfer size and contiguous space in |
---|
1184 | * pipe buffer. If first segment to transfer |
---|
1185 | * is less than the transfer size, we've got |
---|
1186 | * a wraparound in the buffer. |
---|
1187 | */ |
---|
1188 | segsize = wpipe->pipe_buffer.size - |
---|
1189 | wpipe->pipe_buffer.in; |
---|
1190 | if (segsize > size) |
---|
1191 | segsize = size; |
---|
1192 | |
---|
1193 | /* Transfer first segment */ |
---|
1194 | |
---|
1195 | PIPE_UNLOCK(rpipe); |
---|
1196 | error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], |
---|
1197 | segsize, uio); |
---|
1198 | PIPE_LOCK(rpipe); |
---|
1199 | |
---|
1200 | if (error == 0 && segsize < size) { |
---|
1201 | KASSERT(wpipe->pipe_buffer.in + segsize == |
---|
1202 | wpipe->pipe_buffer.size, |
---|
1203 | ("Pipe buffer wraparound disappeared")); |
---|
1204 | /* |
---|
1205 | * Transfer remaining part now, to |
---|
1206 | * support atomic writes. Wraparound |
---|
1207 | * happened. |
---|
1208 | */ |
---|
1209 | |
---|
1210 | PIPE_UNLOCK(rpipe); |
---|
1211 | error = uiomove( |
---|
1212 | &wpipe->pipe_buffer.buffer[0], |
---|
1213 | size - segsize, uio); |
---|
1214 | PIPE_LOCK(rpipe); |
---|
1215 | } |
---|
1216 | if (error == 0) { |
---|
1217 | wpipe->pipe_buffer.in += size; |
---|
1218 | if (wpipe->pipe_buffer.in >= |
---|
1219 | wpipe->pipe_buffer.size) { |
---|
1220 | KASSERT(wpipe->pipe_buffer.in == |
---|
1221 | size - segsize + |
---|
1222 | wpipe->pipe_buffer.size, |
---|
1223 | ("Expected wraparound bad")); |
---|
1224 | wpipe->pipe_buffer.in = size - segsize; |
---|
1225 | } |
---|
1226 | |
---|
1227 | wpipe->pipe_buffer.cnt += size; |
---|
1228 | KASSERT(wpipe->pipe_buffer.cnt <= |
---|
1229 | wpipe->pipe_buffer.size, |
---|
1230 | ("Pipe buffer overflow")); |
---|
1231 | } |
---|
1232 | pipeunlock(wpipe); |
---|
1233 | if (error != 0) |
---|
1234 | break; |
---|
1235 | } else { |
---|
1236 | /* |
---|
1237 | * If the "read-side" has been blocked, wake it up now. |
---|
1238 | */ |
---|
1239 | if (wpipe->pipe_state & PIPE_WANTR) { |
---|
1240 | wpipe->pipe_state &= ~PIPE_WANTR; |
---|
1241 | wakeup(wpipe); |
---|
1242 | } |
---|
1243 | |
---|
1244 | /* |
---|
1245 | * don't block on non-blocking I/O |
---|
1246 | */ |
---|
1247 | if (fp->f_flag & FNONBLOCK) { |
---|
1248 | error = EAGAIN; |
---|
1249 | pipeunlock(wpipe); |
---|
1250 | break; |
---|
1251 | } |
---|
1252 | |
---|
1253 | /* |
---|
1254 | * We have no more space and have something to offer, |
---|
1255 | * wake up select/poll. |
---|
1256 | */ |
---|
1257 | pipeselwakeup(wpipe); |
---|
1258 | |
---|
1259 | wpipe->pipe_state |= PIPE_WANTW; |
---|
1260 | pipeunlock(wpipe); |
---|
1261 | error = msleep(wpipe, PIPE_MTX(rpipe), |
---|
1262 | PRIBIO | PCATCH, "pipewr", 0); |
---|
1263 | if (error != 0) |
---|
1264 | break; |
---|
1265 | } |
---|
1266 | } |
---|
1267 | |
---|
1268 | pipelock(wpipe, 0); |
---|
1269 | --wpipe->pipe_busy; |
---|
1270 | |
---|
1271 | if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { |
---|
1272 | wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); |
---|
1273 | wakeup(wpipe); |
---|
1274 | } else if (wpipe->pipe_buffer.cnt > 0) { |
---|
1275 | /* |
---|
1276 | * If we have put any characters in the buffer, we wake up |
---|
1277 | * the reader. |
---|
1278 | */ |
---|
1279 | if (wpipe->pipe_state & PIPE_WANTR) { |
---|
1280 | wpipe->pipe_state &= ~PIPE_WANTR; |
---|
1281 | wakeup(wpipe); |
---|
1282 | } |
---|
1283 | } |
---|
1284 | |
---|
1285 | /* |
---|
1286 | * Don't return EPIPE if any byte was written. |
---|
1287 | * EINTR and other interrupts are handled by generic I/O layer. |
---|
1288 | * Do not pretend that I/O succeeded for obvious user error |
---|
1289 | * like EFAULT. |
---|
1290 | */ |
---|
1291 | if (uio->uio_resid != orig_resid && error == EPIPE) |
---|
1292 | error = 0; |
---|
1293 | |
---|
1294 | if (error == 0) |
---|
1295 | vfs_timestamp(&wpipe->pipe_mtime); |
---|
1296 | |
---|
1297 | /* |
---|
1298 | * We have something to offer, |
---|
1299 | * wake up select/poll. |
---|
1300 | */ |
---|
1301 | if (wpipe->pipe_buffer.cnt) |
---|
1302 | pipeselwakeup(wpipe); |
---|
1303 | |
---|
1304 | pipeunlock(wpipe); |
---|
1305 | PIPE_UNLOCK(rpipe); |
---|
1306 | return (error); |
---|
1307 | } |
---|
1308 | |
---|
1309 | /* ARGSUSED */ |
---|
1310 | static int |
---|
1311 | pipe_truncate(struct file *fp, off_t length, struct ucred *active_cred, |
---|
1312 | struct thread *td) |
---|
1313 | { |
---|
1314 | struct pipe *cpipe; |
---|
1315 | int error; |
---|
1316 | |
---|
1317 | cpipe = fp->f_data; |
---|
1318 | if (cpipe->pipe_state & PIPE_NAMED) |
---|
1319 | error = vnops.fo_truncate(fp, length, active_cred, td); |
---|
1320 | else |
---|
1321 | error = invfo_truncate(fp, length, active_cred, td); |
---|
1322 | return (error); |
---|
1323 | } |
---|
1324 | |
---|
1325 | /* |
---|
1326 | * we implement a very minimal set of ioctls for compatibility with sockets. |
---|
1327 | */ |
---|
1328 | static int |
---|
1329 | pipe_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, |
---|
1330 | struct thread *td) |
---|
1331 | { |
---|
1332 | struct pipe *mpipe = fp->f_data; |
---|
1333 | int error; |
---|
1334 | |
---|
1335 | PIPE_LOCK(mpipe); |
---|
1336 | |
---|
1337 | #ifdef MAC |
---|
1338 | error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data); |
---|
1339 | if (error) { |
---|
1340 | PIPE_UNLOCK(mpipe); |
---|
1341 | return (error); |
---|
1342 | } |
---|
1343 | #endif |
---|
1344 | |
---|
1345 | error = 0; |
---|
1346 | switch (cmd) { |
---|
1347 | |
---|
1348 | case FIONBIO: |
---|
1349 | break; |
---|
1350 | |
---|
1351 | case FIOASYNC: |
---|
1352 | if (*(int *)data) { |
---|
1353 | mpipe->pipe_state |= PIPE_ASYNC; |
---|
1354 | } else { |
---|
1355 | mpipe->pipe_state &= ~PIPE_ASYNC; |
---|
1356 | } |
---|
1357 | break; |
---|
1358 | |
---|
1359 | case FIONREAD: |
---|
1360 | if (!(fp->f_flag & FREAD)) { |
---|
1361 | *(int *)data = 0; |
---|
1362 | PIPE_UNLOCK(mpipe); |
---|
1363 | return (0); |
---|
1364 | } |
---|
1365 | if (mpipe->pipe_map.cnt != 0) |
---|
1366 | *(int *)data = mpipe->pipe_map.cnt; |
---|
1367 | else |
---|
1368 | *(int *)data = mpipe->pipe_buffer.cnt; |
---|
1369 | break; |
---|
1370 | |
---|
1371 | case FIOSETOWN: |
---|
1372 | PIPE_UNLOCK(mpipe); |
---|
1373 | error = fsetown(*(int *)data, &mpipe->pipe_sigio); |
---|
1374 | goto out_unlocked; |
---|
1375 | |
---|
1376 | case FIOGETOWN: |
---|
1377 | *(int *)data = fgetown(&mpipe->pipe_sigio); |
---|
1378 | break; |
---|
1379 | |
---|
1380 | /* This is deprecated, FIOSETOWN should be used instead. */ |
---|
1381 | case TIOCSPGRP: |
---|
1382 | PIPE_UNLOCK(mpipe); |
---|
1383 | error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); |
---|
1384 | goto out_unlocked; |
---|
1385 | |
---|
1386 | /* This is deprecated, FIOGETOWN should be used instead. */ |
---|
1387 | case TIOCGPGRP: |
---|
1388 | *(int *)data = -fgetown(&mpipe->pipe_sigio); |
---|
1389 | break; |
---|
1390 | |
---|
1391 | default: |
---|
1392 | error = ENOTTY; |
---|
1393 | break; |
---|
1394 | } |
---|
1395 | PIPE_UNLOCK(mpipe); |
---|
1396 | out_unlocked: |
---|
1397 | return (error); |
---|
1398 | } |
---|
1399 | |
---|
1400 | static int |
---|
1401 | pipe_poll(struct file *fp, int events, struct ucred *active_cred, |
---|
1402 | struct thread *td) |
---|
1403 | { |
---|
1404 | struct pipe *rpipe; |
---|
1405 | struct pipe *wpipe; |
---|
1406 | int levents, revents; |
---|
1407 | #ifdef MAC |
---|
1408 | int error; |
---|
1409 | #endif |
---|
1410 | |
---|
1411 | revents = 0; |
---|
1412 | rpipe = fp->f_data; |
---|
1413 | wpipe = PIPE_PEER(rpipe); |
---|
1414 | PIPE_LOCK(rpipe); |
---|
1415 | #ifdef MAC |
---|
1416 | error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); |
---|
1417 | if (error) |
---|
1418 | goto locked_error; |
---|
1419 | #endif |
---|
1420 | if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) |
---|
1421 | if (rpipe->pipe_map.cnt > 0 || rpipe->pipe_buffer.cnt > 0) |
---|
1422 | revents |= events & (POLLIN | POLLRDNORM); |
---|
1423 | |
---|
1424 | if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) |
---|
1425 | if (wpipe->pipe_present != PIPE_ACTIVE || |
---|
1426 | (wpipe->pipe_state & PIPE_EOF) || |
---|
1427 | ((wpipe->pipe_state & PIPE_DIRECTW) == 0 && |
---|
1428 | ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF || |
---|
1429 | wpipe->pipe_buffer.size == 0))) |
---|
1430 | revents |= events & (POLLOUT | POLLWRNORM); |
---|
1431 | |
---|
1432 | levents = events & |
---|
1433 | (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND); |
---|
1434 | if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents && |
---|
1435 | fp->f_pipegen == rpipe->pipe_wgen) |
---|
1436 | events |= POLLINIGNEOF; |
---|
1437 | |
---|
1438 | if ((events & POLLINIGNEOF) == 0) { |
---|
1439 | if (rpipe->pipe_state & PIPE_EOF) { |
---|
1440 | revents |= (events & (POLLIN | POLLRDNORM)); |
---|
1441 | if (wpipe->pipe_present != PIPE_ACTIVE || |
---|
1442 | (wpipe->pipe_state & PIPE_EOF)) |
---|
1443 | revents |= POLLHUP; |
---|
1444 | } |
---|
1445 | } |
---|
1446 | |
---|
1447 | if (revents == 0) { |
---|
1448 | if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) { |
---|
1449 | selrecord(td, &rpipe->pipe_sel); |
---|
1450 | if (SEL_WAITING(&rpipe->pipe_sel)) |
---|
1451 | rpipe->pipe_state |= PIPE_SEL; |
---|
1452 | } |
---|
1453 | |
---|
1454 | if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) { |
---|
1455 | selrecord(td, &wpipe->pipe_sel); |
---|
1456 | if (SEL_WAITING(&wpipe->pipe_sel)) |
---|
1457 | wpipe->pipe_state |= PIPE_SEL; |
---|
1458 | } |
---|
1459 | } |
---|
1460 | #ifdef MAC |
---|
1461 | locked_error: |
---|
1462 | #endif |
---|
1463 | PIPE_UNLOCK(rpipe); |
---|
1464 | |
---|
1465 | return (revents); |
---|
1466 | } |
---|
1467 | |
---|
1468 | /* |
---|
1469 | * We shouldn't need locks here as we're doing a read and this should |
---|
1470 | * be a natural race. |
---|
1471 | */ |
---|
1472 | static int |
---|
1473 | pipe_stat(struct file *fp, struct stat *ub, struct ucred *active_cred, |
---|
1474 | struct thread *td) |
---|
1475 | { |
---|
1476 | struct pipe *pipe; |
---|
1477 | #ifdef MAC |
---|
1478 | int error; |
---|
1479 | #endif |
---|
1480 | |
---|
1481 | pipe = fp->f_data; |
---|
1482 | PIPE_LOCK(pipe); |
---|
1483 | #ifdef MAC |
---|
1484 | error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); |
---|
1485 | if (error) { |
---|
1486 | PIPE_UNLOCK(pipe); |
---|
1487 | return (error); |
---|
1488 | } |
---|
1489 | #endif |
---|
1490 | |
---|
1491 | /* For named pipes ask the underlying filesystem. */ |
---|
1492 | if (pipe->pipe_state & PIPE_NAMED) { |
---|
1493 | PIPE_UNLOCK(pipe); |
---|
1494 | return (vnops.fo_stat(fp, ub, active_cred, td)); |
---|
1495 | } |
---|
1496 | |
---|
1497 | PIPE_UNLOCK(pipe); |
---|
1498 | |
---|
1499 | bzero(ub, sizeof(*ub)); |
---|
1500 | ub->st_mode = S_IFIFO; |
---|
1501 | ub->st_blksize = PAGE_SIZE; |
---|
1502 | if (pipe->pipe_map.cnt != 0) |
---|
1503 | ub->st_size = pipe->pipe_map.cnt; |
---|
1504 | else |
---|
1505 | ub->st_size = pipe->pipe_buffer.cnt; |
---|
1506 | ub->st_blocks = howmany(ub->st_size, ub->st_blksize); |
---|
1507 | ub->st_atim = pipe->pipe_atime; |
---|
1508 | ub->st_mtim = pipe->pipe_mtime; |
---|
1509 | ub->st_ctim = pipe->pipe_ctime; |
---|
1510 | #ifndef __rtems__ |
---|
1511 | ub->st_uid = fp->f_cred->cr_uid; |
---|
1512 | ub->st_gid = fp->f_cred->cr_gid; |
---|
1513 | ub->st_dev = pipedev_ino; |
---|
1514 | ub->st_ino = pipe->pipe_ino; |
---|
1515 | #else /* __rtems__ */ |
---|
1516 | ub->st_uid = BSD_DEFAULT_UID; |
---|
1517 | ub->st_gid = BSD_DEFAULT_GID; |
---|
1518 | ub->st_dev = rtems_filesystem_make_dev_t(0xcc494cd6U, 0x1d970b4dU); |
---|
1519 | ub->st_ino = pipe->pipe_ino; |
---|
1520 | #endif /* __rtems__ */ |
---|
1521 | /* |
---|
1522 | * Left as 0: st_nlink, st_rdev, st_flags, st_gen. |
---|
1523 | */ |
---|
1524 | return (0); |
---|
1525 | } |
---|
1526 | |
---|
1527 | /* ARGSUSED */ |
---|
1528 | static int |
---|
1529 | pipe_close(struct file *fp, struct thread *td) |
---|
1530 | { |
---|
1531 | |
---|
1532 | if (fp->f_vnode != NULL) |
---|
1533 | return vnops.fo_close(fp, td); |
---|
1534 | fp->f_ops = &badfileops; |
---|
1535 | pipe_dtor(fp->f_data); |
---|
1536 | fp->f_data = NULL; |
---|
1537 | return (0); |
---|
1538 | } |
---|
1539 | |
---|
1540 | static int |
---|
1541 | pipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) |
---|
1542 | { |
---|
1543 | struct pipe *cpipe; |
---|
1544 | int error; |
---|
1545 | |
---|
1546 | cpipe = fp->f_data; |
---|
1547 | if (cpipe->pipe_state & PIPE_NAMED) |
---|
1548 | error = vn_chmod(fp, mode, active_cred, td); |
---|
1549 | else |
---|
1550 | error = invfo_chmod(fp, mode, active_cred, td); |
---|
1551 | return (error); |
---|
1552 | } |
---|
1553 | |
---|
1554 | static int |
---|
1555 | pipe_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, |
---|
1556 | struct thread *td) |
---|
1557 | { |
---|
1558 | struct pipe *cpipe; |
---|
1559 | int error; |
---|
1560 | |
---|
1561 | cpipe = fp->f_data; |
---|
1562 | if (cpipe->pipe_state & PIPE_NAMED) |
---|
1563 | error = vn_chown(fp, uid, gid, active_cred, td); |
---|
1564 | else |
---|
1565 | error = invfo_chown(fp, uid, gid, active_cred, td); |
---|
1566 | return (error); |
---|
1567 | } |
---|
1568 | |
---|
1569 | static int |
---|
1570 | pipe_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) |
---|
1571 | { |
---|
1572 | struct pipe *pi; |
---|
1573 | |
---|
1574 | if (fp->f_type == DTYPE_FIFO) |
---|
1575 | return (vn_fill_kinfo(fp, kif, fdp)); |
---|
1576 | kif->kf_type = KF_TYPE_PIPE; |
---|
1577 | pi = fp->f_data; |
---|
1578 | kif->kf_un.kf_pipe.kf_pipe_addr = (uintptr_t)pi; |
---|
1579 | kif->kf_un.kf_pipe.kf_pipe_peer = (uintptr_t)pi->pipe_peer; |
---|
1580 | kif->kf_un.kf_pipe.kf_pipe_buffer_cnt = pi->pipe_buffer.cnt; |
---|
1581 | return (0); |
---|
1582 | } |
---|
1583 | |
---|
1584 | static void |
---|
1585 | pipe_free_kmem(struct pipe *cpipe) |
---|
1586 | { |
---|
1587 | |
---|
1588 | KASSERT(!mtx_owned(PIPE_MTX(cpipe)), |
---|
1589 | ("pipe_free_kmem: pipe mutex locked")); |
---|
1590 | |
---|
1591 | if (cpipe->pipe_buffer.buffer != NULL) { |
---|
1592 | atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size); |
---|
1593 | #ifndef __rtems__ |
---|
1594 | vm_map_remove(pipe_map, |
---|
1595 | (vm_offset_t)cpipe->pipe_buffer.buffer, |
---|
1596 | (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); |
---|
1597 | #else /* __rtems__ */ |
---|
1598 | free(cpipe->pipe_buffer.buffer, M_TEMP); |
---|
1599 | #endif /* __rtems__ */ |
---|
1600 | cpipe->pipe_buffer.buffer = NULL; |
---|
1601 | } |
---|
1602 | #ifndef PIPE_NODIRECT |
---|
1603 | { |
---|
1604 | cpipe->pipe_map.cnt = 0; |
---|
1605 | cpipe->pipe_map.pos = 0; |
---|
1606 | cpipe->pipe_map.npages = 0; |
---|
1607 | } |
---|
1608 | #endif |
---|
1609 | } |
---|
1610 | |
---|
1611 | /* |
---|
1612 | * shutdown the pipe |
---|
1613 | */ |
---|
1614 | static void |
---|
1615 | pipeclose(struct pipe *cpipe) |
---|
1616 | { |
---|
1617 | struct pipepair *pp; |
---|
1618 | struct pipe *ppipe; |
---|
1619 | |
---|
1620 | KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); |
---|
1621 | |
---|
1622 | PIPE_LOCK(cpipe); |
---|
1623 | pipelock(cpipe, 0); |
---|
1624 | pp = cpipe->pipe_pair; |
---|
1625 | |
---|
1626 | pipeselwakeup(cpipe); |
---|
1627 | |
---|
1628 | /* |
---|
1629 | * If the other side is blocked, wake it up saying that |
---|
1630 | * we want to close it down. |
---|
1631 | */ |
---|
1632 | cpipe->pipe_state |= PIPE_EOF; |
---|
1633 | while (cpipe->pipe_busy) { |
---|
1634 | wakeup(cpipe); |
---|
1635 | cpipe->pipe_state |= PIPE_WANT; |
---|
1636 | pipeunlock(cpipe); |
---|
1637 | msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); |
---|
1638 | pipelock(cpipe, 0); |
---|
1639 | } |
---|
1640 | |
---|
1641 | |
---|
1642 | /* |
---|
1643 | * Disconnect from peer, if any. |
---|
1644 | */ |
---|
1645 | ppipe = cpipe->pipe_peer; |
---|
1646 | if (ppipe->pipe_present == PIPE_ACTIVE) { |
---|
1647 | pipeselwakeup(ppipe); |
---|
1648 | |
---|
1649 | ppipe->pipe_state |= PIPE_EOF; |
---|
1650 | wakeup(ppipe); |
---|
1651 | KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0); |
---|
1652 | } |
---|
1653 | |
---|
1654 | /* |
---|
1655 | * Mark this endpoint as free. Release kmem resources. We |
---|
1656 | * don't mark this endpoint as unused until we've finished |
---|
1657 | * doing that, or the pipe might disappear out from under |
---|
1658 | * us. |
---|
1659 | */ |
---|
1660 | PIPE_UNLOCK(cpipe); |
---|
1661 | pipe_free_kmem(cpipe); |
---|
1662 | PIPE_LOCK(cpipe); |
---|
1663 | cpipe->pipe_present = PIPE_CLOSING; |
---|
1664 | pipeunlock(cpipe); |
---|
1665 | |
---|
1666 | /* |
---|
1667 | * knlist_clear() may sleep dropping the PIPE_MTX. Set the |
---|
1668 | * PIPE_FINALIZED, that allows other end to free the |
---|
1669 | * pipe_pair, only after the knotes are completely dismantled. |
---|
1670 | */ |
---|
1671 | knlist_clear(&cpipe->pipe_sel.si_note, 1); |
---|
1672 | cpipe->pipe_present = PIPE_FINALIZED; |
---|
1673 | seldrain(&cpipe->pipe_sel); |
---|
1674 | knlist_destroy(&cpipe->pipe_sel.si_note); |
---|
1675 | |
---|
1676 | /* |
---|
1677 | * If both endpoints are now closed, release the memory for the |
---|
1678 | * pipe pair. If not, unlock. |
---|
1679 | */ |
---|
1680 | if (ppipe->pipe_present == PIPE_FINALIZED) { |
---|
1681 | PIPE_UNLOCK(cpipe); |
---|
1682 | #ifdef MAC |
---|
1683 | mac_pipe_destroy(pp); |
---|
1684 | #endif |
---|
1685 | uma_zfree(pipe_zone, cpipe->pipe_pair); |
---|
1686 | } else |
---|
1687 | PIPE_UNLOCK(cpipe); |
---|
1688 | } |
---|
1689 | |
---|
1690 | /*ARGSUSED*/ |
---|
1691 | static int |
---|
1692 | pipe_kqfilter(struct file *fp, struct knote *kn) |
---|
1693 | { |
---|
1694 | struct pipe *cpipe; |
---|
1695 | |
---|
1696 | /* |
---|
1697 | * If a filter is requested that is not supported by this file |
---|
1698 | * descriptor, don't return an error, but also don't ever generate an |
---|
1699 | * event. |
---|
1700 | */ |
---|
1701 | if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) { |
---|
1702 | kn->kn_fop = &pipe_nfiltops; |
---|
1703 | return (0); |
---|
1704 | } |
---|
1705 | if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) { |
---|
1706 | kn->kn_fop = &pipe_nfiltops; |
---|
1707 | return (0); |
---|
1708 | } |
---|
1709 | cpipe = fp->f_data; |
---|
1710 | PIPE_LOCK(cpipe); |
---|
1711 | switch (kn->kn_filter) { |
---|
1712 | case EVFILT_READ: |
---|
1713 | kn->kn_fop = &pipe_rfiltops; |
---|
1714 | break; |
---|
1715 | case EVFILT_WRITE: |
---|
1716 | kn->kn_fop = &pipe_wfiltops; |
---|
1717 | if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { |
---|
1718 | /* other end of pipe has been closed */ |
---|
1719 | PIPE_UNLOCK(cpipe); |
---|
1720 | return (EPIPE); |
---|
1721 | } |
---|
1722 | cpipe = PIPE_PEER(cpipe); |
---|
1723 | break; |
---|
1724 | default: |
---|
1725 | PIPE_UNLOCK(cpipe); |
---|
1726 | return (EINVAL); |
---|
1727 | } |
---|
1728 | |
---|
1729 | kn->kn_hook = cpipe; |
---|
1730 | knlist_add(&cpipe->pipe_sel.si_note, kn, 1); |
---|
1731 | PIPE_UNLOCK(cpipe); |
---|
1732 | return (0); |
---|
1733 | } |
---|
1734 | |
---|
1735 | static void |
---|
1736 | filt_pipedetach(struct knote *kn) |
---|
1737 | { |
---|
1738 | struct pipe *cpipe = kn->kn_hook; |
---|
1739 | |
---|
1740 | PIPE_LOCK(cpipe); |
---|
1741 | knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); |
---|
1742 | PIPE_UNLOCK(cpipe); |
---|
1743 | } |
---|
1744 | |
---|
1745 | /*ARGSUSED*/ |
---|
1746 | static int |
---|
1747 | filt_piperead(struct knote *kn, long hint) |
---|
1748 | { |
---|
1749 | struct pipe *rpipe = kn->kn_hook; |
---|
1750 | struct pipe *wpipe = rpipe->pipe_peer; |
---|
1751 | int ret; |
---|
1752 | |
---|
1753 | PIPE_LOCK_ASSERT(rpipe, MA_OWNED); |
---|
1754 | kn->kn_data = rpipe->pipe_buffer.cnt; |
---|
1755 | if (kn->kn_data == 0) |
---|
1756 | kn->kn_data = rpipe->pipe_map.cnt; |
---|
1757 | |
---|
1758 | if ((rpipe->pipe_state & PIPE_EOF) || |
---|
1759 | wpipe->pipe_present != PIPE_ACTIVE || |
---|
1760 | (wpipe->pipe_state & PIPE_EOF)) { |
---|
1761 | kn->kn_flags |= EV_EOF; |
---|
1762 | return (1); |
---|
1763 | } |
---|
1764 | ret = kn->kn_data > 0; |
---|
1765 | return ret; |
---|
1766 | } |
---|
1767 | |
---|
1768 | /*ARGSUSED*/ |
---|
1769 | static int |
---|
1770 | filt_pipewrite(struct knote *kn, long hint) |
---|
1771 | { |
---|
1772 | struct pipe *wpipe; |
---|
1773 | |
---|
1774 | /* |
---|
1775 | * If this end of the pipe is closed, the knote was removed from the |
---|
1776 | * knlist and the list lock (i.e., the pipe lock) is therefore not held. |
---|
1777 | */ |
---|
1778 | wpipe = kn->kn_hook; |
---|
1779 | if (wpipe->pipe_present != PIPE_ACTIVE || |
---|
1780 | (wpipe->pipe_state & PIPE_EOF)) { |
---|
1781 | kn->kn_data = 0; |
---|
1782 | kn->kn_flags |= EV_EOF; |
---|
1783 | return (1); |
---|
1784 | } |
---|
1785 | PIPE_LOCK_ASSERT(wpipe, MA_OWNED); |
---|
1786 | kn->kn_data = (wpipe->pipe_buffer.size > 0) ? |
---|
1787 | (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) : PIPE_BUF; |
---|
1788 | if (wpipe->pipe_state & PIPE_DIRECTW) |
---|
1789 | kn->kn_data = 0; |
---|
1790 | |
---|
1791 | return (kn->kn_data >= PIPE_BUF); |
---|
1792 | } |
---|
1793 | |
---|
1794 | static void |
---|
1795 | filt_pipedetach_notsup(struct knote *kn) |
---|
1796 | { |
---|
1797 | |
---|
1798 | } |
---|
1799 | |
---|
1800 | static int |
---|
1801 | filt_pipenotsup(struct knote *kn, long hint) |
---|
1802 | { |
---|
1803 | |
---|
1804 | return (0); |
---|
1805 | } |
---|