Context Navigation

source: rtems-libbsd/freebsd/sys/fs/nfsclient/nfs_clbio.c

6-freebsd-12

Last change on this file was 6e4709b, checked in by Sebastian Huber <sebastian.huber@…>, on 01/05/23 at 16:42:48
vfs/nfs: Revert white space changes
Property mode set to `100644`
File size: 52.3 KB

Line
1	#include <machine/rtems-bsd-kernel-space.h>
2
3	/*-
4	* SPDX-License-Identifier: BSD-3-Clause
5	*
6	* Copyright (c) 1989, 1993
7	* The Regents of the University of California. All rights reserved.
8	*
9	* This code is derived from software contributed to Berkeley by
10	* Rick Macklem at The University of Guelph.
11	*
12	* Redistribution and use in source and binary forms, with or without
13	* modification, are permitted provided that the following conditions
14	* are met:
15	* 1. Redistributions of source code must retain the above copyright
16	* notice, this list of conditions and the following disclaimer.
17	* 2. Redistributions in binary form must reproduce the above copyright
18	* notice, this list of conditions and the following disclaimer in the
19	* documentation and/or other materials provided with the distribution.
20	* 3. Neither the name of the University nor the names of its contributors
21	* may be used to endorse or promote products derived from this software
22	* without specific prior written permission.
23	*
24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34	* SUCH DAMAGE.
35	*
36	* @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
37	*/
38
39	#include <sys/cdefs.h>
40	__FBSDID("$FreeBSD$");
41
42	#include <sys/param.h>
43	#include <sys/systm.h>
44	#include <sys/bio.h>
45	#include <sys/buf.h>
46	#include <sys/kernel.h>
47	#include <sys/mount.h>
48	#include <sys/rwlock.h>
49	#include <sys/vmmeter.h>
50	#include <sys/vnode.h>
51
52	#include <vm/vm.h>
53	#include <vm/vm_param.h>
54	#include <vm/vm_extern.h>
55	#include <vm/vm_page.h>
56	#include <vm/vm_object.h>
57	#include <vm/vm_pager.h>
58	#ifndef __rtems__
59	#include <vm/vnode_pager.h>
60	#endif /* __rtems__ */
61
62	#include <fs/nfs/nfsport.h>
63	#include <fs/nfsclient/nfsmount.h>
64	#include <fs/nfsclient/nfs.h>
65	#include <fs/nfsclient/nfsnode.h>
66	#include <fs/nfsclient/nfs_kdtrace.h>
67
68	extern int newnfs_directio_allow_mmap;
69	extern struct nfsstatsv1 nfsstatsv1;
70	extern struct mtx ncl_iod_mutex;
71	extern int ncl_numasync;
72	extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
73	extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
74	extern int newnfs_directio_enable;
75	extern int nfs_keep_dirty_on_error;
76
77	int ncl_pbuf_freecnt = -1; /* start out unlimited */
78
79	static struct buf nfs_getcacheblk(struct vnode vp, daddr_t bn, int size,
80	struct thread *td);
81	static int nfs_directio_write(struct vnode vp, struct uio uiop,
82	struct ucred *cred, int ioflag);
83
84	/*
85	* Vnode op for VM getpages.
86	*/
87	SYSCTL_DECL(_vfs_nfs);
88	static int use_buf_pager = 1;
89	SYSCTL_INT(_vfs_nfs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN,
90	&use_buf_pager, 0,
91	"Use buffer pager instead of direct readrpc call");
92
93	static daddr_t
94	ncl_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
95	{
96
97	return (off / vp->v_bufobj.bo_bsize);
98	}
99
100	static int
101	ncl_gbp_getblksz(struct vnode *vp, daddr_t lbn)
102	{
103	struct nfsnode *np;
104	u_quad_t nsize;
105	int biosize, bcount;
106
107	np = VTONFS(vp);
108	NFSLOCKNODE(np);
109	nsize = np->n_size;
110	NFSUNLOCKNODE(np);
111
112	biosize = vp->v_bufobj.bo_bsize;
113	bcount = biosize;
114	if ((off_t)lbn * biosize >= nsize)
115	bcount = 0;
116	else if ((off_t)(lbn + 1) * biosize > nsize)
117	bcount = nsize - (off_t)lbn * biosize;
118	return (bcount);
119	}
120
121	int
122	ncl_getpages(struct vop_getpages_args *ap)
123	{
124	int i, error, nextoff, size, toff, count, npages;
125	struct uio uio;
126	struct iovec iov;
127	vm_offset_t kva;
128	struct buf *bp;
129	struct vnode *vp;
130	struct thread *td;
131	struct ucred *cred;
132	struct nfsmount *nmp;
133	vm_object_t object;
134	vm_page_t *pages;
135	struct nfsnode *np;
136
137	vp = ap->a_vp;
138	np = VTONFS(vp);
139	td = curthread;
140	cred = curthread->td_ucred;
141	nmp = VFSTONFS(vp->v_mount);
142	pages = ap->a_m;
143	npages = ap->a_count;
144
145	if ((object = vp->v_object) == NULL) {
146	printf("ncl_getpages: called with non-merged cache vnode\n");
147	return (VM_PAGER_ERROR);
148	}
149
150	if (newnfs_directio_enable && !newnfs_directio_allow_mmap) {
151	NFSLOCKNODE(np);
152	if ((np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
153	NFSUNLOCKNODE(np);
154	printf("ncl_getpages: called on non-cacheable vnode\n");
155	return (VM_PAGER_ERROR);
156	} else
157	NFSUNLOCKNODE(np);
158	}
159
160	mtx_lock(&nmp->nm_mtx);
161	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
162	(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
163	mtx_unlock(&nmp->nm_mtx);
164	/* We'll never get here for v4, because we always have fsinfo */
165	(void)ncl_fsinfo(nmp, vp, cred, td);
166	} else
167	mtx_unlock(&nmp->nm_mtx);
168
169	if (use_buf_pager)
170	return (vfs_bio_getpages(vp, pages, npages, ap->a_rbehind,
171	ap->a_rahead, ncl_gbp_getblkno, ncl_gbp_getblksz));
172
173	/*
174	* If the requested page is partially valid, just return it and
175	* allow the pager to zero-out the blanks. Partially valid pages
176	* can only occur at the file EOF.
177	*
178	* XXXGL: is that true for NFS, where short read can occur???
179	*/
180	#ifndef __rtems__
181	VM_OBJECT_WLOCK(object);
182	if (pages[npages - 1]->valid != 0 && --npages == 0)
183	goto out;
184	VM_OBJECT_WUNLOCK(object);
185	#endif /* __rtems__*/
186
187	/*
188	* We use only the kva address for the buffer, but this is extremely
189	* convenient and fast.
190	*/
191	bp = getpbuf(&ncl_pbuf_freecnt);
192
193	kva = (vm_offset_t) bp->b_data;
194	#ifndef __rtems__
195	pmap_qenter(kva, pages, npages);
196	#endif /* __rtems__*/
197	VM_CNT_INC(v_vnodein);
198	VM_CNT_ADD(v_vnodepgsin, npages);
199
200	count = npages << PAGE_SHIFT;
201	iov.iov_base = (caddr_t) kva;
202	iov.iov_len = count;
203	uio.uio_iov = &iov;
204	uio.uio_iovcnt = 1;
205	#ifndef __rtems__
206	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
207	#else /* __rtems__*/
208	uio.uio_offset = 0;
209	#endif /* __rtems__*/
210	uio.uio_resid = count;
211	uio.uio_segflg = UIO_SYSSPACE;
212	uio.uio_rw = UIO_READ;
213	uio.uio_td = td;
214
215	error = ncl_readrpc(vp, &uio, cred);
216	#ifndef __rtems__
217	pmap_qremove(kva, npages);
218	#endif /* __rtems__*/
219
220	relpbuf(bp, &ncl_pbuf_freecnt);
221
222	if (error && (uio.uio_resid == count)) {
223	printf("ncl_getpages: error %d\n", error);
224	return (VM_PAGER_ERROR);
225	}
226
227	/*
228	* Calculate the number of bytes read and validate only that number
229	* of bytes. Note that due to pending writes, size may be 0. This
230	* does not mean that the remaining data is invalid!
231	*/
232
233	size = count - uio.uio_resid;
234	VM_OBJECT_WLOCK(object);
235	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
236	vm_page_t m;
237	nextoff = toff + PAGE_SIZE;
238	#ifndef __rtems__
239	m = pages[i];
240
241	if (nextoff <= size) {
242	/*
243	* Read operation filled an entire page
244	*/
245	m->valid = VM_PAGE_BITS_ALL;
246	KASSERT(m->dirty == 0,
247	("nfs_getpages: page %p is dirty", m));
248	} else if (size > toff) {
249	/*
250	* Read operation filled a partial page.
251	*/
252	m->valid = 0;
253	vm_page_set_valid_range(m, 0, size - toff);
254	KASSERT(m->dirty == 0,
255	("nfs_getpages: page %p is dirty", m));
256	} else {
257	/*
258	* Read operation was short. If no error
259	* occurred we may have hit a zero-fill
260	* section. We leave valid set to 0, and page
261	* is freed by vm_page_readahead_finish() if
262	* its index is not equal to requested, or
263	* page is zeroed and set valid by
264	* vm_pager_get_pages() for requested page.
265	*/
266	;
267	}
268	#endif /* __rtems__*/
269	}
270	#ifndef __rtems__
271	out:
272	#endif /* __rtems__ */
273	VM_OBJECT_WUNLOCK(object);
274	if (ap->a_rbehind)
275	*ap->a_rbehind = 0;
276	if (ap->a_rahead)
277	*ap->a_rahead = 0;
278	return (VM_PAGER_OK);
279	}
280
281	/*
282	* Vnode op for VM putpages.
283	*/
284	int
285	ncl_putpages(struct vop_putpages_args *ap)
286	{
287	struct uio uio;
288	struct iovec iov;
289	int i, error, npages, count;
290	off_t offset;
291	int *rtvals;
292	struct vnode *vp;
293	struct thread *td;
294	struct ucred *cred;
295	struct nfsmount *nmp;
296	struct nfsnode *np;
297	vm_page_t *pages;
298
299	vp = ap->a_vp;
300	np = VTONFS(vp);
301	td = curthread; /* XXX */
302	/* Set the cred to n_writecred for the write rpcs. */
303	if (np->n_writecred != NULL)
304	cred = crhold(np->n_writecred);
305	else
306	cred = crhold(curthread->td_ucred); /* XXX */
307	nmp = VFSTONFS(vp->v_mount);
308	pages = ap->a_m;
309	count = ap->a_count;
310	rtvals = ap->a_rtvals;
311	npages = btoc(count);
312	#ifndef __rtems__
313	offset = IDX_TO_OFF(pages[0]->pindex);
314	#else /* __rtems__*/
315	offset = 0;
316	#endif /* __rtems__*/
317
318	mtx_lock(&nmp->nm_mtx);
319	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
320	(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
321	mtx_unlock(&nmp->nm_mtx);
322	(void)ncl_fsinfo(nmp, vp, cred, td);
323	} else
324	mtx_unlock(&nmp->nm_mtx);
325
326	NFSLOCKNODE(np);
327	if (newnfs_directio_enable && !newnfs_directio_allow_mmap &&
328	(np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
329	NFSUNLOCKNODE(np);
330	printf("ncl_putpages: called on noncache-able vnode\n");
331	NFSLOCKNODE(np);
332	}
333	/*
334	* When putting pages, do not extend file past EOF.
335	*/
336	if (offset + count > np->n_size) {
337	count = np->n_size - offset;
338	if (count < 0)
339	count = 0;
340	}
341	NFSUNLOCKNODE(np);
342
343	for (i = 0; i < npages; i++)
344	rtvals[i] = VM_PAGER_ERROR;
345
346	VM_CNT_INC(v_vnodeout);
347	VM_CNT_ADD(v_vnodepgsout, count);
348
349	iov.iov_base = unmapped_buf;
350	iov.iov_len = count;
351	uio.uio_iov = &iov;
352	uio.uio_iovcnt = 1;
353	uio.uio_offset = offset;
354	uio.uio_resid = count;
355	uio.uio_segflg = UIO_NOCOPY;
356	uio.uio_rw = UIO_WRITE;
357	uio.uio_td = td;
358
359	#ifndef __rtems__
360	error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync),
361	cred);
362	#else /* __rtems__*/
363	error = VOP_WRITE(vp, &uio, 0, cred);
364	#endif /* __rtems__*/
365	crfree(cred);
366
367	#ifndef __rtems__
368	if (error == 0 \|\| !nfs_keep_dirty_on_error) {
369	vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
370	np->n_size - offset, npages * PAGE_SIZE);
371	}
372	#endif /* __rtems__*/
373	return (rtvals[0]);
374	}
375
376	/*
377	* For nfs, cache consistency can only be maintained approximately.
378	* Although RFC1094 does not specify the criteria, the following is
379	* believed to be compatible with the reference port.
380	* For nfs:
381	* If the file's modify time on the server has changed since the
382	* last read rpc or you have written to the file,
383	* you may have lost data cache consistency with the
384	* server, so flush all of the file's data out of the cache.
385	* Then force a getattr rpc to ensure that you have up to date
386	* attributes.
387	* NB: This implies that cache data can be read when up to
388	* NFS_ATTRTIMEO seconds out of date. If you find that you need current
389	* attributes this could be forced by setting n_attrstamp to 0 before
390	* the VOP_GETATTR() call.
391	*/
392	static inline int
393	nfs_bioread_check_cons(struct vnode vp, struct thread td, struct ucred *cred)
394	{
395	int error = 0;
396	struct vattr vattr;
397	struct nfsnode *np = VTONFS(vp);
398	bool old_lock;
399
400	/*
401	* Ensure the exclusove access to the node before checking
402	* whether the cache is consistent.
403	*/
404	old_lock = ncl_excl_start(vp);
405	NFSLOCKNODE(np);
406	if (np->n_flag & NMODIFIED) {
407	NFSUNLOCKNODE(np);
408	if (vp->v_type != VREG) {
409	if (vp->v_type != VDIR)
410	panic("nfs: bioread, not dir");
411	ncl_invaldir(vp);
412	error = ncl_vinvalbuf(vp, V_SAVE \| V_ALLOWCLEAN, td, 1);
413	if (error != 0)
414	goto out;
415	}
416	np->n_attrstamp = 0;
417	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
418	error = VOP_GETATTR(vp, &vattr, cred);
419	if (error)
420	goto out;
421	NFSLOCKNODE(np);
422	np->n_mtime = vattr.va_mtime;
423	NFSUNLOCKNODE(np);
424	} else {
425	NFSUNLOCKNODE(np);
426	error = VOP_GETATTR(vp, &vattr, cred);
427	if (error)
428	goto out;
429	NFSLOCKNODE(np);
430	if ((np->n_flag & NSIZECHANGED)
431	\|\| (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) {
432	NFSUNLOCKNODE(np);
433	if (vp->v_type == VDIR)
434	ncl_invaldir(vp);
435	error = ncl_vinvalbuf(vp, V_SAVE \| V_ALLOWCLEAN, td, 1);
436	if (error != 0)
437	goto out;
438	NFSLOCKNODE(np);
439	np->n_mtime = vattr.va_mtime;
440	np->n_flag &= ~NSIZECHANGED;
441	}
442	NFSUNLOCKNODE(np);
443	}
444	out:
445	ncl_excl_finish(vp, old_lock);
446	return (error);
447	}
448
449	/*
450	* Vnode op for read using bio
451	*/
452	int
453	ncl_bioread(struct vnode vp, struct uio uio, int ioflag, struct ucred *cred)
454	{
455	struct nfsnode *np = VTONFS(vp);
456	int biosize, i;
457	struct buf bp, rabp;
458	struct thread *td;
459	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
460	daddr_t lbn, rabn;
461	int bcount;
462	int seqcount;
463	int nra, error = 0, n = 0, on = 0;
464	off_t tmp_off;
465
466	KASSERT(uio->uio_rw == UIO_READ, ("ncl_read mode"));
467	if (uio->uio_resid == 0)
468	return (0);
469	if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */
470	return (EINVAL);
471	td = uio->uio_td;
472
473	mtx_lock(&nmp->nm_mtx);
474	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
475	(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
476	mtx_unlock(&nmp->nm_mtx);
477	(void)ncl_fsinfo(nmp, vp, cred, td);
478	mtx_lock(&nmp->nm_mtx);
479	}
480	if (nmp->nm_rsize == 0 \|\| nmp->nm_readdirsize == 0)
481	(void) newnfs_iosize(nmp);
482
483	tmp_off = uio->uio_offset + uio->uio_resid;
484	if (vp->v_type != VDIR &&
485	(tmp_off > nmp->nm_maxfilesize \|\| tmp_off < uio->uio_offset)) {
486	mtx_unlock(&nmp->nm_mtx);
487	return (EFBIG);
488	}
489	mtx_unlock(&nmp->nm_mtx);
490
491	if (newnfs_directio_enable && (ioflag & IO_DIRECT) && (vp->v_type == VREG))
492	/* No caching/ no readaheads. Just read data into the user buffer */
493	return ncl_readrpc(vp, uio, cred);
494
495	biosize = vp->v_bufobj.bo_bsize;
496	seqcount = (int)((off_t)(ioflag >> IO_SEQSHIFT) * biosize / BKVASIZE);
497
498	error = nfs_bioread_check_cons(vp, td, cred);
499	if (error)
500	return error;
501
502	do {
503	u_quad_t nsize;
504
505	NFSLOCKNODE(np);
506	nsize = np->n_size;
507	NFSUNLOCKNODE(np);
508
509	switch (vp->v_type) {
510	case VREG:
511	NFSINCRGLOBAL(nfsstatsv1.biocache_reads);
512	lbn = uio->uio_offset / biosize;
513	on = uio->uio_offset - (lbn * biosize);
514
515	/*
516	* Start the read ahead(s), as required.
517	*/
518	if (nmp->nm_readahead > 0) {
519	for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
520	(off_t)(lbn + 1 + nra) * biosize < nsize; nra++) {
521	rabn = lbn + 1 + nra;
522	if (incore(&vp->v_bufobj, rabn) == NULL) {
523	rabp = nfs_getcacheblk(vp, rabn, biosize, td);
524	if (!rabp) {
525	error = newnfs_sigintr(nmp, td);
526	return (error ? error : EINTR);
527	}
528	if ((rabp->b_flags & (B_CACHE\|B_DELWRI)) == 0) {
529	rabp->b_flags \|= B_ASYNC;
530	rabp->b_iocmd = BIO_READ;
531	vfs_busy_pages(rabp, 0);
532	if (ncl_asyncio(nmp, rabp, cred, td)) {
533	rabp->b_flags \|= B_INVAL;
534	rabp->b_ioflags \|= BIO_ERROR;
535	vfs_unbusy_pages(rabp);
536	brelse(rabp);
537	break;
538	}
539	} else {
540	brelse(rabp);
541	}
542	}
543	}
544	}
545
546	/* Note that bcount is not DEV_BSIZE aligned. */
547	bcount = biosize;
548	if ((off_t)lbn * biosize >= nsize) {
549	bcount = 0;
550	} else if ((off_t)(lbn + 1) * biosize > nsize) {
551	bcount = nsize - (off_t)lbn * biosize;
552	}
553	bp = nfs_getcacheblk(vp, lbn, bcount, td);
554
555	if (!bp) {
556	error = newnfs_sigintr(nmp, td);
557	return (error ? error : EINTR);
558	}
559
560	/*
561	* If B_CACHE is not set, we must issue the read. If this
562	* fails, we return an error.
563	*/
564
565	if ((bp->b_flags & B_CACHE) == 0) {
566	bp->b_iocmd = BIO_READ;
567	vfs_busy_pages(bp, 0);
568	error = ncl_doio(vp, bp, cred, td, 0);
569	if (error) {
570	brelse(bp);
571	return (error);
572	}
573	}
574
575	/*
576	* on is the offset into the current bp. Figure out how many
577	* bytes we can copy out of the bp. Note that bcount is
578	* NOT DEV_BSIZE aligned.
579	*
580	* Then figure out how many bytes we can copy into the uio.
581	*/
582
583	n = 0;
584	if (on < bcount)
585	n = MIN((unsigned)(bcount - on), uio->uio_resid);
586	break;
587	case VLNK:
588	NFSINCRGLOBAL(nfsstatsv1.biocache_readlinks);
589	bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, td);
590	if (!bp) {
591	error = newnfs_sigintr(nmp, td);
592	return (error ? error : EINTR);
593	}
594	if ((bp->b_flags & B_CACHE) == 0) {
595	bp->b_iocmd = BIO_READ;
596	vfs_busy_pages(bp, 0);
597	error = ncl_doio(vp, bp, cred, td, 0);
598	if (error) {
599	bp->b_ioflags \|= BIO_ERROR;
600	brelse(bp);
601	return (error);
602	}
603	}
604	n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
605	on = 0;
606	break;
607	case VDIR:
608	NFSINCRGLOBAL(nfsstatsv1.biocache_readdirs);
609	if (np->n_direofoffset
610	&& uio->uio_offset >= np->n_direofoffset) {
611	return (0);
612	}
613	lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ;
614	on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
615	bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, td);
616	if (!bp) {
617	error = newnfs_sigintr(nmp, td);
618	return (error ? error : EINTR);
619	}
620	if ((bp->b_flags & B_CACHE) == 0) {
621	bp->b_iocmd = BIO_READ;
622	vfs_busy_pages(bp, 0);
623	error = ncl_doio(vp, bp, cred, td, 0);
624	if (error) {
625	brelse(bp);
626	}
627	while (error == NFSERR_BAD_COOKIE) {
628	ncl_invaldir(vp);
629	error = ncl_vinvalbuf(vp, 0, td, 1);
630
631	/*
632	* Yuck! The directory has been modified on the
633	* server. The only way to get the block is by
634	* reading from the beginning to get all the
635	* offset cookies.
636	*
637	* Leave the last bp intact unless there is an error.
638	* Loop back up to the while if the error is another
639	* NFSERR_BAD_COOKIE (double yuch!).
640	*/
641	for (i = 0; i <= lbn && !error; i++) {
642	if (np->n_direofoffset
643	&& (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
644	return (0);
645	bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, td);
646	if (!bp) {
647	error = newnfs_sigintr(nmp, td);
648	return (error ? error : EINTR);
649	}
650	if ((bp->b_flags & B_CACHE) == 0) {
651	bp->b_iocmd = BIO_READ;
652	vfs_busy_pages(bp, 0);
653	error = ncl_doio(vp, bp, cred, td, 0);
654	/*
655	* no error + B_INVAL == directory EOF,
656	* use the block.
657	*/
658	if (error == 0 && (bp->b_flags & B_INVAL))
659	break;
660	}
661	/*
662	* An error will throw away the block and the
663	* for loop will break out. If no error and this
664	* is not the block we want, we throw away the
665	* block and go for the next one via the for loop.
666	*/
667	if (error \|\| i < lbn)
668	brelse(bp);
669	}
670	}
671	/*
672	* The above while is repeated if we hit another cookie
673	* error. If we hit an error and it wasn't a cookie error,
674	* we give up.
675	*/
676	if (error)
677	return (error);
678	}
679
680	/*
681	* If not eof and read aheads are enabled, start one.
682	* (You need the current block first, so that you have the
683	* directory offset cookie of the next block.)
684	*/
685	if (nmp->nm_readahead > 0 &&
686	(bp->b_flags & B_INVAL) == 0 &&
687	(np->n_direofoffset == 0 \|\|
688	(lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
689	incore(&vp->v_bufobj, lbn + 1) == NULL) {
690	rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, td);
691	if (rabp) {
692	if ((rabp->b_flags & (B_CACHE\|B_DELWRI)) == 0) {
693	rabp->b_flags \|= B_ASYNC;
694	rabp->b_iocmd = BIO_READ;
695	vfs_busy_pages(rabp, 0);
696	if (ncl_asyncio(nmp, rabp, cred, td)) {
697	rabp->b_flags \|= B_INVAL;
698	rabp->b_ioflags \|= BIO_ERROR;
699	vfs_unbusy_pages(rabp);
700	brelse(rabp);
701	}
702	} else {
703	brelse(rabp);
704	}
705	}
706	}
707	/*
708	* Unlike VREG files, whos buffer size ( bp->b_bcount ) is
709	* chopped for the EOF condition, we cannot tell how large
710	* NFS directories are going to be until we hit EOF. So
711	* an NFS directory buffer is not chopped to its EOF. Now,
712	* it just so happens that b_resid will effectively chop it
713	* to EOF. BUT this information is lost if the buffer goes
714	* away and is reconstituted into a B_CACHE state ( due to
715	* being VMIO ) later. So we keep track of the directory eof
716	* in np->n_direofoffset and chop it off as an extra step
717	* right here.
718	*/
719	n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
720	if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset)
721	n = np->n_direofoffset - uio->uio_offset;
722	break;
723	default:
724	printf(" ncl_bioread: type %x unexpected\n", vp->v_type);
725	bp = NULL;
726	break;
727	}
728
729	if (n > 0) {
730	error = vn_io_fault_uiomove(bp->b_data + on, (int)n, uio);
731	}
732	if (vp->v_type == VLNK)
733	n = 0;
734	if (bp != NULL)
735	brelse(bp);
736	} while (error == 0 && uio->uio_resid > 0 && n > 0);
737	return (error);
738	}
739
740	#ifdef __rtems__
741	/*
742	* The NFS write path cannot handle iovecs with len > 1. So we need to
743	* break up iovecs accordingly (restricting them to wsize).
744	* For the SYNC case, we can do this with 1 copy (user buffer -> mbuf).
745	* For the ASYNC case, 2 copies are needed. The first a copy from the
746	* user buffer to a staging buffer and then a second copy from the staging
747	* buffer to mbufs. This can be optimized by copying from the user buffer
748	* directly into mbufs and passing the chain down, but that requires a
749	* fair amount of re-working of the relevant codepaths (and can be done
750	* later).
751	*/
752	static int
753	nfs_directio_write(vp, uiop, cred, ioflag)
754	struct vnode *vp;
755	struct uio *uiop;
756	struct ucred *cred;
757	int ioflag;
758	{
759	int error;
760	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
761	struct thread *td = uiop->uio_td;
762	int size;
763	int wsize;
764
765	mtx_lock(&nmp->nm_mtx);
766	wsize = nmp->nm_wsize;
767	mtx_unlock(&nmp->nm_mtx);
768	if (ioflag & IO_SYNC) {
769	int iomode, must_commit;
770	struct uio uio;
771	struct iovec iov;
772	do_sync:
773	while (uiop->uio_resid > 0) {
774	size = MIN(uiop->uio_resid, wsize);
775	size = MIN(uiop->uio_iov->iov_len, size);
776	iov.iov_base = uiop->uio_iov->iov_base;
777	iov.iov_len = size;
778	uio.uio_iov = &iov;
779	uio.uio_iovcnt = 1;
780	uio.uio_offset = uiop->uio_offset;
781	uio.uio_resid = size;
782	uio.uio_segflg = UIO_USERSPACE;
783	uio.uio_rw = UIO_WRITE;
784	uio.uio_td = td;
785	iomode = NFSWRITE_FILESYNC;
786	error = ncl_writerpc(vp, &uio, cred, &iomode,
787	&must_commit, 0);
788	KASSERT((must_commit == 0),
789	("ncl_directio_write: Did not commit write"));
790	if (error)
791	return (error);
792	uiop->uio_offset += size;
793	uiop->uio_resid -= size;
794	if (uiop->uio_iov->iov_len <= size) {
795	uiop->uio_iovcnt--;
796	uiop->uio_iov++;
797	} else {
798	uiop->uio_iov->iov_base =
799	(char *)uiop->uio_iov->iov_base + size;
800	uiop->uio_iov->iov_len -= size;
801	}
802	}
803	} else {
804	struct uio *t_uio;
805	struct iovec *t_iov;
806	struct buf *bp;
807
808	/*
809	* Break up the write into blocksize chunks and hand these
810	* over to nfsiod's for write back.
811	* Unfortunately, this incurs a copy of the data. Since
812	* the user could modify the buffer before the write is
813	* initiated.
814	*
815	* The obvious optimization here is that one of the 2 copies
816	* in the async write path can be eliminated by copying the
817	* data here directly into mbufs and passing the mbuf chain
818	* down. But that will require a fair amount of re-working
819	* of the code and can be done if there's enough interest
820	* in NFS directio access.
821	*/
822	while (uiop->uio_resid > 0) {
823	size = MIN(uiop->uio_resid, wsize);
824	size = MIN(uiop->uio_iov->iov_len, size);
825	bp = getpbuf(&ncl_pbuf_freecnt);
826	t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
827	t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK);
828	t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK);
829	t_iov->iov_len = size;
830	t_uio->uio_iov = t_iov;
831	t_uio->uio_iovcnt = 1;
832	t_uio->uio_offset = uiop->uio_offset;
833	t_uio->uio_resid = size;
834	t_uio->uio_segflg = UIO_SYSSPACE;
835	t_uio->uio_rw = UIO_WRITE;
836	t_uio->uio_td = td;
837	KASSERT(uiop->uio_segflg == UIO_USERSPACE \|\|
838	uiop->uio_segflg == UIO_SYSSPACE,
839	("nfs_directio_write: Bad uio_segflg"));
840	if (uiop->uio_segflg == UIO_USERSPACE) {
841	error = copyin(uiop->uio_iov->iov_base,
842	t_iov->iov_base, size);
843	if (error != 0)
844	goto err_free;
845	} else
846	/*
847	* UIO_SYSSPACE may never happen, but handle
848	* it just in case it does.
849	*/
850	bcopy(uiop->uio_iov->iov_base, t_iov->iov_base,
851	size);
852	bp->b_flags \|= B_DIRECT;
853	bp->b_iocmd = BIO_WRITE;
854	if (cred != NOCRED) {
855	crhold(cred);
856	bp->b_wcred = cred;
857	} else
858	bp->b_wcred = NOCRED;
859	bp->b_caller1 = (void *)t_uio;
860	bp->b_vp = vp;
861	error = ncl_asyncio(nmp, bp, NOCRED, td);
862	err_free:
863	if (error) {
864	free(t_iov->iov_base, M_NFSDIRECTIO);
865	free(t_iov, M_NFSDIRECTIO);
866	free(t_uio, M_NFSDIRECTIO);
867	bp->b_vp = NULL;
868	relpbuf(bp, &ncl_pbuf_freecnt);
869	if (error == EINTR)
870	return (error);
871	goto do_sync;
872	}
873	uiop->uio_offset += size;
874	uiop->uio_resid -= size;
875	if (uiop->uio_iov->iov_len <= size) {
876	uiop->uio_iovcnt--;
877	uiop->uio_iov++;
878	} else {
879	uiop->uio_iov->iov_base =
880	(char *)uiop->uio_iov->iov_base + size;
881	uiop->uio_iov->iov_len -= size;
882	}
883	}
884	}
885	return (0);
886	}
887	#endif /* __rtems__ */
888
889	/*
890	* Vnode op for write using bio
891	*/
892	int
893	ncl_write(struct vop_write_args *ap)
894	{
895	int biosize;
896	struct uio *uio = ap->a_uio;
897	struct thread *td = uio->uio_td;
898	struct vnode *vp = ap->a_vp;
899	struct nfsnode *np = VTONFS(vp);
900	struct ucred *cred = ap->a_cred;
901	int ioflag = ap->a_ioflag;
902	struct buf *bp;
903	struct vattr vattr;
904	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
905	daddr_t lbn;
906	int bcount, noncontig_write, obcount;
907	int bp_cached, n, on, error = 0, error1, wouldcommit;
908	size_t orig_resid, local_resid;
909	off_t orig_size, tmp_off;
910
911	KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode"));
912	KASSERT(uio->uio_segflg != UIO_USERSPACE \|\| uio->uio_td == curthread,
913	("ncl_write proc"));
914	if (vp->v_type != VREG)
915	return (EIO);
916	NFSLOCKNODE(np);
917	if (np->n_flag & NWRITEERR) {
918	np->n_flag &= ~NWRITEERR;
919	NFSUNLOCKNODE(np);
920	return (np->n_error);
921	} else
922	NFSUNLOCKNODE(np);
923	mtx_lock(&nmp->nm_mtx);
924	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
925	(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
926	mtx_unlock(&nmp->nm_mtx);
927	(void)ncl_fsinfo(nmp, vp, cred, td);
928	mtx_lock(&nmp->nm_mtx);
929	}
930	if (nmp->nm_wsize == 0)
931	(void) newnfs_iosize(nmp);
932	mtx_unlock(&nmp->nm_mtx);
933
934	/*
935	* Synchronously flush pending buffers if we are in synchronous
936	* mode or if we are appending.
937	*/
938	if (ioflag & (IO_APPEND \| IO_SYNC)) {
939	NFSLOCKNODE(np);
940	if (np->n_flag & NMODIFIED) {
941	NFSUNLOCKNODE(np);
942	#ifdef notyet /* Needs matching nonblock semantics elsewhere, too. */
943	/*
944	* Require non-blocking, synchronous writes to
945	* dirty files to inform the program it needs
946	* to fsync(2) explicitly.
947	*/
948	if (ioflag & IO_NDELAY)
949	return (EAGAIN);
950	#endif
951	np->n_attrstamp = 0;
952	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
953	error = ncl_vinvalbuf(vp, V_SAVE \| ((ioflag &
954	IO_VMIO) != 0 ? V_VMIO : 0), td, 1);
955	if (error != 0)
956	return (error);
957	} else
958	NFSUNLOCKNODE(np);
959	}
960
961	orig_resid = uio->uio_resid;
962	NFSLOCKNODE(np);
963	orig_size = np->n_size;
964	NFSUNLOCKNODE(np);
965
966	/*
967	* If IO_APPEND then load uio_offset. We restart here if we cannot
968	* get the append lock.
969	*/
970	if (ioflag & IO_APPEND) {
971	np->n_attrstamp = 0;
972	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
973	error = VOP_GETATTR(vp, &vattr, cred);
974	if (error)
975	return (error);
976	NFSLOCKNODE(np);
977	uio->uio_offset = np->n_size;
978	NFSUNLOCKNODE(np);
979	}
980
981	if (uio->uio_offset < 0)
982	return (EINVAL);
983	tmp_off = uio->uio_offset + uio->uio_resid;
984	if (tmp_off > nmp->nm_maxfilesize \|\| tmp_off < uio->uio_offset)
985	return (EFBIG);
986	if (uio->uio_resid == 0)
987	return (0);
988
989	if (newnfs_directio_enable && (ioflag & IO_DIRECT) && vp->v_type == VREG)
990	return nfs_directio_write(vp, uio, cred, ioflag);
991
992	/*
993	* Maybe this should be above the vnode op call, but so long as
994	* file servers have no limits, i don't think it matters
995	*/
996	if (vn_rlimit_fsize(vp, uio, td))
997	return (EFBIG);
998
999	biosize = vp->v_bufobj.bo_bsize;
1000	/*
1001	* Find all of this file's B_NEEDCOMMIT buffers. If our writes
1002	* would exceed the local maximum per-file write commit size when
1003	* combined with those, we must decide whether to flush,
1004	* go synchronous, or return error. We don't bother checking
1005	* IO_UNIT -- we just make all writes atomic anyway, as there's
1006	* no point optimizing for something that really won't ever happen.
1007	*/
1008	wouldcommit = 0;
1009	if (!(ioflag & IO_SYNC)) {
1010	int nflag;
1011
1012	NFSLOCKNODE(np);
1013	nflag = np->n_flag;
1014	NFSUNLOCKNODE(np);
1015	if (nflag & NMODIFIED) {
1016	BO_LOCK(&vp->v_bufobj);
1017	if (vp->v_bufobj.bo_dirty.bv_cnt != 0) {
1018	TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd,
1019	b_bobufs) {
1020	if (bp->b_flags & B_NEEDCOMMIT)
1021	wouldcommit += bp->b_bcount;
1022	}
1023	}
1024	BO_UNLOCK(&vp->v_bufobj);
1025	}
1026	}
1027
1028	do {
1029	if (!(ioflag & IO_SYNC)) {
1030	wouldcommit += biosize;
1031	if (wouldcommit > nmp->nm_wcommitsize) {
1032	np->n_attrstamp = 0;
1033	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
1034	error = ncl_vinvalbuf(vp, V_SAVE \| ((ioflag &
1035	IO_VMIO) != 0 ? V_VMIO : 0), td, 1);
1036	if (error != 0)
1037	return (error);
1038	wouldcommit = biosize;
1039	}
1040	}
1041
1042	NFSINCRGLOBAL(nfsstatsv1.biocache_writes);
1043	lbn = uio->uio_offset / biosize;
1044	on = uio->uio_offset - (lbn * biosize);
1045	n = MIN((unsigned)(biosize - on), uio->uio_resid);
1046	again:
1047	/*
1048	* Handle direct append and file extension cases, calculate
1049	* unaligned buffer size.
1050	*/
1051	NFSLOCKNODE(np);
1052	if ((np->n_flag & NHASBEENLOCKED) == 0 &&
1053	(nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0)
1054	noncontig_write = 1;
1055	else
1056	noncontig_write = 0;
1057	if ((uio->uio_offset == np->n_size \|\|
1058	(noncontig_write != 0 &&
1059	lbn == (np->n_size / biosize) &&
1060	uio->uio_offset + n > np->n_size)) && n) {
1061	NFSUNLOCKNODE(np);
1062	/*
1063	* Get the buffer (in its pre-append state to maintain
1064	* B_CACHE if it was previously set). Resize the
1065	* nfsnode after we have locked the buffer to prevent
1066	* readers from reading garbage.
1067	*/
1068	obcount = np->n_size - (lbn * biosize);
1069	bp = nfs_getcacheblk(vp, lbn, obcount, td);
1070
1071	if (bp != NULL) {
1072	long save;
1073
1074	NFSLOCKNODE(np);
1075	np->n_size = uio->uio_offset + n;
1076	np->n_flag \|= NMODIFIED;
1077	#ifndef __rtems__
1078	vnode_pager_setsize(vp, np->n_size);
1079	#endif /* __rems__ */
1080	NFSUNLOCKNODE(np);
1081
1082	save = bp->b_flags & B_CACHE;
1083	bcount = on + n;
1084	allocbuf(bp, bcount);
1085	bp->b_flags \|= save;
1086	if (noncontig_write != 0 && on > obcount)
1087	vfs_bio_bzero_buf(bp, obcount, on -
1088	obcount);
1089	}
1090	} else {
1091	/*
1092	* Obtain the locked cache block first, and then
1093	* adjust the file's size as appropriate.
1094	*/
1095	bcount = on + n;
1096	if ((off_t)lbn * biosize + bcount < np->n_size) {
1097	if ((off_t)(lbn + 1) * biosize < np->n_size)
1098	bcount = biosize;
1099	else
1100	bcount = np->n_size - (off_t)lbn * biosize;
1101	}
1102	NFSUNLOCKNODE(np);
1103	bp = nfs_getcacheblk(vp, lbn, bcount, td);
1104	NFSLOCKNODE(np);
1105	if (uio->uio_offset + n > np->n_size) {
1106	np->n_size = uio->uio_offset + n;
1107	np->n_flag \|= NMODIFIED;
1108	#ifndef __rtems__
1109	vnode_pager_setsize(vp, np->n_size);
1110	#endif /* __rems__ */
1111	}
1112	NFSUNLOCKNODE(np);
1113	}
1114
1115	if (!bp) {
1116	error = newnfs_sigintr(nmp, td);
1117	if (!error)
1118	error = EINTR;
1119	break;
1120	}
1121
1122	/*
1123	* Issue a READ if B_CACHE is not set. In special-append
1124	* mode, B_CACHE is based on the buffer prior to the write
1125	* op and is typically set, avoiding the read. If a read
1126	* is required in special append mode, the server will
1127	* probably send us a short-read since we extended the file
1128	* on our end, resulting in b_resid == 0 and, thusly,
1129	* B_CACHE getting set.
1130	*
1131	* We can also avoid issuing the read if the write covers
1132	* the entire buffer. We have to make sure the buffer state
1133	* is reasonable in this case since we will not be initiating
1134	* I/O. See the comments in kern/vfs_bio.c's getblk() for
1135	* more information.
1136	*
1137	* B_CACHE may also be set due to the buffer being cached
1138	* normally.
1139	*/
1140
1141	bp_cached = 1;
1142	if (on == 0 && n == bcount) {
1143	if ((bp->b_flags & B_CACHE) == 0)
1144	bp_cached = 0;
1145	bp->b_flags \|= B_CACHE;
1146	bp->b_flags &= ~B_INVAL;
1147	bp->b_ioflags &= ~BIO_ERROR;
1148	}
1149
1150	if ((bp->b_flags & B_CACHE) == 0) {
1151	bp->b_iocmd = BIO_READ;
1152	vfs_busy_pages(bp, 0);
1153	error = ncl_doio(vp, bp, cred, td, 0);
1154	if (error) {
1155	brelse(bp);
1156	break;
1157	}
1158	}
1159	if (bp->b_wcred == NOCRED)
1160	bp->b_wcred = crhold(cred);
1161	NFSLOCKNODE(np);
1162	np->n_flag \|= NMODIFIED;
1163	NFSUNLOCKNODE(np);
1164
1165	/*
1166	* If dirtyend exceeds file size, chop it down. This should
1167	* not normally occur but there is an append race where it
1168	* might occur XXX, so we log it.
1169	*
1170	* If the chopping creates a reverse-indexed or degenerate
1171	* situation with dirtyoff/end, we 0 both of them.
1172	*/
1173
1174	if (bp->b_dirtyend > bcount) {
1175	printf("NFS append race @%lx:%d\n",
1176	(long)bp->b_blkno * DEV_BSIZE,
1177	bp->b_dirtyend - bcount);
1178	bp->b_dirtyend = bcount;
1179	}
1180
1181	if (bp->b_dirtyoff >= bp->b_dirtyend)
1182	bp->b_dirtyoff = bp->b_dirtyend = 0;
1183
1184	/*
1185	* If the new write will leave a contiguous dirty
1186	* area, just update the b_dirtyoff and b_dirtyend,
1187	* otherwise force a write rpc of the old dirty area.
1188	*
1189	* If there has been a file lock applied to this file
1190	* or vfs.nfs.old_noncontig_writing is set, do the following:
1191	* While it is possible to merge discontiguous writes due to
1192	* our having a B_CACHE buffer ( and thus valid read data
1193	* for the hole), we don't because it could lead to
1194	* significant cache coherency problems with multiple clients,
1195	* especially if locking is implemented later on.
1196	*
1197	* If vfs.nfs.old_noncontig_writing is not set and there has
1198	* not been file locking done on this file:
1199	* Relax coherency a bit for the sake of performance and
1200	* expand the current dirty region to contain the new
1201	* write even if it means we mark some non-dirty data as
1202	* dirty.
1203	*/
1204
1205	if (noncontig_write == 0 && bp->b_dirtyend > 0 &&
1206	(on > bp->b_dirtyend \|\| (on + n) < bp->b_dirtyoff)) {
1207	if (bwrite(bp) == EINTR) {
1208	error = EINTR;
1209	break;
1210	}
1211	goto again;
1212	}
1213
1214	local_resid = uio->uio_resid;
1215	error = vn_io_fault_uiomove((char *)bp->b_data + on, n, uio);
1216
1217	if (error != 0 && !bp_cached) {
1218	/*
1219	* This block has no other content then what
1220	* possibly was written by the faulty uiomove.
1221	* Release it, forgetting the data pages, to
1222	* prevent the leak of uninitialized data to
1223	* usermode.
1224	*/
1225	bp->b_ioflags \|= BIO_ERROR;
1226	brelse(bp);
1227	uio->uio_offset -= local_resid - uio->uio_resid;
1228	uio->uio_resid = local_resid;
1229	break;
1230	}
1231
1232	/*
1233	* Since this block is being modified, it must be written
1234	* again and not just committed. Since write clustering does
1235	* not work for the stage 1 data write, only the stage 2
1236	* commit rpc, we have to clear B_CLUSTEROK as well.
1237	*/
1238	bp->b_flags &= ~(B_NEEDCOMMIT \| B_CLUSTEROK);
1239
1240	/*
1241	* Get the partial update on the progress made from
1242	* uiomove, if an error occurred.
1243	*/
1244	if (error != 0)
1245	n = local_resid - uio->uio_resid;
1246
1247	/*
1248	* Only update dirtyoff/dirtyend if not a degenerate
1249	* condition.
1250	*/
1251	if (n > 0) {
1252	if (bp->b_dirtyend > 0) {
1253	bp->b_dirtyoff = min(on, bp->b_dirtyoff);
1254	bp->b_dirtyend = max((on + n), bp->b_dirtyend);
1255	} else {
1256	bp->b_dirtyoff = on;
1257	bp->b_dirtyend = on + n;
1258	}
1259	vfs_bio_set_valid(bp, on, n);
1260	}
1261
1262	/*
1263	* If IO_SYNC do bwrite().
1264	*
1265	* IO_INVAL appears to be unused. The idea appears to be
1266	* to turn off caching in this case. Very odd. XXX
1267	*/
1268	if ((ioflag & IO_SYNC)) {
1269	if (ioflag & IO_INVAL)
1270	bp->b_flags \|= B_NOCACHE;
1271	error1 = bwrite(bp);
1272	if (error1 != 0) {
1273	if (error == 0)
1274	error = error1;
1275	break;
1276	}
1277	} else if ((n + on) == biosize \|\| (ioflag & IO_ASYNC) != 0) {
1278	bp->b_flags \|= B_ASYNC;
1279	(void) ncl_writebp(bp, 0, NULL);
1280	} else {
1281	bdwrite(bp);
1282	}
1283
1284	if (error != 0)
1285	break;
1286	} while (uio->uio_resid > 0 && n > 0);
1287
1288	if (error != 0) {
1289	if (ioflag & IO_UNIT) {
1290	VATTR_NULL(&vattr);
1291	vattr.va_size = orig_size;
1292	/* IO_SYNC is handled implicitely */
1293	(void)VOP_SETATTR(vp, &vattr, cred);
1294	uio->uio_offset -= orig_resid - uio->uio_resid;
1295	uio->uio_resid = orig_resid;
1296	}
1297	}
1298
1299	return (error);
1300	}
1301
1302	#ifdef __rtems__
1303	/*
1304	* Get an nfs cache block.
1305	*
1306	* Allocate a new one if the block isn't currently in the cache
1307	* and return the block marked busy. If the calling process is
1308	* interrupted by a signal for an interruptible mount point, return
1309	* NULL.
1310	*
1311	* The caller must carefully deal with the possible B_INVAL state of
1312	* the buffer. ncl_doio() clears B_INVAL (and ncl_asyncio() clears it
1313	* indirectly), so synchronous reads can be issued without worrying about
1314	* the B_INVAL state. We have to be a little more careful when dealing
1315	* with writes (see comments in nfs_write()) when extending a file past
1316	* its EOF.
1317	*/
1318	static struct buf *
1319	nfs_getcacheblk(struct vnode vp, daddr_t bn, int size, struct thread td)
1320	{
1321	struct buf *bp;
1322	struct mount *mp;
1323	struct nfsmount *nmp;
1324
1325	mp = vp->v_mount;
1326	nmp = VFSTONFS(mp);
1327
1328	if (nmp->nm_flag & NFSMNT_INT) {
1329	sigset_t oldset;
1330
1331	newnfs_set_sigmask(td, &oldset);
1332	bp = getblk(vp, bn, size, PCATCH, 0, 0);
1333	newnfs_restore_sigmask(td, &oldset);
1334	while (bp == NULL) {
1335	if (newnfs_sigintr(nmp, td))
1336	return (NULL);
1337	bp = getblk(vp, bn, size, 0, 2 * hz, 0);
1338	}
1339	} else {
1340	bp = getblk(vp, bn, size, 0, 0, 0);
1341	}
1342
1343	if (vp->v_type == VREG)
1344	bp->b_blkno = bn * (vp->v_bufobj.bo_bsize / DEV_BSIZE);
1345	return (bp);
1346	}
1347	#endif /* __rtems__ */
1348
1349	/*
1350	* Flush and invalidate all dirty buffers. If another process is already
1351	* doing the flush, just wait for completion.
1352	*/
1353	int
1354	ncl_vinvalbuf(struct vnode vp, int flags, struct thread td, int intrflg)
1355	{
1356	struct nfsnode *np = VTONFS(vp);
1357	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1358	int error = 0, slpflag, slptimeo;
1359	bool old_lock;
1360
1361	ASSERT_VOP_LOCKED(vp, "ncl_vinvalbuf");
1362
1363	if ((nmp->nm_flag & NFSMNT_INT) == 0)
1364	intrflg = 0;
1365	if (NFSCL_FORCEDISM(nmp->nm_mountp))
1366	intrflg = 1;
1367	if (intrflg) {
1368	slpflag = PCATCH;
1369	slptimeo = 2 * hz;
1370	} else {
1371	slpflag = 0;
1372	slptimeo = 0;
1373	}
1374
1375	old_lock = ncl_excl_start(vp);
1376	if (old_lock)
1377	flags \|= V_ALLOWCLEAN;
1378
1379	/*
1380	* Now, flush as required.
1381	*/
1382	if ((flags & (V_SAVE \| V_VMIO)) == V_SAVE &&
1383	vp->v_bufobj.bo_object != NULL) {
1384	#ifndef __rtems__
1385	VM_OBJECT_WLOCK(vp->v_bufobj.bo_object);
1386	vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
1387	VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object);
1388	#endif /* __rtems__*/
1389	/*
1390	* If the page clean was interrupted, fail the invalidation.
1391	* Not doing so, we run the risk of losing dirty pages in the
1392	* vinvalbuf() call below.
1393	*/
1394	if (intrflg && (error = newnfs_sigintr(nmp, td)))
1395	goto out;
1396	}
1397
1398	error = vinvalbuf(vp, flags, slpflag, 0);
1399	while (error) {
1400	if (intrflg && (error = newnfs_sigintr(nmp, td)))
1401	goto out;
1402	error = vinvalbuf(vp, flags, 0, slptimeo);
1403	}
1404	if (NFSHASPNFS(nmp)) {
1405	nfscl_layoutcommit(vp, td);
1406	/*
1407	* Invalidate the attribute cache, since writes to a DS
1408	* won't update the size attribute.
1409	*/
1410	NFSLOCKNODE(np);
1411	np->n_attrstamp = 0;
1412	} else
1413	NFSLOCKNODE(np);
1414	if (np->n_directio_asyncwr == 0)
1415	np->n_flag &= ~NMODIFIED;
1416	NFSUNLOCKNODE(np);
1417	out:
1418	ncl_excl_finish(vp, old_lock);
1419	return error;
1420	}
1421
1422	/*
1423	* Initiate asynchronous I/O. Return an error if no nfsiods are available.
1424	* This is mainly to avoid queueing async I/O requests when the nfsiods
1425	* are all hung on a dead server.
1426	*
1427	* Note: ncl_asyncio() does not clear (BIO_ERROR\|B_INVAL) but when the bp
1428	* is eventually dequeued by the async daemon, ncl_doio() will.
1429	*/
1430	int
1431	ncl_asyncio(struct nfsmount nmp, struct buf bp, struct ucred cred, struct thread td)
1432	{
1433	#ifdef __rtems__
1434	return (EOPNOTSUPP);
1435	#else /* __rtems__ */
1436	int iod;
1437	int gotiod;
1438	int slpflag = 0;
1439	int slptimeo = 0;
1440	int error, error2;
1441
1442	/*
1443	* Commits are usually short and sweet so lets save some cpu and
1444	* leave the async daemons for more important rpc's (such as reads
1445	* and writes).
1446	*
1447	* Readdirplus RPCs do vget()s to acquire the vnodes for entries
1448	* in the directory in order to update attributes. This can deadlock
1449	* with another thread that is waiting for async I/O to be done by
1450	* an nfsiod thread while holding a lock on one of these vnodes.
1451	* To avoid this deadlock, don't allow the async nfsiod threads to
1452	* perform Readdirplus RPCs.
1453	*/
1454	NFSLOCKIOD();
1455	if ((bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) &&
1456	(nmp->nm_bufqiods > ncl_numasync / 2)) \|\|
1457	(bp->b_vp->v_type == VDIR && (nmp->nm_flag & NFSMNT_RDIRPLUS))) {
1458	NFSUNLOCKIOD();
1459	return(EIO);
1460	}
1461	again:
1462	if (nmp->nm_flag & NFSMNT_INT)
1463	slpflag = PCATCH;
1464	gotiod = FALSE;
1465
1466	/*
1467	* Find a free iod to process this request.
1468	*/
1469	for (iod = 0; iod < ncl_numasync; iod++)
1470	if (ncl_iodwant[iod] == NFSIOD_AVAILABLE) {
1471	gotiod = TRUE;
1472	break;
1473	}
1474
1475	/*
1476	* Try to create one if none are free.
1477	*/
1478	if (!gotiod)
1479	ncl_nfsiodnew();
1480	else {
1481	/*
1482	* Found one, so wake it up and tell it which
1483	* mount to process.
1484	*/
1485	NFS_DPF(ASYNCIO, ("ncl_asyncio: waking iod %d for mount %p\n",
1486	iod, nmp));
1487	ncl_iodwant[iod] = NFSIOD_NOT_AVAILABLE;
1488	ncl_iodmount[iod] = nmp;
1489	nmp->nm_bufqiods++;
1490	wakeup(&ncl_iodwant[iod]);
1491	}
1492
1493	/*
1494	* If none are free, we may already have an iod working on this mount
1495	* point. If so, it will process our request.
1496	*/
1497	if (!gotiod) {
1498	if (nmp->nm_bufqiods > 0) {
1499	NFS_DPF(ASYNCIO,
1500	("ncl_asyncio: %d iods are already processing mount %p\n",
1501	nmp->nm_bufqiods, nmp));
1502	gotiod = TRUE;
1503	}
1504	}
1505
1506	/*
1507	* If we have an iod which can process the request, then queue
1508	* the buffer.
1509	*/
1510	if (gotiod) {
1511	/*
1512	* Ensure that the queue never grows too large. We still want
1513	* to asynchronize so we block rather then return EIO.
1514	*/
1515	while (nmp->nm_bufqlen >= 2*ncl_numasync) {
1516	NFS_DPF(ASYNCIO,
1517	("ncl_asyncio: waiting for mount %p queue to drain\n", nmp));
1518	nmp->nm_bufqwant = TRUE;
1519	error = newnfs_msleep(td, &nmp->nm_bufq,
1520	&ncl_iod_mutex, slpflag \| PRIBIO, "nfsaio",
1521	slptimeo);
1522	if (error) {
1523	error2 = newnfs_sigintr(nmp, td);
1524	if (error2) {
1525	NFSUNLOCKIOD();
1526	return (error2);
1527	}
1528	if (slpflag == PCATCH) {
1529	slpflag = 0;
1530	slptimeo = 2 * hz;
1531	}
1532	}
1533	/*
1534	* We might have lost our iod while sleeping,
1535	* so check and loop if necessary.
1536	*/
1537	goto again;
1538	}
1539
1540	/* We might have lost our nfsiod */
1541	if (nmp->nm_bufqiods == 0) {
1542	NFS_DPF(ASYNCIO,
1543	("ncl_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
1544	goto again;
1545	}
1546
1547	if (bp->b_iocmd == BIO_READ) {
1548	if (bp->b_rcred == NOCRED && cred != NOCRED)
1549	bp->b_rcred = crhold(cred);
1550	} else {
1551	if (bp->b_wcred == NOCRED && cred != NOCRED)
1552	bp->b_wcred = crhold(cred);
1553	}
1554
1555	if (bp->b_flags & B_REMFREE)
1556	bremfreef(bp);
1557	BUF_KERNPROC(bp);
1558	TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
1559	nmp->nm_bufqlen++;
1560	if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
1561	NFSLOCKNODE(VTONFS(bp->b_vp));
1562	VTONFS(bp->b_vp)->n_flag \|= NMODIFIED;
1563	VTONFS(bp->b_vp)->n_directio_asyncwr++;
1564	NFSUNLOCKNODE(VTONFS(bp->b_vp));
1565	}
1566	NFSUNLOCKIOD();
1567	return (0);
1568	}
1569
1570	NFSUNLOCKIOD();
1571
1572	/*
1573	* All the iods are busy on other mounts, so return EIO to
1574	* force the caller to process the i/o synchronously.
1575	*/
1576	NFS_DPF(ASYNCIO, ("ncl_asyncio: no iods available, i/o is synchronous\n"));
1577	#endif /* __rtems__ */
1578	return (EIO);
1579	}
1580
1581	void
1582	ncl_doio_directwrite(struct buf *bp)
1583	{
1584	#ifdef __rtems__
1585	panic("not supported in RTEMS");
1586	#else /* __rtems__ */
1587	int iomode, must_commit;
1588	struct uio uiop = (struct uio )bp->b_caller1;
1589	char *iov_base = uiop->uio_iov->iov_base;
1590
1591	iomode = NFSWRITE_FILESYNC;
1592	uiop->uio_td = NULL; /* NULL since we're in nfsiod */
1593	ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit, 0);
1594	KASSERT((must_commit == 0), ("ncl_doio_directwrite: Did not commit write"));
1595	free(iov_base, M_NFSDIRECTIO);
1596	free(uiop->uio_iov, M_NFSDIRECTIO);
1597	free(uiop, M_NFSDIRECTIO);
1598	if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
1599	struct nfsnode *np = VTONFS(bp->b_vp);
1600	NFSLOCKNODE(np);
1601	if (NFSHASPNFS(VFSTONFS(vnode_mount(bp->b_vp)))) {
1602	/*
1603	* Invalidate the attribute cache, since writes to a DS
1604	* won't update the size attribute.
1605	*/
1606	np->n_attrstamp = 0;
1607	}
1608	np->n_directio_asyncwr--;
1609	if (np->n_directio_asyncwr == 0) {
1610	np->n_flag &= ~NMODIFIED;
1611	if ((np->n_flag & NFSYNCWAIT)) {
1612	np->n_flag &= ~NFSYNCWAIT;
1613	wakeup((caddr_t)&np->n_directio_asyncwr);
1614	}
1615	}
1616	NFSUNLOCKNODE(np);
1617	}
1618	bp->b_vp = NULL;
1619	relpbuf(bp, &ncl_pbuf_freecnt);
1620	#endif /* __rtems__ */
1621	}
1622
1623	/*
1624	* Do an I/O operation to/from a cache block. This may be called
1625	* synchronously or from an nfsiod.
1626	*/
1627	int
1628	ncl_doio(struct vnode vp, struct buf bp, struct ucred cr, struct thread td,
1629	int called_from_strategy)
1630	{
1631	struct uio *uiop;
1632	struct nfsnode *np;
1633	struct nfsmount *nmp;
1634	int error = 0, iomode, must_commit = 0;
1635	struct uio uio;
1636	struct iovec io;
1637	struct proc *p = td ? td->td_proc : NULL;
1638	uint8_t iocmd;
1639
1640	np = VTONFS(vp);
1641	nmp = VFSTONFS(vp->v_mount);
1642	uiop = &uio;
1643	uiop->uio_iov = &io;
1644	uiop->uio_iovcnt = 1;
1645	uiop->uio_segflg = UIO_SYSSPACE;
1646	uiop->uio_td = td;
1647
1648	/*
1649	* clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We
1650	* do this here so we do not have to do it in all the code that
1651	* calls us.
1652	*/
1653	bp->b_flags &= ~B_INVAL;
1654	bp->b_ioflags &= ~BIO_ERROR;
1655
1656	KASSERT(!(bp->b_flags & B_DONE), ("ncl_doio: bp %p already marked done", bp));
1657	iocmd = bp->b_iocmd;
1658	if (iocmd == BIO_READ) {
1659	io.iov_len = uiop->uio_resid = bp->b_bcount;
1660	io.iov_base = bp->b_data;
1661	uiop->uio_rw = UIO_READ;
1662
1663	switch (vp->v_type) {
1664	case VREG:
1665	uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1666	NFSINCRGLOBAL(nfsstatsv1.read_bios);
1667	error = ncl_readrpc(vp, uiop, cr);
1668
1669	if (!error) {
1670	if (uiop->uio_resid) {
1671	/*
1672	* If we had a short read with no error, we must have
1673	* hit a file hole. We should zero-fill the remainder.
1674	* This can also occur if the server hits the file EOF.
1675	*
1676	* Holes used to be able to occur due to pending
1677	* writes, but that is not possible any longer.
1678	*/
1679	int nread = bp->b_bcount - uiop->uio_resid;
1680	ssize_t left = uiop->uio_resid;
1681
1682	if (left > 0)
1683	bzero((char *)bp->b_data + nread, left);
1684	uiop->uio_resid = 0;
1685	}
1686	}
1687	/* ASSERT_VOP_LOCKED(vp, "ncl_doio"); */
1688	if (p && vp->v_writecount <= -1) {
1689	NFSLOCKNODE(np);
1690	if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.na_mtime)) {
1691	NFSUNLOCKNODE(np);
1692	#ifndef __rtems__
1693	PROC_LOCK(p);
1694	killproc(p, "text file modification");
1695	PROC_UNLOCK(p);
1696	#else /* __rtems__ */
1697	panic("nfsclient: text file modification: want to killproc");
1698	#endif /* __rtems__ */
1699	} else
1700	NFSUNLOCKNODE(np);
1701	}
1702	break;
1703	case VLNK:
1704	uiop->uio_offset = (off_t)0;
1705	NFSINCRGLOBAL(nfsstatsv1.readlink_bios);
1706	error = ncl_readlinkrpc(vp, uiop, cr);
1707	break;
1708	case VDIR:
1709	NFSINCRGLOBAL(nfsstatsv1.readdir_bios);
1710	uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
1711	if ((nmp->nm_flag & NFSMNT_RDIRPLUS) != 0) {
1712	error = ncl_readdirplusrpc(vp, uiop, cr, td);
1713	if (error == NFSERR_NOTSUPP)
1714	nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
1715	}
1716	if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
1717	error = ncl_readdirrpc(vp, uiop, cr, td);
1718	/*
1719	* end-of-directory sets B_INVAL but does not generate an
1720	* error.
1721	*/
1722	if (error == 0 && uiop->uio_resid == bp->b_bcount)
1723	bp->b_flags \|= B_INVAL;
1724	break;
1725	default:
1726	printf("ncl_doio: type %x unexpected\n", vp->v_type);
1727	break;
1728	}
1729	if (error) {
1730	bp->b_ioflags \|= BIO_ERROR;
1731	bp->b_error = error;
1732	}
1733	} else {
1734	/*
1735	* If we only need to commit, try to commit
1736	*/
1737	if (bp->b_flags & B_NEEDCOMMIT) {
1738	int retv;
1739	off_t off;
1740
1741	off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
1742	retv = ncl_commit(vp, off, bp->b_dirtyend-bp->b_dirtyoff,
1743	bp->b_wcred, td);
1744	if (retv == 0) {
1745	bp->b_dirtyoff = bp->b_dirtyend = 0;
1746	bp->b_flags &= ~(B_NEEDCOMMIT \| B_CLUSTEROK);
1747	bp->b_resid = 0;
1748	bufdone(bp);
1749	return (0);
1750	}
1751	if (retv == NFSERR_STALEWRITEVERF) {
1752	ncl_clearcommit(vp->v_mount);
1753	}
1754	}
1755
1756	/*
1757	* Setup for actual write
1758	*/
1759	NFSLOCKNODE(np);
1760	if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
1761	bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
1762	NFSUNLOCKNODE(np);
1763
1764	if (bp->b_dirtyend > bp->b_dirtyoff) {
1765	io.iov_len = uiop->uio_resid = bp->b_dirtyend
1766	- bp->b_dirtyoff;
1767	uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE
1768	+ bp->b_dirtyoff;
1769	io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
1770	uiop->uio_rw = UIO_WRITE;
1771	NFSINCRGLOBAL(nfsstatsv1.write_bios);
1772
1773	if ((bp->b_flags & (B_ASYNC \| B_NEEDCOMMIT \| B_NOCACHE \| B_CLUSTER)) == B_ASYNC)
1774	iomode = NFSWRITE_UNSTABLE;
1775	else
1776	iomode = NFSWRITE_FILESYNC;
1777
1778	error = ncl_writerpc(vp, uiop, cr, &iomode, &must_commit,
1779	called_from_strategy);
1780
1781	/*
1782	* When setting B_NEEDCOMMIT also set B_CLUSTEROK to try
1783	* to cluster the buffers needing commit. This will allow
1784	* the system to submit a single commit rpc for the whole
1785	* cluster. We can do this even if the buffer is not 100%
1786	* dirty (relative to the NFS blocksize), so we optimize the
1787	* append-to-file-case.
1788	*
1789	* (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be
1790	* cleared because write clustering only works for commit
1791	* rpc's, not for the data portion of the write).
1792	*/
1793
1794	if (!error && iomode == NFSWRITE_UNSTABLE) {
1795	bp->b_flags \|= B_NEEDCOMMIT;
1796	if (bp->b_dirtyoff == 0
1797	&& bp->b_dirtyend == bp->b_bcount)
1798	bp->b_flags \|= B_CLUSTEROK;
1799	} else {
1800	bp->b_flags &= ~(B_NEEDCOMMIT \| B_CLUSTEROK);
1801	}
1802
1803	/*
1804	* For an interrupted write, the buffer is still valid
1805	* and the write hasn't been pushed to the server yet,
1806	* so we can't set BIO_ERROR and report the interruption
1807	* by setting B_EINTR. For the B_ASYNC case, B_EINTR
1808	* is not relevant, so the rpc attempt is essentially
1809	* a noop. For the case of a V3 write rpc not being
1810	* committed to stable storage, the block is still
1811	* dirty and requires either a commit rpc or another
1812	* write rpc with iomode == NFSV3WRITE_FILESYNC before
1813	* the block is reused. This is indicated by setting
1814	* the B_DELWRI and B_NEEDCOMMIT flags.
1815	*
1816	* EIO is returned by ncl_writerpc() to indicate a recoverable
1817	* write error and is handled as above, except that
1818	* B_EINTR isn't set. One cause of this is a stale stateid
1819	* error for the RPC that indicates recovery is required,
1820	* when called with called_from_strategy != 0.
1821	*
1822	* If the buffer is marked B_PAGING, it does not reside on
1823	* the vp's paging queues so we cannot call bdirty(). The
1824	* bp in this case is not an NFS cache block so we should
1825	* be safe. XXX
1826	*
1827	* The logic below breaks up errors into recoverable and
1828	* unrecoverable. For the former, we clear B_INVAL\|B_NOCACHE
1829	* and keep the buffer around for potential write retries.
1830	* For the latter (eg ESTALE), we toss the buffer away (B_INVAL)
1831	* and save the error in the nfsnode. This is less than ideal
1832	* but necessary. Keeping such buffers around could potentially
1833	* cause buffer exhaustion eventually (they can never be written
1834	* out, so will get constantly be re-dirtied). It also causes
1835	* all sorts of vfs panics. For non-recoverable write errors,
1836	* also invalidate the attrcache, so we'll be forced to go over
1837	* the wire for this object, returning an error to user on next
1838	* call (most of the time).
1839	*/
1840	if (error == EINTR \|\| error == EIO \|\| error == ETIMEDOUT
1841	\|\| (!error && (bp->b_flags & B_NEEDCOMMIT))) {
1842	bp->b_flags &= ~(B_INVAL\|B_NOCACHE);
1843	if ((bp->b_flags & B_PAGING) == 0) {
1844	bdirty(bp);
1845	bp->b_flags &= ~B_DONE;
1846	}
1847	if ((error == EINTR \|\| error == ETIMEDOUT) &&
1848	(bp->b_flags & B_ASYNC) == 0)
1849	bp->b_flags \|= B_EINTR;
1850	} else {
1851	if (error) {
1852	bp->b_ioflags \|= BIO_ERROR;
1853	bp->b_flags \|= B_INVAL;
1854	bp->b_error = np->n_error = error;
1855	NFSLOCKNODE(np);
1856	np->n_flag \|= NWRITEERR;
1857	np->n_attrstamp = 0;
1858	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
1859	NFSUNLOCKNODE(np);
1860	}
1861	bp->b_dirtyoff = bp->b_dirtyend = 0;
1862	}
1863	} else {
1864	bp->b_resid = 0;
1865	bufdone(bp);
1866	return (0);
1867	}
1868	}
1869	bp->b_resid = uiop->uio_resid;
1870	if (must_commit)
1871	ncl_clearcommit(vp->v_mount);
1872	bufdone(bp);
1873	return (error);
1874	}
1875
1876	/*
1877	* Used to aid in handling ftruncate() operations on the NFS client side.
1878	* Truncation creates a number of special problems for NFS. We have to
1879	* throw away VM pages and buffer cache buffers that are beyond EOF, and
1880	* we have to properly handle VM pages or (potentially dirty) buffers
1881	* that straddle the truncation point.
1882	*/
1883
1884	int
1885	ncl_meta_setsize(struct vnode vp, struct thread td, u_quad_t nsize)
1886	{
1887	struct nfsnode *np = VTONFS(vp);
1888	u_quad_t tsize;
1889	int biosize = vp->v_bufobj.bo_bsize;
1890	int error = 0;
1891
1892	NFSLOCKNODE(np);
1893	tsize = np->n_size;
1894	np->n_size = nsize;
1895	NFSUNLOCKNODE(np);
1896
1897	if (nsize < tsize) {
1898	struct buf *bp;
1899	daddr_t lbn;
1900	int bufsize;
1901
1902	/*
1903	* vtruncbuf() doesn't get the buffer overlapping the
1904	* truncation point. We may have a B_DELWRI and/or B_CACHE
1905	* buffer that now needs to be truncated.
1906	*/
1907	error = vtruncbuf(vp, nsize, biosize);
1908	lbn = nsize / biosize;
1909	bufsize = nsize - (lbn * biosize);
1910	bp = nfs_getcacheblk(vp, lbn, bufsize, td);
1911	if (!bp)
1912	return EINTR;
1913	if (bp->b_dirtyoff > bp->b_bcount)
1914	bp->b_dirtyoff = bp->b_bcount;
1915	if (bp->b_dirtyend > bp->b_bcount)
1916	bp->b_dirtyend = bp->b_bcount;
1917	bp->b_flags \|= B_RELBUF; /* don't leave garbage around */
1918	brelse(bp);
1919	} else {
1920	#ifndef __rtems__
1921	vnode_pager_setsize(vp, nsize);
1922	#endif /* __rtems__ */
1923	}
1924	return(error);
1925	}
1926

Note: See TracBrowser for help on using the repository browser.

Download in other formats: