• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3   * All Rights Reserved.
4   *
5   * This program is free software; you can redistribute it and/or
6   * modify it under the terms of the GNU General Public License as
7   * published by the Free Software Foundation.
8   *
9   * This program is distributed in the hope that it would be useful,
10   * but WITHOUT ANY WARRANTY; without even the implied warranty of
11   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   * GNU General Public License for more details.
13   *
14   * You should have received a copy of the GNU General Public License
15   * along with this program; if not, write the Free Software Foundation,
16   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17   */
18  
19  #include "xfs.h"
20  #include "xfs_fs.h"
21  #include "xfs_types.h"
22  #include "xfs_bit.h"
23  #include "xfs_log.h"
24  #include "xfs_inum.h"
25  #include "xfs_trans.h"
26  #include "xfs_sb.h"
27  #include "xfs_ag.h"
28  #include "xfs_dir2.h"
29  #include "xfs_mount.h"
30  #include "xfs_da_btree.h"
31  #include "xfs_bmap_btree.h"
32  #include "xfs_ialloc_btree.h"
33  #include "xfs_dinode.h"
34  #include "xfs_inode.h"
35  #include "xfs_inode_item.h"
36  #include "xfs_itable.h"
37  #include "xfs_ialloc.h"
38  #include "xfs_alloc.h"
39  #include "xfs_bmap.h"
40  #include "xfs_acl.h"
41  #include "xfs_attr.h"
42  #include "xfs_rw.h"
43  #include "xfs_error.h"
44  #include "xfs_quota.h"
45  #include "xfs_utils.h"
46  #include "xfs_rtalloc.h"
47  #include "xfs_trans_space.h"
48  #include "xfs_log_priv.h"
49  #include "xfs_filestream.h"
50  #include "xfs_vnodeops.h"
51  #include "xfs_trace.h"
52  
53  /*
54   * The maximum pathlen is 1024 bytes. Since the minimum file system
55   * blocksize is 512 bytes, we can get a max of 2 extents back from
56   * bmapi.
57   */
58  #define SYMLINK_MAPS 2
59  
60  STATIC int
xfs_readlink_bmap(xfs_inode_t * ip,char * link)61  xfs_readlink_bmap(
62  	xfs_inode_t	*ip,
63  	char		*link)
64  {
65  	xfs_mount_t	*mp = ip->i_mount;
66  	int		pathlen = ip->i_d.di_size;
67  	int             nmaps = SYMLINK_MAPS;
68  	xfs_bmbt_irec_t mval[SYMLINK_MAPS];
69  	xfs_daddr_t	d;
70  	int		byte_cnt;
71  	int		n;
72  	xfs_buf_t	*bp;
73  	int		error = 0;
74  
75  	error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, pathlen), mval, &nmaps,
76  			       0);
77  	if (error)
78  		goto out;
79  
80  	for (n = 0; n < nmaps; n++) {
81  		d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
82  		byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
83  
84  		bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
85  				  XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
86  		if (!bp)
87  			return XFS_ERROR(ENOMEM);
88  		error = bp->b_error;
89  		if (error) {
90  			xfs_buf_ioerror_alert(bp, __func__);
91  			xfs_buf_relse(bp);
92  			goto out;
93  		}
94  		if (pathlen < byte_cnt)
95  			byte_cnt = pathlen;
96  		pathlen -= byte_cnt;
97  
98  		memcpy(link, bp->b_addr, byte_cnt);
99  		xfs_buf_relse(bp);
100  	}
101  
102  	link[ip->i_d.di_size] = '\0';
103  	error = 0;
104  
105   out:
106  	return error;
107  }
108  
109  int
xfs_readlink(xfs_inode_t * ip,char * link)110  xfs_readlink(
111  	xfs_inode_t     *ip,
112  	char		*link)
113  {
114  	xfs_mount_t	*mp = ip->i_mount;
115  	xfs_fsize_t	pathlen;
116  	int		error = 0;
117  
118  	trace_xfs_readlink(ip);
119  
120  	if (XFS_FORCED_SHUTDOWN(mp))
121  		return XFS_ERROR(EIO);
122  
123  	xfs_ilock(ip, XFS_ILOCK_SHARED);
124  
125  	pathlen = ip->i_d.di_size;
126  	if (!pathlen)
127  		goto out;
128  
129  	if (pathlen < 0 || pathlen > MAXPATHLEN) {
130  		xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
131  			 __func__, (unsigned long long) ip->i_ino,
132  			 (long long) pathlen);
133  		ASSERT(0);
134  		error = XFS_ERROR(EFSCORRUPTED);
135  		goto out;
136  	}
137  
138  
139  	if (ip->i_df.if_flags & XFS_IFINLINE) {
140  		memcpy(link, ip->i_df.if_u1.if_data, pathlen);
141  		link[pathlen] = '\0';
142  	} else {
143  		error = xfs_readlink_bmap(ip, link);
144  	}
145  
146   out:
147  	xfs_iunlock(ip, XFS_ILOCK_SHARED);
148  	return error;
149  }
150  
151  /*
152   * Flags for xfs_free_eofblocks
153   */
154  #define XFS_FREE_EOF_TRYLOCK	(1<<0)
155  
156  /*
157   * This is called by xfs_inactive to free any blocks beyond eof
158   * when the link count isn't zero and by xfs_dm_punch_hole() when
159   * punching a hole to EOF.
160   */
161  STATIC int
xfs_free_eofblocks(xfs_mount_t * mp,xfs_inode_t * ip,int flags)162  xfs_free_eofblocks(
163  	xfs_mount_t	*mp,
164  	xfs_inode_t	*ip,
165  	int		flags)
166  {
167  	xfs_trans_t	*tp;
168  	int		error;
169  	xfs_fileoff_t	end_fsb;
170  	xfs_fileoff_t	last_fsb;
171  	xfs_filblks_t	map_len;
172  	int		nimaps;
173  	xfs_bmbt_irec_t	imap;
174  
175  	/*
176  	 * Figure out if there are any blocks beyond the end
177  	 * of the file.  If not, then there is nothing to do.
178  	 */
179  	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
180  	last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
181  	if (last_fsb <= end_fsb)
182  		return 0;
183  	map_len = last_fsb - end_fsb;
184  
185  	nimaps = 1;
186  	xfs_ilock(ip, XFS_ILOCK_SHARED);
187  	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
188  	xfs_iunlock(ip, XFS_ILOCK_SHARED);
189  
190  	if (!error && (nimaps != 0) &&
191  	    (imap.br_startblock != HOLESTARTBLOCK ||
192  	     ip->i_delayed_blks)) {
193  		/*
194  		 * Attach the dquots to the inode up front.
195  		 */
196  		error = xfs_qm_dqattach(ip, 0);
197  		if (error)
198  			return error;
199  
200  		/*
201  		 * There are blocks after the end of file.
202  		 * Free them up now by truncating the file to
203  		 * its current size.
204  		 */
205  		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
206  
207  		if (flags & XFS_FREE_EOF_TRYLOCK) {
208  			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
209  				xfs_trans_cancel(tp, 0);
210  				return 0;
211  			}
212  		} else {
213  			xfs_ilock(ip, XFS_IOLOCK_EXCL);
214  		}
215  
216  		error = xfs_trans_reserve(tp, 0,
217  					  XFS_ITRUNCATE_LOG_RES(mp),
218  					  0, XFS_TRANS_PERM_LOG_RES,
219  					  XFS_ITRUNCATE_LOG_COUNT);
220  		if (error) {
221  			ASSERT(XFS_FORCED_SHUTDOWN(mp));
222  			xfs_trans_cancel(tp, 0);
223  			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
224  			return error;
225  		}
226  
227  		xfs_ilock(ip, XFS_ILOCK_EXCL);
228  		xfs_trans_ijoin(tp, ip, 0);
229  
230  		/*
231  		 * Do not update the on-disk file size.  If we update the
232  		 * on-disk file size and then the system crashes before the
233  		 * contents of the file are flushed to disk then the files
234  		 * may be full of holes (ie NULL files bug).
235  		 */
236  		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
237  					      XFS_ISIZE(ip));
238  		if (error) {
239  			/*
240  			 * If we get an error at this point we simply don't
241  			 * bother truncating the file.
242  			 */
243  			xfs_trans_cancel(tp,
244  					 (XFS_TRANS_RELEASE_LOG_RES |
245  					  XFS_TRANS_ABORT));
246  		} else {
247  			error = xfs_trans_commit(tp,
248  						XFS_TRANS_RELEASE_LOG_RES);
249  		}
250  		xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
251  	}
252  	return error;
253  }
254  
255  /*
256   * Free a symlink that has blocks associated with it.
257   */
258  STATIC int
xfs_inactive_symlink_rmt(xfs_inode_t * ip,xfs_trans_t ** tpp)259  xfs_inactive_symlink_rmt(
260  	xfs_inode_t	*ip,
261  	xfs_trans_t	**tpp)
262  {
263  	xfs_buf_t	*bp;
264  	int		committed;
265  	int		done;
266  	int		error;
267  	xfs_fsblock_t	first_block;
268  	xfs_bmap_free_t	free_list;
269  	int		i;
270  	xfs_mount_t	*mp;
271  	xfs_bmbt_irec_t	mval[SYMLINK_MAPS];
272  	int		nmaps;
273  	xfs_trans_t	*ntp;
274  	int		size;
275  	xfs_trans_t	*tp;
276  
277  	tp = *tpp;
278  	mp = ip->i_mount;
279  	ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
280  	/*
281  	 * We're freeing a symlink that has some
282  	 * blocks allocated to it.  Free the
283  	 * blocks here.  We know that we've got
284  	 * either 1 or 2 extents and that we can
285  	 * free them all in one bunmapi call.
286  	 */
287  	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
288  	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
289  			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
290  		ASSERT(XFS_FORCED_SHUTDOWN(mp));
291  		xfs_trans_cancel(tp, 0);
292  		*tpp = NULL;
293  		return error;
294  	}
295  	/*
296  	 * Lock the inode, fix the size, and join it to the transaction.
297  	 * Hold it so in the normal path, we still have it locked for
298  	 * the second transaction.  In the error paths we need it
299  	 * held so the cancel won't rele it, see below.
300  	 */
301  	xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
302  	size = (int)ip->i_d.di_size;
303  	ip->i_d.di_size = 0;
304  	xfs_trans_ijoin(tp, ip, 0);
305  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
306  	/*
307  	 * Find the block(s) so we can inval and unmap them.
308  	 */
309  	done = 0;
310  	xfs_bmap_init(&free_list, &first_block);
311  	nmaps = ARRAY_SIZE(mval);
312  	error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, size),
313  				mval, &nmaps, 0);
314  	if (error)
315  		goto error0;
316  	/*
317  	 * Invalidate the block(s).
318  	 */
319  	for (i = 0; i < nmaps; i++) {
320  		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
321  			XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
322  			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
323  		if (!bp) {
324  			error = ENOMEM;
325  			goto error1;
326  		}
327  		xfs_trans_binval(tp, bp);
328  	}
329  	/*
330  	 * Unmap the dead block(s) to the free_list.
331  	 */
332  	if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
333  			&first_block, &free_list, &done)))
334  		goto error1;
335  	ASSERT(done);
336  	/*
337  	 * Commit the first transaction.  This logs the EFI and the inode.
338  	 */
339  	if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
340  		goto error1;
341  	/*
342  	 * The transaction must have been committed, since there were
343  	 * actually extents freed by xfs_bunmapi.  See xfs_bmap_finish.
344  	 * The new tp has the extent freeing and EFDs.
345  	 */
346  	ASSERT(committed);
347  	/*
348  	 * The first xact was committed, so add the inode to the new one.
349  	 * Mark it dirty so it will be logged and moved forward in the log as
350  	 * part of every commit.
351  	 */
352  	xfs_trans_ijoin(tp, ip, 0);
353  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
354  	/*
355  	 * Get a new, empty transaction to return to our caller.
356  	 */
357  	ntp = xfs_trans_dup(tp);
358  	/*
359  	 * Commit the transaction containing extent freeing and EFDs.
360  	 * If we get an error on the commit here or on the reserve below,
361  	 * we need to unlock the inode since the new transaction doesn't
362  	 * have the inode attached.
363  	 */
364  	error = xfs_trans_commit(tp, 0);
365  	tp = ntp;
366  	if (error) {
367  		ASSERT(XFS_FORCED_SHUTDOWN(mp));
368  		goto error0;
369  	}
370  	/*
371  	 * transaction commit worked ok so we can drop the extra ticket
372  	 * reference that we gained in xfs_trans_dup()
373  	 */
374  	xfs_log_ticket_put(tp->t_ticket);
375  
376  	/*
377  	 * Remove the memory for extent descriptions (just bookkeeping).
378  	 */
379  	if (ip->i_df.if_bytes)
380  		xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
381  	ASSERT(ip->i_df.if_bytes == 0);
382  	/*
383  	 * Put an itruncate log reservation in the new transaction
384  	 * for our caller.
385  	 */
386  	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
387  			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
388  		ASSERT(XFS_FORCED_SHUTDOWN(mp));
389  		goto error0;
390  	}
391  	/*
392  	 * Return with the inode locked but not joined to the transaction.
393  	 */
394  	*tpp = tp;
395  	return 0;
396  
397   error1:
398  	xfs_bmap_cancel(&free_list);
399   error0:
400  	/*
401  	 * Have to come here with the inode locked and either
402  	 * (held and in the transaction) or (not in the transaction).
403  	 * If the inode isn't held then cancel would iput it, but
404  	 * that's wrong since this is inactive and the vnode ref
405  	 * count is 0 already.
406  	 * Cancel won't do anything to the inode if held, but it still
407  	 * needs to be locked until the cancel is done, if it was
408  	 * joined to the transaction.
409  	 */
410  	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
411  	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
412  	*tpp = NULL;
413  	return error;
414  
415  }
416  
417  STATIC int
xfs_inactive_symlink_local(xfs_inode_t * ip,xfs_trans_t ** tpp)418  xfs_inactive_symlink_local(
419  	xfs_inode_t	*ip,
420  	xfs_trans_t	**tpp)
421  {
422  	int		error;
423  
424  	ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip));
425  	/*
426  	 * We're freeing a symlink which fit into
427  	 * the inode.  Just free the memory used
428  	 * to hold the old symlink.
429  	 */
430  	error = xfs_trans_reserve(*tpp, 0,
431  				  XFS_ITRUNCATE_LOG_RES(ip->i_mount),
432  				  0, XFS_TRANS_PERM_LOG_RES,
433  				  XFS_ITRUNCATE_LOG_COUNT);
434  
435  	if (error) {
436  		xfs_trans_cancel(*tpp, 0);
437  		*tpp = NULL;
438  		return error;
439  	}
440  	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
441  
442  	/*
443  	 * Zero length symlinks _can_ exist.
444  	 */
445  	if (ip->i_df.if_bytes > 0) {
446  		xfs_idata_realloc(ip,
447  				  -(ip->i_df.if_bytes),
448  				  XFS_DATA_FORK);
449  		ASSERT(ip->i_df.if_bytes == 0);
450  	}
451  	return 0;
452  }
453  
454  STATIC int
xfs_inactive_attrs(xfs_inode_t * ip,xfs_trans_t ** tpp)455  xfs_inactive_attrs(
456  	xfs_inode_t	*ip,
457  	xfs_trans_t	**tpp)
458  {
459  	xfs_trans_t	*tp;
460  	int		error;
461  	xfs_mount_t	*mp;
462  
463  	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
464  	tp = *tpp;
465  	mp = ip->i_mount;
466  	ASSERT(ip->i_d.di_forkoff != 0);
467  	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
468  	xfs_iunlock(ip, XFS_ILOCK_EXCL);
469  	if (error)
470  		goto error_unlock;
471  
472  	error = xfs_attr_inactive(ip);
473  	if (error)
474  		goto error_unlock;
475  
476  	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
477  	error = xfs_trans_reserve(tp, 0,
478  				  XFS_IFREE_LOG_RES(mp),
479  				  0, XFS_TRANS_PERM_LOG_RES,
480  				  XFS_INACTIVE_LOG_COUNT);
481  	if (error)
482  		goto error_cancel;
483  
484  	xfs_ilock(ip, XFS_ILOCK_EXCL);
485  	xfs_trans_ijoin(tp, ip, 0);
486  	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
487  
488  	ASSERT(ip->i_d.di_anextents == 0);
489  
490  	*tpp = tp;
491  	return 0;
492  
493  error_cancel:
494  	ASSERT(XFS_FORCED_SHUTDOWN(mp));
495  	xfs_trans_cancel(tp, 0);
496  error_unlock:
497  	*tpp = NULL;
498  	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
499  	return error;
500  }
501  
502  int
xfs_release(xfs_inode_t * ip)503  xfs_release(
504  	xfs_inode_t	*ip)
505  {
506  	xfs_mount_t	*mp = ip->i_mount;
507  	int		error;
508  
509  	if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
510  		return 0;
511  
512  	/* If this is a read-only mount, don't do this (would generate I/O) */
513  	if (mp->m_flags & XFS_MOUNT_RDONLY)
514  		return 0;
515  
516  	if (!XFS_FORCED_SHUTDOWN(mp)) {
517  		int truncated;
518  
519  		/*
520  		 * If we are using filestreams, and we have an unlinked
521  		 * file that we are processing the last close on, then nothing
522  		 * will be able to reopen and write to this file. Purge this
523  		 * inode from the filestreams cache so that it doesn't delay
524  		 * teardown of the inode.
525  		 */
526  		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
527  			xfs_filestream_deassociate(ip);
528  
529  		/*
530  		 * If we previously truncated this file and removed old data
531  		 * in the process, we want to initiate "early" writeout on
532  		 * the last close.  This is an attempt to combat the notorious
533  		 * NULL files problem which is particularly noticeable from a
534  		 * truncate down, buffered (re-)write (delalloc), followed by
535  		 * a crash.  What we are effectively doing here is
536  		 * significantly reducing the time window where we'd otherwise
537  		 * be exposed to that problem.
538  		 */
539  		truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
540  		if (truncated) {
541  			xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
542  			if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
543  				xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
544  		}
545  	}
546  
547  	if (ip->i_d.di_nlink == 0)
548  		return 0;
549  
550  	if ((S_ISREG(ip->i_d.di_mode) &&
551  	     (VFS_I(ip)->i_size > 0 ||
552  	      (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
553  	     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
554  	    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
555  
556  		/*
557  		 * If we can't get the iolock just skip truncating the blocks
558  		 * past EOF because we could deadlock with the mmap_sem
559  		 * otherwise.  We'll get another chance to drop them once the
560  		 * last reference to the inode is dropped, so we'll never leak
561  		 * blocks permanently.
562  		 *
563  		 * Further, check if the inode is being opened, written and
564  		 * closed frequently and we have delayed allocation blocks
565  		 * outstanding (e.g. streaming writes from the NFS server),
566  		 * truncating the blocks past EOF will cause fragmentation to
567  		 * occur.
568  		 *
569  		 * In this case don't do the truncation, either, but we have to
570  		 * be careful how we detect this case. Blocks beyond EOF show
571  		 * up as i_delayed_blks even when the inode is clean, so we
572  		 * need to truncate them away first before checking for a dirty
573  		 * release. Hence on the first dirty close we will still remove
574  		 * the speculative allocation, but after that we will leave it
575  		 * in place.
576  		 */
577  		if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
578  			return 0;
579  
580  		error = xfs_free_eofblocks(mp, ip,
581  					   XFS_FREE_EOF_TRYLOCK);
582  		if (error)
583  			return error;
584  
585  		/* delalloc blocks after truncation means it really is dirty */
586  		if (ip->i_delayed_blks)
587  			xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
588  	}
589  	return 0;
590  }
591  
592  /*
593   * xfs_inactive
594   *
595   * This is called when the vnode reference count for the vnode
596   * goes to zero.  If the file has been unlinked, then it must
597   * now be truncated.  Also, we clear all of the read-ahead state
598   * kept for the inode here since the file is now closed.
599   */
600  int
xfs_inactive(xfs_inode_t * ip)601  xfs_inactive(
602  	xfs_inode_t	*ip)
603  {
604  	xfs_bmap_free_t	free_list;
605  	xfs_fsblock_t	first_block;
606  	int		committed;
607  	xfs_trans_t	*tp;
608  	xfs_mount_t	*mp;
609  	int		error;
610  	int		truncate;
611  
612  	/*
613  	 * If the inode is already free, then there can be nothing
614  	 * to clean up here.
615  	 */
616  	if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) {
617  		ASSERT(ip->i_df.if_real_bytes == 0);
618  		ASSERT(ip->i_df.if_broot_bytes == 0);
619  		return VN_INACTIVE_CACHE;
620  	}
621  
622  	/*
623  	 * Only do a truncate if it's a regular file with
624  	 * some actual space in it.  It's OK to look at the
625  	 * inode's fields without the lock because we're the
626  	 * only one with a reference to the inode.
627  	 */
628  	truncate = ((ip->i_d.di_nlink == 0) &&
629  	    ((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 ||
630  	     (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
631  	    S_ISREG(ip->i_d.di_mode));
632  
633  	mp = ip->i_mount;
634  
635  	error = 0;
636  
637  	/* If this is a read-only mount, don't do this (would generate I/O) */
638  	if (mp->m_flags & XFS_MOUNT_RDONLY)
639  		goto out;
640  
641  	if (ip->i_d.di_nlink != 0) {
642  		if ((S_ISREG(ip->i_d.di_mode) &&
643  		    (VFS_I(ip)->i_size > 0 ||
644  		     (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
645  		    (ip->i_df.if_flags & XFS_IFEXTENTS) &&
646  		    (!(ip->i_d.di_flags &
647  				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
648  		     ip->i_delayed_blks != 0))) {
649  			error = xfs_free_eofblocks(mp, ip, 0);
650  			if (error)
651  				return VN_INACTIVE_CACHE;
652  		}
653  		goto out;
654  	}
655  
656  	ASSERT(ip->i_d.di_nlink == 0);
657  
658  	error = xfs_qm_dqattach(ip, 0);
659  	if (error)
660  		return VN_INACTIVE_CACHE;
661  
662  	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
663  	if (truncate) {
664  		xfs_ilock(ip, XFS_IOLOCK_EXCL);
665  
666  		error = xfs_trans_reserve(tp, 0,
667  					  XFS_ITRUNCATE_LOG_RES(mp),
668  					  0, XFS_TRANS_PERM_LOG_RES,
669  					  XFS_ITRUNCATE_LOG_COUNT);
670  		if (error) {
671  			/* Don't call itruncate_cleanup */
672  			ASSERT(XFS_FORCED_SHUTDOWN(mp));
673  			xfs_trans_cancel(tp, 0);
674  			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
675  			return VN_INACTIVE_CACHE;
676  		}
677  
678  		xfs_ilock(ip, XFS_ILOCK_EXCL);
679  		xfs_trans_ijoin(tp, ip, 0);
680  
681  		ip->i_d.di_size = 0;
682  		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
683  
684  		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
685  		if (error) {
686  			xfs_trans_cancel(tp,
687  				XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
688  			xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
689  			return VN_INACTIVE_CACHE;
690  		}
691  
692  		ASSERT(ip->i_d.di_nextents == 0);
693  	} else if (S_ISLNK(ip->i_d.di_mode)) {
694  
695  		/*
696  		 * If we get an error while cleaning up a
697  		 * symlink we bail out.
698  		 */
699  		error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ?
700  			xfs_inactive_symlink_rmt(ip, &tp) :
701  			xfs_inactive_symlink_local(ip, &tp);
702  
703  		if (error) {
704  			ASSERT(tp == NULL);
705  			return VN_INACTIVE_CACHE;
706  		}
707  
708  		xfs_trans_ijoin(tp, ip, 0);
709  	} else {
710  		error = xfs_trans_reserve(tp, 0,
711  					  XFS_IFREE_LOG_RES(mp),
712  					  0, XFS_TRANS_PERM_LOG_RES,
713  					  XFS_INACTIVE_LOG_COUNT);
714  		if (error) {
715  			ASSERT(XFS_FORCED_SHUTDOWN(mp));
716  			xfs_trans_cancel(tp, 0);
717  			return VN_INACTIVE_CACHE;
718  		}
719  
720  		xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
721  		xfs_trans_ijoin(tp, ip, 0);
722  	}
723  
724  	/*
725  	 * If there are attributes associated with the file
726  	 * then blow them away now.  The code calls a routine
727  	 * that recursively deconstructs the attribute fork.
728  	 * We need to just commit the current transaction
729  	 * because we can't use it for xfs_attr_inactive().
730  	 */
731  	if (ip->i_d.di_anextents > 0) {
732  		error = xfs_inactive_attrs(ip, &tp);
733  		/*
734  		 * If we got an error, the transaction is already
735  		 * cancelled, and the inode is unlocked. Just get out.
736  		 */
737  		 if (error)
738  			 return VN_INACTIVE_CACHE;
739  	} else if (ip->i_afp) {
740  		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
741  	}
742  
743  	/*
744  	 * Free the inode.
745  	 */
746  	xfs_bmap_init(&free_list, &first_block);
747  	error = xfs_ifree(tp, ip, &free_list);
748  	if (error) {
749  		/*
750  		 * If we fail to free the inode, shut down.  The cancel
751  		 * might do that, we need to make sure.  Otherwise the
752  		 * inode might be lost for a long time or forever.
753  		 */
754  		if (!XFS_FORCED_SHUTDOWN(mp)) {
755  			xfs_notice(mp, "%s: xfs_ifree returned error %d",
756  				__func__, error);
757  			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
758  		}
759  		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
760  	} else {
761  		/*
762  		 * Credit the quota account(s). The inode is gone.
763  		 */
764  		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
765  
766  		/*
767  		 * Just ignore errors at this point.  There is nothing we can
768  		 * do except to try to keep going. Make sure it's not a silent
769  		 * error.
770  		 */
771  		error = xfs_bmap_finish(&tp,  &free_list, &committed);
772  		if (error)
773  			xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
774  				__func__, error);
775  		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
776  		if (error)
777  			xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
778  				__func__, error);
779  	}
780  
781  	/*
782  	 * Release the dquots held by inode, if any.
783  	 */
784  	xfs_qm_dqdetach(ip);
785  	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
786  
787   out:
788  	return VN_INACTIVE_CACHE;
789  }
790  
791  /*
792   * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
793   * is allowed, otherwise it has to be an exact match. If a CI match is found,
794   * ci_name->name will point to a the actual name (caller must free) or
795   * will be set to NULL if an exact match is found.
796   */
797  int
xfs_lookup(xfs_inode_t * dp,struct xfs_name * name,xfs_inode_t ** ipp,struct xfs_name * ci_name)798  xfs_lookup(
799  	xfs_inode_t		*dp,
800  	struct xfs_name		*name,
801  	xfs_inode_t		**ipp,
802  	struct xfs_name		*ci_name)
803  {
804  	xfs_ino_t		inum;
805  	int			error;
806  	uint			lock_mode;
807  
808  	trace_xfs_lookup(dp, name);
809  
810  	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
811  		return XFS_ERROR(EIO);
812  
813  	lock_mode = xfs_ilock_map_shared(dp);
814  	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
815  	xfs_iunlock_map_shared(dp, lock_mode);
816  
817  	if (error)
818  		goto out;
819  
820  	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
821  	if (error)
822  		goto out_free_name;
823  
824  	return 0;
825  
826  out_free_name:
827  	if (ci_name)
828  		kmem_free(ci_name->name);
829  out:
830  	*ipp = NULL;
831  	return error;
832  }
833  
834  int
xfs_create(xfs_inode_t * dp,struct xfs_name * name,umode_t mode,xfs_dev_t rdev,xfs_inode_t ** ipp)835  xfs_create(
836  	xfs_inode_t		*dp,
837  	struct xfs_name		*name,
838  	umode_t			mode,
839  	xfs_dev_t		rdev,
840  	xfs_inode_t		**ipp)
841  {
842  	int			is_dir = S_ISDIR(mode);
843  	struct xfs_mount	*mp = dp->i_mount;
844  	struct xfs_inode	*ip = NULL;
845  	struct xfs_trans	*tp = NULL;
846  	int			error;
847  	xfs_bmap_free_t		free_list;
848  	xfs_fsblock_t		first_block;
849  	boolean_t		unlock_dp_on_error = B_FALSE;
850  	uint			cancel_flags;
851  	int			committed;
852  	prid_t			prid;
853  	struct xfs_dquot	*udqp = NULL;
854  	struct xfs_dquot	*gdqp = NULL;
855  	uint			resblks;
856  	uint			log_res;
857  	uint			log_count;
858  
859  	trace_xfs_create(dp, name);
860  
861  	if (XFS_FORCED_SHUTDOWN(mp))
862  		return XFS_ERROR(EIO);
863  
864  	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
865  		prid = xfs_get_projid(dp);
866  	else
867  		prid = XFS_PROJID_DEFAULT;
868  
869  	/*
870  	 * Make sure that we have allocated dquot(s) on disk.
871  	 */
872  	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
873  			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
874  	if (error)
875  		return error;
876  
877  	if (is_dir) {
878  		rdev = 0;
879  		resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
880  		log_res = XFS_MKDIR_LOG_RES(mp);
881  		log_count = XFS_MKDIR_LOG_COUNT;
882  		tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
883  	} else {
884  		resblks = XFS_CREATE_SPACE_RES(mp, name->len);
885  		log_res = XFS_CREATE_LOG_RES(mp);
886  		log_count = XFS_CREATE_LOG_COUNT;
887  		tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
888  	}
889  
890  	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
891  
892  	/*
893  	 * Initially assume that the file does not exist and
894  	 * reserve the resources for that case.  If that is not
895  	 * the case we'll drop the one we have and get a more
896  	 * appropriate transaction later.
897  	 */
898  	error = xfs_trans_reserve(tp, resblks, log_res, 0,
899  			XFS_TRANS_PERM_LOG_RES, log_count);
900  	if (error == ENOSPC) {
901  		/* flush outstanding delalloc blocks and retry */
902  		xfs_flush_inodes(dp);
903  		error = xfs_trans_reserve(tp, resblks, log_res, 0,
904  				XFS_TRANS_PERM_LOG_RES, log_count);
905  	}
906  	if (error == ENOSPC) {
907  		/* No space at all so try a "no-allocation" reservation */
908  		resblks = 0;
909  		error = xfs_trans_reserve(tp, 0, log_res, 0,
910  				XFS_TRANS_PERM_LOG_RES, log_count);
911  	}
912  	if (error) {
913  		cancel_flags = 0;
914  		goto out_trans_cancel;
915  	}
916  
917  	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
918  	unlock_dp_on_error = B_TRUE;
919  
920  	xfs_bmap_init(&free_list, &first_block);
921  
922  	/*
923  	 * Reserve disk quota and the inode.
924  	 */
925  	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
926  	if (error)
927  		goto out_trans_cancel;
928  
929  	error = xfs_dir_canenter(tp, dp, name, resblks);
930  	if (error)
931  		goto out_trans_cancel;
932  
933  	/*
934  	 * A newly created regular or special file just has one directory
935  	 * entry pointing to them, but a directory also the "." entry
936  	 * pointing to itself.
937  	 */
938  	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
939  			       prid, resblks > 0, &ip, &committed);
940  	if (error) {
941  		if (error == ENOSPC)
942  			goto out_trans_cancel;
943  		goto out_trans_abort;
944  	}
945  
946  	/*
947  	 * Now we join the directory inode to the transaction.  We do not do it
948  	 * earlier because xfs_dir_ialloc might commit the previous transaction
949  	 * (and release all the locks).  An error from here on will result in
950  	 * the transaction cancel unlocking dp so don't do it explicitly in the
951  	 * error path.
952  	 */
953  	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
954  	unlock_dp_on_error = B_FALSE;
955  
956  	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
957  					&first_block, &free_list, resblks ?
958  					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
959  	if (error) {
960  		ASSERT(error != ENOSPC);
961  		goto out_trans_abort;
962  	}
963  	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
964  	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
965  
966  	if (is_dir) {
967  		error = xfs_dir_init(tp, ip, dp);
968  		if (error)
969  			goto out_bmap_cancel;
970  
971  		error = xfs_bumplink(tp, dp);
972  		if (error)
973  			goto out_bmap_cancel;
974  	}
975  
976  	/*
977  	 * If this is a synchronous mount, make sure that the
978  	 * create transaction goes to disk before returning to
979  	 * the user.
980  	 */
981  	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
982  		xfs_trans_set_sync(tp);
983  
984  	/*
985  	 * Attach the dquot(s) to the inodes and modify them incore.
986  	 * These ids of the inode couldn't have changed since the new
987  	 * inode has been locked ever since it was created.
988  	 */
989  	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
990  
991  	error = xfs_bmap_finish(&tp, &free_list, &committed);
992  	if (error)
993  		goto out_bmap_cancel;
994  
995  	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
996  	if (error)
997  		goto out_release_inode;
998  
999  	xfs_qm_dqrele(udqp);
1000  	xfs_qm_dqrele(gdqp);
1001  
1002  	*ipp = ip;
1003  	return 0;
1004  
1005   out_bmap_cancel:
1006  	xfs_bmap_cancel(&free_list);
1007   out_trans_abort:
1008  	cancel_flags |= XFS_TRANS_ABORT;
1009   out_trans_cancel:
1010  	xfs_trans_cancel(tp, cancel_flags);
1011   out_release_inode:
1012  	/*
1013  	 * Wait until after the current transaction is aborted to
1014  	 * release the inode.  This prevents recursive transactions
1015  	 * and deadlocks from xfs_inactive.
1016  	 */
1017  	if (ip)
1018  		IRELE(ip);
1019  
1020  	xfs_qm_dqrele(udqp);
1021  	xfs_qm_dqrele(gdqp);
1022  
1023  	if (unlock_dp_on_error)
1024  		xfs_iunlock(dp, XFS_ILOCK_EXCL);
1025  	return error;
1026  }
1027  
1028  #ifdef DEBUG
1029  int xfs_locked_n;
1030  int xfs_small_retries;
1031  int xfs_middle_retries;
1032  int xfs_lots_retries;
1033  int xfs_lock_delays;
1034  #endif
1035  
1036  /*
1037   * Bump the subclass so xfs_lock_inodes() acquires each lock with
1038   * a different value
1039   */
1040  static inline int
xfs_lock_inumorder(int lock_mode,int subclass)1041  xfs_lock_inumorder(int lock_mode, int subclass)
1042  {
1043  	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
1044  		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
1045  	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
1046  		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
1047  
1048  	return lock_mode;
1049  }
1050  
1051  /*
1052   * The following routine will lock n inodes in exclusive mode.
1053   * We assume the caller calls us with the inodes in i_ino order.
1054   *
1055   * We need to detect deadlock where an inode that we lock
1056   * is in the AIL and we start waiting for another inode that is locked
1057   * by a thread in a long running transaction (such as truncate). This can
1058   * result in deadlock since the long running trans might need to wait
1059   * for the inode we just locked in order to push the tail and free space
1060   * in the log.
1061   */
1062  void
xfs_lock_inodes(xfs_inode_t ** ips,int inodes,uint lock_mode)1063  xfs_lock_inodes(
1064  	xfs_inode_t	**ips,
1065  	int		inodes,
1066  	uint		lock_mode)
1067  {
1068  	int		attempts = 0, i, j, try_lock;
1069  	xfs_log_item_t	*lp;
1070  
1071  	ASSERT(ips && (inodes >= 2)); /* we need at least two */
1072  
1073  	try_lock = 0;
1074  	i = 0;
1075  
1076  again:
1077  	for (; i < inodes; i++) {
1078  		ASSERT(ips[i]);
1079  
1080  		if (i && (ips[i] == ips[i-1]))	/* Already locked */
1081  			continue;
1082  
1083  		/*
1084  		 * If try_lock is not set yet, make sure all locked inodes
1085  		 * are not in the AIL.
1086  		 * If any are, set try_lock to be used later.
1087  		 */
1088  
1089  		if (!try_lock) {
1090  			for (j = (i - 1); j >= 0 && !try_lock; j--) {
1091  				lp = (xfs_log_item_t *)ips[j]->i_itemp;
1092  				if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1093  					try_lock++;
1094  				}
1095  			}
1096  		}
1097  
1098  		/*
1099  		 * If any of the previous locks we have locked is in the AIL,
1100  		 * we must TRY to get the second and subsequent locks. If
1101  		 * we can't get any, we must release all we have
1102  		 * and try again.
1103  		 */
1104  
1105  		if (try_lock) {
1106  			/* try_lock must be 0 if i is 0. */
1107  			/*
1108  			 * try_lock means we have an inode locked
1109  			 * that is in the AIL.
1110  			 */
1111  			ASSERT(i != 0);
1112  			if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
1113  				attempts++;
1114  
1115  				/*
1116  				 * Unlock all previous guys and try again.
1117  				 * xfs_iunlock will try to push the tail
1118  				 * if the inode is in the AIL.
1119  				 */
1120  
1121  				for(j = i - 1; j >= 0; j--) {
1122  
1123  					/*
1124  					 * Check to see if we've already
1125  					 * unlocked this one.
1126  					 * Not the first one going back,
1127  					 * and the inode ptr is the same.
1128  					 */
1129  					if ((j != (i - 1)) && ips[j] ==
1130  								ips[j+1])
1131  						continue;
1132  
1133  					xfs_iunlock(ips[j], lock_mode);
1134  				}
1135  
1136  				if ((attempts % 5) == 0) {
1137  					delay(1); /* Don't just spin the CPU */
1138  #ifdef DEBUG
1139  					xfs_lock_delays++;
1140  #endif
1141  				}
1142  				i = 0;
1143  				try_lock = 0;
1144  				goto again;
1145  			}
1146  		} else {
1147  			xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
1148  		}
1149  	}
1150  
1151  #ifdef DEBUG
1152  	if (attempts) {
1153  		if (attempts < 5) xfs_small_retries++;
1154  		else if (attempts < 100) xfs_middle_retries++;
1155  		else xfs_lots_retries++;
1156  	} else {
1157  		xfs_locked_n++;
1158  	}
1159  #endif
1160  }
1161  
1162  /*
1163   * xfs_lock_two_inodes() can only be used to lock one type of lock
1164   * at a time - the iolock or the ilock, but not both at once. If
1165   * we lock both at once, lockdep will report false positives saying
1166   * we have violated locking orders.
1167   */
1168  void
xfs_lock_two_inodes(xfs_inode_t * ip0,xfs_inode_t * ip1,uint lock_mode)1169  xfs_lock_two_inodes(
1170  	xfs_inode_t		*ip0,
1171  	xfs_inode_t		*ip1,
1172  	uint			lock_mode)
1173  {
1174  	xfs_inode_t		*temp;
1175  	int			attempts = 0;
1176  	xfs_log_item_t		*lp;
1177  
1178  	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
1179  		ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
1180  	ASSERT(ip0->i_ino != ip1->i_ino);
1181  
1182  	if (ip0->i_ino > ip1->i_ino) {
1183  		temp = ip0;
1184  		ip0 = ip1;
1185  		ip1 = temp;
1186  	}
1187  
1188   again:
1189  	xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
1190  
1191  	/*
1192  	 * If the first lock we have locked is in the AIL, we must TRY to get
1193  	 * the second lock. If we can't get it, we must release the first one
1194  	 * and try again.
1195  	 */
1196  	lp = (xfs_log_item_t *)ip0->i_itemp;
1197  	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1198  		if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
1199  			xfs_iunlock(ip0, lock_mode);
1200  			if ((++attempts % 5) == 0)
1201  				delay(1); /* Don't just spin the CPU */
1202  			goto again;
1203  		}
1204  	} else {
1205  		xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
1206  	}
1207  }
1208  
1209  int
xfs_remove(xfs_inode_t * dp,struct xfs_name * name,xfs_inode_t * ip)1210  xfs_remove(
1211  	xfs_inode_t             *dp,
1212  	struct xfs_name		*name,
1213  	xfs_inode_t		*ip)
1214  {
1215  	xfs_mount_t		*mp = dp->i_mount;
1216  	xfs_trans_t             *tp = NULL;
1217  	int			is_dir = S_ISDIR(ip->i_d.di_mode);
1218  	int                     error = 0;
1219  	xfs_bmap_free_t         free_list;
1220  	xfs_fsblock_t           first_block;
1221  	int			cancel_flags;
1222  	int			committed;
1223  	int			link_zero;
1224  	uint			resblks;
1225  	uint			log_count;
1226  
1227  	trace_xfs_remove(dp, name);
1228  
1229  	if (XFS_FORCED_SHUTDOWN(mp))
1230  		return XFS_ERROR(EIO);
1231  
1232  	error = xfs_qm_dqattach(dp, 0);
1233  	if (error)
1234  		goto std_return;
1235  
1236  	error = xfs_qm_dqattach(ip, 0);
1237  	if (error)
1238  		goto std_return;
1239  
1240  	if (is_dir) {
1241  		tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
1242  		log_count = XFS_DEFAULT_LOG_COUNT;
1243  	} else {
1244  		tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
1245  		log_count = XFS_REMOVE_LOG_COUNT;
1246  	}
1247  	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1248  
1249  	/*
1250  	 * We try to get the real space reservation first,
1251  	 * allowing for directory btree deletion(s) implying
1252  	 * possible bmap insert(s).  If we can't get the space
1253  	 * reservation then we use 0 instead, and avoid the bmap
1254  	 * btree insert(s) in the directory code by, if the bmap
1255  	 * insert tries to happen, instead trimming the LAST
1256  	 * block from the directory.
1257  	 */
1258  	resblks = XFS_REMOVE_SPACE_RES(mp);
1259  	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
1260  				  XFS_TRANS_PERM_LOG_RES, log_count);
1261  	if (error == ENOSPC) {
1262  		resblks = 0;
1263  		error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
1264  					  XFS_TRANS_PERM_LOG_RES, log_count);
1265  	}
1266  	if (error) {
1267  		ASSERT(error != ENOSPC);
1268  		cancel_flags = 0;
1269  		goto out_trans_cancel;
1270  	}
1271  
1272  	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
1273  
1274  	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1275  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1276  
1277  	/*
1278  	 * If we're removing a directory perform some additional validation.
1279  	 */
1280  	if (is_dir) {
1281  		ASSERT(ip->i_d.di_nlink >= 2);
1282  		if (ip->i_d.di_nlink != 2) {
1283  			error = XFS_ERROR(ENOTEMPTY);
1284  			goto out_trans_cancel;
1285  		}
1286  		if (!xfs_dir_isempty(ip)) {
1287  			error = XFS_ERROR(ENOTEMPTY);
1288  			goto out_trans_cancel;
1289  		}
1290  	}
1291  
1292  	xfs_bmap_init(&free_list, &first_block);
1293  	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
1294  					&first_block, &free_list, resblks);
1295  	if (error) {
1296  		ASSERT(error != ENOENT);
1297  		goto out_bmap_cancel;
1298  	}
1299  	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1300  
1301  	if (is_dir) {
1302  		/*
1303  		 * Drop the link from ip's "..".
1304  		 */
1305  		error = xfs_droplink(tp, dp);
1306  		if (error)
1307  			goto out_bmap_cancel;
1308  
1309  		/*
1310  		 * Drop the "." link from ip to self.
1311  		 */
1312  		error = xfs_droplink(tp, ip);
1313  		if (error)
1314  			goto out_bmap_cancel;
1315  	} else {
1316  		/*
1317  		 * When removing a non-directory we need to log the parent
1318  		 * inode here.  For a directory this is done implicitly
1319  		 * by the xfs_droplink call for the ".." entry.
1320  		 */
1321  		xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1322  	}
1323  
1324  	/*
1325  	 * Drop the link from dp to ip.
1326  	 */
1327  	error = xfs_droplink(tp, ip);
1328  	if (error)
1329  		goto out_bmap_cancel;
1330  
1331  	/*
1332  	 * Determine if this is the last link while
1333  	 * we are in the transaction.
1334  	 */
1335  	link_zero = (ip->i_d.di_nlink == 0);
1336  
1337  	/*
1338  	 * If this is a synchronous mount, make sure that the
1339  	 * remove transaction goes to disk before returning to
1340  	 * the user.
1341  	 */
1342  	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
1343  		xfs_trans_set_sync(tp);
1344  
1345  	error = xfs_bmap_finish(&tp, &free_list, &committed);
1346  	if (error)
1347  		goto out_bmap_cancel;
1348  
1349  	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1350  	if (error)
1351  		goto std_return;
1352  
1353  	/*
1354  	 * If we are using filestreams, kill the stream association.
1355  	 * If the file is still open it may get a new one but that
1356  	 * will get killed on last close in xfs_close() so we don't
1357  	 * have to worry about that.
1358  	 */
1359  	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
1360  		xfs_filestream_deassociate(ip);
1361  
1362  	return 0;
1363  
1364   out_bmap_cancel:
1365  	xfs_bmap_cancel(&free_list);
1366  	cancel_flags |= XFS_TRANS_ABORT;
1367   out_trans_cancel:
1368  	xfs_trans_cancel(tp, cancel_flags);
1369   std_return:
1370  	return error;
1371  }
1372  
1373  int
xfs_link(xfs_inode_t * tdp,xfs_inode_t * sip,struct xfs_name * target_name)1374  xfs_link(
1375  	xfs_inode_t		*tdp,
1376  	xfs_inode_t		*sip,
1377  	struct xfs_name		*target_name)
1378  {
1379  	xfs_mount_t		*mp = tdp->i_mount;
1380  	xfs_trans_t		*tp;
1381  	int			error;
1382  	xfs_bmap_free_t         free_list;
1383  	xfs_fsblock_t           first_block;
1384  	int			cancel_flags;
1385  	int			committed;
1386  	int			resblks;
1387  
1388  	trace_xfs_link(tdp, target_name);
1389  
1390  	ASSERT(!S_ISDIR(sip->i_d.di_mode));
1391  
1392  	if (XFS_FORCED_SHUTDOWN(mp))
1393  		return XFS_ERROR(EIO);
1394  
1395  	error = xfs_qm_dqattach(sip, 0);
1396  	if (error)
1397  		goto std_return;
1398  
1399  	error = xfs_qm_dqattach(tdp, 0);
1400  	if (error)
1401  		goto std_return;
1402  
1403  	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
1404  	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1405  	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
1406  	error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
1407  			XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
1408  	if (error == ENOSPC) {
1409  		resblks = 0;
1410  		error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0,
1411  				XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
1412  	}
1413  	if (error) {
1414  		cancel_flags = 0;
1415  		goto error_return;
1416  	}
1417  
1418  	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
1419  
1420  	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
1421  	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
1422  
1423  	/*
1424  	 * If we are using project inheritance, we only allow hard link
1425  	 * creation in our tree when the project IDs are the same; else
1426  	 * the tree quota mechanism could be circumvented.
1427  	 */
1428  	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1429  		     (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
1430  		error = XFS_ERROR(EXDEV);
1431  		goto error_return;
1432  	}
1433  
1434  	error = xfs_dir_canenter(tp, tdp, target_name, resblks);
1435  	if (error)
1436  		goto error_return;
1437  
1438  	xfs_bmap_init(&free_list, &first_block);
1439  
1440  	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
1441  					&first_block, &free_list, resblks);
1442  	if (error)
1443  		goto abort_return;
1444  	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1445  	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
1446  
1447  	error = xfs_bumplink(tp, sip);
1448  	if (error)
1449  		goto abort_return;
1450  
1451  	/*
1452  	 * If this is a synchronous mount, make sure that the
1453  	 * link transaction goes to disk before returning to
1454  	 * the user.
1455  	 */
1456  	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
1457  		xfs_trans_set_sync(tp);
1458  	}
1459  
1460  	error = xfs_bmap_finish (&tp, &free_list, &committed);
1461  	if (error) {
1462  		xfs_bmap_cancel(&free_list);
1463  		goto abort_return;
1464  	}
1465  
1466  	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1467  
1468   abort_return:
1469  	cancel_flags |= XFS_TRANS_ABORT;
1470   error_return:
1471  	xfs_trans_cancel(tp, cancel_flags);
1472   std_return:
1473  	return error;
1474  }
1475  
1476  int
xfs_symlink(xfs_inode_t * dp,struct xfs_name * link_name,const char * target_path,umode_t mode,xfs_inode_t ** ipp)1477  xfs_symlink(
1478  	xfs_inode_t		*dp,
1479  	struct xfs_name		*link_name,
1480  	const char		*target_path,
1481  	umode_t			mode,
1482  	xfs_inode_t		**ipp)
1483  {
1484  	xfs_mount_t		*mp = dp->i_mount;
1485  	xfs_trans_t		*tp;
1486  	xfs_inode_t		*ip;
1487  	int			error;
1488  	int			pathlen;
1489  	xfs_bmap_free_t		free_list;
1490  	xfs_fsblock_t		first_block;
1491  	boolean_t		unlock_dp_on_error = B_FALSE;
1492  	uint			cancel_flags;
1493  	int			committed;
1494  	xfs_fileoff_t		first_fsb;
1495  	xfs_filblks_t		fs_blocks;
1496  	int			nmaps;
1497  	xfs_bmbt_irec_t		mval[SYMLINK_MAPS];
1498  	xfs_daddr_t		d;
1499  	const char		*cur_chunk;
1500  	int			byte_cnt;
1501  	int			n;
1502  	xfs_buf_t		*bp;
1503  	prid_t			prid;
1504  	struct xfs_dquot	*udqp, *gdqp;
1505  	uint			resblks;
1506  
1507  	*ipp = NULL;
1508  	error = 0;
1509  	ip = NULL;
1510  	tp = NULL;
1511  
1512  	trace_xfs_symlink(dp, link_name);
1513  
1514  	if (XFS_FORCED_SHUTDOWN(mp))
1515  		return XFS_ERROR(EIO);
1516  
1517  	/*
1518  	 * Check component lengths of the target path name.
1519  	 */
1520  	pathlen = strlen(target_path);
1521  	if (pathlen >= MAXPATHLEN)      /* total string too long */
1522  		return XFS_ERROR(ENAMETOOLONG);
1523  
1524  	udqp = gdqp = NULL;
1525  	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1526  		prid = xfs_get_projid(dp);
1527  	else
1528  		prid = XFS_PROJID_DEFAULT;
1529  
1530  	/*
1531  	 * Make sure that we have allocated dquot(s) on disk.
1532  	 */
1533  	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
1534  			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
1535  	if (error)
1536  		goto std_return;
1537  
1538  	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
1539  	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1540  	/*
1541  	 * The symlink will fit into the inode data fork?
1542  	 * There can't be any attributes so we get the whole variable part.
1543  	 */
1544  	if (pathlen <= XFS_LITINO(mp))
1545  		fs_blocks = 0;
1546  	else
1547  		fs_blocks = XFS_B_TO_FSB(mp, pathlen);
1548  	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
1549  	error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
1550  			XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
1551  	if (error == ENOSPC && fs_blocks == 0) {
1552  		resblks = 0;
1553  		error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
1554  				XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
1555  	}
1556  	if (error) {
1557  		cancel_flags = 0;
1558  		goto error_return;
1559  	}
1560  
1561  	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1562  	unlock_dp_on_error = B_TRUE;
1563  
1564  	/*
1565  	 * Check whether the directory allows new symlinks or not.
1566  	 */
1567  	if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
1568  		error = XFS_ERROR(EPERM);
1569  		goto error_return;
1570  	}
1571  
1572  	/*
1573  	 * Reserve disk quota : blocks and inode.
1574  	 */
1575  	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
1576  	if (error)
1577  		goto error_return;
1578  
1579  	/*
1580  	 * Check for ability to enter directory entry, if no space reserved.
1581  	 */
1582  	error = xfs_dir_canenter(tp, dp, link_name, resblks);
1583  	if (error)
1584  		goto error_return;
1585  	/*
1586  	 * Initialize the bmap freelist prior to calling either
1587  	 * bmapi or the directory create code.
1588  	 */
1589  	xfs_bmap_init(&free_list, &first_block);
1590  
1591  	/*
1592  	 * Allocate an inode for the symlink.
1593  	 */
1594  	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
1595  			       prid, resblks > 0, &ip, NULL);
1596  	if (error) {
1597  		if (error == ENOSPC)
1598  			goto error_return;
1599  		goto error1;
1600  	}
1601  
1602  	/*
1603  	 * An error after we've joined dp to the transaction will result in the
1604  	 * transaction cancel unlocking dp so don't do it explicitly in the
1605  	 * error path.
1606  	 */
1607  	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1608  	unlock_dp_on_error = B_FALSE;
1609  
1610  	/*
1611  	 * Also attach the dquot(s) to it, if applicable.
1612  	 */
1613  	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
1614  
1615  	if (resblks)
1616  		resblks -= XFS_IALLOC_SPACE_RES(mp);
1617  	/*
1618  	 * If the symlink will fit into the inode, write it inline.
1619  	 */
1620  	if (pathlen <= XFS_IFORK_DSIZE(ip)) {
1621  		xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
1622  		memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
1623  		ip->i_d.di_size = pathlen;
1624  
1625  		/*
1626  		 * The inode was initially created in extent format.
1627  		 */
1628  		ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
1629  		ip->i_df.if_flags |= XFS_IFINLINE;
1630  
1631  		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
1632  		xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
1633  
1634  	} else {
1635  		first_fsb = 0;
1636  		nmaps = SYMLINK_MAPS;
1637  
1638  		error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
1639  				  XFS_BMAPI_METADATA, &first_block, resblks,
1640  				  mval, &nmaps, &free_list);
1641  		if (error)
1642  			goto error2;
1643  
1644  		if (resblks)
1645  			resblks -= fs_blocks;
1646  		ip->i_d.di_size = pathlen;
1647  		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1648  
1649  		cur_chunk = target_path;
1650  		for (n = 0; n < nmaps; n++) {
1651  			d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
1652  			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
1653  			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
1654  					       BTOBB(byte_cnt), 0);
1655  			if (!bp) {
1656  				error = ENOMEM;
1657  				goto error2;
1658  			}
1659  			if (pathlen < byte_cnt) {
1660  				byte_cnt = pathlen;
1661  			}
1662  			pathlen -= byte_cnt;
1663  
1664  			memcpy(bp->b_addr, cur_chunk, byte_cnt);
1665  			cur_chunk += byte_cnt;
1666  
1667  			xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
1668  		}
1669  	}
1670  
1671  	/*
1672  	 * Create the directory entry for the symlink.
1673  	 */
1674  	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
1675  					&first_block, &free_list, resblks);
1676  	if (error)
1677  		goto error2;
1678  	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1679  	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1680  
1681  	/*
1682  	 * If this is a synchronous mount, make sure that the
1683  	 * symlink transaction goes to disk before returning to
1684  	 * the user.
1685  	 */
1686  	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
1687  		xfs_trans_set_sync(tp);
1688  	}
1689  
1690  	error = xfs_bmap_finish(&tp, &free_list, &committed);
1691  	if (error) {
1692  		goto error2;
1693  	}
1694  	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1695  	xfs_qm_dqrele(udqp);
1696  	xfs_qm_dqrele(gdqp);
1697  
1698  	*ipp = ip;
1699  	return 0;
1700  
1701   error2:
1702  	IRELE(ip);
1703   error1:
1704  	xfs_bmap_cancel(&free_list);
1705  	cancel_flags |= XFS_TRANS_ABORT;
1706   error_return:
1707  	xfs_trans_cancel(tp, cancel_flags);
1708  	xfs_qm_dqrele(udqp);
1709  	xfs_qm_dqrele(gdqp);
1710  
1711  	if (unlock_dp_on_error)
1712  		xfs_iunlock(dp, XFS_ILOCK_EXCL);
1713   std_return:
1714  	return error;
1715  }
1716  
1717  int
xfs_set_dmattrs(xfs_inode_t * ip,u_int evmask,u_int16_t state)1718  xfs_set_dmattrs(
1719  	xfs_inode_t     *ip,
1720  	u_int		evmask,
1721  	u_int16_t	state)
1722  {
1723  	xfs_mount_t	*mp = ip->i_mount;
1724  	xfs_trans_t	*tp;
1725  	int		error;
1726  
1727  	if (!capable(CAP_SYS_ADMIN))
1728  		return XFS_ERROR(EPERM);
1729  
1730  	if (XFS_FORCED_SHUTDOWN(mp))
1731  		return XFS_ERROR(EIO);
1732  
1733  	tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
1734  	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0);
1735  	if (error) {
1736  		xfs_trans_cancel(tp, 0);
1737  		return error;
1738  	}
1739  	xfs_ilock(ip, XFS_ILOCK_EXCL);
1740  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1741  
1742  	ip->i_d.di_dmevmask = evmask;
1743  	ip->i_d.di_dmstate  = state;
1744  
1745  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1746  	error = xfs_trans_commit(tp, 0);
1747  
1748  	return error;
1749  }
1750  
1751  /*
1752   * xfs_alloc_file_space()
1753   *      This routine allocates disk space for the given file.
1754   *
1755   *	If alloc_type == 0, this request is for an ALLOCSP type
1756   *	request which will change the file size.  In this case, no
1757   *	DMAPI event will be generated by the call.  A TRUNCATE event
1758   *	will be generated later by xfs_setattr.
1759   *
1760   *	If alloc_type != 0, this request is for a RESVSP type
1761   *	request, and a DMAPI DM_EVENT_WRITE will be generated if the
1762   *	lower block boundary byte address is less than the file's
1763   *	length.
1764   *
1765   * RETURNS:
1766   *       0 on success
1767   *      errno on error
1768   *
1769   */
1770  STATIC int
xfs_alloc_file_space(xfs_inode_t * ip,xfs_off_t offset,xfs_off_t len,int alloc_type,int attr_flags)1771  xfs_alloc_file_space(
1772  	xfs_inode_t		*ip,
1773  	xfs_off_t		offset,
1774  	xfs_off_t		len,
1775  	int			alloc_type,
1776  	int			attr_flags)
1777  {
1778  	xfs_mount_t		*mp = ip->i_mount;
1779  	xfs_off_t		count;
1780  	xfs_filblks_t		allocated_fsb;
1781  	xfs_filblks_t		allocatesize_fsb;
1782  	xfs_extlen_t		extsz, temp;
1783  	xfs_fileoff_t		startoffset_fsb;
1784  	xfs_fsblock_t		firstfsb;
1785  	int			nimaps;
1786  	int			quota_flag;
1787  	int			rt;
1788  	xfs_trans_t		*tp;
1789  	xfs_bmbt_irec_t		imaps[1], *imapp;
1790  	xfs_bmap_free_t		free_list;
1791  	uint			qblocks, resblks, resrtextents;
1792  	int			committed;
1793  	int			error;
1794  
1795  	trace_xfs_alloc_file_space(ip);
1796  
1797  	if (XFS_FORCED_SHUTDOWN(mp))
1798  		return XFS_ERROR(EIO);
1799  
1800  	error = xfs_qm_dqattach(ip, 0);
1801  	if (error)
1802  		return error;
1803  
1804  	if (len <= 0)
1805  		return XFS_ERROR(EINVAL);
1806  
1807  	rt = XFS_IS_REALTIME_INODE(ip);
1808  	extsz = xfs_get_extsz_hint(ip);
1809  
1810  	count = len;
1811  	imapp = &imaps[0];
1812  	nimaps = 1;
1813  	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
1814  	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
1815  
1816  	/*
1817  	 * Allocate file space until done or until there is an error
1818  	 */
1819  	while (allocatesize_fsb && !error) {
1820  		xfs_fileoff_t	s, e;
1821  
1822  		/*
1823  		 * Determine space reservations for data/realtime.
1824  		 */
1825  		if (unlikely(extsz)) {
1826  			s = startoffset_fsb;
1827  			do_div(s, extsz);
1828  			s *= extsz;
1829  			e = startoffset_fsb + allocatesize_fsb;
1830  			if ((temp = do_mod(startoffset_fsb, extsz)))
1831  				e += temp;
1832  			if ((temp = do_mod(e, extsz)))
1833  				e += extsz - temp;
1834  		} else {
1835  			s = 0;
1836  			e = allocatesize_fsb;
1837  		}
1838  
1839  		/*
1840  		 * The transaction reservation is limited to a 32-bit block
1841  		 * count, hence we need to limit the number of blocks we are
1842  		 * trying to reserve to avoid an overflow. We can't allocate
1843  		 * more than @nimaps extents, and an extent is limited on disk
1844  		 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
1845  		 */
1846  		resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
1847  		if (unlikely(rt)) {
1848  			resrtextents = qblocks = resblks;
1849  			resrtextents /= mp->m_sb.sb_rextsize;
1850  			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1851  			quota_flag = XFS_QMOPT_RES_RTBLKS;
1852  		} else {
1853  			resrtextents = 0;
1854  			resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
1855  			quota_flag = XFS_QMOPT_RES_REGBLKS;
1856  		}
1857  
1858  		/*
1859  		 * Allocate and setup the transaction.
1860  		 */
1861  		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1862  		error = xfs_trans_reserve(tp, resblks,
1863  					  XFS_WRITE_LOG_RES(mp), resrtextents,
1864  					  XFS_TRANS_PERM_LOG_RES,
1865  					  XFS_WRITE_LOG_COUNT);
1866  		/*
1867  		 * Check for running out of space
1868  		 */
1869  		if (error) {
1870  			/*
1871  			 * Free the transaction structure.
1872  			 */
1873  			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1874  			xfs_trans_cancel(tp, 0);
1875  			break;
1876  		}
1877  		xfs_ilock(ip, XFS_ILOCK_EXCL);
1878  		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
1879  						      0, quota_flag);
1880  		if (error)
1881  			goto error1;
1882  
1883  		xfs_trans_ijoin(tp, ip, 0);
1884  
1885  		xfs_bmap_init(&free_list, &firstfsb);
1886  		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
1887  					allocatesize_fsb, alloc_type, &firstfsb,
1888  					0, imapp, &nimaps, &free_list);
1889  		if (error) {
1890  			goto error0;
1891  		}
1892  
1893  		/*
1894  		 * Complete the transaction
1895  		 */
1896  		error = xfs_bmap_finish(&tp, &free_list, &committed);
1897  		if (error) {
1898  			goto error0;
1899  		}
1900  
1901  		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1902  		xfs_iunlock(ip, XFS_ILOCK_EXCL);
1903  		if (error) {
1904  			break;
1905  		}
1906  
1907  		allocated_fsb = imapp->br_blockcount;
1908  
1909  		if (nimaps == 0) {
1910  			error = XFS_ERROR(ENOSPC);
1911  			break;
1912  		}
1913  
1914  		startoffset_fsb += allocated_fsb;
1915  		allocatesize_fsb -= allocated_fsb;
1916  	}
1917  
1918  	return error;
1919  
1920  error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
1921  	xfs_bmap_cancel(&free_list);
1922  	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
1923  
1924  error1:	/* Just cancel transaction */
1925  	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1926  	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1927  	return error;
1928  }
1929  
1930  /*
1931   * Zero file bytes between startoff and endoff inclusive.
1932   * The iolock is held exclusive and no blocks are buffered.
1933   *
1934   * This function is used by xfs_free_file_space() to zero
1935   * partial blocks when the range to free is not block aligned.
1936   * When unreserving space with boundaries that are not block
1937   * aligned we round up the start and round down the end
1938   * boundaries and then use this function to zero the parts of
1939   * the blocks that got dropped during the rounding.
1940   */
1941  STATIC int
xfs_zero_remaining_bytes(xfs_inode_t * ip,xfs_off_t startoff,xfs_off_t endoff)1942  xfs_zero_remaining_bytes(
1943  	xfs_inode_t		*ip,
1944  	xfs_off_t		startoff,
1945  	xfs_off_t		endoff)
1946  {
1947  	xfs_bmbt_irec_t		imap;
1948  	xfs_fileoff_t		offset_fsb;
1949  	xfs_off_t		lastoffset;
1950  	xfs_off_t		offset;
1951  	xfs_buf_t		*bp;
1952  	xfs_mount_t		*mp = ip->i_mount;
1953  	int			nimap;
1954  	int			error = 0;
1955  
1956  	/*
1957  	 * Avoid doing I/O beyond eof - it's not necessary
1958  	 * since nothing can read beyond eof.  The space will
1959  	 * be zeroed when the file is extended anyway.
1960  	 */
1961  	if (startoff >= XFS_ISIZE(ip))
1962  		return 0;
1963  
1964  	if (endoff > XFS_ISIZE(ip))
1965  		endoff = XFS_ISIZE(ip);
1966  
1967  	bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
1968  					mp->m_rtdev_targp : mp->m_ddev_targp,
1969  				mp->m_sb.sb_blocksize, XBF_DONT_BLOCK);
1970  	if (!bp)
1971  		return XFS_ERROR(ENOMEM);
1972  
1973  	xfs_buf_unlock(bp);
1974  
1975  	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
1976  		offset_fsb = XFS_B_TO_FSBT(mp, offset);
1977  		nimap = 1;
1978  		error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
1979  		if (error || nimap < 1)
1980  			break;
1981  		ASSERT(imap.br_blockcount >= 1);
1982  		ASSERT(imap.br_startoff == offset_fsb);
1983  		lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
1984  		if (lastoffset > endoff)
1985  			lastoffset = endoff;
1986  		if (imap.br_startblock == HOLESTARTBLOCK)
1987  			continue;
1988  		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1989  		if (imap.br_state == XFS_EXT_UNWRITTEN)
1990  			continue;
1991  		XFS_BUF_UNDONE(bp);
1992  		XFS_BUF_UNWRITE(bp);
1993  		XFS_BUF_READ(bp);
1994  		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
1995  		xfsbdstrat(mp, bp);
1996  		error = xfs_buf_iowait(bp);
1997  		if (error) {
1998  			xfs_buf_ioerror_alert(bp,
1999  					"xfs_zero_remaining_bytes(read)");
2000  			break;
2001  		}
2002  		memset(bp->b_addr +
2003  			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
2004  		      0, lastoffset - offset + 1);
2005  		XFS_BUF_UNDONE(bp);
2006  		XFS_BUF_UNREAD(bp);
2007  		XFS_BUF_WRITE(bp);
2008  		xfsbdstrat(mp, bp);
2009  		error = xfs_buf_iowait(bp);
2010  		if (error) {
2011  			xfs_buf_ioerror_alert(bp,
2012  					"xfs_zero_remaining_bytes(write)");
2013  			break;
2014  		}
2015  	}
2016  	xfs_buf_free(bp);
2017  	return error;
2018  }
2019  
2020  /*
2021   * xfs_free_file_space()
2022   *      This routine frees disk space for the given file.
2023   *
2024   *	This routine is only called by xfs_change_file_space
2025   *	for an UNRESVSP type call.
2026   *
2027   * RETURNS:
2028   *       0 on success
2029   *      errno on error
2030   *
2031   */
2032  STATIC int
xfs_free_file_space(xfs_inode_t * ip,xfs_off_t offset,xfs_off_t len,int attr_flags)2033  xfs_free_file_space(
2034  	xfs_inode_t		*ip,
2035  	xfs_off_t		offset,
2036  	xfs_off_t		len,
2037  	int			attr_flags)
2038  {
2039  	int			committed;
2040  	int			done;
2041  	xfs_fileoff_t		endoffset_fsb;
2042  	int			error;
2043  	xfs_fsblock_t		firstfsb;
2044  	xfs_bmap_free_t		free_list;
2045  	xfs_bmbt_irec_t		imap;
2046  	xfs_off_t		ioffset;
2047  	xfs_extlen_t		mod=0;
2048  	xfs_mount_t		*mp;
2049  	int			nimap;
2050  	uint			resblks;
2051  	uint			rounding;
2052  	int			rt;
2053  	xfs_fileoff_t		startoffset_fsb;
2054  	xfs_trans_t		*tp;
2055  	int			need_iolock = 1;
2056  
2057  	mp = ip->i_mount;
2058  
2059  	trace_xfs_free_file_space(ip);
2060  
2061  	error = xfs_qm_dqattach(ip, 0);
2062  	if (error)
2063  		return error;
2064  
2065  	error = 0;
2066  	if (len <= 0)	/* if nothing being freed */
2067  		return error;
2068  	rt = XFS_IS_REALTIME_INODE(ip);
2069  	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);
2070  	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
2071  
2072  	if (attr_flags & XFS_ATTR_NOLOCK)
2073  		need_iolock = 0;
2074  	if (need_iolock) {
2075  		xfs_ilock(ip, XFS_IOLOCK_EXCL);
2076  		/* wait for the completion of any pending DIOs */
2077  		inode_dio_wait(VFS_I(ip));
2078  	}
2079  
2080  	rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
2081  	ioffset = offset & ~(rounding - 1);
2082  
2083  	if (VN_CACHED(VFS_I(ip)) != 0) {
2084  		error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
2085  		if (error)
2086  			goto out_unlock_iolock;
2087  	}
2088  
2089  	/*
2090  	 * Need to zero the stuff we're not freeing, on disk.
2091  	 * If it's a realtime file & can't use unwritten extents then we
2092  	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
2093  	 * will take care of it for us.
2094  	 */
2095  	if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
2096  		nimap = 1;
2097  		error = xfs_bmapi_read(ip, startoffset_fsb, 1,
2098  					&imap, &nimap, 0);
2099  		if (error)
2100  			goto out_unlock_iolock;
2101  		ASSERT(nimap == 0 || nimap == 1);
2102  		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
2103  			xfs_daddr_t	block;
2104  
2105  			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
2106  			block = imap.br_startblock;
2107  			mod = do_div(block, mp->m_sb.sb_rextsize);
2108  			if (mod)
2109  				startoffset_fsb += mp->m_sb.sb_rextsize - mod;
2110  		}
2111  		nimap = 1;
2112  		error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
2113  					&imap, &nimap, 0);
2114  		if (error)
2115  			goto out_unlock_iolock;
2116  		ASSERT(nimap == 0 || nimap == 1);
2117  		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
2118  			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
2119  			mod++;
2120  			if (mod && (mod != mp->m_sb.sb_rextsize))
2121  				endoffset_fsb -= mod;
2122  		}
2123  	}
2124  	if ((done = (endoffset_fsb <= startoffset_fsb)))
2125  		/*
2126  		 * One contiguous piece to clear
2127  		 */
2128  		error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
2129  	else {
2130  		/*
2131  		 * Some full blocks, possibly two pieces to clear
2132  		 */
2133  		if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
2134  			error = xfs_zero_remaining_bytes(ip, offset,
2135  				XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
2136  		if (!error &&
2137  		    XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
2138  			error = xfs_zero_remaining_bytes(ip,
2139  				XFS_FSB_TO_B(mp, endoffset_fsb),
2140  				offset + len - 1);
2141  	}
2142  
2143  	/*
2144  	 * free file space until done or until there is an error
2145  	 */
2146  	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
2147  	while (!error && !done) {
2148  
2149  		/*
2150  		 * allocate and setup the transaction. Allow this
2151  		 * transaction to dip into the reserve blocks to ensure
2152  		 * the freeing of the space succeeds at ENOSPC.
2153  		 */
2154  		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
2155  		tp->t_flags |= XFS_TRANS_RESERVE;
2156  		error = xfs_trans_reserve(tp,
2157  					  resblks,
2158  					  XFS_WRITE_LOG_RES(mp),
2159  					  0,
2160  					  XFS_TRANS_PERM_LOG_RES,
2161  					  XFS_WRITE_LOG_COUNT);
2162  
2163  		/*
2164  		 * check for running out of space
2165  		 */
2166  		if (error) {
2167  			/*
2168  			 * Free the transaction structure.
2169  			 */
2170  			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
2171  			xfs_trans_cancel(tp, 0);
2172  			break;
2173  		}
2174  		xfs_ilock(ip, XFS_ILOCK_EXCL);
2175  		error = xfs_trans_reserve_quota(tp, mp,
2176  				ip->i_udquot, ip->i_gdquot,
2177  				resblks, 0, XFS_QMOPT_RES_REGBLKS);
2178  		if (error)
2179  			goto error1;
2180  
2181  		xfs_trans_ijoin(tp, ip, 0);
2182  
2183  		/*
2184  		 * issue the bunmapi() call to free the blocks
2185  		 */
2186  		xfs_bmap_init(&free_list, &firstfsb);
2187  		error = xfs_bunmapi(tp, ip, startoffset_fsb,
2188  				  endoffset_fsb - startoffset_fsb,
2189  				  0, 2, &firstfsb, &free_list, &done);
2190  		if (error) {
2191  			goto error0;
2192  		}
2193  
2194  		/*
2195  		 * complete the transaction
2196  		 */
2197  		error = xfs_bmap_finish(&tp, &free_list, &committed);
2198  		if (error) {
2199  			goto error0;
2200  		}
2201  
2202  		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2203  		xfs_iunlock(ip, XFS_ILOCK_EXCL);
2204  	}
2205  
2206   out_unlock_iolock:
2207  	if (need_iolock)
2208  		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
2209  	return error;
2210  
2211   error0:
2212  	xfs_bmap_cancel(&free_list);
2213   error1:
2214  	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
2215  	xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
2216  		    XFS_ILOCK_EXCL);
2217  	return error;
2218  }
2219  
2220  /*
2221   * xfs_change_file_space()
2222   *      This routine allocates or frees disk space for the given file.
2223   *      The user specified parameters are checked for alignment and size
2224   *      limitations.
2225   *
2226   * RETURNS:
2227   *       0 on success
2228   *      errno on error
2229   *
2230   */
2231  int
xfs_change_file_space(xfs_inode_t * ip,int cmd,xfs_flock64_t * bf,xfs_off_t offset,int attr_flags)2232  xfs_change_file_space(
2233  	xfs_inode_t	*ip,
2234  	int		cmd,
2235  	xfs_flock64_t	*bf,
2236  	xfs_off_t	offset,
2237  	int		attr_flags)
2238  {
2239  	xfs_mount_t	*mp = ip->i_mount;
2240  	int		clrprealloc;
2241  	int		error;
2242  	xfs_fsize_t	fsize;
2243  	int		setprealloc;
2244  	xfs_off_t	startoffset;
2245  	xfs_off_t	llen;
2246  	xfs_trans_t	*tp;
2247  	struct iattr	iattr;
2248  	int		prealloc_type;
2249  
2250  	if (!S_ISREG(ip->i_d.di_mode))
2251  		return XFS_ERROR(EINVAL);
2252  
2253  	switch (bf->l_whence) {
2254  	case 0: /*SEEK_SET*/
2255  		break;
2256  	case 1: /*SEEK_CUR*/
2257  		bf->l_start += offset;
2258  		break;
2259  	case 2: /*SEEK_END*/
2260  		bf->l_start += XFS_ISIZE(ip);
2261  		break;
2262  	default:
2263  		return XFS_ERROR(EINVAL);
2264  	}
2265  
2266  	llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len;
2267  
2268  	if (   (bf->l_start < 0)
2269  	    || (bf->l_start > XFS_MAXIOFFSET(mp))
2270  	    || (bf->l_start + llen < 0)
2271  	    || (bf->l_start + llen > XFS_MAXIOFFSET(mp)))
2272  		return XFS_ERROR(EINVAL);
2273  
2274  	bf->l_whence = 0;
2275  
2276  	startoffset = bf->l_start;
2277  	fsize = XFS_ISIZE(ip);
2278  
2279  	/*
2280  	 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve
2281  	 * file space.
2282  	 * These calls do NOT zero the data space allocated to the file,
2283  	 * nor do they change the file size.
2284  	 *
2285  	 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file
2286  	 * space.
2287  	 * These calls cause the new file data to be zeroed and the file
2288  	 * size to be changed.
2289  	 */
2290  	setprealloc = clrprealloc = 0;
2291  	prealloc_type = XFS_BMAPI_PREALLOC;
2292  
2293  	switch (cmd) {
2294  	case XFS_IOC_ZERO_RANGE:
2295  		prealloc_type |= XFS_BMAPI_CONVERT;
2296  		xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0);
2297  		/* FALLTHRU */
2298  	case XFS_IOC_RESVSP:
2299  	case XFS_IOC_RESVSP64:
2300  		error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
2301  						prealloc_type, attr_flags);
2302  		if (error)
2303  			return error;
2304  		setprealloc = 1;
2305  		break;
2306  
2307  	case XFS_IOC_UNRESVSP:
2308  	case XFS_IOC_UNRESVSP64:
2309  		if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
2310  								attr_flags)))
2311  			return error;
2312  		break;
2313  
2314  	case XFS_IOC_ALLOCSP:
2315  	case XFS_IOC_ALLOCSP64:
2316  	case XFS_IOC_FREESP:
2317  	case XFS_IOC_FREESP64:
2318  		if (startoffset > fsize) {
2319  			error = xfs_alloc_file_space(ip, fsize,
2320  					startoffset - fsize, 0, attr_flags);
2321  			if (error)
2322  				break;
2323  		}
2324  
2325  		iattr.ia_valid = ATTR_SIZE;
2326  		iattr.ia_size = startoffset;
2327  
2328  		error = xfs_setattr_size(ip, &iattr, attr_flags);
2329  
2330  		if (error)
2331  			return error;
2332  
2333  		clrprealloc = 1;
2334  		break;
2335  
2336  	default:
2337  		ASSERT(0);
2338  		return XFS_ERROR(EINVAL);
2339  	}
2340  
2341  	/*
2342  	 * update the inode timestamp, mode, and prealloc flag bits
2343  	 */
2344  	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
2345  
2346  	if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp),
2347  				      0, 0, 0))) {
2348  		/* ASSERT(0); */
2349  		xfs_trans_cancel(tp, 0);
2350  		return error;
2351  	}
2352  
2353  	xfs_ilock(ip, XFS_ILOCK_EXCL);
2354  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2355  
2356  	if ((attr_flags & XFS_ATTR_DMI) == 0) {
2357  		ip->i_d.di_mode &= ~S_ISUID;
2358  
2359  		/*
2360  		 * Note that we don't have to worry about mandatory
2361  		 * file locking being disabled here because we only
2362  		 * clear the S_ISGID bit if the Group execute bit is
2363  		 * on, but if it was on then mandatory locking wouldn't
2364  		 * have been enabled.
2365  		 */
2366  		if (ip->i_d.di_mode & S_IXGRP)
2367  			ip->i_d.di_mode &= ~S_ISGID;
2368  
2369  		xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2370  	}
2371  	if (setprealloc)
2372  		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
2373  	else if (clrprealloc)
2374  		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
2375  
2376  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2377  	if (attr_flags & XFS_ATTR_SYNC)
2378  		xfs_trans_set_sync(tp);
2379  	return xfs_trans_commit(tp, 0);
2380  }
2381