• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 #include "xfs.h"
20 #include "xfs_fs.h"
21 #include "xfs_types.h"
22 #include "xfs_bit.h"
23 #include "xfs_log.h"
24 #include "xfs_inum.h"
25 #include "xfs_trans.h"
26 #include "xfs_sb.h"
27 #include "xfs_ag.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h"
31 #include "xfs_da_btree.h"
32 #include "xfs_bmap_btree.h"
33 #include "xfs_alloc_btree.h"
34 #include "xfs_ialloc_btree.h"
35 #include "xfs_dir2_sf.h"
36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h"
38 #include "xfs_inode.h"
39 #include "xfs_inode_item.h"
40 #include "xfs_itable.h"
41 #include "xfs_btree.h"
42 #include "xfs_ialloc.h"
43 #include "xfs_alloc.h"
44 #include "xfs_bmap.h"
45 #include "xfs_attr.h"
46 #include "xfs_rw.h"
47 #include "xfs_error.h"
48 #include "xfs_quota.h"
49 #include "xfs_utils.h"
50 #include "xfs_rtalloc.h"
51 #include "xfs_trans_space.h"
52 #include "xfs_log_priv.h"
53 #include "xfs_filestream.h"
54 #include "xfs_vnodeops.h"
55 
56 int
xfs_setattr(struct xfs_inode * ip,struct iattr * iattr,int flags)57 xfs_setattr(
58 	struct xfs_inode	*ip,
59 	struct iattr		*iattr,
60 	int			flags)
61 {
62 	xfs_mount_t		*mp = ip->i_mount;
63 	struct inode		*inode = VFS_I(ip);
64 	int			mask = iattr->ia_valid;
65 	xfs_trans_t		*tp;
66 	int			code;
67 	uint			lock_flags;
68 	uint			commit_flags=0;
69 	uid_t			uid=0, iuid=0;
70 	gid_t			gid=0, igid=0;
71 	int			timeflags = 0;
72 	struct xfs_dquot	*udqp, *gdqp, *olddquot1, *olddquot2;
73 	int			need_iolock = 1;
74 
75 	xfs_itrace_entry(ip);
76 
77 	if (mp->m_flags & XFS_MOUNT_RDONLY)
78 		return XFS_ERROR(EROFS);
79 
80 	if (XFS_FORCED_SHUTDOWN(mp))
81 		return XFS_ERROR(EIO);
82 
83 	code = -inode_change_ok(inode, iattr);
84 	if (code)
85 		return code;
86 
87 	olddquot1 = olddquot2 = NULL;
88 	udqp = gdqp = NULL;
89 
90 	/*
91 	 * If disk quotas is on, we make sure that the dquots do exist on disk,
92 	 * before we start any other transactions. Trying to do this later
93 	 * is messy. We don't care to take a readlock to look at the ids
94 	 * in inode here, because we can't hold it across the trans_reserve.
95 	 * If the IDs do change before we take the ilock, we're covered
96 	 * because the i_*dquot fields will get updated anyway.
97 	 */
98 	if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
99 		uint	qflags = 0;
100 
101 		if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
102 			uid = iattr->ia_uid;
103 			qflags |= XFS_QMOPT_UQUOTA;
104 		} else {
105 			uid = ip->i_d.di_uid;
106 		}
107 		if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
108 			gid = iattr->ia_gid;
109 			qflags |= XFS_QMOPT_GQUOTA;
110 		}  else {
111 			gid = ip->i_d.di_gid;
112 		}
113 
114 		/*
115 		 * We take a reference when we initialize udqp and gdqp,
116 		 * so it is important that we never blindly double trip on
117 		 * the same variable. See xfs_create() for an example.
118 		 */
119 		ASSERT(udqp == NULL);
120 		ASSERT(gdqp == NULL);
121 		code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
122 					 qflags, &udqp, &gdqp);
123 		if (code)
124 			return code;
125 	}
126 
127 	/*
128 	 * For the other attributes, we acquire the inode lock and
129 	 * first do an error checking pass.
130 	 */
131 	tp = NULL;
132 	lock_flags = XFS_ILOCK_EXCL;
133 	if (flags & XFS_ATTR_NOLOCK)
134 		need_iolock = 0;
135 	if (!(mask & ATTR_SIZE)) {
136 		if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
137 		    (mp->m_flags & XFS_MOUNT_WSYNC)) {
138 			tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
139 			commit_flags = 0;
140 			if ((code = xfs_trans_reserve(tp, 0,
141 						     XFS_ICHANGE_LOG_RES(mp), 0,
142 						     0, 0))) {
143 				lock_flags = 0;
144 				goto error_return;
145 			}
146 		}
147 	} else {
148 		if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
149 		    !(flags & XFS_ATTR_DMI)) {
150 			int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
151 			code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
152 				iattr->ia_size, 0, dmflags, NULL);
153 			if (code) {
154 				lock_flags = 0;
155 				goto error_return;
156 			}
157 		}
158 		if (need_iolock)
159 			lock_flags |= XFS_IOLOCK_EXCL;
160 	}
161 
162 	xfs_ilock(ip, lock_flags);
163 
164 	/*
165 	 * Change file ownership.  Must be the owner or privileged.
166 	 */
167 	if (mask & (ATTR_UID|ATTR_GID)) {
168 		/*
169 		 * These IDs could have changed since we last looked at them.
170 		 * But, we're assured that if the ownership did change
171 		 * while we didn't have the inode locked, inode's dquot(s)
172 		 * would have changed also.
173 		 */
174 		iuid = ip->i_d.di_uid;
175 		igid = ip->i_d.di_gid;
176 		gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
177 		uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
178 
179 		/*
180 		 * Do a quota reservation only if uid/gid is actually
181 		 * going to change.
182 		 */
183 		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
184 		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
185 			ASSERT(tp);
186 			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
187 						capable(CAP_FOWNER) ?
188 						XFS_QMOPT_FORCE_RES : 0);
189 			if (code)	/* out of quota */
190 				goto error_return;
191 		}
192 	}
193 
194 	/*
195 	 * Truncate file.  Must have write permission and not be a directory.
196 	 */
197 	if (mask & ATTR_SIZE) {
198 		/* Short circuit the truncate case for zero length files */
199 		if (iattr->ia_size == 0 &&
200 		    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
201 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
202 			lock_flags &= ~XFS_ILOCK_EXCL;
203 			if (mask & ATTR_CTIME)
204 				xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
205 			code = 0;
206 			goto error_return;
207 		}
208 
209 		if (S_ISDIR(ip->i_d.di_mode)) {
210 			code = XFS_ERROR(EISDIR);
211 			goto error_return;
212 		} else if (!S_ISREG(ip->i_d.di_mode)) {
213 			code = XFS_ERROR(EINVAL);
214 			goto error_return;
215 		}
216 
217 		/*
218 		 * Make sure that the dquots are attached to the inode.
219 		 */
220 		code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
221 		if (code)
222 			goto error_return;
223 
224 		/*
225 		 * Now we can make the changes.  Before we join the inode
226 		 * to the transaction, if ATTR_SIZE is set then take care of
227 		 * the part of the truncation that must be done without the
228 		 * inode lock.  This needs to be done before joining the inode
229 		 * to the transaction, because the inode cannot be unlocked
230 		 * once it is a part of the transaction.
231 		 */
232 		if (iattr->ia_size > ip->i_size) {
233 			/*
234 			 * Do the first part of growing a file: zero any data
235 			 * in the last block that is beyond the old EOF.  We
236 			 * need to do this before the inode is joined to the
237 			 * transaction to modify the i_size.
238 			 */
239 			code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
240 		}
241 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
242 
243 		/*
244 		 * We are going to log the inode size change in this
245 		 * transaction so any previous writes that are beyond the on
246 		 * disk EOF and the new EOF that have not been written out need
247 		 * to be written here. If we do not write the data out, we
248 		 * expose ourselves to the null files problem.
249 		 *
250 		 * Only flush from the on disk size to the smaller of the in
251 		 * memory file size or the new size as that's the range we
252 		 * really care about here and prevents waiting for other data
253 		 * not within the range we care about here.
254 		 */
255 		if (!code &&
256 		    ip->i_size != ip->i_d.di_size &&
257 		    iattr->ia_size > ip->i_d.di_size) {
258 			code = xfs_flush_pages(ip,
259 					ip->i_d.di_size, iattr->ia_size,
260 					XFS_B_ASYNC, FI_NONE);
261 		}
262 
263 		/* wait for all I/O to complete */
264 		xfs_ioend_wait(ip);
265 
266 		if (!code)
267 			code = xfs_itruncate_data(ip, iattr->ia_size);
268 		if (code) {
269 			ASSERT(tp == NULL);
270 			lock_flags &= ~XFS_ILOCK_EXCL;
271 			ASSERT(lock_flags == XFS_IOLOCK_EXCL);
272 			goto error_return;
273 		}
274 		tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
275 		if ((code = xfs_trans_reserve(tp, 0,
276 					     XFS_ITRUNCATE_LOG_RES(mp), 0,
277 					     XFS_TRANS_PERM_LOG_RES,
278 					     XFS_ITRUNCATE_LOG_COUNT))) {
279 			xfs_trans_cancel(tp, 0);
280 			if (need_iolock)
281 				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
282 			return code;
283 		}
284 		commit_flags = XFS_TRANS_RELEASE_LOG_RES;
285 		xfs_ilock(ip, XFS_ILOCK_EXCL);
286 
287 		xfs_trans_ijoin(tp, ip, lock_flags);
288 		xfs_trans_ihold(tp, ip);
289 
290 		/*
291 		 * Only change the c/mtime if we are changing the size
292 		 * or we are explicitly asked to change it. This handles
293 		 * the semantic difference between truncate() and ftruncate()
294 		 * as implemented in the VFS.
295 		 */
296 		if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
297 			timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
298 
299 		if (iattr->ia_size > ip->i_size) {
300 			ip->i_d.di_size = iattr->ia_size;
301 			ip->i_size = iattr->ia_size;
302 			if (!(flags & XFS_ATTR_DMI))
303 				xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
304 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
305 		} else if (iattr->ia_size <= ip->i_size ||
306 			   (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
307 			/*
308 			 * signal a sync transaction unless
309 			 * we're truncating an already unlinked
310 			 * file on a wsync filesystem
311 			 */
312 			code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
313 					    XFS_DATA_FORK,
314 					    ((ip->i_d.di_nlink != 0 ||
315 					      !(mp->m_flags & XFS_MOUNT_WSYNC))
316 					     ? 1 : 0));
317 			if (code)
318 				goto abort_return;
319 			/*
320 			 * Truncated "down", so we're removing references
321 			 * to old data here - if we now delay flushing for
322 			 * a long time, we expose ourselves unduly to the
323 			 * notorious NULL files problem.  So, we mark this
324 			 * vnode and flush it when the file is closed, and
325 			 * do not wait the usual (long) time for writeout.
326 			 */
327 			xfs_iflags_set(ip, XFS_ITRUNCATED);
328 		}
329 	} else if (tp) {
330 		xfs_trans_ijoin(tp, ip, lock_flags);
331 		xfs_trans_ihold(tp, ip);
332 	}
333 
334 	/*
335 	 * Change file ownership.  Must be the owner or privileged.
336 	 */
337 	if (mask & (ATTR_UID|ATTR_GID)) {
338 		/*
339 		 * CAP_FSETID overrides the following restrictions:
340 		 *
341 		 * The set-user-ID and set-group-ID bits of a file will be
342 		 * cleared upon successful return from chown()
343 		 */
344 		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
345 		    !capable(CAP_FSETID)) {
346 			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
347 		}
348 
349 		/*
350 		 * Change the ownerships and register quota modifications
351 		 * in the transaction.
352 		 */
353 		if (iuid != uid) {
354 			if (XFS_IS_UQUOTA_ON(mp)) {
355 				ASSERT(mask & ATTR_UID);
356 				ASSERT(udqp);
357 				olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
358 							&ip->i_udquot, udqp);
359 			}
360 			ip->i_d.di_uid = uid;
361 			inode->i_uid = uid;
362 		}
363 		if (igid != gid) {
364 			if (XFS_IS_GQUOTA_ON(mp)) {
365 				ASSERT(!XFS_IS_PQUOTA_ON(mp));
366 				ASSERT(mask & ATTR_GID);
367 				ASSERT(gdqp);
368 				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
369 							&ip->i_gdquot, gdqp);
370 			}
371 			ip->i_d.di_gid = gid;
372 			inode->i_gid = gid;
373 		}
374 
375 		xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
376 		timeflags |= XFS_ICHGTIME_CHG;
377 	}
378 
379 	/*
380 	 * Change file access modes.
381 	 */
382 	if (mask & ATTR_MODE) {
383 		umode_t mode = iattr->ia_mode;
384 
385 		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
386 			mode &= ~S_ISGID;
387 
388 		ip->i_d.di_mode &= S_IFMT;
389 		ip->i_d.di_mode |= mode & ~S_IFMT;
390 
391 		inode->i_mode &= S_IFMT;
392 		inode->i_mode |= mode & ~S_IFMT;
393 
394 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
395 		timeflags |= XFS_ICHGTIME_CHG;
396 	}
397 
398 	/*
399 	 * Change file access or modified times.
400 	 */
401 	if (mask & (ATTR_ATIME|ATTR_MTIME)) {
402 		if (mask & ATTR_ATIME) {
403 			inode->i_atime = iattr->ia_atime;
404 			ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
405 			ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
406 			ip->i_update_core = 1;
407 		}
408 		if (mask & ATTR_MTIME) {
409 			inode->i_mtime = iattr->ia_mtime;
410 			ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
411 			ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
412 			timeflags &= ~XFS_ICHGTIME_MOD;
413 			timeflags |= XFS_ICHGTIME_CHG;
414 		}
415 		if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
416 			xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
417 	}
418 
419 	/*
420 	 * Change file inode change time only if ATTR_CTIME set
421 	 * AND we have been called by a DMI function.
422 	 */
423 
424 	if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
425 		inode->i_ctime = iattr->ia_ctime;
426 		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
427 		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
428 		ip->i_update_core = 1;
429 		timeflags &= ~XFS_ICHGTIME_CHG;
430 	}
431 
432 	/*
433 	 * Send out timestamp changes that need to be set to the
434 	 * current time.  Not done when called by a DMI function.
435 	 */
436 	if (timeflags && !(flags & XFS_ATTR_DMI))
437 		xfs_ichgtime(ip, timeflags);
438 
439 	XFS_STATS_INC(xs_ig_attrchg);
440 
441 	/*
442 	 * If this is a synchronous mount, make sure that the
443 	 * transaction goes to disk before returning to the user.
444 	 * This is slightly sub-optimal in that truncates require
445 	 * two sync transactions instead of one for wsync filesystems.
446 	 * One for the truncate and one for the timestamps since we
447 	 * don't want to change the timestamps unless we're sure the
448 	 * truncate worked.  Truncates are less than 1% of the laddis
449 	 * mix so this probably isn't worth the trouble to optimize.
450 	 */
451 	code = 0;
452 	if (tp) {
453 		if (mp->m_flags & XFS_MOUNT_WSYNC)
454 			xfs_trans_set_sync(tp);
455 
456 		code = xfs_trans_commit(tp, commit_flags);
457 	}
458 
459 	xfs_iunlock(ip, lock_flags);
460 
461 	/*
462 	 * Release any dquot(s) the inode had kept before chown.
463 	 */
464 	XFS_QM_DQRELE(mp, olddquot1);
465 	XFS_QM_DQRELE(mp, olddquot2);
466 	XFS_QM_DQRELE(mp, udqp);
467 	XFS_QM_DQRELE(mp, gdqp);
468 
469 	if (code) {
470 		return code;
471 	}
472 
473 	if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
474 	    !(flags & XFS_ATTR_DMI)) {
475 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
476 					NULL, DM_RIGHT_NULL, NULL, NULL,
477 					0, 0, AT_DELAY_FLAG(flags));
478 	}
479 	return 0;
480 
481  abort_return:
482 	commit_flags |= XFS_TRANS_ABORT;
483 	/* FALLTHROUGH */
484  error_return:
485 	XFS_QM_DQRELE(mp, udqp);
486 	XFS_QM_DQRELE(mp, gdqp);
487 	if (tp) {
488 		xfs_trans_cancel(tp, commit_flags);
489 	}
490 	if (lock_flags != 0) {
491 		xfs_iunlock(ip, lock_flags);
492 	}
493 	return code;
494 }
495 
496 /*
497  * The maximum pathlen is 1024 bytes. Since the minimum file system
498  * blocksize is 512 bytes, we can get a max of 2 extents back from
499  * bmapi.
500  */
501 #define SYMLINK_MAPS 2
502 
503 STATIC int
xfs_readlink_bmap(xfs_inode_t * ip,char * link)504 xfs_readlink_bmap(
505 	xfs_inode_t	*ip,
506 	char		*link)
507 {
508 	xfs_mount_t	*mp = ip->i_mount;
509 	int		pathlen = ip->i_d.di_size;
510 	int             nmaps = SYMLINK_MAPS;
511 	xfs_bmbt_irec_t mval[SYMLINK_MAPS];
512 	xfs_daddr_t	d;
513 	int		byte_cnt;
514 	int		n;
515 	xfs_buf_t	*bp;
516 	int		error = 0;
517 
518 	error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0,
519 			mval, &nmaps, NULL, NULL);
520 	if (error)
521 		goto out;
522 
523 	for (n = 0; n < nmaps; n++) {
524 		d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
525 		byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
526 
527 		bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0);
528 		error = XFS_BUF_GETERROR(bp);
529 		if (error) {
530 			xfs_ioerror_alert("xfs_readlink",
531 				  ip->i_mount, bp, XFS_BUF_ADDR(bp));
532 			xfs_buf_relse(bp);
533 			goto out;
534 		}
535 		if (pathlen < byte_cnt)
536 			byte_cnt = pathlen;
537 		pathlen -= byte_cnt;
538 
539 		memcpy(link, XFS_BUF_PTR(bp), byte_cnt);
540 		xfs_buf_relse(bp);
541 	}
542 
543 	link[ip->i_d.di_size] = '\0';
544 	error = 0;
545 
546  out:
547 	return error;
548 }
549 
550 int
xfs_readlink(xfs_inode_t * ip,char * link)551 xfs_readlink(
552 	xfs_inode_t     *ip,
553 	char		*link)
554 {
555 	xfs_mount_t	*mp = ip->i_mount;
556 	int		pathlen;
557 	int		error = 0;
558 
559 	xfs_itrace_entry(ip);
560 
561 	if (XFS_FORCED_SHUTDOWN(mp))
562 		return XFS_ERROR(EIO);
563 
564 	xfs_ilock(ip, XFS_ILOCK_SHARED);
565 
566 	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
567 	ASSERT(ip->i_d.di_size <= MAXPATHLEN);
568 
569 	pathlen = ip->i_d.di_size;
570 	if (!pathlen)
571 		goto out;
572 
573 	if (ip->i_df.if_flags & XFS_IFINLINE) {
574 		memcpy(link, ip->i_df.if_u1.if_data, pathlen);
575 		link[pathlen] = '\0';
576 	} else {
577 		error = xfs_readlink_bmap(ip, link);
578 	}
579 
580  out:
581 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
582 	return error;
583 }
584 
585 /*
586  * xfs_fsync
587  *
588  * This is called to sync the inode and its data out to disk.  We need to hold
589  * the I/O lock while flushing the data, and the inode lock while flushing the
590  * inode.  The inode lock CANNOT be held while flushing the data, so acquire
591  * after we're done with that.
592  */
593 int
xfs_fsync(xfs_inode_t * ip)594 xfs_fsync(
595 	xfs_inode_t	*ip)
596 {
597 	xfs_trans_t	*tp;
598 	int		error;
599 	int		log_flushed = 0, changed = 1;
600 
601 	xfs_itrace_entry(ip);
602 
603 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
604 		return XFS_ERROR(EIO);
605 
606 	/* capture size updates in I/O completion before writing the inode. */
607 	error = xfs_wait_on_pages(ip, 0, -1);
608 	if (error)
609 		return XFS_ERROR(error);
610 
611 	/*
612 	 * We always need to make sure that the required inode state is safe on
613 	 * disk.  The vnode might be clean but we still might need to force the
614 	 * log because of committed transactions that haven't hit the disk yet.
615 	 * Likewise, there could be unflushed non-transactional changes to the
616 	 * inode core that have to go to disk and this requires us to issue
617 	 * a synchronous transaction to capture these changes correctly.
618 	 *
619 	 * This code relies on the assumption that if the update_* fields
620 	 * of the inode are clear and the inode is unpinned then it is clean
621 	 * and no action is required.
622 	 */
623 	xfs_ilock(ip, XFS_ILOCK_SHARED);
624 
625 	if (!(ip->i_update_size || ip->i_update_core)) {
626 		/*
627 		 * Timestamps/size haven't changed since last inode flush or
628 		 * inode transaction commit.  That means either nothing got
629 		 * written or a transaction committed which caught the updates.
630 		 * If the latter happened and the transaction hasn't hit the
631 		 * disk yet, the inode will be still be pinned.  If it is,
632 		 * force the log.
633 		 */
634 
635 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
636 
637 		if (xfs_ipincount(ip)) {
638 			error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
639 				      XFS_LOG_FORCE | XFS_LOG_SYNC,
640 				      &log_flushed);
641 		} else {
642 			/*
643 			 * If the inode is not pinned and nothing has changed
644 			 * we don't need to flush the cache.
645 			 */
646 			changed = 0;
647 		}
648 	} else	{
649 		/*
650 		 * Kick off a transaction to log the inode core to get the
651 		 * updates.  The sync transaction will also force the log.
652 		 */
653 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
654 		tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
655 		error = xfs_trans_reserve(tp, 0,
656 				XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
657 		if (error) {
658 			xfs_trans_cancel(tp, 0);
659 			return error;
660 		}
661 		xfs_ilock(ip, XFS_ILOCK_EXCL);
662 
663 		/*
664 		 * Note - it's possible that we might have pushed ourselves out
665 		 * of the way during trans_reserve which would flush the inode.
666 		 * But there's no guarantee that the inode buffer has actually
667 		 * gone out yet (it's delwri).	Plus the buffer could be pinned
668 		 * anyway if it's part of an inode in another recent
669 		 * transaction.	 So we play it safe and fire off the
670 		 * transaction anyway.
671 		 */
672 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
673 		xfs_trans_ihold(tp, ip);
674 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
675 		xfs_trans_set_sync(tp);
676 		error = _xfs_trans_commit(tp, 0, &log_flushed);
677 
678 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
679 	}
680 
681 	if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) {
682 		/*
683 		 * If the log write didn't issue an ordered tag we need
684 		 * to flush the disk cache for the data device now.
685 		 */
686 		if (!log_flushed)
687 			xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
688 
689 		/*
690 		 * If this inode is on the RT dev we need to flush that
691 		 * cache as well.
692 		 */
693 		if (XFS_IS_REALTIME_INODE(ip))
694 			xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
695 	}
696 
697 	return error;
698 }
699 
700 /*
701  * This is called by xfs_inactive to free any blocks beyond eof
702  * when the link count isn't zero and by xfs_dm_punch_hole() when
703  * punching a hole to EOF.
704  */
705 int
xfs_free_eofblocks(xfs_mount_t * mp,xfs_inode_t * ip,int flags)706 xfs_free_eofblocks(
707 	xfs_mount_t	*mp,
708 	xfs_inode_t	*ip,
709 	int		flags)
710 {
711 	xfs_trans_t	*tp;
712 	int		error;
713 	xfs_fileoff_t	end_fsb;
714 	xfs_fileoff_t	last_fsb;
715 	xfs_filblks_t	map_len;
716 	int		nimaps;
717 	xfs_bmbt_irec_t	imap;
718 	int		use_iolock = (flags & XFS_FREE_EOF_LOCK);
719 
720 	/*
721 	 * Figure out if there are any blocks beyond the end
722 	 * of the file.  If not, then there is nothing to do.
723 	 */
724 	end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
725 	last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
726 	map_len = last_fsb - end_fsb;
727 	if (map_len <= 0)
728 		return 0;
729 
730 	nimaps = 1;
731 	xfs_ilock(ip, XFS_ILOCK_SHARED);
732 	error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
733 			  NULL, 0, &imap, &nimaps, NULL, NULL);
734 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
735 
736 	if (!error && (nimaps != 0) &&
737 	    (imap.br_startblock != HOLESTARTBLOCK ||
738 	     ip->i_delayed_blks)) {
739 		/*
740 		 * Attach the dquots to the inode up front.
741 		 */
742 		if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
743 			return error;
744 
745 		/*
746 		 * There are blocks after the end of file.
747 		 * Free them up now by truncating the file to
748 		 * its current size.
749 		 */
750 		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
751 
752 		/*
753 		 * Do the xfs_itruncate_start() call before
754 		 * reserving any log space because
755 		 * itruncate_start will call into the buffer
756 		 * cache and we can't
757 		 * do that within a transaction.
758 		 */
759 		if (use_iolock)
760 			xfs_ilock(ip, XFS_IOLOCK_EXCL);
761 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
762 				    ip->i_size);
763 		if (error) {
764 			xfs_trans_cancel(tp, 0);
765 			if (use_iolock)
766 				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
767 			return error;
768 		}
769 
770 		error = xfs_trans_reserve(tp, 0,
771 					  XFS_ITRUNCATE_LOG_RES(mp),
772 					  0, XFS_TRANS_PERM_LOG_RES,
773 					  XFS_ITRUNCATE_LOG_COUNT);
774 		if (error) {
775 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
776 			xfs_trans_cancel(tp, 0);
777 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
778 			return error;
779 		}
780 
781 		xfs_ilock(ip, XFS_ILOCK_EXCL);
782 		xfs_trans_ijoin(tp, ip,
783 				XFS_IOLOCK_EXCL |
784 				XFS_ILOCK_EXCL);
785 		xfs_trans_ihold(tp, ip);
786 
787 		error = xfs_itruncate_finish(&tp, ip,
788 					     ip->i_size,
789 					     XFS_DATA_FORK,
790 					     0);
791 		/*
792 		 * If we get an error at this point we
793 		 * simply don't bother truncating the file.
794 		 */
795 		if (error) {
796 			xfs_trans_cancel(tp,
797 					 (XFS_TRANS_RELEASE_LOG_RES |
798 					  XFS_TRANS_ABORT));
799 		} else {
800 			error = xfs_trans_commit(tp,
801 						XFS_TRANS_RELEASE_LOG_RES);
802 		}
803 		xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
804 					    : XFS_ILOCK_EXCL));
805 	}
806 	return error;
807 }
808 
809 /*
810  * Free a symlink that has blocks associated with it.
811  */
812 STATIC int
xfs_inactive_symlink_rmt(xfs_inode_t * ip,xfs_trans_t ** tpp)813 xfs_inactive_symlink_rmt(
814 	xfs_inode_t	*ip,
815 	xfs_trans_t	**tpp)
816 {
817 	xfs_buf_t	*bp;
818 	int		committed;
819 	int		done;
820 	int		error;
821 	xfs_fsblock_t	first_block;
822 	xfs_bmap_free_t	free_list;
823 	int		i;
824 	xfs_mount_t	*mp;
825 	xfs_bmbt_irec_t	mval[SYMLINK_MAPS];
826 	int		nmaps;
827 	xfs_trans_t	*ntp;
828 	int		size;
829 	xfs_trans_t	*tp;
830 
831 	tp = *tpp;
832 	mp = ip->i_mount;
833 	ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
834 	/*
835 	 * We're freeing a symlink that has some
836 	 * blocks allocated to it.  Free the
837 	 * blocks here.  We know that we've got
838 	 * either 1 or 2 extents and that we can
839 	 * free them all in one bunmapi call.
840 	 */
841 	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
842 	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
843 			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
844 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
845 		xfs_trans_cancel(tp, 0);
846 		*tpp = NULL;
847 		return error;
848 	}
849 	/*
850 	 * Lock the inode, fix the size, and join it to the transaction.
851 	 * Hold it so in the normal path, we still have it locked for
852 	 * the second transaction.  In the error paths we need it
853 	 * held so the cancel won't rele it, see below.
854 	 */
855 	xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
856 	size = (int)ip->i_d.di_size;
857 	ip->i_d.di_size = 0;
858 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
859 	xfs_trans_ihold(tp, ip);
860 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
861 	/*
862 	 * Find the block(s) so we can inval and unmap them.
863 	 */
864 	done = 0;
865 	xfs_bmap_init(&free_list, &first_block);
866 	nmaps = ARRAY_SIZE(mval);
867 	if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
868 			XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
869 			&free_list, NULL)))
870 		goto error0;
871 	/*
872 	 * Invalidate the block(s).
873 	 */
874 	for (i = 0; i < nmaps; i++) {
875 		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
876 			XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
877 			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
878 		xfs_trans_binval(tp, bp);
879 	}
880 	/*
881 	 * Unmap the dead block(s) to the free_list.
882 	 */
883 	if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
884 			&first_block, &free_list, NULL, &done)))
885 		goto error1;
886 	ASSERT(done);
887 	/*
888 	 * Commit the first transaction.  This logs the EFI and the inode.
889 	 */
890 	if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
891 		goto error1;
892 	/*
893 	 * The transaction must have been committed, since there were
894 	 * actually extents freed by xfs_bunmapi.  See xfs_bmap_finish.
895 	 * The new tp has the extent freeing and EFDs.
896 	 */
897 	ASSERT(committed);
898 	/*
899 	 * The first xact was committed, so add the inode to the new one.
900 	 * Mark it dirty so it will be logged and moved forward in the log as
901 	 * part of every commit.
902 	 */
903 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
904 	xfs_trans_ihold(tp, ip);
905 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
906 	/*
907 	 * Get a new, empty transaction to return to our caller.
908 	 */
909 	ntp = xfs_trans_dup(tp);
910 	/*
911 	 * Commit the transaction containing extent freeing and EFDs.
912 	 * If we get an error on the commit here or on the reserve below,
913 	 * we need to unlock the inode since the new transaction doesn't
914 	 * have the inode attached.
915 	 */
916 	error = xfs_trans_commit(tp, 0);
917 	tp = ntp;
918 	if (error) {
919 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
920 		goto error0;
921 	}
922 	/*
923 	 * transaction commit worked ok so we can drop the extra ticket
924 	 * reference that we gained in xfs_trans_dup()
925 	 */
926 	xfs_log_ticket_put(tp->t_ticket);
927 
928 	/*
929 	 * Remove the memory for extent descriptions (just bookkeeping).
930 	 */
931 	if (ip->i_df.if_bytes)
932 		xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
933 	ASSERT(ip->i_df.if_bytes == 0);
934 	/*
935 	 * Put an itruncate log reservation in the new transaction
936 	 * for our caller.
937 	 */
938 	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
939 			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
940 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
941 		goto error0;
942 	}
943 	/*
944 	 * Return with the inode locked but not joined to the transaction.
945 	 */
946 	*tpp = tp;
947 	return 0;
948 
949  error1:
950 	xfs_bmap_cancel(&free_list);
951  error0:
952 	/*
953 	 * Have to come here with the inode locked and either
954 	 * (held and in the transaction) or (not in the transaction).
955 	 * If the inode isn't held then cancel would iput it, but
956 	 * that's wrong since this is inactive and the vnode ref
957 	 * count is 0 already.
958 	 * Cancel won't do anything to the inode if held, but it still
959 	 * needs to be locked until the cancel is done, if it was
960 	 * joined to the transaction.
961 	 */
962 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
963 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
964 	*tpp = NULL;
965 	return error;
966 
967 }
968 
969 STATIC int
xfs_inactive_symlink_local(xfs_inode_t * ip,xfs_trans_t ** tpp)970 xfs_inactive_symlink_local(
971 	xfs_inode_t	*ip,
972 	xfs_trans_t	**tpp)
973 {
974 	int		error;
975 
976 	ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip));
977 	/*
978 	 * We're freeing a symlink which fit into
979 	 * the inode.  Just free the memory used
980 	 * to hold the old symlink.
981 	 */
982 	error = xfs_trans_reserve(*tpp, 0,
983 				  XFS_ITRUNCATE_LOG_RES(ip->i_mount),
984 				  0, XFS_TRANS_PERM_LOG_RES,
985 				  XFS_ITRUNCATE_LOG_COUNT);
986 
987 	if (error) {
988 		xfs_trans_cancel(*tpp, 0);
989 		*tpp = NULL;
990 		return error;
991 	}
992 	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
993 
994 	/*
995 	 * Zero length symlinks _can_ exist.
996 	 */
997 	if (ip->i_df.if_bytes > 0) {
998 		xfs_idata_realloc(ip,
999 				  -(ip->i_df.if_bytes),
1000 				  XFS_DATA_FORK);
1001 		ASSERT(ip->i_df.if_bytes == 0);
1002 	}
1003 	return 0;
1004 }
1005 
1006 STATIC int
xfs_inactive_attrs(xfs_inode_t * ip,xfs_trans_t ** tpp)1007 xfs_inactive_attrs(
1008 	xfs_inode_t	*ip,
1009 	xfs_trans_t	**tpp)
1010 {
1011 	xfs_trans_t	*tp;
1012 	int		error;
1013 	xfs_mount_t	*mp;
1014 
1015 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1016 	tp = *tpp;
1017 	mp = ip->i_mount;
1018 	ASSERT(ip->i_d.di_forkoff != 0);
1019 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1020 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1021 	if (error)
1022 		goto error_unlock;
1023 
1024 	error = xfs_attr_inactive(ip);
1025 	if (error)
1026 		goto error_unlock;
1027 
1028 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1029 	error = xfs_trans_reserve(tp, 0,
1030 				  XFS_IFREE_LOG_RES(mp),
1031 				  0, XFS_TRANS_PERM_LOG_RES,
1032 				  XFS_INACTIVE_LOG_COUNT);
1033 	if (error)
1034 		goto error_cancel;
1035 
1036 	xfs_ilock(ip, XFS_ILOCK_EXCL);
1037 	xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1038 	xfs_trans_ihold(tp, ip);
1039 	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
1040 
1041 	ASSERT(ip->i_d.di_anextents == 0);
1042 
1043 	*tpp = tp;
1044 	return 0;
1045 
1046 error_cancel:
1047 	ASSERT(XFS_FORCED_SHUTDOWN(mp));
1048 	xfs_trans_cancel(tp, 0);
1049 error_unlock:
1050 	*tpp = NULL;
1051 	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1052 	return error;
1053 }
1054 
1055 int
xfs_release(xfs_inode_t * ip)1056 xfs_release(
1057 	xfs_inode_t	*ip)
1058 {
1059 	xfs_mount_t	*mp = ip->i_mount;
1060 	int		error;
1061 
1062 	if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
1063 		return 0;
1064 
1065 	/* If this is a read-only mount, don't do this (would generate I/O) */
1066 	if (mp->m_flags & XFS_MOUNT_RDONLY)
1067 		return 0;
1068 
1069 	if (!XFS_FORCED_SHUTDOWN(mp)) {
1070 		int truncated;
1071 
1072 		/*
1073 		 * If we are using filestreams, and we have an unlinked
1074 		 * file that we are processing the last close on, then nothing
1075 		 * will be able to reopen and write to this file. Purge this
1076 		 * inode from the filestreams cache so that it doesn't delay
1077 		 * teardown of the inode.
1078 		 */
1079 		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
1080 			xfs_filestream_deassociate(ip);
1081 
1082 		/*
1083 		 * If we previously truncated this file and removed old data
1084 		 * in the process, we want to initiate "early" writeout on
1085 		 * the last close.  This is an attempt to combat the notorious
1086 		 * NULL files problem which is particularly noticable from a
1087 		 * truncate down, buffered (re-)write (delalloc), followed by
1088 		 * a crash.  What we are effectively doing here is
1089 		 * significantly reducing the time window where we'd otherwise
1090 		 * be exposed to that problem.
1091 		 */
1092 		truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
1093 		if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
1094 			xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
1095 	}
1096 
1097 	if (ip->i_d.di_nlink != 0) {
1098 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1099 		     ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
1100 		       ip->i_delayed_blks > 0)) &&
1101 		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
1102 		    (!(ip->i_d.di_flags &
1103 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
1104 			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
1105 			if (error)
1106 				return error;
1107 		}
1108 	}
1109 
1110 	return 0;
1111 }
1112 
1113 /*
1114  * xfs_inactive
1115  *
1116  * This is called when the vnode reference count for the vnode
1117  * goes to zero.  If the file has been unlinked, then it must
1118  * now be truncated.  Also, we clear all of the read-ahead state
1119  * kept for the inode here since the file is now closed.
1120  */
1121 int
xfs_inactive(xfs_inode_t * ip)1122 xfs_inactive(
1123 	xfs_inode_t	*ip)
1124 {
1125 	xfs_bmap_free_t	free_list;
1126 	xfs_fsblock_t	first_block;
1127 	int		committed;
1128 	xfs_trans_t	*tp;
1129 	xfs_mount_t	*mp;
1130 	int		error;
1131 	int		truncate;
1132 
1133 	xfs_itrace_entry(ip);
1134 
1135 	/*
1136 	 * If the inode is already free, then there can be nothing
1137 	 * to clean up here.
1138 	 */
1139 	if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) {
1140 		ASSERT(ip->i_df.if_real_bytes == 0);
1141 		ASSERT(ip->i_df.if_broot_bytes == 0);
1142 		return VN_INACTIVE_CACHE;
1143 	}
1144 
1145 	/*
1146 	 * Only do a truncate if it's a regular file with
1147 	 * some actual space in it.  It's OK to look at the
1148 	 * inode's fields without the lock because we're the
1149 	 * only one with a reference to the inode.
1150 	 */
1151 	truncate = ((ip->i_d.di_nlink == 0) &&
1152 	    ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
1153 	     (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
1154 	    ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
1155 
1156 	mp = ip->i_mount;
1157 
1158 	if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY))
1159 		XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL);
1160 
1161 	error = 0;
1162 
1163 	/* If this is a read-only mount, don't do this (would generate I/O) */
1164 	if (mp->m_flags & XFS_MOUNT_RDONLY)
1165 		goto out;
1166 
1167 	if (ip->i_d.di_nlink != 0) {
1168 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1169                      ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
1170                        ip->i_delayed_blks > 0)) &&
1171 		      (ip->i_df.if_flags & XFS_IFEXTENTS) &&
1172 		     (!(ip->i_d.di_flags &
1173 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
1174 		      (ip->i_delayed_blks != 0)))) {
1175 			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
1176 			if (error)
1177 				return VN_INACTIVE_CACHE;
1178 		}
1179 		goto out;
1180 	}
1181 
1182 	ASSERT(ip->i_d.di_nlink == 0);
1183 
1184 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
1185 		return VN_INACTIVE_CACHE;
1186 
1187 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1188 	if (truncate) {
1189 		/*
1190 		 * Do the xfs_itruncate_start() call before
1191 		 * reserving any log space because itruncate_start
1192 		 * will call into the buffer cache and we can't
1193 		 * do that within a transaction.
1194 		 */
1195 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
1196 
1197 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
1198 		if (error) {
1199 			xfs_trans_cancel(tp, 0);
1200 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1201 			return VN_INACTIVE_CACHE;
1202 		}
1203 
1204 		error = xfs_trans_reserve(tp, 0,
1205 					  XFS_ITRUNCATE_LOG_RES(mp),
1206 					  0, XFS_TRANS_PERM_LOG_RES,
1207 					  XFS_ITRUNCATE_LOG_COUNT);
1208 		if (error) {
1209 			/* Don't call itruncate_cleanup */
1210 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
1211 			xfs_trans_cancel(tp, 0);
1212 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1213 			return VN_INACTIVE_CACHE;
1214 		}
1215 
1216 		xfs_ilock(ip, XFS_ILOCK_EXCL);
1217 		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1218 		xfs_trans_ihold(tp, ip);
1219 
1220 		/*
1221 		 * normally, we have to run xfs_itruncate_finish sync.
1222 		 * But if filesystem is wsync and we're in the inactive
1223 		 * path, then we know that nlink == 0, and that the
1224 		 * xaction that made nlink == 0 is permanently committed
1225 		 * since xfs_remove runs as a synchronous transaction.
1226 		 */
1227 		error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
1228 				(!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
1229 
1230 		if (error) {
1231 			xfs_trans_cancel(tp,
1232 				XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1233 			xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1234 			return VN_INACTIVE_CACHE;
1235 		}
1236 	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
1237 
1238 		/*
1239 		 * If we get an error while cleaning up a
1240 		 * symlink we bail out.
1241 		 */
1242 		error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ?
1243 			xfs_inactive_symlink_rmt(ip, &tp) :
1244 			xfs_inactive_symlink_local(ip, &tp);
1245 
1246 		if (error) {
1247 			ASSERT(tp == NULL);
1248 			return VN_INACTIVE_CACHE;
1249 		}
1250 
1251 		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1252 		xfs_trans_ihold(tp, ip);
1253 	} else {
1254 		error = xfs_trans_reserve(tp, 0,
1255 					  XFS_IFREE_LOG_RES(mp),
1256 					  0, XFS_TRANS_PERM_LOG_RES,
1257 					  XFS_INACTIVE_LOG_COUNT);
1258 		if (error) {
1259 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
1260 			xfs_trans_cancel(tp, 0);
1261 			return VN_INACTIVE_CACHE;
1262 		}
1263 
1264 		xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1265 		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1266 		xfs_trans_ihold(tp, ip);
1267 	}
1268 
1269 	/*
1270 	 * If there are attributes associated with the file
1271 	 * then blow them away now.  The code calls a routine
1272 	 * that recursively deconstructs the attribute fork.
1273 	 * We need to just commit the current transaction
1274 	 * because we can't use it for xfs_attr_inactive().
1275 	 */
1276 	if (ip->i_d.di_anextents > 0) {
1277 		error = xfs_inactive_attrs(ip, &tp);
1278 		/*
1279 		 * If we got an error, the transaction is already
1280 		 * cancelled, and the inode is unlocked. Just get out.
1281 		 */
1282 		 if (error)
1283 			 return VN_INACTIVE_CACHE;
1284 	} else if (ip->i_afp) {
1285 		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
1286 	}
1287 
1288 	/*
1289 	 * Free the inode.
1290 	 */
1291 	xfs_bmap_init(&free_list, &first_block);
1292 	error = xfs_ifree(tp, ip, &free_list);
1293 	if (error) {
1294 		/*
1295 		 * If we fail to free the inode, shut down.  The cancel
1296 		 * might do that, we need to make sure.  Otherwise the
1297 		 * inode might be lost for a long time or forever.
1298 		 */
1299 		if (!XFS_FORCED_SHUTDOWN(mp)) {
1300 			cmn_err(CE_NOTE,
1301 		"xfs_inactive:	xfs_ifree() returned an error = %d on %s",
1302 				error, mp->m_fsname);
1303 			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1304 		}
1305 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
1306 	} else {
1307 		/*
1308 		 * Credit the quota account(s). The inode is gone.
1309 		 */
1310 		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
1311 
1312 		/*
1313 		 * Just ignore errors at this point.  There is nothing we can
1314 		 * do except to try to keep going. Make sure it's not a silent
1315 		 * error.
1316 		 */
1317 		error = xfs_bmap_finish(&tp,  &free_list, &committed);
1318 		if (error)
1319 			xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
1320 				"xfs_bmap_finish() returned error %d", error);
1321 		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1322 		if (error)
1323 			xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
1324 				"xfs_trans_commit() returned error %d", error);
1325 	}
1326 	/*
1327 	 * Release the dquots held by inode, if any.
1328 	 */
1329 	XFS_QM_DQDETACH(mp, ip);
1330 
1331 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1332 
1333  out:
1334 	return VN_INACTIVE_CACHE;
1335 }
1336 
1337 /*
1338  * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
1339  * is allowed, otherwise it has to be an exact match. If a CI match is found,
1340  * ci_name->name will point to a the actual name (caller must free) or
1341  * will be set to NULL if an exact match is found.
1342  */
1343 int
xfs_lookup(xfs_inode_t * dp,struct xfs_name * name,xfs_inode_t ** ipp,struct xfs_name * ci_name)1344 xfs_lookup(
1345 	xfs_inode_t		*dp,
1346 	struct xfs_name		*name,
1347 	xfs_inode_t		**ipp,
1348 	struct xfs_name		*ci_name)
1349 {
1350 	xfs_ino_t		inum;
1351 	int			error;
1352 	uint			lock_mode;
1353 
1354 	xfs_itrace_entry(dp);
1355 
1356 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
1357 		return XFS_ERROR(EIO);
1358 
1359 	lock_mode = xfs_ilock_map_shared(dp);
1360 	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
1361 	xfs_iunlock_map_shared(dp, lock_mode);
1362 
1363 	if (error)
1364 		goto out;
1365 
1366 	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
1367 	if (error)
1368 		goto out_free_name;
1369 
1370 	xfs_itrace_ref(*ipp);
1371 	return 0;
1372 
1373 out_free_name:
1374 	if (ci_name)
1375 		kmem_free(ci_name->name);
1376 out:
1377 	*ipp = NULL;
1378 	return error;
1379 }
1380 
1381 int
xfs_create(xfs_inode_t * dp,struct xfs_name * name,mode_t mode,xfs_dev_t rdev,xfs_inode_t ** ipp,cred_t * credp)1382 xfs_create(
1383 	xfs_inode_t		*dp,
1384 	struct xfs_name		*name,
1385 	mode_t			mode,
1386 	xfs_dev_t		rdev,
1387 	xfs_inode_t		**ipp,
1388 	cred_t			*credp)
1389 {
1390 	xfs_mount_t		*mp = dp->i_mount;
1391 	xfs_inode_t		*ip;
1392 	xfs_trans_t		*tp;
1393 	int			error;
1394 	xfs_bmap_free_t		free_list;
1395 	xfs_fsblock_t		first_block;
1396 	boolean_t		unlock_dp_on_error = B_FALSE;
1397 	int			dm_event_sent = 0;
1398 	uint			cancel_flags;
1399 	int			committed;
1400 	xfs_prid_t		prid;
1401 	struct xfs_dquot	*udqp, *gdqp;
1402 	uint			resblks;
1403 
1404 	ASSERT(!*ipp);
1405 	xfs_itrace_entry(dp);
1406 
1407 	if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
1408 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
1409 				dp, DM_RIGHT_NULL, NULL,
1410 				DM_RIGHT_NULL, name->name, NULL,
1411 				mode, 0, 0);
1412 
1413 		if (error)
1414 			return error;
1415 		dm_event_sent = 1;
1416 	}
1417 
1418 	if (XFS_FORCED_SHUTDOWN(mp))
1419 		return XFS_ERROR(EIO);
1420 
1421 	/* Return through std_return after this point. */
1422 
1423 	udqp = gdqp = NULL;
1424 	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1425 		prid = dp->i_d.di_projid;
1426 	else
1427 		prid = (xfs_prid_t)dfltprid;
1428 
1429 	/*
1430 	 * Make sure that we have allocated dquot(s) on disk.
1431 	 */
1432 	error = XFS_QM_DQVOPALLOC(mp, dp,
1433 			current_fsuid(), current_fsgid(), prid,
1434 			XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
1435 	if (error)
1436 		goto std_return;
1437 
1438 	ip = NULL;
1439 
1440 	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
1441 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1442 	resblks = XFS_CREATE_SPACE_RES(mp, name->len);
1443 	/*
1444 	 * Initially assume that the file does not exist and
1445 	 * reserve the resources for that case.  If that is not
1446 	 * the case we'll drop the one we have and get a more
1447 	 * appropriate transaction later.
1448 	 */
1449 	error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
1450 			XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
1451 	if (error == ENOSPC) {
1452 		resblks = 0;
1453 		error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0,
1454 				XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
1455 	}
1456 	if (error) {
1457 		cancel_flags = 0;
1458 		goto error_return;
1459 	}
1460 
1461 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1462 	unlock_dp_on_error = B_TRUE;
1463 
1464 	xfs_bmap_init(&free_list, &first_block);
1465 
1466 	ASSERT(ip == NULL);
1467 
1468 	/*
1469 	 * Reserve disk quota and the inode.
1470 	 */
1471 	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
1472 	if (error)
1473 		goto error_return;
1474 
1475 	error = xfs_dir_canenter(tp, dp, name, resblks);
1476 	if (error)
1477 		goto error_return;
1478 	error = xfs_dir_ialloc(&tp, dp, mode, 1,
1479 			rdev, credp, prid, resblks > 0,
1480 			&ip, &committed);
1481 	if (error) {
1482 		if (error == ENOSPC)
1483 			goto error_return;
1484 		goto abort_return;
1485 	}
1486 	xfs_itrace_ref(ip);
1487 
1488 	/*
1489 	 * At this point, we've gotten a newly allocated inode.
1490 	 * It is locked (and joined to the transaction).
1491 	 */
1492 
1493 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1494 
1495 	/*
1496 	 * Now we join the directory inode to the transaction.  We do not do it
1497 	 * earlier because xfs_dir_ialloc might commit the previous transaction
1498 	 * (and release all the locks).  An error from here on will result in
1499 	 * the transaction cancel unlocking dp so don't do it explicitly in the
1500 	 * error path.
1501 	 */
1502 	IHOLD(dp);
1503 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1504 	unlock_dp_on_error = B_FALSE;
1505 
1506 	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
1507 					&first_block, &free_list, resblks ?
1508 					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1509 	if (error) {
1510 		ASSERT(error != ENOSPC);
1511 		goto abort_return;
1512 	}
1513 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1514 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1515 
1516 	/*
1517 	 * If this is a synchronous mount, make sure that the
1518 	 * create transaction goes to disk before returning to
1519 	 * the user.
1520 	 */
1521 	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
1522 		xfs_trans_set_sync(tp);
1523 	}
1524 
1525 	/*
1526 	 * Attach the dquot(s) to the inodes and modify them incore.
1527 	 * These ids of the inode couldn't have changed since the new
1528 	 * inode has been locked ever since it was created.
1529 	 */
1530 	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
1531 
1532 	/*
1533 	 * xfs_trans_commit normally decrements the vnode ref count
1534 	 * when it unlocks the inode. Since we want to return the
1535 	 * vnode to the caller, we bump the vnode ref count now.
1536 	 */
1537 	IHOLD(ip);
1538 
1539 	error = xfs_bmap_finish(&tp, &free_list, &committed);
1540 	if (error) {
1541 		xfs_bmap_cancel(&free_list);
1542 		goto abort_rele;
1543 	}
1544 
1545 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1546 	if (error) {
1547 		IRELE(ip);
1548 		tp = NULL;
1549 		goto error_return;
1550 	}
1551 
1552 	XFS_QM_DQRELE(mp, udqp);
1553 	XFS_QM_DQRELE(mp, gdqp);
1554 
1555 	*ipp = ip;
1556 
1557 	/* Fallthrough to std_return with error = 0  */
1558 
1559 std_return:
1560 	if ((*ipp || (error != 0 && dm_event_sent != 0)) &&
1561 	    DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
1562 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
1563 			dp, DM_RIGHT_NULL,
1564 			*ipp ? ip : NULL,
1565 			DM_RIGHT_NULL, name->name, NULL,
1566 			mode, error, 0);
1567 	}
1568 	return error;
1569 
1570  abort_return:
1571 	cancel_flags |= XFS_TRANS_ABORT;
1572 	/* FALLTHROUGH */
1573 
1574  error_return:
1575 	if (tp != NULL)
1576 		xfs_trans_cancel(tp, cancel_flags);
1577 
1578 	XFS_QM_DQRELE(mp, udqp);
1579 	XFS_QM_DQRELE(mp, gdqp);
1580 
1581 	if (unlock_dp_on_error)
1582 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
1583 
1584 	goto std_return;
1585 
1586  abort_rele:
1587 	/*
1588 	 * Wait until after the current transaction is aborted to
1589 	 * release the inode.  This prevents recursive transactions
1590 	 * and deadlocks from xfs_inactive.
1591 	 */
1592 	cancel_flags |= XFS_TRANS_ABORT;
1593 	xfs_trans_cancel(tp, cancel_flags);
1594 	IRELE(ip);
1595 
1596 	XFS_QM_DQRELE(mp, udqp);
1597 	XFS_QM_DQRELE(mp, gdqp);
1598 
1599 	goto std_return;
1600 }
1601 
1602 #ifdef DEBUG
1603 int xfs_locked_n;
1604 int xfs_small_retries;
1605 int xfs_middle_retries;
1606 int xfs_lots_retries;
1607 int xfs_lock_delays;
1608 #endif
1609 
1610 /*
1611  * Bump the subclass so xfs_lock_inodes() acquires each lock with
1612  * a different value
1613  */
1614 static inline int
xfs_lock_inumorder(int lock_mode,int subclass)1615 xfs_lock_inumorder(int lock_mode, int subclass)
1616 {
1617 	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
1618 		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
1619 	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
1620 		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
1621 
1622 	return lock_mode;
1623 }
1624 
1625 /*
1626  * The following routine will lock n inodes in exclusive mode.
1627  * We assume the caller calls us with the inodes in i_ino order.
1628  *
1629  * We need to detect deadlock where an inode that we lock
1630  * is in the AIL and we start waiting for another inode that is locked
1631  * by a thread in a long running transaction (such as truncate). This can
1632  * result in deadlock since the long running trans might need to wait
1633  * for the inode we just locked in order to push the tail and free space
1634  * in the log.
1635  */
1636 void
xfs_lock_inodes(xfs_inode_t ** ips,int inodes,uint lock_mode)1637 xfs_lock_inodes(
1638 	xfs_inode_t	**ips,
1639 	int		inodes,
1640 	uint		lock_mode)
1641 {
1642 	int		attempts = 0, i, j, try_lock;
1643 	xfs_log_item_t	*lp;
1644 
1645 	ASSERT(ips && (inodes >= 2)); /* we need at least two */
1646 
1647 	try_lock = 0;
1648 	i = 0;
1649 
1650 again:
1651 	for (; i < inodes; i++) {
1652 		ASSERT(ips[i]);
1653 
1654 		if (i && (ips[i] == ips[i-1]))	/* Already locked */
1655 			continue;
1656 
1657 		/*
1658 		 * If try_lock is not set yet, make sure all locked inodes
1659 		 * are not in the AIL.
1660 		 * If any are, set try_lock to be used later.
1661 		 */
1662 
1663 		if (!try_lock) {
1664 			for (j = (i - 1); j >= 0 && !try_lock; j--) {
1665 				lp = (xfs_log_item_t *)ips[j]->i_itemp;
1666 				if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1667 					try_lock++;
1668 				}
1669 			}
1670 		}
1671 
1672 		/*
1673 		 * If any of the previous locks we have locked is in the AIL,
1674 		 * we must TRY to get the second and subsequent locks. If
1675 		 * we can't get any, we must release all we have
1676 		 * and try again.
1677 		 */
1678 
1679 		if (try_lock) {
1680 			/* try_lock must be 0 if i is 0. */
1681 			/*
1682 			 * try_lock means we have an inode locked
1683 			 * that is in the AIL.
1684 			 */
1685 			ASSERT(i != 0);
1686 			if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
1687 				attempts++;
1688 
1689 				/*
1690 				 * Unlock all previous guys and try again.
1691 				 * xfs_iunlock will try to push the tail
1692 				 * if the inode is in the AIL.
1693 				 */
1694 
1695 				for(j = i - 1; j >= 0; j--) {
1696 
1697 					/*
1698 					 * Check to see if we've already
1699 					 * unlocked this one.
1700 					 * Not the first one going back,
1701 					 * and the inode ptr is the same.
1702 					 */
1703 					if ((j != (i - 1)) && ips[j] ==
1704 								ips[j+1])
1705 						continue;
1706 
1707 					xfs_iunlock(ips[j], lock_mode);
1708 				}
1709 
1710 				if ((attempts % 5) == 0) {
1711 					delay(1); /* Don't just spin the CPU */
1712 #ifdef DEBUG
1713 					xfs_lock_delays++;
1714 #endif
1715 				}
1716 				i = 0;
1717 				try_lock = 0;
1718 				goto again;
1719 			}
1720 		} else {
1721 			xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
1722 		}
1723 	}
1724 
1725 #ifdef DEBUG
1726 	if (attempts) {
1727 		if (attempts < 5) xfs_small_retries++;
1728 		else if (attempts < 100) xfs_middle_retries++;
1729 		else xfs_lots_retries++;
1730 	} else {
1731 		xfs_locked_n++;
1732 	}
1733 #endif
1734 }
1735 
1736 /*
1737  * xfs_lock_two_inodes() can only be used to lock one type of lock
1738  * at a time - the iolock or the ilock, but not both at once. If
1739  * we lock both at once, lockdep will report false positives saying
1740  * we have violated locking orders.
1741  */
1742 void
xfs_lock_two_inodes(xfs_inode_t * ip0,xfs_inode_t * ip1,uint lock_mode)1743 xfs_lock_two_inodes(
1744 	xfs_inode_t		*ip0,
1745 	xfs_inode_t		*ip1,
1746 	uint			lock_mode)
1747 {
1748 	xfs_inode_t		*temp;
1749 	int			attempts = 0;
1750 	xfs_log_item_t		*lp;
1751 
1752 	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
1753 		ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
1754 	ASSERT(ip0->i_ino != ip1->i_ino);
1755 
1756 	if (ip0->i_ino > ip1->i_ino) {
1757 		temp = ip0;
1758 		ip0 = ip1;
1759 		ip1 = temp;
1760 	}
1761 
1762  again:
1763 	xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
1764 
1765 	/*
1766 	 * If the first lock we have locked is in the AIL, we must TRY to get
1767 	 * the second lock. If we can't get it, we must release the first one
1768 	 * and try again.
1769 	 */
1770 	lp = (xfs_log_item_t *)ip0->i_itemp;
1771 	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1772 		if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
1773 			xfs_iunlock(ip0, lock_mode);
1774 			if ((++attempts % 5) == 0)
1775 				delay(1); /* Don't just spin the CPU */
1776 			goto again;
1777 		}
1778 	} else {
1779 		xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
1780 	}
1781 }
1782 
1783 int
xfs_remove(xfs_inode_t * dp,struct xfs_name * name,xfs_inode_t * ip)1784 xfs_remove(
1785 	xfs_inode_t             *dp,
1786 	struct xfs_name		*name,
1787 	xfs_inode_t		*ip)
1788 {
1789 	xfs_mount_t		*mp = dp->i_mount;
1790 	xfs_trans_t             *tp = NULL;
1791 	int			is_dir = S_ISDIR(ip->i_d.di_mode);
1792 	int                     error = 0;
1793 	xfs_bmap_free_t         free_list;
1794 	xfs_fsblock_t           first_block;
1795 	int			cancel_flags;
1796 	int			committed;
1797 	int			link_zero;
1798 	uint			resblks;
1799 	uint			log_count;
1800 
1801 	xfs_itrace_entry(dp);
1802 	xfs_itrace_entry(ip);
1803 
1804 	if (XFS_FORCED_SHUTDOWN(mp))
1805 		return XFS_ERROR(EIO);
1806 
1807 	if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
1808 		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL,
1809 					NULL, DM_RIGHT_NULL, name->name, NULL,
1810 					ip->i_d.di_mode, 0, 0);
1811 		if (error)
1812 			return error;
1813 	}
1814 
1815 	error = XFS_QM_DQATTACH(mp, dp, 0);
1816 	if (error)
1817 		goto std_return;
1818 
1819 	error = XFS_QM_DQATTACH(mp, ip, 0);
1820 	if (error)
1821 		goto std_return;
1822 
1823 	if (is_dir) {
1824 		tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
1825 		log_count = XFS_DEFAULT_LOG_COUNT;
1826 	} else {
1827 		tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
1828 		log_count = XFS_REMOVE_LOG_COUNT;
1829 	}
1830 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1831 
1832 	/*
1833 	 * We try to get the real space reservation first,
1834 	 * allowing for directory btree deletion(s) implying
1835 	 * possible bmap insert(s).  If we can't get the space
1836 	 * reservation then we use 0 instead, and avoid the bmap
1837 	 * btree insert(s) in the directory code by, if the bmap
1838 	 * insert tries to happen, instead trimming the LAST
1839 	 * block from the directory.
1840 	 */
1841 	resblks = XFS_REMOVE_SPACE_RES(mp);
1842 	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
1843 				  XFS_TRANS_PERM_LOG_RES, log_count);
1844 	if (error == ENOSPC) {
1845 		resblks = 0;
1846 		error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
1847 					  XFS_TRANS_PERM_LOG_RES, log_count);
1848 	}
1849 	if (error) {
1850 		ASSERT(error != ENOSPC);
1851 		cancel_flags = 0;
1852 		goto out_trans_cancel;
1853 	}
1854 
1855 	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
1856 
1857 	/*
1858 	 * At this point, we've gotten both the directory and the entry
1859 	 * inodes locked.
1860 	 */
1861 	IHOLD(ip);
1862 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1863 
1864 	IHOLD(dp);
1865 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1866 
1867 	/*
1868 	 * If we're removing a directory perform some additional validation.
1869 	 */
1870 	if (is_dir) {
1871 		ASSERT(ip->i_d.di_nlink >= 2);
1872 		if (ip->i_d.di_nlink != 2) {
1873 			error = XFS_ERROR(ENOTEMPTY);
1874 			goto out_trans_cancel;
1875 		}
1876 		if (!xfs_dir_isempty(ip)) {
1877 			error = XFS_ERROR(ENOTEMPTY);
1878 			goto out_trans_cancel;
1879 		}
1880 	}
1881 
1882 	xfs_bmap_init(&free_list, &first_block);
1883 	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
1884 					&first_block, &free_list, resblks);
1885 	if (error) {
1886 		ASSERT(error != ENOENT);
1887 		goto out_bmap_cancel;
1888 	}
1889 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1890 
1891 	if (is_dir) {
1892 		/*
1893 		 * Drop the link from ip's "..".
1894 		 */
1895 		error = xfs_droplink(tp, dp);
1896 		if (error)
1897 			goto out_bmap_cancel;
1898 
1899 		/*
1900 		 * Drop the "." link from ip to self.
1901 		 */
1902 		error = xfs_droplink(tp, ip);
1903 		if (error)
1904 			goto out_bmap_cancel;
1905 	} else {
1906 		/*
1907 		 * When removing a non-directory we need to log the parent
1908 		 * inode here.  For a directory this is done implicitly
1909 		 * by the xfs_droplink call for the ".." entry.
1910 		 */
1911 		xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1912 	}
1913 
1914 	/*
1915 	 * Drop the link from dp to ip.
1916 	 */
1917 	error = xfs_droplink(tp, ip);
1918 	if (error)
1919 		goto out_bmap_cancel;
1920 
1921 	/*
1922 	 * Determine if this is the last link while
1923 	 * we are in the transaction.
1924 	 */
1925 	link_zero = (ip->i_d.di_nlink == 0);
1926 
1927 	/*
1928 	 * If this is a synchronous mount, make sure that the
1929 	 * remove transaction goes to disk before returning to
1930 	 * the user.
1931 	 */
1932 	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
1933 		xfs_trans_set_sync(tp);
1934 
1935 	error = xfs_bmap_finish(&tp, &free_list, &committed);
1936 	if (error)
1937 		goto out_bmap_cancel;
1938 
1939 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1940 	if (error)
1941 		goto std_return;
1942 
1943 	/*
1944 	 * If we are using filestreams, kill the stream association.
1945 	 * If the file is still open it may get a new one but that
1946 	 * will get killed on last close in xfs_close() so we don't
1947 	 * have to worry about that.
1948 	 */
1949 	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
1950 		xfs_filestream_deassociate(ip);
1951 
1952 	xfs_itrace_exit(ip);
1953 	xfs_itrace_exit(dp);
1954 
1955  std_return:
1956 	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
1957 		XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
1958 				NULL, DM_RIGHT_NULL, name->name, NULL,
1959 				ip->i_d.di_mode, error, 0);
1960 	}
1961 
1962 	return error;
1963 
1964  out_bmap_cancel:
1965 	xfs_bmap_cancel(&free_list);
1966 	cancel_flags |= XFS_TRANS_ABORT;
1967  out_trans_cancel:
1968 	xfs_trans_cancel(tp, cancel_flags);
1969 	goto std_return;
1970 }
1971 
1972 int
xfs_link(xfs_inode_t * tdp,xfs_inode_t * sip,struct xfs_name * target_name)1973 xfs_link(
1974 	xfs_inode_t		*tdp,
1975 	xfs_inode_t		*sip,
1976 	struct xfs_name		*target_name)
1977 {
1978 	xfs_mount_t		*mp = tdp->i_mount;
1979 	xfs_trans_t		*tp;
1980 	int			error;
1981 	xfs_bmap_free_t         free_list;
1982 	xfs_fsblock_t           first_block;
1983 	int			cancel_flags;
1984 	int			committed;
1985 	int			resblks;
1986 
1987 	xfs_itrace_entry(tdp);
1988 	xfs_itrace_entry(sip);
1989 
1990 	ASSERT(!S_ISDIR(sip->i_d.di_mode));
1991 
1992 	if (XFS_FORCED_SHUTDOWN(mp))
1993 		return XFS_ERROR(EIO);
1994 
1995 	if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) {
1996 		error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
1997 					tdp, DM_RIGHT_NULL,
1998 					sip, DM_RIGHT_NULL,
1999 					target_name->name, NULL, 0, 0, 0);
2000 		if (error)
2001 			return error;
2002 	}
2003 
2004 	/* Return through std_return after this point. */
2005 
2006 	error = XFS_QM_DQATTACH(mp, sip, 0);
2007 	if (!error && sip != tdp)
2008 		error = XFS_QM_DQATTACH(mp, tdp, 0);
2009 	if (error)
2010 		goto std_return;
2011 
2012 	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
2013 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2014 	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
2015 	error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
2016 			XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
2017 	if (error == ENOSPC) {
2018 		resblks = 0;
2019 		error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0,
2020 				XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
2021 	}
2022 	if (error) {
2023 		cancel_flags = 0;
2024 		goto error_return;
2025 	}
2026 
2027 	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
2028 
2029 	/*
2030 	 * Increment vnode ref counts since xfs_trans_commit &
2031 	 * xfs_trans_cancel will both unlock the inodes and
2032 	 * decrement the associated ref counts.
2033 	 */
2034 	IHOLD(sip);
2035 	IHOLD(tdp);
2036 	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
2037 	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
2038 
2039 	/*
2040 	 * If the source has too many links, we can't make any more to it.
2041 	 */
2042 	if (sip->i_d.di_nlink >= XFS_MAXLINK) {
2043 		error = XFS_ERROR(EMLINK);
2044 		goto error_return;
2045 	}
2046 
2047 	/*
2048 	 * If we are using project inheritance, we only allow hard link
2049 	 * creation in our tree when the project IDs are the same; else
2050 	 * the tree quota mechanism could be circumvented.
2051 	 */
2052 	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
2053 		     (tdp->i_d.di_projid != sip->i_d.di_projid))) {
2054 		error = XFS_ERROR(EXDEV);
2055 		goto error_return;
2056 	}
2057 
2058 	error = xfs_dir_canenter(tp, tdp, target_name, resblks);
2059 	if (error)
2060 		goto error_return;
2061 
2062 	xfs_bmap_init(&free_list, &first_block);
2063 
2064 	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
2065 					&first_block, &free_list, resblks);
2066 	if (error)
2067 		goto abort_return;
2068 	xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2069 	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
2070 
2071 	error = xfs_bumplink(tp, sip);
2072 	if (error)
2073 		goto abort_return;
2074 
2075 	/*
2076 	 * If this is a synchronous mount, make sure that the
2077 	 * link transaction goes to disk before returning to
2078 	 * the user.
2079 	 */
2080 	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2081 		xfs_trans_set_sync(tp);
2082 	}
2083 
2084 	error = xfs_bmap_finish (&tp, &free_list, &committed);
2085 	if (error) {
2086 		xfs_bmap_cancel(&free_list);
2087 		goto abort_return;
2088 	}
2089 
2090 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2091 	if (error)
2092 		goto std_return;
2093 
2094 	/* Fall through to std_return with error = 0. */
2095 std_return:
2096 	if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) {
2097 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
2098 				tdp, DM_RIGHT_NULL,
2099 				sip, DM_RIGHT_NULL,
2100 				target_name->name, NULL, 0, error, 0);
2101 	}
2102 	return error;
2103 
2104  abort_return:
2105 	cancel_flags |= XFS_TRANS_ABORT;
2106 	/* FALLTHROUGH */
2107 
2108  error_return:
2109 	xfs_trans_cancel(tp, cancel_flags);
2110 	goto std_return;
2111 }
2112 
2113 
2114 int
xfs_mkdir(xfs_inode_t * dp,struct xfs_name * dir_name,mode_t mode,xfs_inode_t ** ipp,cred_t * credp)2115 xfs_mkdir(
2116 	xfs_inode_t             *dp,
2117 	struct xfs_name		*dir_name,
2118 	mode_t			mode,
2119 	xfs_inode_t		**ipp,
2120 	cred_t			*credp)
2121 {
2122 	xfs_mount_t		*mp = dp->i_mount;
2123 	xfs_inode_t		*cdp;	/* inode of created dir */
2124 	xfs_trans_t		*tp;
2125 	int			cancel_flags;
2126 	int			error;
2127 	int			committed;
2128 	xfs_bmap_free_t         free_list;
2129 	xfs_fsblock_t           first_block;
2130 	boolean_t		unlock_dp_on_error = B_FALSE;
2131 	boolean_t		created = B_FALSE;
2132 	int			dm_event_sent = 0;
2133 	xfs_prid_t		prid;
2134 	struct xfs_dquot	*udqp, *gdqp;
2135 	uint			resblks;
2136 
2137 	if (XFS_FORCED_SHUTDOWN(mp))
2138 		return XFS_ERROR(EIO);
2139 
2140 	tp = NULL;
2141 
2142 	if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
2143 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
2144 					dp, DM_RIGHT_NULL, NULL,
2145 					DM_RIGHT_NULL, dir_name->name, NULL,
2146 					mode, 0, 0);
2147 		if (error)
2148 			return error;
2149 		dm_event_sent = 1;
2150 	}
2151 
2152 	/* Return through std_return after this point. */
2153 
2154 	xfs_itrace_entry(dp);
2155 
2156 	mp = dp->i_mount;
2157 	udqp = gdqp = NULL;
2158 	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
2159 		prid = dp->i_d.di_projid;
2160 	else
2161 		prid = (xfs_prid_t)dfltprid;
2162 
2163 	/*
2164 	 * Make sure that we have allocated dquot(s) on disk.
2165 	 */
2166 	error = XFS_QM_DQVOPALLOC(mp, dp,
2167 			current_fsuid(), current_fsgid(), prid,
2168 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
2169 	if (error)
2170 		goto std_return;
2171 
2172 	tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
2173 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2174 	resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len);
2175 	error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0,
2176 				  XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT);
2177 	if (error == ENOSPC) {
2178 		resblks = 0;
2179 		error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0,
2180 					  XFS_TRANS_PERM_LOG_RES,
2181 					  XFS_MKDIR_LOG_COUNT);
2182 	}
2183 	if (error) {
2184 		cancel_flags = 0;
2185 		goto error_return;
2186 	}
2187 
2188 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
2189 	unlock_dp_on_error = B_TRUE;
2190 
2191 	/*
2192 	 * Check for directory link count overflow.
2193 	 */
2194 	if (dp->i_d.di_nlink >= XFS_MAXLINK) {
2195 		error = XFS_ERROR(EMLINK);
2196 		goto error_return;
2197 	}
2198 
2199 	/*
2200 	 * Reserve disk quota and the inode.
2201 	 */
2202 	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
2203 	if (error)
2204 		goto error_return;
2205 
2206 	error = xfs_dir_canenter(tp, dp, dir_name, resblks);
2207 	if (error)
2208 		goto error_return;
2209 	/*
2210 	 * create the directory inode.
2211 	 */
2212 	error = xfs_dir_ialloc(&tp, dp, mode, 2,
2213 			0, credp, prid, resblks > 0,
2214 		&cdp, NULL);
2215 	if (error) {
2216 		if (error == ENOSPC)
2217 			goto error_return;
2218 		goto abort_return;
2219 	}
2220 	xfs_itrace_ref(cdp);
2221 
2222 	/*
2223 	 * Now we add the directory inode to the transaction.
2224 	 * We waited until now since xfs_dir_ialloc might start
2225 	 * a new transaction.  Had we joined the transaction
2226 	 * earlier, the locks might have gotten released. An error
2227 	 * from here on will result in the transaction cancel
2228 	 * unlocking dp so don't do it explicitly in the error path.
2229 	 */
2230 	IHOLD(dp);
2231 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2232 	unlock_dp_on_error = B_FALSE;
2233 
2234 	xfs_bmap_init(&free_list, &first_block);
2235 
2236 	error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino,
2237 					&first_block, &free_list, resblks ?
2238 					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
2239 	if (error) {
2240 		ASSERT(error != ENOSPC);
2241 		goto error1;
2242 	}
2243 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2244 
2245 	error = xfs_dir_init(tp, cdp, dp);
2246 	if (error)
2247 		goto error2;
2248 
2249 	error = xfs_bumplink(tp, dp);
2250 	if (error)
2251 		goto error2;
2252 
2253 	created = B_TRUE;
2254 
2255 	*ipp = cdp;
2256 	IHOLD(cdp);
2257 
2258 	/*
2259 	 * Attach the dquots to the new inode and modify the icount incore.
2260 	 */
2261 	XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp);
2262 
2263 	/*
2264 	 * If this is a synchronous mount, make sure that the
2265 	 * mkdir transaction goes to disk before returning to
2266 	 * the user.
2267 	 */
2268 	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2269 		xfs_trans_set_sync(tp);
2270 	}
2271 
2272 	error = xfs_bmap_finish(&tp, &free_list, &committed);
2273 	if (error) {
2274 		IRELE(cdp);
2275 		goto error2;
2276 	}
2277 
2278 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2279 	XFS_QM_DQRELE(mp, udqp);
2280 	XFS_QM_DQRELE(mp, gdqp);
2281 	if (error) {
2282 		IRELE(cdp);
2283 	}
2284 
2285 	/* Fall through to std_return with error = 0 or errno from
2286 	 * xfs_trans_commit. */
2287 
2288 std_return:
2289 	if ((created || (error != 0 && dm_event_sent != 0)) &&
2290 	    DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
2291 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
2292 					dp, DM_RIGHT_NULL,
2293 					created ? cdp : NULL,
2294 					DM_RIGHT_NULL,
2295 					dir_name->name, NULL,
2296 					mode, error, 0);
2297 	}
2298 	return error;
2299 
2300  error2:
2301  error1:
2302 	xfs_bmap_cancel(&free_list);
2303  abort_return:
2304 	cancel_flags |= XFS_TRANS_ABORT;
2305  error_return:
2306 	xfs_trans_cancel(tp, cancel_flags);
2307 	XFS_QM_DQRELE(mp, udqp);
2308 	XFS_QM_DQRELE(mp, gdqp);
2309 
2310 	if (unlock_dp_on_error)
2311 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
2312 
2313 	goto std_return;
2314 }
2315 
2316 int
xfs_symlink(xfs_inode_t * dp,struct xfs_name * link_name,const char * target_path,mode_t mode,xfs_inode_t ** ipp,cred_t * credp)2317 xfs_symlink(
2318 	xfs_inode_t		*dp,
2319 	struct xfs_name		*link_name,
2320 	const char		*target_path,
2321 	mode_t			mode,
2322 	xfs_inode_t		**ipp,
2323 	cred_t			*credp)
2324 {
2325 	xfs_mount_t		*mp = dp->i_mount;
2326 	xfs_trans_t		*tp;
2327 	xfs_inode_t		*ip;
2328 	int			error;
2329 	int			pathlen;
2330 	xfs_bmap_free_t		free_list;
2331 	xfs_fsblock_t		first_block;
2332 	boolean_t		unlock_dp_on_error = B_FALSE;
2333 	uint			cancel_flags;
2334 	int			committed;
2335 	xfs_fileoff_t		first_fsb;
2336 	xfs_filblks_t		fs_blocks;
2337 	int			nmaps;
2338 	xfs_bmbt_irec_t		mval[SYMLINK_MAPS];
2339 	xfs_daddr_t		d;
2340 	const char		*cur_chunk;
2341 	int			byte_cnt;
2342 	int			n;
2343 	xfs_buf_t		*bp;
2344 	xfs_prid_t		prid;
2345 	struct xfs_dquot	*udqp, *gdqp;
2346 	uint			resblks;
2347 
2348 	*ipp = NULL;
2349 	error = 0;
2350 	ip = NULL;
2351 	tp = NULL;
2352 
2353 	xfs_itrace_entry(dp);
2354 
2355 	if (XFS_FORCED_SHUTDOWN(mp))
2356 		return XFS_ERROR(EIO);
2357 
2358 	/*
2359 	 * Check component lengths of the target path name.
2360 	 */
2361 	pathlen = strlen(target_path);
2362 	if (pathlen >= MAXPATHLEN)      /* total string too long */
2363 		return XFS_ERROR(ENAMETOOLONG);
2364 
2365 	if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
2366 		error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
2367 					DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
2368 					link_name->name, target_path, 0, 0, 0);
2369 		if (error)
2370 			return error;
2371 	}
2372 
2373 	/* Return through std_return after this point. */
2374 
2375 	udqp = gdqp = NULL;
2376 	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
2377 		prid = dp->i_d.di_projid;
2378 	else
2379 		prid = (xfs_prid_t)dfltprid;
2380 
2381 	/*
2382 	 * Make sure that we have allocated dquot(s) on disk.
2383 	 */
2384 	error = XFS_QM_DQVOPALLOC(mp, dp,
2385 			current_fsuid(), current_fsgid(), prid,
2386 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
2387 	if (error)
2388 		goto std_return;
2389 
2390 	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
2391 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2392 	/*
2393 	 * The symlink will fit into the inode data fork?
2394 	 * There can't be any attributes so we get the whole variable part.
2395 	 */
2396 	if (pathlen <= XFS_LITINO(mp))
2397 		fs_blocks = 0;
2398 	else
2399 		fs_blocks = XFS_B_TO_FSB(mp, pathlen);
2400 	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
2401 	error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
2402 			XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
2403 	if (error == ENOSPC && fs_blocks == 0) {
2404 		resblks = 0;
2405 		error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
2406 				XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
2407 	}
2408 	if (error) {
2409 		cancel_flags = 0;
2410 		goto error_return;
2411 	}
2412 
2413 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
2414 	unlock_dp_on_error = B_TRUE;
2415 
2416 	/*
2417 	 * Check whether the directory allows new symlinks or not.
2418 	 */
2419 	if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
2420 		error = XFS_ERROR(EPERM);
2421 		goto error_return;
2422 	}
2423 
2424 	/*
2425 	 * Reserve disk quota : blocks and inode.
2426 	 */
2427 	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
2428 	if (error)
2429 		goto error_return;
2430 
2431 	/*
2432 	 * Check for ability to enter directory entry, if no space reserved.
2433 	 */
2434 	error = xfs_dir_canenter(tp, dp, link_name, resblks);
2435 	if (error)
2436 		goto error_return;
2437 	/*
2438 	 * Initialize the bmap freelist prior to calling either
2439 	 * bmapi or the directory create code.
2440 	 */
2441 	xfs_bmap_init(&free_list, &first_block);
2442 
2443 	/*
2444 	 * Allocate an inode for the symlink.
2445 	 */
2446 	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT),
2447 			       1, 0, credp, prid, resblks > 0, &ip, NULL);
2448 	if (error) {
2449 		if (error == ENOSPC)
2450 			goto error_return;
2451 		goto error1;
2452 	}
2453 	xfs_itrace_ref(ip);
2454 
2455 	/*
2456 	 * An error after we've joined dp to the transaction will result in the
2457 	 * transaction cancel unlocking dp so don't do it explicitly in the
2458 	 * error path.
2459 	 */
2460 	IHOLD(dp);
2461 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2462 	unlock_dp_on_error = B_FALSE;
2463 
2464 	/*
2465 	 * Also attach the dquot(s) to it, if applicable.
2466 	 */
2467 	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
2468 
2469 	if (resblks)
2470 		resblks -= XFS_IALLOC_SPACE_RES(mp);
2471 	/*
2472 	 * If the symlink will fit into the inode, write it inline.
2473 	 */
2474 	if (pathlen <= XFS_IFORK_DSIZE(ip)) {
2475 		xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
2476 		memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
2477 		ip->i_d.di_size = pathlen;
2478 
2479 		/*
2480 		 * The inode was initially created in extent format.
2481 		 */
2482 		ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
2483 		ip->i_df.if_flags |= XFS_IFINLINE;
2484 
2485 		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
2486 		xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
2487 
2488 	} else {
2489 		first_fsb = 0;
2490 		nmaps = SYMLINK_MAPS;
2491 
2492 		error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
2493 				  XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
2494 				  &first_block, resblks, mval, &nmaps,
2495 				  &free_list, NULL);
2496 		if (error) {
2497 			goto error1;
2498 		}
2499 
2500 		if (resblks)
2501 			resblks -= fs_blocks;
2502 		ip->i_d.di_size = pathlen;
2503 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2504 
2505 		cur_chunk = target_path;
2506 		for (n = 0; n < nmaps; n++) {
2507 			d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
2508 			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
2509 			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
2510 					       BTOBB(byte_cnt), 0);
2511 			ASSERT(bp && !XFS_BUF_GETERROR(bp));
2512 			if (pathlen < byte_cnt) {
2513 				byte_cnt = pathlen;
2514 			}
2515 			pathlen -= byte_cnt;
2516 
2517 			memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt);
2518 			cur_chunk += byte_cnt;
2519 
2520 			xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
2521 		}
2522 	}
2523 
2524 	/*
2525 	 * Create the directory entry for the symlink.
2526 	 */
2527 	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
2528 					&first_block, &free_list, resblks);
2529 	if (error)
2530 		goto error1;
2531 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2532 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2533 
2534 	/*
2535 	 * If this is a synchronous mount, make sure that the
2536 	 * symlink transaction goes to disk before returning to
2537 	 * the user.
2538 	 */
2539 	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2540 		xfs_trans_set_sync(tp);
2541 	}
2542 
2543 	/*
2544 	 * xfs_trans_commit normally decrements the vnode ref count
2545 	 * when it unlocks the inode. Since we want to return the
2546 	 * vnode to the caller, we bump the vnode ref count now.
2547 	 */
2548 	IHOLD(ip);
2549 
2550 	error = xfs_bmap_finish(&tp, &free_list, &committed);
2551 	if (error) {
2552 		goto error2;
2553 	}
2554 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2555 	XFS_QM_DQRELE(mp, udqp);
2556 	XFS_QM_DQRELE(mp, gdqp);
2557 
2558 	/* Fall through to std_return with error = 0 or errno from
2559 	 * xfs_trans_commit	*/
2560 std_return:
2561 	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) {
2562 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
2563 					dp, DM_RIGHT_NULL,
2564 					error ? NULL : ip,
2565 					DM_RIGHT_NULL, link_name->name,
2566 					target_path, 0, error, 0);
2567 	}
2568 
2569 	if (!error)
2570 		*ipp = ip;
2571 	return error;
2572 
2573  error2:
2574 	IRELE(ip);
2575  error1:
2576 	xfs_bmap_cancel(&free_list);
2577 	cancel_flags |= XFS_TRANS_ABORT;
2578  error_return:
2579 	xfs_trans_cancel(tp, cancel_flags);
2580 	XFS_QM_DQRELE(mp, udqp);
2581 	XFS_QM_DQRELE(mp, gdqp);
2582 
2583 	if (unlock_dp_on_error)
2584 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
2585 
2586 	goto std_return;
2587 }
2588 
2589 int
xfs_inode_flush(xfs_inode_t * ip,int flags)2590 xfs_inode_flush(
2591 	xfs_inode_t	*ip,
2592 	int		flags)
2593 {
2594 	xfs_mount_t	*mp = ip->i_mount;
2595 	int		error = 0;
2596 
2597 	if (XFS_FORCED_SHUTDOWN(mp))
2598 		return XFS_ERROR(EIO);
2599 
2600 	/*
2601 	 * Bypass inodes which have already been cleaned by
2602 	 * the inode flush clustering code inside xfs_iflush
2603 	 */
2604 	if (xfs_inode_clean(ip))
2605 		return 0;
2606 
2607 	/*
2608 	 * We make this non-blocking if the inode is contended,
2609 	 * return EAGAIN to indicate to the caller that they
2610 	 * did not succeed. This prevents the flush path from
2611 	 * blocking on inodes inside another operation right
2612 	 * now, they get caught later by xfs_sync.
2613 	 */
2614 	if (flags & FLUSH_SYNC) {
2615 		xfs_ilock(ip, XFS_ILOCK_SHARED);
2616 		xfs_iflock(ip);
2617 	} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
2618 		if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
2619 			xfs_iunlock(ip, XFS_ILOCK_SHARED);
2620 			return EAGAIN;
2621 		}
2622 	} else {
2623 		return EAGAIN;
2624 	}
2625 
2626 	error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
2627 						    : XFS_IFLUSH_ASYNC_NOBLOCK);
2628 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
2629 
2630 	return error;
2631 }
2632 
2633 
2634 int
xfs_set_dmattrs(xfs_inode_t * ip,u_int evmask,u_int16_t state)2635 xfs_set_dmattrs(
2636 	xfs_inode_t     *ip,
2637 	u_int		evmask,
2638 	u_int16_t	state)
2639 {
2640 	xfs_mount_t	*mp = ip->i_mount;
2641 	xfs_trans_t	*tp;
2642 	int		error;
2643 
2644 	if (!capable(CAP_SYS_ADMIN))
2645 		return XFS_ERROR(EPERM);
2646 
2647 	if (XFS_FORCED_SHUTDOWN(mp))
2648 		return XFS_ERROR(EIO);
2649 
2650 	tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
2651 	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0);
2652 	if (error) {
2653 		xfs_trans_cancel(tp, 0);
2654 		return error;
2655 	}
2656 	xfs_ilock(ip, XFS_ILOCK_EXCL);
2657 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2658 
2659 	ip->i_d.di_dmevmask = evmask;
2660 	ip->i_d.di_dmstate  = state;
2661 
2662 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2663 	IHOLD(ip);
2664 	error = xfs_trans_commit(tp, 0);
2665 
2666 	return error;
2667 }
2668 
2669 int
xfs_reclaim(xfs_inode_t * ip)2670 xfs_reclaim(
2671 	xfs_inode_t	*ip)
2672 {
2673 
2674 	xfs_itrace_entry(ip);
2675 
2676 	ASSERT(!VN_MAPPED(VFS_I(ip)));
2677 
2678 	/* bad inode, get out here ASAP */
2679 	if (VN_BAD(VFS_I(ip))) {
2680 		xfs_ireclaim(ip);
2681 		return 0;
2682 	}
2683 
2684 	xfs_ioend_wait(ip);
2685 
2686 	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
2687 
2688 	/*
2689 	 * Make sure the atime in the XFS inode is correct before freeing the
2690 	 * Linux inode.
2691 	 */
2692 	xfs_synchronize_atime(ip);
2693 
2694 	/*
2695 	 * If we have nothing to flush with this inode then complete the
2696 	 * teardown now, otherwise break the link between the xfs inode and the
2697 	 * linux inode and clean up the xfs inode later. This avoids flushing
2698 	 * the inode to disk during the delete operation itself.
2699 	 *
2700 	 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
2701 	 * first to ensure that xfs_iunpin() will never see an xfs inode
2702 	 * that has a linux inode being reclaimed. Synchronisation is provided
2703 	 * by the i_flags_lock.
2704 	 */
2705 	if (!ip->i_update_core && (ip->i_itemp == NULL)) {
2706 		xfs_ilock(ip, XFS_ILOCK_EXCL);
2707 		xfs_iflock(ip);
2708 		xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2709 		return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
2710 	}
2711 	xfs_inode_set_reclaim_tag(ip);
2712 	return 0;
2713 }
2714 
2715 /*
2716  * xfs_alloc_file_space()
2717  *      This routine allocates disk space for the given file.
2718  *
2719  *	If alloc_type == 0, this request is for an ALLOCSP type
2720  *	request which will change the file size.  In this case, no
2721  *	DMAPI event will be generated by the call.  A TRUNCATE event
2722  *	will be generated later by xfs_setattr.
2723  *
2724  *	If alloc_type != 0, this request is for a RESVSP type
2725  *	request, and a DMAPI DM_EVENT_WRITE will be generated if the
2726  *	lower block boundary byte address is less than the file's
2727  *	length.
2728  *
2729  * RETURNS:
2730  *       0 on success
2731  *      errno on error
2732  *
2733  */
2734 STATIC int
xfs_alloc_file_space(xfs_inode_t * ip,xfs_off_t offset,xfs_off_t len,int alloc_type,int attr_flags)2735 xfs_alloc_file_space(
2736 	xfs_inode_t		*ip,
2737 	xfs_off_t		offset,
2738 	xfs_off_t		len,
2739 	int			alloc_type,
2740 	int			attr_flags)
2741 {
2742 	xfs_mount_t		*mp = ip->i_mount;
2743 	xfs_off_t		count;
2744 	xfs_filblks_t		allocated_fsb;
2745 	xfs_filblks_t		allocatesize_fsb;
2746 	xfs_extlen_t		extsz, temp;
2747 	xfs_fileoff_t		startoffset_fsb;
2748 	xfs_fsblock_t		firstfsb;
2749 	int			nimaps;
2750 	int			bmapi_flag;
2751 	int			quota_flag;
2752 	int			rt;
2753 	xfs_trans_t		*tp;
2754 	xfs_bmbt_irec_t		imaps[1], *imapp;
2755 	xfs_bmap_free_t		free_list;
2756 	uint			qblocks, resblks, resrtextents;
2757 	int			committed;
2758 	int			error;
2759 
2760 	xfs_itrace_entry(ip);
2761 
2762 	if (XFS_FORCED_SHUTDOWN(mp))
2763 		return XFS_ERROR(EIO);
2764 
2765 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
2766 		return error;
2767 
2768 	if (len <= 0)
2769 		return XFS_ERROR(EINVAL);
2770 
2771 	rt = XFS_IS_REALTIME_INODE(ip);
2772 	extsz = xfs_get_extsz_hint(ip);
2773 
2774 	count = len;
2775 	imapp = &imaps[0];
2776 	nimaps = 1;
2777 	bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
2778 	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
2779 	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
2780 
2781 	/*	Generate a DMAPI event if needed.	*/
2782 	if (alloc_type != 0 && offset < ip->i_size &&
2783 			(attr_flags & XFS_ATTR_DMI) == 0  &&
2784 			DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
2785 		xfs_off_t           end_dmi_offset;
2786 
2787 		end_dmi_offset = offset+len;
2788 		if (end_dmi_offset > ip->i_size)
2789 			end_dmi_offset = ip->i_size;
2790 		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset,
2791 				      end_dmi_offset - offset, 0, NULL);
2792 		if (error)
2793 			return error;
2794 	}
2795 
2796 	/*
2797 	 * Allocate file space until done or until there is an error
2798 	 */
2799 retry:
2800 	while (allocatesize_fsb && !error) {
2801 		xfs_fileoff_t	s, e;
2802 
2803 		/*
2804 		 * Determine space reservations for data/realtime.
2805 		 */
2806 		if (unlikely(extsz)) {
2807 			s = startoffset_fsb;
2808 			do_div(s, extsz);
2809 			s *= extsz;
2810 			e = startoffset_fsb + allocatesize_fsb;
2811 			if ((temp = do_mod(startoffset_fsb, extsz)))
2812 				e += temp;
2813 			if ((temp = do_mod(e, extsz)))
2814 				e += extsz - temp;
2815 		} else {
2816 			s = 0;
2817 			e = allocatesize_fsb;
2818 		}
2819 
2820 		if (unlikely(rt)) {
2821 			resrtextents = qblocks = (uint)(e - s);
2822 			resrtextents /= mp->m_sb.sb_rextsize;
2823 			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
2824 			quota_flag = XFS_QMOPT_RES_RTBLKS;
2825 		} else {
2826 			resrtextents = 0;
2827 			resblks = qblocks = \
2828 				XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
2829 			quota_flag = XFS_QMOPT_RES_REGBLKS;
2830 		}
2831 
2832 		/*
2833 		 * Allocate and setup the transaction.
2834 		 */
2835 		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
2836 		error = xfs_trans_reserve(tp, resblks,
2837 					  XFS_WRITE_LOG_RES(mp), resrtextents,
2838 					  XFS_TRANS_PERM_LOG_RES,
2839 					  XFS_WRITE_LOG_COUNT);
2840 		/*
2841 		 * Check for running out of space
2842 		 */
2843 		if (error) {
2844 			/*
2845 			 * Free the transaction structure.
2846 			 */
2847 			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
2848 			xfs_trans_cancel(tp, 0);
2849 			break;
2850 		}
2851 		xfs_ilock(ip, XFS_ILOCK_EXCL);
2852 		error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
2853 						      qblocks, 0, quota_flag);
2854 		if (error)
2855 			goto error1;
2856 
2857 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2858 		xfs_trans_ihold(tp, ip);
2859 
2860 		/*
2861 		 * Issue the xfs_bmapi() call to allocate the blocks
2862 		 */
2863 		xfs_bmap_init(&free_list, &firstfsb);
2864 		error = xfs_bmapi(tp, ip, startoffset_fsb,
2865 				  allocatesize_fsb, bmapi_flag,
2866 				  &firstfsb, 0, imapp, &nimaps,
2867 				  &free_list, NULL);
2868 		if (error) {
2869 			goto error0;
2870 		}
2871 
2872 		/*
2873 		 * Complete the transaction
2874 		 */
2875 		error = xfs_bmap_finish(&tp, &free_list, &committed);
2876 		if (error) {
2877 			goto error0;
2878 		}
2879 
2880 		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2881 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
2882 		if (error) {
2883 			break;
2884 		}
2885 
2886 		allocated_fsb = imapp->br_blockcount;
2887 
2888 		if (nimaps == 0) {
2889 			error = XFS_ERROR(ENOSPC);
2890 			break;
2891 		}
2892 
2893 		startoffset_fsb += allocated_fsb;
2894 		allocatesize_fsb -= allocated_fsb;
2895 	}
2896 dmapi_enospc_check:
2897 	if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
2898 	    DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
2899 		error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
2900 				ip, DM_RIGHT_NULL,
2901 				ip, DM_RIGHT_NULL,
2902 				NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
2903 		if (error == 0)
2904 			goto retry;	/* Maybe DMAPI app. has made space */
2905 		/* else fall through with error from XFS_SEND_DATA */
2906 	}
2907 
2908 	return error;
2909 
2910 error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
2911 	xfs_bmap_cancel(&free_list);
2912 	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
2913 
2914 error1:	/* Just cancel transaction */
2915 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
2916 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
2917 	goto dmapi_enospc_check;
2918 }
2919 
2920 /*
2921  * Zero file bytes between startoff and endoff inclusive.
2922  * The iolock is held exclusive and no blocks are buffered.
2923  *
2924  * This function is used by xfs_free_file_space() to zero
2925  * partial blocks when the range to free is not block aligned.
2926  * When unreserving space with boundaries that are not block
2927  * aligned we round up the start and round down the end
2928  * boundaries and then use this function to zero the parts of
2929  * the blocks that got dropped during the rounding.
2930  */
2931 STATIC int
xfs_zero_remaining_bytes(xfs_inode_t * ip,xfs_off_t startoff,xfs_off_t endoff)2932 xfs_zero_remaining_bytes(
2933 	xfs_inode_t		*ip,
2934 	xfs_off_t		startoff,
2935 	xfs_off_t		endoff)
2936 {
2937 	xfs_bmbt_irec_t		imap;
2938 	xfs_fileoff_t		offset_fsb;
2939 	xfs_off_t		lastoffset;
2940 	xfs_off_t		offset;
2941 	xfs_buf_t		*bp;
2942 	xfs_mount_t		*mp = ip->i_mount;
2943 	int			nimap;
2944 	int			error = 0;
2945 
2946 	/*
2947 	 * Avoid doing I/O beyond eof - it's not necessary
2948 	 * since nothing can read beyond eof.  The space will
2949 	 * be zeroed when the file is extended anyway.
2950 	 */
2951 	if (startoff >= ip->i_size)
2952 		return 0;
2953 
2954 	if (endoff > ip->i_size)
2955 		endoff = ip->i_size;
2956 
2957 	bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
2958 				XFS_IS_REALTIME_INODE(ip) ?
2959 				mp->m_rtdev_targp : mp->m_ddev_targp);
2960 	if (!bp)
2961 		return XFS_ERROR(ENOMEM);
2962 
2963 	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
2964 		offset_fsb = XFS_B_TO_FSBT(mp, offset);
2965 		nimap = 1;
2966 		error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0,
2967 			NULL, 0, &imap, &nimap, NULL, NULL);
2968 		if (error || nimap < 1)
2969 			break;
2970 		ASSERT(imap.br_blockcount >= 1);
2971 		ASSERT(imap.br_startoff == offset_fsb);
2972 		lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
2973 		if (lastoffset > endoff)
2974 			lastoffset = endoff;
2975 		if (imap.br_startblock == HOLESTARTBLOCK)
2976 			continue;
2977 		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
2978 		if (imap.br_state == XFS_EXT_UNWRITTEN)
2979 			continue;
2980 		XFS_BUF_UNDONE(bp);
2981 		XFS_BUF_UNWRITE(bp);
2982 		XFS_BUF_READ(bp);
2983 		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
2984 		xfsbdstrat(mp, bp);
2985 		error = xfs_iowait(bp);
2986 		if (error) {
2987 			xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
2988 					  mp, bp, XFS_BUF_ADDR(bp));
2989 			break;
2990 		}
2991 		memset(XFS_BUF_PTR(bp) +
2992 			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
2993 		      0, lastoffset - offset + 1);
2994 		XFS_BUF_UNDONE(bp);
2995 		XFS_BUF_UNREAD(bp);
2996 		XFS_BUF_WRITE(bp);
2997 		xfsbdstrat(mp, bp);
2998 		error = xfs_iowait(bp);
2999 		if (error) {
3000 			xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
3001 					  mp, bp, XFS_BUF_ADDR(bp));
3002 			break;
3003 		}
3004 	}
3005 	xfs_buf_free(bp);
3006 	return error;
3007 }
3008 
3009 /*
3010  * xfs_free_file_space()
3011  *      This routine frees disk space for the given file.
3012  *
3013  *	This routine is only called by xfs_change_file_space
3014  *	for an UNRESVSP type call.
3015  *
3016  * RETURNS:
3017  *       0 on success
3018  *      errno on error
3019  *
3020  */
3021 STATIC int
xfs_free_file_space(xfs_inode_t * ip,xfs_off_t offset,xfs_off_t len,int attr_flags)3022 xfs_free_file_space(
3023 	xfs_inode_t		*ip,
3024 	xfs_off_t		offset,
3025 	xfs_off_t		len,
3026 	int			attr_flags)
3027 {
3028 	int			committed;
3029 	int			done;
3030 	xfs_off_t		end_dmi_offset;
3031 	xfs_fileoff_t		endoffset_fsb;
3032 	int			error;
3033 	xfs_fsblock_t		firstfsb;
3034 	xfs_bmap_free_t		free_list;
3035 	xfs_bmbt_irec_t		imap;
3036 	xfs_off_t		ioffset;
3037 	xfs_extlen_t		mod=0;
3038 	xfs_mount_t		*mp;
3039 	int			nimap;
3040 	uint			resblks;
3041 	uint			rounding;
3042 	int			rt;
3043 	xfs_fileoff_t		startoffset_fsb;
3044 	xfs_trans_t		*tp;
3045 	int			need_iolock = 1;
3046 
3047 	mp = ip->i_mount;
3048 
3049 	xfs_itrace_entry(ip);
3050 
3051 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
3052 		return error;
3053 
3054 	error = 0;
3055 	if (len <= 0)	/* if nothing being freed */
3056 		return error;
3057 	rt = XFS_IS_REALTIME_INODE(ip);
3058 	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);
3059 	end_dmi_offset = offset + len;
3060 	endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
3061 
3062 	if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
3063 	    DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
3064 		if (end_dmi_offset > ip->i_size)
3065 			end_dmi_offset = ip->i_size;
3066 		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip,
3067 				offset, end_dmi_offset - offset,
3068 				AT_DELAY_FLAG(attr_flags), NULL);
3069 		if (error)
3070 			return error;
3071 	}
3072 
3073 	if (attr_flags & XFS_ATTR_NOLOCK)
3074 		need_iolock = 0;
3075 	if (need_iolock) {
3076 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
3077 		/* wait for the completion of any pending DIOs */
3078 		xfs_ioend_wait(ip);
3079 	}
3080 
3081 	rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
3082 	ioffset = offset & ~(rounding - 1);
3083 
3084 	if (VN_CACHED(VFS_I(ip)) != 0) {
3085 		xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);
3086 		error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
3087 		if (error)
3088 			goto out_unlock_iolock;
3089 	}
3090 
3091 	/*
3092 	 * Need to zero the stuff we're not freeing, on disk.
3093 	 * If its a realtime file & can't use unwritten extents then we
3094 	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
3095 	 * will take care of it for us.
3096 	 */
3097 	if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
3098 		nimap = 1;
3099 		error = xfs_bmapi(NULL, ip, startoffset_fsb,
3100 			1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
3101 		if (error)
3102 			goto out_unlock_iolock;
3103 		ASSERT(nimap == 0 || nimap == 1);
3104 		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
3105 			xfs_daddr_t	block;
3106 
3107 			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
3108 			block = imap.br_startblock;
3109 			mod = do_div(block, mp->m_sb.sb_rextsize);
3110 			if (mod)
3111 				startoffset_fsb += mp->m_sb.sb_rextsize - mod;
3112 		}
3113 		nimap = 1;
3114 		error = xfs_bmapi(NULL, ip, endoffset_fsb - 1,
3115 			1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
3116 		if (error)
3117 			goto out_unlock_iolock;
3118 		ASSERT(nimap == 0 || nimap == 1);
3119 		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
3120 			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
3121 			mod++;
3122 			if (mod && (mod != mp->m_sb.sb_rextsize))
3123 				endoffset_fsb -= mod;
3124 		}
3125 	}
3126 	if ((done = (endoffset_fsb <= startoffset_fsb)))
3127 		/*
3128 		 * One contiguous piece to clear
3129 		 */
3130 		error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
3131 	else {
3132 		/*
3133 		 * Some full blocks, possibly two pieces to clear
3134 		 */
3135 		if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
3136 			error = xfs_zero_remaining_bytes(ip, offset,
3137 				XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
3138 		if (!error &&
3139 		    XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
3140 			error = xfs_zero_remaining_bytes(ip,
3141 				XFS_FSB_TO_B(mp, endoffset_fsb),
3142 				offset + len - 1);
3143 	}
3144 
3145 	/*
3146 	 * free file space until done or until there is an error
3147 	 */
3148 	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
3149 	while (!error && !done) {
3150 
3151 		/*
3152 		 * allocate and setup the transaction. Allow this
3153 		 * transaction to dip into the reserve blocks to ensure
3154 		 * the freeing of the space succeeds at ENOSPC.
3155 		 */
3156 		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
3157 		tp->t_flags |= XFS_TRANS_RESERVE;
3158 		error = xfs_trans_reserve(tp,
3159 					  resblks,
3160 					  XFS_WRITE_LOG_RES(mp),
3161 					  0,
3162 					  XFS_TRANS_PERM_LOG_RES,
3163 					  XFS_WRITE_LOG_COUNT);
3164 
3165 		/*
3166 		 * check for running out of space
3167 		 */
3168 		if (error) {
3169 			/*
3170 			 * Free the transaction structure.
3171 			 */
3172 			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
3173 			xfs_trans_cancel(tp, 0);
3174 			break;
3175 		}
3176 		xfs_ilock(ip, XFS_ILOCK_EXCL);
3177 		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
3178 				ip->i_udquot, ip->i_gdquot, resblks, 0,
3179 				XFS_QMOPT_RES_REGBLKS);
3180 		if (error)
3181 			goto error1;
3182 
3183 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3184 		xfs_trans_ihold(tp, ip);
3185 
3186 		/*
3187 		 * issue the bunmapi() call to free the blocks
3188 		 */
3189 		xfs_bmap_init(&free_list, &firstfsb);
3190 		error = xfs_bunmapi(tp, ip, startoffset_fsb,
3191 				  endoffset_fsb - startoffset_fsb,
3192 				  0, 2, &firstfsb, &free_list, NULL, &done);
3193 		if (error) {
3194 			goto error0;
3195 		}
3196 
3197 		/*
3198 		 * complete the transaction
3199 		 */
3200 		error = xfs_bmap_finish(&tp, &free_list, &committed);
3201 		if (error) {
3202 			goto error0;
3203 		}
3204 
3205 		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3206 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
3207 	}
3208 
3209  out_unlock_iolock:
3210 	if (need_iolock)
3211 		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3212 	return error;
3213 
3214  error0:
3215 	xfs_bmap_cancel(&free_list);
3216  error1:
3217 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
3218 	xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
3219 		    XFS_ILOCK_EXCL);
3220 	return error;
3221 }
3222 
3223 /*
3224  * xfs_change_file_space()
3225  *      This routine allocates or frees disk space for the given file.
3226  *      The user specified parameters are checked for alignment and size
3227  *      limitations.
3228  *
3229  * RETURNS:
3230  *       0 on success
3231  *      errno on error
3232  *
3233  */
3234 int
xfs_change_file_space(xfs_inode_t * ip,int cmd,xfs_flock64_t * bf,xfs_off_t offset,int attr_flags)3235 xfs_change_file_space(
3236 	xfs_inode_t	*ip,
3237 	int		cmd,
3238 	xfs_flock64_t	*bf,
3239 	xfs_off_t	offset,
3240 	int		attr_flags)
3241 {
3242 	xfs_mount_t	*mp = ip->i_mount;
3243 	int		clrprealloc;
3244 	int		error;
3245 	xfs_fsize_t	fsize;
3246 	int		setprealloc;
3247 	xfs_off_t	startoffset;
3248 	xfs_off_t	llen;
3249 	xfs_trans_t	*tp;
3250 	struct iattr	iattr;
3251 
3252 	xfs_itrace_entry(ip);
3253 
3254 	if (!S_ISREG(ip->i_d.di_mode))
3255 		return XFS_ERROR(EINVAL);
3256 
3257 	switch (bf->l_whence) {
3258 	case 0: /*SEEK_SET*/
3259 		break;
3260 	case 1: /*SEEK_CUR*/
3261 		bf->l_start += offset;
3262 		break;
3263 	case 2: /*SEEK_END*/
3264 		bf->l_start += ip->i_size;
3265 		break;
3266 	default:
3267 		return XFS_ERROR(EINVAL);
3268 	}
3269 
3270 	llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len;
3271 
3272 	if (   (bf->l_start < 0)
3273 	    || (bf->l_start > XFS_MAXIOFFSET(mp))
3274 	    || (bf->l_start + llen < 0)
3275 	    || (bf->l_start + llen > XFS_MAXIOFFSET(mp)))
3276 		return XFS_ERROR(EINVAL);
3277 
3278 	bf->l_whence = 0;
3279 
3280 	startoffset = bf->l_start;
3281 	fsize = ip->i_size;
3282 
3283 	/*
3284 	 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve
3285 	 * file space.
3286 	 * These calls do NOT zero the data space allocated to the file,
3287 	 * nor do they change the file size.
3288 	 *
3289 	 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file
3290 	 * space.
3291 	 * These calls cause the new file data to be zeroed and the file
3292 	 * size to be changed.
3293 	 */
3294 	setprealloc = clrprealloc = 0;
3295 
3296 	switch (cmd) {
3297 	case XFS_IOC_RESVSP:
3298 	case XFS_IOC_RESVSP64:
3299 		error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
3300 								1, attr_flags);
3301 		if (error)
3302 			return error;
3303 		setprealloc = 1;
3304 		break;
3305 
3306 	case XFS_IOC_UNRESVSP:
3307 	case XFS_IOC_UNRESVSP64:
3308 		if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
3309 								attr_flags)))
3310 			return error;
3311 		break;
3312 
3313 	case XFS_IOC_ALLOCSP:
3314 	case XFS_IOC_ALLOCSP64:
3315 	case XFS_IOC_FREESP:
3316 	case XFS_IOC_FREESP64:
3317 		if (startoffset > fsize) {
3318 			error = xfs_alloc_file_space(ip, fsize,
3319 					startoffset - fsize, 0, attr_flags);
3320 			if (error)
3321 				break;
3322 		}
3323 
3324 		iattr.ia_valid = ATTR_SIZE;
3325 		iattr.ia_size = startoffset;
3326 
3327 		error = xfs_setattr(ip, &iattr, attr_flags);
3328 
3329 		if (error)
3330 			return error;
3331 
3332 		clrprealloc = 1;
3333 		break;
3334 
3335 	default:
3336 		ASSERT(0);
3337 		return XFS_ERROR(EINVAL);
3338 	}
3339 
3340 	/*
3341 	 * update the inode timestamp, mode, and prealloc flag bits
3342 	 */
3343 	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
3344 
3345 	if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp),
3346 				      0, 0, 0))) {
3347 		/* ASSERT(0); */
3348 		xfs_trans_cancel(tp, 0);
3349 		return error;
3350 	}
3351 
3352 	xfs_ilock(ip, XFS_ILOCK_EXCL);
3353 
3354 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3355 	xfs_trans_ihold(tp, ip);
3356 
3357 	if ((attr_flags & XFS_ATTR_DMI) == 0) {
3358 		ip->i_d.di_mode &= ~S_ISUID;
3359 
3360 		/*
3361 		 * Note that we don't have to worry about mandatory
3362 		 * file locking being disabled here because we only
3363 		 * clear the S_ISGID bit if the Group execute bit is
3364 		 * on, but if it was on then mandatory locking wouldn't
3365 		 * have been enabled.
3366 		 */
3367 		if (ip->i_d.di_mode & S_IXGRP)
3368 			ip->i_d.di_mode &= ~S_ISGID;
3369 
3370 		xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3371 	}
3372 	if (setprealloc)
3373 		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
3374 	else if (clrprealloc)
3375 		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
3376 
3377 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3378 	xfs_trans_set_sync(tp);
3379 
3380 	error = xfs_trans_commit(tp, 0);
3381 
3382 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
3383 
3384 	return error;
3385 }
3386