• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_format.h"
21 #include "xfs_log_format.h"
22 #include "xfs_trans_resv.h"
23 #include "xfs_mount.h"
24 #include "xfs_inode.h"
25 #include "xfs_trans.h"
26 #include "xfs_inode_item.h"
27 #include "xfs_error.h"
28 #include "xfs_trace.h"
29 #include "xfs_trans_priv.h"
30 #include "xfs_buf_item.h"
31 #include "xfs_log.h"
32 
33 
34 kmem_zone_t	*xfs_ili_zone;		/* inode log item zone */
35 
INODE_ITEM(struct xfs_log_item * lip)36 static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
37 {
38 	return container_of(lip, struct xfs_inode_log_item, ili_item);
39 }
40 
41 STATIC void
xfs_inode_item_data_fork_size(struct xfs_inode_log_item * iip,int * nvecs,int * nbytes)42 xfs_inode_item_data_fork_size(
43 	struct xfs_inode_log_item *iip,
44 	int			*nvecs,
45 	int			*nbytes)
46 {
47 	struct xfs_inode	*ip = iip->ili_inode;
48 
49 	switch (ip->i_d.di_format) {
50 	case XFS_DINODE_FMT_EXTENTS:
51 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
52 		    ip->i_d.di_nextents > 0 &&
53 		    ip->i_df.if_bytes > 0) {
54 			/* worst case, doesn't subtract delalloc extents */
55 			*nbytes += XFS_IFORK_DSIZE(ip);
56 			*nvecs += 1;
57 		}
58 		break;
59 	case XFS_DINODE_FMT_BTREE:
60 		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
61 		    ip->i_df.if_broot_bytes > 0) {
62 			*nbytes += ip->i_df.if_broot_bytes;
63 			*nvecs += 1;
64 		}
65 		break;
66 	case XFS_DINODE_FMT_LOCAL:
67 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
68 		    ip->i_df.if_bytes > 0) {
69 			*nbytes += roundup(ip->i_df.if_bytes, 4);
70 			*nvecs += 1;
71 		}
72 		break;
73 
74 	case XFS_DINODE_FMT_DEV:
75 	case XFS_DINODE_FMT_UUID:
76 		break;
77 	default:
78 		ASSERT(0);
79 		break;
80 	}
81 }
82 
83 STATIC void
xfs_inode_item_attr_fork_size(struct xfs_inode_log_item * iip,int * nvecs,int * nbytes)84 xfs_inode_item_attr_fork_size(
85 	struct xfs_inode_log_item *iip,
86 	int			*nvecs,
87 	int			*nbytes)
88 {
89 	struct xfs_inode	*ip = iip->ili_inode;
90 
91 	switch (ip->i_d.di_aformat) {
92 	case XFS_DINODE_FMT_EXTENTS:
93 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
94 		    ip->i_d.di_anextents > 0 &&
95 		    ip->i_afp->if_bytes > 0) {
96 			/* worst case, doesn't subtract unused space */
97 			*nbytes += XFS_IFORK_ASIZE(ip);
98 			*nvecs += 1;
99 		}
100 		break;
101 	case XFS_DINODE_FMT_BTREE:
102 		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
103 		    ip->i_afp->if_broot_bytes > 0) {
104 			*nbytes += ip->i_afp->if_broot_bytes;
105 			*nvecs += 1;
106 		}
107 		break;
108 	case XFS_DINODE_FMT_LOCAL:
109 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
110 		    ip->i_afp->if_bytes > 0) {
111 			*nbytes += roundup(ip->i_afp->if_bytes, 4);
112 			*nvecs += 1;
113 		}
114 		break;
115 	default:
116 		ASSERT(0);
117 		break;
118 	}
119 }
120 
121 /*
122  * This returns the number of iovecs needed to log the given inode item.
123  *
124  * We need one iovec for the inode log format structure, one for the
125  * inode core, and possibly one for the inode data/extents/b-tree root
126  * and one for the inode attribute data/extents/b-tree root.
127  */
128 STATIC void
xfs_inode_item_size(struct xfs_log_item * lip,int * nvecs,int * nbytes)129 xfs_inode_item_size(
130 	struct xfs_log_item	*lip,
131 	int			*nvecs,
132 	int			*nbytes)
133 {
134 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
135 	struct xfs_inode	*ip = iip->ili_inode;
136 
137 	*nvecs += 2;
138 	*nbytes += sizeof(struct xfs_inode_log_format) +
139 		   xfs_log_dinode_size(ip->i_d.di_version);
140 
141 	xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
142 	if (XFS_IFORK_Q(ip))
143 		xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
144 }
145 
146 STATIC void
xfs_inode_item_format_data_fork(struct xfs_inode_log_item * iip,struct xfs_inode_log_format * ilf,struct xfs_log_vec * lv,struct xfs_log_iovec ** vecp)147 xfs_inode_item_format_data_fork(
148 	struct xfs_inode_log_item *iip,
149 	struct xfs_inode_log_format *ilf,
150 	struct xfs_log_vec	*lv,
151 	struct xfs_log_iovec	**vecp)
152 {
153 	struct xfs_inode	*ip = iip->ili_inode;
154 	size_t			data_bytes;
155 
156 	switch (ip->i_d.di_format) {
157 	case XFS_DINODE_FMT_EXTENTS:
158 		iip->ili_fields &=
159 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
160 			  XFS_ILOG_DEV | XFS_ILOG_UUID);
161 
162 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
163 		    ip->i_d.di_nextents > 0 &&
164 		    ip->i_df.if_bytes > 0) {
165 			struct xfs_bmbt_rec *p;
166 
167 			ASSERT(ip->i_df.if_u1.if_extents != NULL);
168 			ASSERT(xfs_iext_count(&ip->i_df) > 0);
169 
170 			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
171 			data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
172 			xlog_finish_iovec(lv, *vecp, data_bytes);
173 
174 			ASSERT(data_bytes <= ip->i_df.if_bytes);
175 
176 			ilf->ilf_dsize = data_bytes;
177 			ilf->ilf_size++;
178 		} else {
179 			iip->ili_fields &= ~XFS_ILOG_DEXT;
180 		}
181 		break;
182 	case XFS_DINODE_FMT_BTREE:
183 		iip->ili_fields &=
184 			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
185 			  XFS_ILOG_DEV | XFS_ILOG_UUID);
186 
187 		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
188 		    ip->i_df.if_broot_bytes > 0) {
189 			ASSERT(ip->i_df.if_broot != NULL);
190 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
191 					ip->i_df.if_broot,
192 					ip->i_df.if_broot_bytes);
193 			ilf->ilf_dsize = ip->i_df.if_broot_bytes;
194 			ilf->ilf_size++;
195 		} else {
196 			ASSERT(!(iip->ili_fields &
197 				 XFS_ILOG_DBROOT));
198 			iip->ili_fields &= ~XFS_ILOG_DBROOT;
199 		}
200 		break;
201 	case XFS_DINODE_FMT_LOCAL:
202 		iip->ili_fields &=
203 			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
204 			  XFS_ILOG_DEV | XFS_ILOG_UUID);
205 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
206 		    ip->i_df.if_bytes > 0) {
207 			/*
208 			 * Round i_bytes up to a word boundary.
209 			 * The underlying memory is guaranteed to
210 			 * to be there by xfs_idata_realloc().
211 			 */
212 			data_bytes = roundup(ip->i_df.if_bytes, 4);
213 			ASSERT(ip->i_df.if_real_bytes == 0 ||
214 			       ip->i_df.if_real_bytes >= data_bytes);
215 			ASSERT(ip->i_df.if_u1.if_data != NULL);
216 			ASSERT(ip->i_d.di_size > 0);
217 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
218 					ip->i_df.if_u1.if_data, data_bytes);
219 			ilf->ilf_dsize = (unsigned)data_bytes;
220 			ilf->ilf_size++;
221 		} else {
222 			iip->ili_fields &= ~XFS_ILOG_DDATA;
223 		}
224 		break;
225 	case XFS_DINODE_FMT_DEV:
226 		iip->ili_fields &=
227 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
228 			  XFS_ILOG_DEXT | XFS_ILOG_UUID);
229 		if (iip->ili_fields & XFS_ILOG_DEV)
230 			ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev;
231 		break;
232 	case XFS_DINODE_FMT_UUID:
233 		iip->ili_fields &=
234 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
235 			  XFS_ILOG_DEXT | XFS_ILOG_DEV);
236 		if (iip->ili_fields & XFS_ILOG_UUID)
237 			ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid;
238 		break;
239 	default:
240 		ASSERT(0);
241 		break;
242 	}
243 }
244 
245 STATIC void
xfs_inode_item_format_attr_fork(struct xfs_inode_log_item * iip,struct xfs_inode_log_format * ilf,struct xfs_log_vec * lv,struct xfs_log_iovec ** vecp)246 xfs_inode_item_format_attr_fork(
247 	struct xfs_inode_log_item *iip,
248 	struct xfs_inode_log_format *ilf,
249 	struct xfs_log_vec	*lv,
250 	struct xfs_log_iovec	**vecp)
251 {
252 	struct xfs_inode	*ip = iip->ili_inode;
253 	size_t			data_bytes;
254 
255 	switch (ip->i_d.di_aformat) {
256 	case XFS_DINODE_FMT_EXTENTS:
257 		iip->ili_fields &=
258 			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
259 
260 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
261 		    ip->i_d.di_anextents > 0 &&
262 		    ip->i_afp->if_bytes > 0) {
263 			struct xfs_bmbt_rec *p;
264 
265 			ASSERT(xfs_iext_count(ip->i_afp) ==
266 				ip->i_d.di_anextents);
267 			ASSERT(ip->i_afp->if_u1.if_extents != NULL);
268 
269 			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
270 			data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
271 			xlog_finish_iovec(lv, *vecp, data_bytes);
272 
273 			ilf->ilf_asize = data_bytes;
274 			ilf->ilf_size++;
275 		} else {
276 			iip->ili_fields &= ~XFS_ILOG_AEXT;
277 		}
278 		break;
279 	case XFS_DINODE_FMT_BTREE:
280 		iip->ili_fields &=
281 			~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
282 
283 		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
284 		    ip->i_afp->if_broot_bytes > 0) {
285 			ASSERT(ip->i_afp->if_broot != NULL);
286 
287 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
288 					ip->i_afp->if_broot,
289 					ip->i_afp->if_broot_bytes);
290 			ilf->ilf_asize = ip->i_afp->if_broot_bytes;
291 			ilf->ilf_size++;
292 		} else {
293 			iip->ili_fields &= ~XFS_ILOG_ABROOT;
294 		}
295 		break;
296 	case XFS_DINODE_FMT_LOCAL:
297 		iip->ili_fields &=
298 			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
299 
300 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
301 		    ip->i_afp->if_bytes > 0) {
302 			/*
303 			 * Round i_bytes up to a word boundary.
304 			 * The underlying memory is guaranteed to
305 			 * to be there by xfs_idata_realloc().
306 			 */
307 			data_bytes = roundup(ip->i_afp->if_bytes, 4);
308 			ASSERT(ip->i_afp->if_real_bytes == 0 ||
309 			       ip->i_afp->if_real_bytes >= data_bytes);
310 			ASSERT(ip->i_afp->if_u1.if_data != NULL);
311 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
312 					ip->i_afp->if_u1.if_data,
313 					data_bytes);
314 			ilf->ilf_asize = (unsigned)data_bytes;
315 			ilf->ilf_size++;
316 		} else {
317 			iip->ili_fields &= ~XFS_ILOG_ADATA;
318 		}
319 		break;
320 	default:
321 		ASSERT(0);
322 		break;
323 	}
324 }
325 
326 static void
xfs_inode_to_log_dinode(struct xfs_inode * ip,struct xfs_log_dinode * to,xfs_lsn_t lsn)327 xfs_inode_to_log_dinode(
328 	struct xfs_inode	*ip,
329 	struct xfs_log_dinode	*to,
330 	xfs_lsn_t		lsn)
331 {
332 	struct xfs_icdinode	*from = &ip->i_d;
333 	struct inode		*inode = VFS_I(ip);
334 
335 	to->di_magic = XFS_DINODE_MAGIC;
336 
337 	to->di_version = from->di_version;
338 	to->di_format = from->di_format;
339 	to->di_uid = from->di_uid;
340 	to->di_gid = from->di_gid;
341 	to->di_projid_lo = from->di_projid_lo;
342 	to->di_projid_hi = from->di_projid_hi;
343 
344 	memset(to->di_pad, 0, sizeof(to->di_pad));
345 	memset(to->di_pad3, 0, sizeof(to->di_pad3));
346 	to->di_atime.t_sec = inode->i_atime.tv_sec;
347 	to->di_atime.t_nsec = inode->i_atime.tv_nsec;
348 	to->di_mtime.t_sec = inode->i_mtime.tv_sec;
349 	to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
350 	to->di_ctime.t_sec = inode->i_ctime.tv_sec;
351 	to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
352 	to->di_nlink = inode->i_nlink;
353 	to->di_gen = inode->i_generation;
354 	to->di_mode = inode->i_mode;
355 
356 	to->di_size = from->di_size;
357 	to->di_nblocks = from->di_nblocks;
358 	to->di_extsize = from->di_extsize;
359 	to->di_nextents = from->di_nextents;
360 	to->di_anextents = from->di_anextents;
361 	to->di_forkoff = from->di_forkoff;
362 	to->di_aformat = from->di_aformat;
363 	to->di_dmevmask = from->di_dmevmask;
364 	to->di_dmstate = from->di_dmstate;
365 	to->di_flags = from->di_flags;
366 
367 	/* log a dummy value to ensure log structure is fully initialised */
368 	to->di_next_unlinked = NULLAGINO;
369 
370 	if (from->di_version == 3) {
371 		to->di_changecount = inode->i_version;
372 		to->di_crtime.t_sec = from->di_crtime.t_sec;
373 		to->di_crtime.t_nsec = from->di_crtime.t_nsec;
374 		to->di_flags2 = from->di_flags2;
375 		to->di_cowextsize = from->di_cowextsize;
376 		to->di_ino = ip->i_ino;
377 		to->di_lsn = lsn;
378 		memset(to->di_pad2, 0, sizeof(to->di_pad2));
379 		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
380 		to->di_flushiter = 0;
381 	} else {
382 		to->di_flushiter = from->di_flushiter;
383 	}
384 }
385 
386 /*
387  * Format the inode core. Current timestamp data is only in the VFS inode
388  * fields, so we need to grab them from there. Hence rather than just copying
389  * the XFS inode core structure, format the fields directly into the iovec.
390  */
391 static void
xfs_inode_item_format_core(struct xfs_inode * ip,struct xfs_log_vec * lv,struct xfs_log_iovec ** vecp)392 xfs_inode_item_format_core(
393 	struct xfs_inode	*ip,
394 	struct xfs_log_vec	*lv,
395 	struct xfs_log_iovec	**vecp)
396 {
397 	struct xfs_log_dinode	*dic;
398 
399 	dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
400 	xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
401 	xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
402 }
403 
404 /*
405  * This is called to fill in the vector of log iovecs for the given inode
406  * log item.  It fills the first item with an inode log format structure,
407  * the second with the on-disk inode structure, and a possible third and/or
408  * fourth with the inode data/extents/b-tree root and inode attributes
409  * data/extents/b-tree root.
410  *
411  * Note: Always use the 64 bit inode log format structure so we don't
412  * leave an uninitialised hole in the format item on 64 bit systems. Log
413  * recovery on 32 bit systems handles this just fine, so there's no reason
414  * for not using an initialising the properly padded structure all the time.
415  */
416 STATIC void
xfs_inode_item_format(struct xfs_log_item * lip,struct xfs_log_vec * lv)417 xfs_inode_item_format(
418 	struct xfs_log_item	*lip,
419 	struct xfs_log_vec	*lv)
420 {
421 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
422 	struct xfs_inode	*ip = iip->ili_inode;
423 	struct xfs_log_iovec	*vecp = NULL;
424 	struct xfs_inode_log_format *ilf;
425 
426 	ASSERT(ip->i_d.di_version > 1);
427 
428 	ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
429 	ilf->ilf_type = XFS_LI_INODE;
430 	ilf->ilf_ino = ip->i_ino;
431 	ilf->ilf_blkno = ip->i_imap.im_blkno;
432 	ilf->ilf_len = ip->i_imap.im_len;
433 	ilf->ilf_boffset = ip->i_imap.im_boffset;
434 	ilf->ilf_fields = XFS_ILOG_CORE;
435 	ilf->ilf_size = 2; /* format + core */
436 
437 	/*
438 	 * make sure we don't leak uninitialised data into the log in the case
439 	 * when we don't log every field in the inode.
440 	 */
441 	ilf->ilf_dsize = 0;
442 	ilf->ilf_asize = 0;
443 	ilf->ilf_pad = 0;
444 	memset(&ilf->ilf_u.ilfu_uuid, 0, sizeof(ilf->ilf_u.ilfu_uuid));
445 
446 	xlog_finish_iovec(lv, vecp, sizeof(*ilf));
447 
448 	xfs_inode_item_format_core(ip, lv, &vecp);
449 	xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
450 	if (XFS_IFORK_Q(ip)) {
451 		xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
452 	} else {
453 		iip->ili_fields &=
454 			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
455 	}
456 
457 	/* update the format with the exact fields we actually logged */
458 	ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
459 }
460 
461 /*
462  * This is called to pin the inode associated with the inode log
463  * item in memory so it cannot be written out.
464  */
465 STATIC void
xfs_inode_item_pin(struct xfs_log_item * lip)466 xfs_inode_item_pin(
467 	struct xfs_log_item	*lip)
468 {
469 	struct xfs_inode	*ip = INODE_ITEM(lip)->ili_inode;
470 
471 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
472 
473 	trace_xfs_inode_pin(ip, _RET_IP_);
474 	atomic_inc(&ip->i_pincount);
475 }
476 
477 
478 /*
479  * This is called to unpin the inode associated with the inode log
480  * item which was previously pinned with a call to xfs_inode_item_pin().
481  *
482  * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
483  */
484 STATIC void
xfs_inode_item_unpin(struct xfs_log_item * lip,int remove)485 xfs_inode_item_unpin(
486 	struct xfs_log_item	*lip,
487 	int			remove)
488 {
489 	struct xfs_inode	*ip = INODE_ITEM(lip)->ili_inode;
490 
491 	trace_xfs_inode_unpin(ip, _RET_IP_);
492 	ASSERT(atomic_read(&ip->i_pincount) > 0);
493 	if (atomic_dec_and_test(&ip->i_pincount))
494 		wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
495 }
496 
497 /*
498  * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
499  * have been failed during writeback
500  *
501  * This informs the AIL that the inode is already flush locked on the next push,
502  * and acquires a hold on the buffer to ensure that it isn't reclaimed before
503  * dirty data makes it to disk.
504  */
505 STATIC void
xfs_inode_item_error(struct xfs_log_item * lip,struct xfs_buf * bp)506 xfs_inode_item_error(
507 	struct xfs_log_item	*lip,
508 	struct xfs_buf		*bp)
509 {
510 	ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
511 	xfs_set_li_failed(lip, bp);
512 }
513 
514 STATIC uint
xfs_inode_item_push(struct xfs_log_item * lip,struct list_head * buffer_list)515 xfs_inode_item_push(
516 	struct xfs_log_item	*lip,
517 	struct list_head	*buffer_list)
518 		__releases(&lip->li_ailp->xa_lock)
519 		__acquires(&lip->li_ailp->xa_lock)
520 {
521 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
522 	struct xfs_inode	*ip = iip->ili_inode;
523 	struct xfs_buf		*bp = lip->li_buf;
524 	uint			rval = XFS_ITEM_SUCCESS;
525 	int			error;
526 
527 	if (xfs_ipincount(ip) > 0)
528 		return XFS_ITEM_PINNED;
529 
530 	/*
531 	 * The buffer containing this item failed to be written back
532 	 * previously. Resubmit the buffer for IO.
533 	 */
534 	if (lip->li_flags & XFS_LI_FAILED) {
535 		if (!xfs_buf_trylock(bp))
536 			return XFS_ITEM_LOCKED;
537 
538 		if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
539 			rval = XFS_ITEM_FLUSHING;
540 
541 		xfs_buf_unlock(bp);
542 		return rval;
543 	}
544 
545 	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
546 		return XFS_ITEM_LOCKED;
547 
548 	/*
549 	 * Re-check the pincount now that we stabilized the value by
550 	 * taking the ilock.
551 	 */
552 	if (xfs_ipincount(ip) > 0) {
553 		rval = XFS_ITEM_PINNED;
554 		goto out_unlock;
555 	}
556 
557 	/*
558 	 * Stale inode items should force out the iclog.
559 	 */
560 	if (ip->i_flags & XFS_ISTALE) {
561 		rval = XFS_ITEM_PINNED;
562 		goto out_unlock;
563 	}
564 
565 	/*
566 	 * Someone else is already flushing the inode.  Nothing we can do
567 	 * here but wait for the flush to finish and remove the item from
568 	 * the AIL.
569 	 */
570 	if (!xfs_iflock_nowait(ip)) {
571 		rval = XFS_ITEM_FLUSHING;
572 		goto out_unlock;
573 	}
574 
575 	ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
576 	ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
577 
578 	spin_unlock(&lip->li_ailp->xa_lock);
579 
580 	error = xfs_iflush(ip, &bp);
581 	if (!error) {
582 		if (!xfs_buf_delwri_queue(bp, buffer_list))
583 			rval = XFS_ITEM_FLUSHING;
584 		xfs_buf_relse(bp);
585 	}
586 
587 	spin_lock(&lip->li_ailp->xa_lock);
588 out_unlock:
589 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
590 	return rval;
591 }
592 
593 /*
594  * Unlock the inode associated with the inode log item.
595  * Clear the fields of the inode and inode log item that
596  * are specific to the current transaction.  If the
597  * hold flags is set, do not unlock the inode.
598  */
599 STATIC void
xfs_inode_item_unlock(struct xfs_log_item * lip)600 xfs_inode_item_unlock(
601 	struct xfs_log_item	*lip)
602 {
603 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
604 	struct xfs_inode	*ip = iip->ili_inode;
605 	unsigned short		lock_flags;
606 
607 	ASSERT(ip->i_itemp != NULL);
608 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
609 
610 	lock_flags = iip->ili_lock_flags;
611 	iip->ili_lock_flags = 0;
612 	if (lock_flags)
613 		xfs_iunlock(ip, lock_flags);
614 }
615 
616 /*
617  * This is called to find out where the oldest active copy of the inode log
618  * item in the on disk log resides now that the last log write of it completed
619  * at the given lsn.  Since we always re-log all dirty data in an inode, the
620  * latest copy in the on disk log is the only one that matters.  Therefore,
621  * simply return the given lsn.
622  *
623  * If the inode has been marked stale because the cluster is being freed, we
624  * don't want to (re-)insert this inode into the AIL. There is a race condition
625  * where the cluster buffer may be unpinned before the inode is inserted into
626  * the AIL during transaction committed processing. If the buffer is unpinned
627  * before the inode item has been committed and inserted, then it is possible
628  * for the buffer to be written and IO completes before the inode is inserted
629  * into the AIL. In that case, we'd be inserting a clean, stale inode into the
630  * AIL which will never get removed. It will, however, get reclaimed which
631  * triggers an assert in xfs_inode_free() complaining about freein an inode
632  * still in the AIL.
633  *
634  * To avoid this, just unpin the inode directly and return a LSN of -1 so the
635  * transaction committed code knows that it does not need to do any further
636  * processing on the item.
637  */
638 STATIC xfs_lsn_t
xfs_inode_item_committed(struct xfs_log_item * lip,xfs_lsn_t lsn)639 xfs_inode_item_committed(
640 	struct xfs_log_item	*lip,
641 	xfs_lsn_t		lsn)
642 {
643 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
644 	struct xfs_inode	*ip = iip->ili_inode;
645 
646 	if (xfs_iflags_test(ip, XFS_ISTALE)) {
647 		xfs_inode_item_unpin(lip, 0);
648 		return -1;
649 	}
650 	return lsn;
651 }
652 
653 /*
654  * XXX rcc - this one really has to do something.  Probably needs
655  * to stamp in a new field in the incore inode.
656  */
657 STATIC void
xfs_inode_item_committing(struct xfs_log_item * lip,xfs_lsn_t lsn)658 xfs_inode_item_committing(
659 	struct xfs_log_item	*lip,
660 	xfs_lsn_t		lsn)
661 {
662 	INODE_ITEM(lip)->ili_last_lsn = lsn;
663 }
664 
665 /*
666  * This is the ops vector shared by all buf log items.
667  */
668 static const struct xfs_item_ops xfs_inode_item_ops = {
669 	.iop_size	= xfs_inode_item_size,
670 	.iop_format	= xfs_inode_item_format,
671 	.iop_pin	= xfs_inode_item_pin,
672 	.iop_unpin	= xfs_inode_item_unpin,
673 	.iop_unlock	= xfs_inode_item_unlock,
674 	.iop_committed	= xfs_inode_item_committed,
675 	.iop_push	= xfs_inode_item_push,
676 	.iop_committing = xfs_inode_item_committing,
677 	.iop_error	= xfs_inode_item_error
678 };
679 
680 
681 /*
682  * Initialize the inode log item for a newly allocated (in-core) inode.
683  */
684 void
xfs_inode_item_init(struct xfs_inode * ip,struct xfs_mount * mp)685 xfs_inode_item_init(
686 	struct xfs_inode	*ip,
687 	struct xfs_mount	*mp)
688 {
689 	struct xfs_inode_log_item *iip;
690 
691 	ASSERT(ip->i_itemp == NULL);
692 	iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
693 
694 	iip->ili_inode = ip;
695 	xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
696 						&xfs_inode_item_ops);
697 }
698 
699 /*
700  * Free the inode log item and any memory hanging off of it.
701  */
702 void
xfs_inode_item_destroy(xfs_inode_t * ip)703 xfs_inode_item_destroy(
704 	xfs_inode_t	*ip)
705 {
706 	kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
707 	kmem_zone_free(xfs_ili_zone, ip->i_itemp);
708 }
709 
710 
711 /*
712  * This is the inode flushing I/O completion routine.  It is called
713  * from interrupt level when the buffer containing the inode is
714  * flushed to disk.  It is responsible for removing the inode item
715  * from the AIL if it has not been re-logged, and unlocking the inode's
716  * flush lock.
717  *
718  * To reduce AIL lock traffic as much as possible, we scan the buffer log item
719  * list for other inodes that will run this function. We remove them from the
720  * buffer list so we can process all the inode IO completions in one AIL lock
721  * traversal.
722  */
723 void
xfs_iflush_done(struct xfs_buf * bp,struct xfs_log_item * lip)724 xfs_iflush_done(
725 	struct xfs_buf		*bp,
726 	struct xfs_log_item	*lip)
727 {
728 	struct xfs_inode_log_item *iip;
729 	struct xfs_log_item	*blip;
730 	struct xfs_log_item	*next;
731 	struct xfs_log_item	*prev;
732 	struct xfs_ail		*ailp = lip->li_ailp;
733 	int			need_ail = 0;
734 
735 	/*
736 	 * Scan the buffer IO completions for other inodes being completed and
737 	 * attach them to the current inode log item.
738 	 */
739 	blip = bp->b_fspriv;
740 	prev = NULL;
741 	while (blip != NULL) {
742 		if (blip->li_cb != xfs_iflush_done) {
743 			prev = blip;
744 			blip = blip->li_bio_list;
745 			continue;
746 		}
747 
748 		/* remove from list */
749 		next = blip->li_bio_list;
750 		if (!prev) {
751 			bp->b_fspriv = next;
752 		} else {
753 			prev->li_bio_list = next;
754 		}
755 
756 		/* add to current list */
757 		blip->li_bio_list = lip->li_bio_list;
758 		lip->li_bio_list = blip;
759 
760 		/*
761 		 * while we have the item, do the unlocked check for needing
762 		 * the AIL lock.
763 		 */
764 		iip = INODE_ITEM(blip);
765 		if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
766 		    (blip->li_flags & XFS_LI_FAILED))
767 			need_ail++;
768 
769 		blip = next;
770 	}
771 
772 	/* make sure we capture the state of the initial inode. */
773 	iip = INODE_ITEM(lip);
774 	if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
775 	    lip->li_flags & XFS_LI_FAILED)
776 		need_ail++;
777 
778 	/*
779 	 * We only want to pull the item from the AIL if it is
780 	 * actually there and its location in the log has not
781 	 * changed since we started the flush.  Thus, we only bother
782 	 * if the ili_logged flag is set and the inode's lsn has not
783 	 * changed.  First we check the lsn outside
784 	 * the lock since it's cheaper, and then we recheck while
785 	 * holding the lock before removing the inode from the AIL.
786 	 */
787 	if (need_ail) {
788 		bool			mlip_changed = false;
789 
790 		/* this is an opencoded batch version of xfs_trans_ail_delete */
791 		spin_lock(&ailp->xa_lock);
792 		for (blip = lip; blip; blip = blip->li_bio_list) {
793 			if (INODE_ITEM(blip)->ili_logged &&
794 			    blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
795 				mlip_changed |= xfs_ail_delete_one(ailp, blip);
796 			else {
797 				xfs_clear_li_failed(blip);
798 			}
799 		}
800 
801 		if (mlip_changed) {
802 			if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
803 				xlog_assign_tail_lsn_locked(ailp->xa_mount);
804 			if (list_empty(&ailp->xa_ail))
805 				wake_up_all(&ailp->xa_empty);
806 		}
807 		spin_unlock(&ailp->xa_lock);
808 
809 		if (mlip_changed)
810 			xfs_log_space_wake(ailp->xa_mount);
811 	}
812 
813 	/*
814 	 * clean up and unlock the flush lock now we are done. We can clear the
815 	 * ili_last_fields bits now that we know that the data corresponding to
816 	 * them is safely on disk.
817 	 */
818 	for (blip = lip; blip; blip = next) {
819 		next = blip->li_bio_list;
820 		blip->li_bio_list = NULL;
821 
822 		iip = INODE_ITEM(blip);
823 		iip->ili_logged = 0;
824 		iip->ili_last_fields = 0;
825 		xfs_ifunlock(iip->ili_inode);
826 	}
827 }
828 
829 /*
830  * This is the inode flushing abort routine.  It is called from xfs_iflush when
831  * the filesystem is shutting down to clean up the inode state.  It is
832  * responsible for removing the inode item from the AIL if it has not been
833  * re-logged, and unlocking the inode's flush lock.
834  */
835 void
xfs_iflush_abort(xfs_inode_t * ip,bool stale)836 xfs_iflush_abort(
837 	xfs_inode_t		*ip,
838 	bool			stale)
839 {
840 	xfs_inode_log_item_t	*iip = ip->i_itemp;
841 
842 	if (iip) {
843 		if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
844 			xfs_trans_ail_remove(&iip->ili_item,
845 					     stale ? SHUTDOWN_LOG_IO_ERROR :
846 						     SHUTDOWN_CORRUPT_INCORE);
847 		}
848 		iip->ili_logged = 0;
849 		/*
850 		 * Clear the ili_last_fields bits now that we know that the
851 		 * data corresponding to them is safely on disk.
852 		 */
853 		iip->ili_last_fields = 0;
854 		/*
855 		 * Clear the inode logging fields so no more flushes are
856 		 * attempted.
857 		 */
858 		iip->ili_fields = 0;
859 		iip->ili_fsync_fields = 0;
860 	}
861 	/*
862 	 * Release the inode's flush lock since we're done with it.
863 	 */
864 	xfs_ifunlock(ip);
865 }
866 
867 void
xfs_istale_done(struct xfs_buf * bp,struct xfs_log_item * lip)868 xfs_istale_done(
869 	struct xfs_buf		*bp,
870 	struct xfs_log_item	*lip)
871 {
872 	xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
873 }
874 
875 /*
876  * convert an xfs_inode_log_format struct from the old 32 bit version
877  * (which can have different field alignments) to the native 64 bit version
878  */
879 int
xfs_inode_item_format_convert(struct xfs_log_iovec * buf,struct xfs_inode_log_format * in_f)880 xfs_inode_item_format_convert(
881 	struct xfs_log_iovec		*buf,
882 	struct xfs_inode_log_format	*in_f)
883 {
884 	struct xfs_inode_log_format_32	*in_f32 = buf->i_addr;
885 
886 	if (buf->i_len != sizeof(*in_f32))
887 		return -EFSCORRUPTED;
888 
889 	in_f->ilf_type = in_f32->ilf_type;
890 	in_f->ilf_size = in_f32->ilf_size;
891 	in_f->ilf_fields = in_f32->ilf_fields;
892 	in_f->ilf_asize = in_f32->ilf_asize;
893 	in_f->ilf_dsize = in_f32->ilf_dsize;
894 	in_f->ilf_ino = in_f32->ilf_ino;
895 	/* copy biggest field of ilf_u */
896 	memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
897 	       in_f32->ilf_u.ilfu_uuid.__u_bits, sizeof(uuid_t));
898 	in_f->ilf_blkno = in_f32->ilf_blkno;
899 	in_f->ilf_len = in_f32->ilf_len;
900 	in_f->ilf_boffset = in_f32->ilf_boffset;
901 	return 0;
902 }
903