1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 #include <linux/log2.h>
19
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_format.h"
23 #include "xfs_log_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_inum.h"
26 #include "xfs_sb.h"
27 #include "xfs_ag.h"
28 #include "xfs_mount.h"
29 #include "xfs_inode.h"
30 #include "xfs_trans.h"
31 #include "xfs_inode_item.h"
32 #include "xfs_bmap_btree.h"
33 #include "xfs_bmap.h"
34 #include "xfs_error.h"
35 #include "xfs_trace.h"
36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h"
38
39 kmem_zone_t *xfs_ifork_zone;
40
41 STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
42 STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
43 STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
44
45 #ifdef DEBUG
46 /*
47 * Make sure that the extents in the given memory buffer
48 * are valid.
49 */
50 void
xfs_validate_extents(xfs_ifork_t * ifp,int nrecs,xfs_exntfmt_t fmt)51 xfs_validate_extents(
52 xfs_ifork_t *ifp,
53 int nrecs,
54 xfs_exntfmt_t fmt)
55 {
56 xfs_bmbt_irec_t irec;
57 xfs_bmbt_rec_host_t rec;
58 int i;
59
60 for (i = 0; i < nrecs; i++) {
61 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
62 rec.l0 = get_unaligned(&ep->l0);
63 rec.l1 = get_unaligned(&ep->l1);
64 xfs_bmbt_get_all(&rec, &irec);
65 if (fmt == XFS_EXTFMT_NOSTATE)
66 ASSERT(irec.br_state == XFS_EXT_NORM);
67 }
68 }
69 #else /* DEBUG */
70 #define xfs_validate_extents(ifp, nrecs, fmt)
71 #endif /* DEBUG */
72
73
74 /*
75 * Move inode type and inode format specific information from the
76 * on-disk inode to the in-core inode. For fifos, devs, and sockets
77 * this means set if_rdev to the proper value. For files, directories,
78 * and symlinks this means to bring in the in-line data or extent
79 * pointers. For a file in B-tree format, only the root is immediately
80 * brought in-core. The rest will be in-lined in if_extents when it
81 * is first referenced (see xfs_iread_extents()).
82 */
83 int
xfs_iformat_fork(xfs_inode_t * ip,xfs_dinode_t * dip)84 xfs_iformat_fork(
85 xfs_inode_t *ip,
86 xfs_dinode_t *dip)
87 {
88 xfs_attr_shortform_t *atp;
89 int size;
90 int error = 0;
91 xfs_fsize_t di_size;
92
93 if (unlikely(be32_to_cpu(dip->di_nextents) +
94 be16_to_cpu(dip->di_anextents) >
95 be64_to_cpu(dip->di_nblocks))) {
96 xfs_warn(ip->i_mount,
97 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
98 (unsigned long long)ip->i_ino,
99 (int)(be32_to_cpu(dip->di_nextents) +
100 be16_to_cpu(dip->di_anextents)),
101 (unsigned long long)
102 be64_to_cpu(dip->di_nblocks));
103 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
104 ip->i_mount, dip);
105 return -EFSCORRUPTED;
106 }
107
108 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
109 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
110 (unsigned long long)ip->i_ino,
111 dip->di_forkoff);
112 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
113 ip->i_mount, dip);
114 return -EFSCORRUPTED;
115 }
116
117 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
118 !ip->i_mount->m_rtdev_targp)) {
119 xfs_warn(ip->i_mount,
120 "corrupt dinode %Lu, has realtime flag set.",
121 ip->i_ino);
122 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
123 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
124 return -EFSCORRUPTED;
125 }
126
127 switch (ip->i_d.di_mode & S_IFMT) {
128 case S_IFIFO:
129 case S_IFCHR:
130 case S_IFBLK:
131 case S_IFSOCK:
132 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
133 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
134 ip->i_mount, dip);
135 return -EFSCORRUPTED;
136 }
137 ip->i_d.di_size = 0;
138 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
139 break;
140
141 case S_IFREG:
142 case S_IFLNK:
143 case S_IFDIR:
144 switch (dip->di_format) {
145 case XFS_DINODE_FMT_LOCAL:
146 /*
147 * no local regular files yet
148 */
149 if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
150 xfs_warn(ip->i_mount,
151 "corrupt inode %Lu (local format for regular file).",
152 (unsigned long long) ip->i_ino);
153 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
154 XFS_ERRLEVEL_LOW,
155 ip->i_mount, dip);
156 return -EFSCORRUPTED;
157 }
158
159 di_size = be64_to_cpu(dip->di_size);
160 if (unlikely(di_size < 0 ||
161 di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
162 xfs_warn(ip->i_mount,
163 "corrupt inode %Lu (bad size %Ld for local inode).",
164 (unsigned long long) ip->i_ino,
165 (long long) di_size);
166 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
167 XFS_ERRLEVEL_LOW,
168 ip->i_mount, dip);
169 return -EFSCORRUPTED;
170 }
171
172 size = (int)di_size;
173 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
174 break;
175 case XFS_DINODE_FMT_EXTENTS:
176 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
177 break;
178 case XFS_DINODE_FMT_BTREE:
179 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
180 break;
181 default:
182 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
183 ip->i_mount);
184 return -EFSCORRUPTED;
185 }
186 break;
187
188 default:
189 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
190 return -EFSCORRUPTED;
191 }
192 if (error) {
193 return error;
194 }
195 if (!XFS_DFORK_Q(dip))
196 return 0;
197
198 ASSERT(ip->i_afp == NULL);
199 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
200
201 switch (dip->di_aformat) {
202 case XFS_DINODE_FMT_LOCAL:
203 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
204 size = be16_to_cpu(atp->hdr.totsize);
205
206 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
207 xfs_warn(ip->i_mount,
208 "corrupt inode %Lu (bad attr fork size %Ld).",
209 (unsigned long long) ip->i_ino,
210 (long long) size);
211 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
212 XFS_ERRLEVEL_LOW,
213 ip->i_mount, dip);
214 return -EFSCORRUPTED;
215 }
216
217 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
218 break;
219 case XFS_DINODE_FMT_EXTENTS:
220 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
221 break;
222 case XFS_DINODE_FMT_BTREE:
223 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
224 break;
225 default:
226 error = -EFSCORRUPTED;
227 break;
228 }
229 if (error) {
230 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
231 ip->i_afp = NULL;
232 xfs_idestroy_fork(ip, XFS_DATA_FORK);
233 }
234 return error;
235 }
236
237 /*
238 * The file is in-lined in the on-disk inode.
239 * If it fits into if_inline_data, then copy
240 * it there, otherwise allocate a buffer for it
241 * and copy the data there. Either way, set
242 * if_data to point at the data.
243 * If we allocate a buffer for the data, make
244 * sure that its size is a multiple of 4 and
245 * record the real size in i_real_bytes.
246 */
247 STATIC int
xfs_iformat_local(xfs_inode_t * ip,xfs_dinode_t * dip,int whichfork,int size)248 xfs_iformat_local(
249 xfs_inode_t *ip,
250 xfs_dinode_t *dip,
251 int whichfork,
252 int size)
253 {
254 xfs_ifork_t *ifp;
255 int real_size;
256
257 /*
258 * If the size is unreasonable, then something
259 * is wrong and we just bail out rather than crash in
260 * kmem_alloc() or memcpy() below.
261 */
262 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
263 xfs_warn(ip->i_mount,
264 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
265 (unsigned long long) ip->i_ino, size,
266 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
267 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
268 ip->i_mount, dip);
269 return -EFSCORRUPTED;
270 }
271 ifp = XFS_IFORK_PTR(ip, whichfork);
272 real_size = 0;
273 if (size == 0)
274 ifp->if_u1.if_data = NULL;
275 else if (size <= sizeof(ifp->if_u2.if_inline_data))
276 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
277 else {
278 real_size = roundup(size, 4);
279 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
280 }
281 ifp->if_bytes = size;
282 ifp->if_real_bytes = real_size;
283 if (size)
284 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
285 ifp->if_flags &= ~XFS_IFEXTENTS;
286 ifp->if_flags |= XFS_IFINLINE;
287 return 0;
288 }
289
290 /*
291 * The file consists of a set of extents all
292 * of which fit into the on-disk inode.
293 * If there are few enough extents to fit into
294 * the if_inline_ext, then copy them there.
295 * Otherwise allocate a buffer for them and copy
296 * them into it. Either way, set if_extents
297 * to point at the extents.
298 */
299 STATIC int
xfs_iformat_extents(xfs_inode_t * ip,xfs_dinode_t * dip,int whichfork)300 xfs_iformat_extents(
301 xfs_inode_t *ip,
302 xfs_dinode_t *dip,
303 int whichfork)
304 {
305 xfs_bmbt_rec_t *dp;
306 xfs_ifork_t *ifp;
307 int nex;
308 int size;
309 int i;
310
311 ifp = XFS_IFORK_PTR(ip, whichfork);
312 nex = XFS_DFORK_NEXTENTS(dip, whichfork);
313 size = nex * (uint)sizeof(xfs_bmbt_rec_t);
314
315 /*
316 * If the number of extents is unreasonable, then something
317 * is wrong and we just bail out rather than crash in
318 * kmem_alloc() or memcpy() below.
319 */
320 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
321 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
322 (unsigned long long) ip->i_ino, nex);
323 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
324 ip->i_mount, dip);
325 return -EFSCORRUPTED;
326 }
327
328 ifp->if_real_bytes = 0;
329 if (nex == 0)
330 ifp->if_u1.if_extents = NULL;
331 else if (nex <= XFS_INLINE_EXTS)
332 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
333 else
334 xfs_iext_add(ifp, 0, nex);
335
336 ifp->if_bytes = size;
337 if (size) {
338 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
339 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
340 for (i = 0; i < nex; i++, dp++) {
341 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
342 ep->l0 = get_unaligned_be64(&dp->l0);
343 ep->l1 = get_unaligned_be64(&dp->l1);
344 }
345 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
346 if (whichfork != XFS_DATA_FORK ||
347 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
348 if (unlikely(xfs_check_nostate_extents(
349 ifp, 0, nex))) {
350 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
351 XFS_ERRLEVEL_LOW,
352 ip->i_mount);
353 return -EFSCORRUPTED;
354 }
355 }
356 ifp->if_flags |= XFS_IFEXTENTS;
357 return 0;
358 }
359
360 /*
361 * The file has too many extents to fit into
362 * the inode, so they are in B-tree format.
363 * Allocate a buffer for the root of the B-tree
364 * and copy the root into it. The i_extents
365 * field will remain NULL until all of the
366 * extents are read in (when they are needed).
367 */
368 STATIC int
xfs_iformat_btree(xfs_inode_t * ip,xfs_dinode_t * dip,int whichfork)369 xfs_iformat_btree(
370 xfs_inode_t *ip,
371 xfs_dinode_t *dip,
372 int whichfork)
373 {
374 struct xfs_mount *mp = ip->i_mount;
375 xfs_bmdr_block_t *dfp;
376 xfs_ifork_t *ifp;
377 /* REFERENCED */
378 int nrecs;
379 int size;
380
381 ifp = XFS_IFORK_PTR(ip, whichfork);
382 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
383 size = XFS_BMAP_BROOT_SPACE(mp, dfp);
384 nrecs = be16_to_cpu(dfp->bb_numrecs);
385
386 /*
387 * blow out if -- fork has less extents than can fit in
388 * fork (fork shouldn't be a btree format), root btree
389 * block has more records than can fit into the fork,
390 * or the number of extents is greater than the number of
391 * blocks.
392 */
393 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
394 XFS_IFORK_MAXEXT(ip, whichfork) ||
395 XFS_BMDR_SPACE_CALC(nrecs) >
396 XFS_DFORK_SIZE(dip, mp, whichfork) ||
397 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
398 xfs_warn(mp, "corrupt inode %Lu (btree).",
399 (unsigned long long) ip->i_ino);
400 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
401 mp, dip);
402 return -EFSCORRUPTED;
403 }
404
405 ifp->if_broot_bytes = size;
406 ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
407 ASSERT(ifp->if_broot != NULL);
408 /*
409 * Copy and convert from the on-disk structure
410 * to the in-memory structure.
411 */
412 xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
413 ifp->if_broot, size);
414 ifp->if_flags &= ~XFS_IFEXTENTS;
415 ifp->if_flags |= XFS_IFBROOT;
416
417 return 0;
418 }
419
420 /*
421 * Read in extents from a btree-format inode.
422 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c.
423 */
424 int
xfs_iread_extents(xfs_trans_t * tp,xfs_inode_t * ip,int whichfork)425 xfs_iread_extents(
426 xfs_trans_t *tp,
427 xfs_inode_t *ip,
428 int whichfork)
429 {
430 int error;
431 xfs_ifork_t *ifp;
432 xfs_extnum_t nextents;
433
434 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
435
436 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
437 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
438 ip->i_mount);
439 return -EFSCORRUPTED;
440 }
441 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
442 ifp = XFS_IFORK_PTR(ip, whichfork);
443
444 /*
445 * We know that the size is valid (it's checked in iformat_btree)
446 */
447 ifp->if_bytes = ifp->if_real_bytes = 0;
448 ifp->if_flags |= XFS_IFEXTENTS;
449 xfs_iext_add(ifp, 0, nextents);
450 error = xfs_bmap_read_extents(tp, ip, whichfork);
451 if (error) {
452 xfs_iext_destroy(ifp);
453 ifp->if_flags &= ~XFS_IFEXTENTS;
454 return error;
455 }
456 xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
457 return 0;
458 }
459 /*
460 * Reallocate the space for if_broot based on the number of records
461 * being added or deleted as indicated in rec_diff. Move the records
462 * and pointers in if_broot to fit the new size. When shrinking this
463 * will eliminate holes between the records and pointers created by
464 * the caller. When growing this will create holes to be filled in
465 * by the caller.
466 *
467 * The caller must not request to add more records than would fit in
468 * the on-disk inode root. If the if_broot is currently NULL, then
469 * if we are adding records, one will be allocated. The caller must also
470 * not request that the number of records go below zero, although
471 * it can go to zero.
472 *
473 * ip -- the inode whose if_broot area is changing
474 * ext_diff -- the change in the number of records, positive or negative,
475 * requested for the if_broot array.
476 */
477 void
xfs_iroot_realloc(xfs_inode_t * ip,int rec_diff,int whichfork)478 xfs_iroot_realloc(
479 xfs_inode_t *ip,
480 int rec_diff,
481 int whichfork)
482 {
483 struct xfs_mount *mp = ip->i_mount;
484 int cur_max;
485 xfs_ifork_t *ifp;
486 struct xfs_btree_block *new_broot;
487 int new_max;
488 size_t new_size;
489 char *np;
490 char *op;
491
492 /*
493 * Handle the degenerate case quietly.
494 */
495 if (rec_diff == 0) {
496 return;
497 }
498
499 ifp = XFS_IFORK_PTR(ip, whichfork);
500 if (rec_diff > 0) {
501 /*
502 * If there wasn't any memory allocated before, just
503 * allocate it now and get out.
504 */
505 if (ifp->if_broot_bytes == 0) {
506 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
507 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
508 ifp->if_broot_bytes = (int)new_size;
509 return;
510 }
511
512 /*
513 * If there is already an existing if_broot, then we need
514 * to realloc() it and shift the pointers to their new
515 * location. The records don't change location because
516 * they are kept butted up against the btree block header.
517 */
518 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
519 new_max = cur_max + rec_diff;
520 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
521 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
522 XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
523 KM_SLEEP | KM_NOFS);
524 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
525 ifp->if_broot_bytes);
526 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
527 (int)new_size);
528 ifp->if_broot_bytes = (int)new_size;
529 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
530 XFS_IFORK_SIZE(ip, whichfork));
531 memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
532 return;
533 }
534
535 /*
536 * rec_diff is less than 0. In this case, we are shrinking the
537 * if_broot buffer. It must already exist. If we go to zero
538 * records, just get rid of the root and clear the status bit.
539 */
540 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
541 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
542 new_max = cur_max + rec_diff;
543 ASSERT(new_max >= 0);
544 if (new_max > 0)
545 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
546 else
547 new_size = 0;
548 if (new_size > 0) {
549 new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
550 /*
551 * First copy over the btree block header.
552 */
553 memcpy(new_broot, ifp->if_broot,
554 XFS_BMBT_BLOCK_LEN(ip->i_mount));
555 } else {
556 new_broot = NULL;
557 ifp->if_flags &= ~XFS_IFBROOT;
558 }
559
560 /*
561 * Only copy the records and pointers if there are any.
562 */
563 if (new_max > 0) {
564 /*
565 * First copy the records.
566 */
567 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
568 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
569 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
570
571 /*
572 * Then copy the pointers.
573 */
574 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
575 ifp->if_broot_bytes);
576 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
577 (int)new_size);
578 memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
579 }
580 kmem_free(ifp->if_broot);
581 ifp->if_broot = new_broot;
582 ifp->if_broot_bytes = (int)new_size;
583 if (ifp->if_broot)
584 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
585 XFS_IFORK_SIZE(ip, whichfork));
586 return;
587 }
588
589
590 /*
591 * This is called when the amount of space needed for if_data
592 * is increased or decreased. The change in size is indicated by
593 * the number of bytes that need to be added or deleted in the
594 * byte_diff parameter.
595 *
596 * If the amount of space needed has decreased below the size of the
597 * inline buffer, then switch to using the inline buffer. Otherwise,
598 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
599 * to what is needed.
600 *
601 * ip -- the inode whose if_data area is changing
602 * byte_diff -- the change in the number of bytes, positive or negative,
603 * requested for the if_data array.
604 */
605 void
xfs_idata_realloc(xfs_inode_t * ip,int byte_diff,int whichfork)606 xfs_idata_realloc(
607 xfs_inode_t *ip,
608 int byte_diff,
609 int whichfork)
610 {
611 xfs_ifork_t *ifp;
612 int new_size;
613 int real_size;
614
615 if (byte_diff == 0) {
616 return;
617 }
618
619 ifp = XFS_IFORK_PTR(ip, whichfork);
620 new_size = (int)ifp->if_bytes + byte_diff;
621 ASSERT(new_size >= 0);
622
623 if (new_size == 0) {
624 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
625 kmem_free(ifp->if_u1.if_data);
626 }
627 ifp->if_u1.if_data = NULL;
628 real_size = 0;
629 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
630 /*
631 * If the valid extents/data can fit in if_inline_ext/data,
632 * copy them from the malloc'd vector and free it.
633 */
634 if (ifp->if_u1.if_data == NULL) {
635 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
636 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
637 ASSERT(ifp->if_real_bytes != 0);
638 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
639 new_size);
640 kmem_free(ifp->if_u1.if_data);
641 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
642 }
643 real_size = 0;
644 } else {
645 /*
646 * Stuck with malloc/realloc.
647 * For inline data, the underlying buffer must be
648 * a multiple of 4 bytes in size so that it can be
649 * logged and stay on word boundaries. We enforce
650 * that here.
651 */
652 real_size = roundup(new_size, 4);
653 if (ifp->if_u1.if_data == NULL) {
654 ASSERT(ifp->if_real_bytes == 0);
655 ifp->if_u1.if_data = kmem_alloc(real_size,
656 KM_SLEEP | KM_NOFS);
657 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
658 /*
659 * Only do the realloc if the underlying size
660 * is really changing.
661 */
662 if (ifp->if_real_bytes != real_size) {
663 ifp->if_u1.if_data =
664 kmem_realloc(ifp->if_u1.if_data,
665 real_size,
666 ifp->if_real_bytes,
667 KM_SLEEP | KM_NOFS);
668 }
669 } else {
670 ASSERT(ifp->if_real_bytes == 0);
671 ifp->if_u1.if_data = kmem_alloc(real_size,
672 KM_SLEEP | KM_NOFS);
673 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
674 ifp->if_bytes);
675 }
676 }
677 ifp->if_real_bytes = real_size;
678 ifp->if_bytes = new_size;
679 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
680 }
681
682 void
xfs_idestroy_fork(xfs_inode_t * ip,int whichfork)683 xfs_idestroy_fork(
684 xfs_inode_t *ip,
685 int whichfork)
686 {
687 xfs_ifork_t *ifp;
688
689 ifp = XFS_IFORK_PTR(ip, whichfork);
690 if (ifp->if_broot != NULL) {
691 kmem_free(ifp->if_broot);
692 ifp->if_broot = NULL;
693 }
694
695 /*
696 * If the format is local, then we can't have an extents
697 * array so just look for an inline data array. If we're
698 * not local then we may or may not have an extents list,
699 * so check and free it up if we do.
700 */
701 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
702 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
703 (ifp->if_u1.if_data != NULL)) {
704 ASSERT(ifp->if_real_bytes != 0);
705 kmem_free(ifp->if_u1.if_data);
706 ifp->if_u1.if_data = NULL;
707 ifp->if_real_bytes = 0;
708 }
709 } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
710 ((ifp->if_flags & XFS_IFEXTIREC) ||
711 ((ifp->if_u1.if_extents != NULL) &&
712 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
713 ASSERT(ifp->if_real_bytes != 0);
714 xfs_iext_destroy(ifp);
715 }
716 ASSERT(ifp->if_u1.if_extents == NULL ||
717 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
718 ASSERT(ifp->if_real_bytes == 0);
719 if (whichfork == XFS_ATTR_FORK) {
720 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
721 ip->i_afp = NULL;
722 }
723 }
724
725 /*
726 * Convert in-core extents to on-disk form
727 *
728 * For either the data or attr fork in extent format, we need to endian convert
729 * the in-core extent as we place them into the on-disk inode.
730 *
731 * In the case of the data fork, the in-core and on-disk fork sizes can be
732 * different due to delayed allocation extents. We only copy on-disk extents
733 * here, so callers must always use the physical fork size to determine the
734 * size of the buffer passed to this routine. We will return the size actually
735 * used.
736 */
737 int
xfs_iextents_copy(xfs_inode_t * ip,xfs_bmbt_rec_t * dp,int whichfork)738 xfs_iextents_copy(
739 xfs_inode_t *ip,
740 xfs_bmbt_rec_t *dp,
741 int whichfork)
742 {
743 int copied;
744 int i;
745 xfs_ifork_t *ifp;
746 int nrecs;
747 xfs_fsblock_t start_block;
748
749 ifp = XFS_IFORK_PTR(ip, whichfork);
750 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
751 ASSERT(ifp->if_bytes > 0);
752
753 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
754 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
755 ASSERT(nrecs > 0);
756
757 /*
758 * There are some delayed allocation extents in the
759 * inode, so copy the extents one at a time and skip
760 * the delayed ones. There must be at least one
761 * non-delayed extent.
762 */
763 copied = 0;
764 for (i = 0; i < nrecs; i++) {
765 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
766 start_block = xfs_bmbt_get_startblock(ep);
767 if (isnullstartblock(start_block)) {
768 /*
769 * It's a delayed allocation extent, so skip it.
770 */
771 continue;
772 }
773
774 /* Translate to on disk format */
775 put_unaligned_be64(ep->l0, &dp->l0);
776 put_unaligned_be64(ep->l1, &dp->l1);
777 dp++;
778 copied++;
779 }
780 ASSERT(copied != 0);
781 xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
782
783 return (copied * (uint)sizeof(xfs_bmbt_rec_t));
784 }
785
786 /*
787 * Each of the following cases stores data into the same region
788 * of the on-disk inode, so only one of them can be valid at
789 * any given time. While it is possible to have conflicting formats
790 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
791 * in EXTENTS format, this can only happen when the fork has
792 * changed formats after being modified but before being flushed.
793 * In these cases, the format always takes precedence, because the
794 * format indicates the current state of the fork.
795 */
796 void
xfs_iflush_fork(xfs_inode_t * ip,xfs_dinode_t * dip,xfs_inode_log_item_t * iip,int whichfork)797 xfs_iflush_fork(
798 xfs_inode_t *ip,
799 xfs_dinode_t *dip,
800 xfs_inode_log_item_t *iip,
801 int whichfork)
802 {
803 char *cp;
804 xfs_ifork_t *ifp;
805 xfs_mount_t *mp;
806 static const short brootflag[2] =
807 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
808 static const short dataflag[2] =
809 { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
810 static const short extflag[2] =
811 { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
812
813 if (!iip)
814 return;
815 ifp = XFS_IFORK_PTR(ip, whichfork);
816 /*
817 * This can happen if we gave up in iformat in an error path,
818 * for the attribute fork.
819 */
820 if (!ifp) {
821 ASSERT(whichfork == XFS_ATTR_FORK);
822 return;
823 }
824 cp = XFS_DFORK_PTR(dip, whichfork);
825 mp = ip->i_mount;
826 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
827 case XFS_DINODE_FMT_LOCAL:
828 if ((iip->ili_fields & dataflag[whichfork]) &&
829 (ifp->if_bytes > 0)) {
830 ASSERT(ifp->if_u1.if_data != NULL);
831 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
832 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
833 }
834 break;
835
836 case XFS_DINODE_FMT_EXTENTS:
837 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
838 !(iip->ili_fields & extflag[whichfork]));
839 if ((iip->ili_fields & extflag[whichfork]) &&
840 (ifp->if_bytes > 0)) {
841 ASSERT(xfs_iext_get_ext(ifp, 0));
842 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
843 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
844 whichfork);
845 }
846 break;
847
848 case XFS_DINODE_FMT_BTREE:
849 if ((iip->ili_fields & brootflag[whichfork]) &&
850 (ifp->if_broot_bytes > 0)) {
851 ASSERT(ifp->if_broot != NULL);
852 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
853 XFS_IFORK_SIZE(ip, whichfork));
854 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
855 (xfs_bmdr_block_t *)cp,
856 XFS_DFORK_SIZE(dip, mp, whichfork));
857 }
858 break;
859
860 case XFS_DINODE_FMT_DEV:
861 if (iip->ili_fields & XFS_ILOG_DEV) {
862 ASSERT(whichfork == XFS_DATA_FORK);
863 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
864 }
865 break;
866
867 case XFS_DINODE_FMT_UUID:
868 if (iip->ili_fields & XFS_ILOG_UUID) {
869 ASSERT(whichfork == XFS_DATA_FORK);
870 memcpy(XFS_DFORK_DPTR(dip),
871 &ip->i_df.if_u2.if_uuid,
872 sizeof(uuid_t));
873 }
874 break;
875
876 default:
877 ASSERT(0);
878 break;
879 }
880 }
881
882 /*
883 * Return a pointer to the extent record at file index idx.
884 */
885 xfs_bmbt_rec_host_t *
xfs_iext_get_ext(xfs_ifork_t * ifp,xfs_extnum_t idx)886 xfs_iext_get_ext(
887 xfs_ifork_t *ifp, /* inode fork pointer */
888 xfs_extnum_t idx) /* index of target extent */
889 {
890 ASSERT(idx >= 0);
891 ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
892
893 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
894 return ifp->if_u1.if_ext_irec->er_extbuf;
895 } else if (ifp->if_flags & XFS_IFEXTIREC) {
896 xfs_ext_irec_t *erp; /* irec pointer */
897 int erp_idx = 0; /* irec index */
898 xfs_extnum_t page_idx = idx; /* ext index in target list */
899
900 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
901 return &erp->er_extbuf[page_idx];
902 } else if (ifp->if_bytes) {
903 return &ifp->if_u1.if_extents[idx];
904 } else {
905 return NULL;
906 }
907 }
908
909 /*
910 * Insert new item(s) into the extent records for incore inode
911 * fork 'ifp'. 'count' new items are inserted at index 'idx'.
912 */
913 void
xfs_iext_insert(xfs_inode_t * ip,xfs_extnum_t idx,xfs_extnum_t count,xfs_bmbt_irec_t * new,int state)914 xfs_iext_insert(
915 xfs_inode_t *ip, /* incore inode pointer */
916 xfs_extnum_t idx, /* starting index of new items */
917 xfs_extnum_t count, /* number of inserted items */
918 xfs_bmbt_irec_t *new, /* items to insert */
919 int state) /* type of extent conversion */
920 {
921 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
922 xfs_extnum_t i; /* extent record index */
923
924 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
925
926 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
927 xfs_iext_add(ifp, idx, count);
928 for (i = idx; i < idx + count; i++, new++)
929 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
930 }
931
932 /*
933 * This is called when the amount of space required for incore file
934 * extents needs to be increased. The ext_diff parameter stores the
935 * number of new extents being added and the idx parameter contains
936 * the extent index where the new extents will be added. If the new
937 * extents are being appended, then we just need to (re)allocate and
938 * initialize the space. Otherwise, if the new extents are being
939 * inserted into the middle of the existing entries, a bit more work
940 * is required to make room for the new extents to be inserted. The
941 * caller is responsible for filling in the new extent entries upon
942 * return.
943 */
944 void
xfs_iext_add(xfs_ifork_t * ifp,xfs_extnum_t idx,int ext_diff)945 xfs_iext_add(
946 xfs_ifork_t *ifp, /* inode fork pointer */
947 xfs_extnum_t idx, /* index to begin adding exts */
948 int ext_diff) /* number of extents to add */
949 {
950 int byte_diff; /* new bytes being added */
951 int new_size; /* size of extents after adding */
952 xfs_extnum_t nextents; /* number of extents in file */
953
954 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
955 ASSERT((idx >= 0) && (idx <= nextents));
956 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
957 new_size = ifp->if_bytes + byte_diff;
958 /*
959 * If the new number of extents (nextents + ext_diff)
960 * fits inside the inode, then continue to use the inline
961 * extent buffer.
962 */
963 if (nextents + ext_diff <= XFS_INLINE_EXTS) {
964 if (idx < nextents) {
965 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
966 &ifp->if_u2.if_inline_ext[idx],
967 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
968 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
969 }
970 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
971 ifp->if_real_bytes = 0;
972 }
973 /*
974 * Otherwise use a linear (direct) extent list.
975 * If the extents are currently inside the inode,
976 * xfs_iext_realloc_direct will switch us from
977 * inline to direct extent allocation mode.
978 */
979 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
980 xfs_iext_realloc_direct(ifp, new_size);
981 if (idx < nextents) {
982 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
983 &ifp->if_u1.if_extents[idx],
984 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
985 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
986 }
987 }
988 /* Indirection array */
989 else {
990 xfs_ext_irec_t *erp;
991 int erp_idx = 0;
992 int page_idx = idx;
993
994 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
995 if (ifp->if_flags & XFS_IFEXTIREC) {
996 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
997 } else {
998 xfs_iext_irec_init(ifp);
999 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1000 erp = ifp->if_u1.if_ext_irec;
1001 }
1002 /* Extents fit in target extent page */
1003 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
1004 if (page_idx < erp->er_extcount) {
1005 memmove(&erp->er_extbuf[page_idx + ext_diff],
1006 &erp->er_extbuf[page_idx],
1007 (erp->er_extcount - page_idx) *
1008 sizeof(xfs_bmbt_rec_t));
1009 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
1010 }
1011 erp->er_extcount += ext_diff;
1012 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1013 }
1014 /* Insert a new extent page */
1015 else if (erp) {
1016 xfs_iext_add_indirect_multi(ifp,
1017 erp_idx, page_idx, ext_diff);
1018 }
1019 /*
1020 * If extent(s) are being appended to the last page in
1021 * the indirection array and the new extent(s) don't fit
1022 * in the page, then erp is NULL and erp_idx is set to
1023 * the next index needed in the indirection array.
1024 */
1025 else {
1026 uint count = ext_diff;
1027
1028 while (count) {
1029 erp = xfs_iext_irec_new(ifp, erp_idx);
1030 erp->er_extcount = min(count, XFS_LINEAR_EXTS);
1031 count -= erp->er_extcount;
1032 if (count)
1033 erp_idx++;
1034 }
1035 }
1036 }
1037 ifp->if_bytes = new_size;
1038 }
1039
1040 /*
1041 * This is called when incore extents are being added to the indirection
1042 * array and the new extents do not fit in the target extent list. The
1043 * erp_idx parameter contains the irec index for the target extent list
1044 * in the indirection array, and the idx parameter contains the extent
1045 * index within the list. The number of extents being added is stored
1046 * in the count parameter.
1047 *
1048 * |-------| |-------|
1049 * | | | | idx - number of extents before idx
1050 * | idx | | count |
1051 * | | | | count - number of extents being inserted at idx
1052 * |-------| |-------|
1053 * | count | | nex2 | nex2 - number of extents after idx + count
1054 * |-------| |-------|
1055 */
1056 void
xfs_iext_add_indirect_multi(xfs_ifork_t * ifp,int erp_idx,xfs_extnum_t idx,int count)1057 xfs_iext_add_indirect_multi(
1058 xfs_ifork_t *ifp, /* inode fork pointer */
1059 int erp_idx, /* target extent irec index */
1060 xfs_extnum_t idx, /* index within target list */
1061 int count) /* new extents being added */
1062 {
1063 int byte_diff; /* new bytes being added */
1064 xfs_ext_irec_t *erp; /* pointer to irec entry */
1065 xfs_extnum_t ext_diff; /* number of extents to add */
1066 xfs_extnum_t ext_cnt; /* new extents still needed */
1067 xfs_extnum_t nex2; /* extents after idx + count */
1068 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
1069 int nlists; /* number of irec's (lists) */
1070
1071 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1072 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1073 nex2 = erp->er_extcount - idx;
1074 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1075
1076 /*
1077 * Save second part of target extent list
1078 * (all extents past */
1079 if (nex2) {
1080 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1081 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
1082 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
1083 erp->er_extcount -= nex2;
1084 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
1085 memset(&erp->er_extbuf[idx], 0, byte_diff);
1086 }
1087
1088 /*
1089 * Add the new extents to the end of the target
1090 * list, then allocate new irec record(s) and
1091 * extent buffer(s) as needed to store the rest
1092 * of the new extents.
1093 */
1094 ext_cnt = count;
1095 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
1096 if (ext_diff) {
1097 erp->er_extcount += ext_diff;
1098 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1099 ext_cnt -= ext_diff;
1100 }
1101 while (ext_cnt) {
1102 erp_idx++;
1103 erp = xfs_iext_irec_new(ifp, erp_idx);
1104 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
1105 erp->er_extcount = ext_diff;
1106 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1107 ext_cnt -= ext_diff;
1108 }
1109
1110 /* Add nex2 extents back to indirection array */
1111 if (nex2) {
1112 xfs_extnum_t ext_avail;
1113 int i;
1114
1115 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1116 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
1117 i = 0;
1118 /*
1119 * If nex2 extents fit in the current page, append
1120 * nex2_ep after the new extents.
1121 */
1122 if (nex2 <= ext_avail) {
1123 i = erp->er_extcount;
1124 }
1125 /*
1126 * Otherwise, check if space is available in the
1127 * next page.
1128 */
1129 else if ((erp_idx < nlists - 1) &&
1130 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
1131 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
1132 erp_idx++;
1133 erp++;
1134 /* Create a hole for nex2 extents */
1135 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
1136 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
1137 }
1138 /*
1139 * Final choice, create a new extent page for
1140 * nex2 extents.
1141 */
1142 else {
1143 erp_idx++;
1144 erp = xfs_iext_irec_new(ifp, erp_idx);
1145 }
1146 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
1147 kmem_free(nex2_ep);
1148 erp->er_extcount += nex2;
1149 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
1150 }
1151 }
1152
1153 /*
1154 * This is called when the amount of space required for incore file
1155 * extents needs to be decreased. The ext_diff parameter stores the
1156 * number of extents to be removed and the idx parameter contains
1157 * the extent index where the extents will be removed from.
1158 *
1159 * If the amount of space needed has decreased below the linear
1160 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
1161 * extent array. Otherwise, use kmem_realloc() to adjust the
1162 * size to what is needed.
1163 */
1164 void
xfs_iext_remove(xfs_inode_t * ip,xfs_extnum_t idx,int ext_diff,int state)1165 xfs_iext_remove(
1166 xfs_inode_t *ip, /* incore inode pointer */
1167 xfs_extnum_t idx, /* index to begin removing exts */
1168 int ext_diff, /* number of extents to remove */
1169 int state) /* type of extent conversion */
1170 {
1171 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
1172 xfs_extnum_t nextents; /* number of extents in file */
1173 int new_size; /* size of extents after removal */
1174
1175 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
1176
1177 ASSERT(ext_diff > 0);
1178 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1179 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
1180
1181 if (new_size == 0) {
1182 xfs_iext_destroy(ifp);
1183 } else if (ifp->if_flags & XFS_IFEXTIREC) {
1184 xfs_iext_remove_indirect(ifp, idx, ext_diff);
1185 } else if (ifp->if_real_bytes) {
1186 xfs_iext_remove_direct(ifp, idx, ext_diff);
1187 } else {
1188 xfs_iext_remove_inline(ifp, idx, ext_diff);
1189 }
1190 ifp->if_bytes = new_size;
1191 }
1192
1193 /*
1194 * This removes ext_diff extents from the inline buffer, beginning
1195 * at extent index idx.
1196 */
1197 void
xfs_iext_remove_inline(xfs_ifork_t * ifp,xfs_extnum_t idx,int ext_diff)1198 xfs_iext_remove_inline(
1199 xfs_ifork_t *ifp, /* inode fork pointer */
1200 xfs_extnum_t idx, /* index to begin removing exts */
1201 int ext_diff) /* number of extents to remove */
1202 {
1203 int nextents; /* number of extents in file */
1204
1205 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1206 ASSERT(idx < XFS_INLINE_EXTS);
1207 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1208 ASSERT(((nextents - ext_diff) > 0) &&
1209 (nextents - ext_diff) < XFS_INLINE_EXTS);
1210
1211 if (idx + ext_diff < nextents) {
1212 memmove(&ifp->if_u2.if_inline_ext[idx],
1213 &ifp->if_u2.if_inline_ext[idx + ext_diff],
1214 (nextents - (idx + ext_diff)) *
1215 sizeof(xfs_bmbt_rec_t));
1216 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
1217 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1218 } else {
1219 memset(&ifp->if_u2.if_inline_ext[idx], 0,
1220 ext_diff * sizeof(xfs_bmbt_rec_t));
1221 }
1222 }
1223
1224 /*
1225 * This removes ext_diff extents from a linear (direct) extent list,
1226 * beginning at extent index idx. If the extents are being removed
1227 * from the end of the list (ie. truncate) then we just need to re-
1228 * allocate the list to remove the extra space. Otherwise, if the
1229 * extents are being removed from the middle of the existing extent
1230 * entries, then we first need to move the extent records beginning
1231 * at idx + ext_diff up in the list to overwrite the records being
1232 * removed, then remove the extra space via kmem_realloc.
1233 */
1234 void
xfs_iext_remove_direct(xfs_ifork_t * ifp,xfs_extnum_t idx,int ext_diff)1235 xfs_iext_remove_direct(
1236 xfs_ifork_t *ifp, /* inode fork pointer */
1237 xfs_extnum_t idx, /* index to begin removing exts */
1238 int ext_diff) /* number of extents to remove */
1239 {
1240 xfs_extnum_t nextents; /* number of extents in file */
1241 int new_size; /* size of extents after removal */
1242
1243 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1244 new_size = ifp->if_bytes -
1245 (ext_diff * sizeof(xfs_bmbt_rec_t));
1246 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1247
1248 if (new_size == 0) {
1249 xfs_iext_destroy(ifp);
1250 return;
1251 }
1252 /* Move extents up in the list (if needed) */
1253 if (idx + ext_diff < nextents) {
1254 memmove(&ifp->if_u1.if_extents[idx],
1255 &ifp->if_u1.if_extents[idx + ext_diff],
1256 (nextents - (idx + ext_diff)) *
1257 sizeof(xfs_bmbt_rec_t));
1258 }
1259 memset(&ifp->if_u1.if_extents[nextents - ext_diff],
1260 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1261 /*
1262 * Reallocate the direct extent list. If the extents
1263 * will fit inside the inode then xfs_iext_realloc_direct
1264 * will switch from direct to inline extent allocation
1265 * mode for us.
1266 */
1267 xfs_iext_realloc_direct(ifp, new_size);
1268 ifp->if_bytes = new_size;
1269 }
1270
1271 /*
1272 * This is called when incore extents are being removed from the
1273 * indirection array and the extents being removed span multiple extent
1274 * buffers. The idx parameter contains the file extent index where we
1275 * want to begin removing extents, and the count parameter contains
1276 * how many extents need to be removed.
1277 *
1278 * |-------| |-------|
1279 * | nex1 | | | nex1 - number of extents before idx
1280 * |-------| | count |
1281 * | | | | count - number of extents being removed at idx
1282 * | count | |-------|
1283 * | | | nex2 | nex2 - number of extents after idx + count
1284 * |-------| |-------|
1285 */
1286 void
xfs_iext_remove_indirect(xfs_ifork_t * ifp,xfs_extnum_t idx,int count)1287 xfs_iext_remove_indirect(
1288 xfs_ifork_t *ifp, /* inode fork pointer */
1289 xfs_extnum_t idx, /* index to begin removing extents */
1290 int count) /* number of extents to remove */
1291 {
1292 xfs_ext_irec_t *erp; /* indirection array pointer */
1293 int erp_idx = 0; /* indirection array index */
1294 xfs_extnum_t ext_cnt; /* extents left to remove */
1295 xfs_extnum_t ext_diff; /* extents to remove in current list */
1296 xfs_extnum_t nex1; /* number of extents before idx */
1297 xfs_extnum_t nex2; /* extents after idx + count */
1298 int page_idx = idx; /* index in target extent list */
1299
1300 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1301 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
1302 ASSERT(erp != NULL);
1303 nex1 = page_idx;
1304 ext_cnt = count;
1305 while (ext_cnt) {
1306 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
1307 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
1308 /*
1309 * Check for deletion of entire list;
1310 * xfs_iext_irec_remove() updates extent offsets.
1311 */
1312 if (ext_diff == erp->er_extcount) {
1313 xfs_iext_irec_remove(ifp, erp_idx);
1314 ext_cnt -= ext_diff;
1315 nex1 = 0;
1316 if (ext_cnt) {
1317 ASSERT(erp_idx < ifp->if_real_bytes /
1318 XFS_IEXT_BUFSZ);
1319 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1320 nex1 = 0;
1321 continue;
1322 } else {
1323 break;
1324 }
1325 }
1326 /* Move extents up (if needed) */
1327 if (nex2) {
1328 memmove(&erp->er_extbuf[nex1],
1329 &erp->er_extbuf[nex1 + ext_diff],
1330 nex2 * sizeof(xfs_bmbt_rec_t));
1331 }
1332 /* Zero out rest of page */
1333 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
1334 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
1335 /* Update remaining counters */
1336 erp->er_extcount -= ext_diff;
1337 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
1338 ext_cnt -= ext_diff;
1339 nex1 = 0;
1340 erp_idx++;
1341 erp++;
1342 }
1343 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
1344 xfs_iext_irec_compact(ifp);
1345 }
1346
1347 /*
1348 * Create, destroy, or resize a linear (direct) block of extents.
1349 */
1350 void
xfs_iext_realloc_direct(xfs_ifork_t * ifp,int new_size)1351 xfs_iext_realloc_direct(
1352 xfs_ifork_t *ifp, /* inode fork pointer */
1353 int new_size) /* new size of extents after adding */
1354 {
1355 int rnew_size; /* real new size of extents */
1356
1357 rnew_size = new_size;
1358
1359 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
1360 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
1361 (new_size != ifp->if_real_bytes)));
1362
1363 /* Free extent records */
1364 if (new_size == 0) {
1365 xfs_iext_destroy(ifp);
1366 }
1367 /* Resize direct extent list and zero any new bytes */
1368 else if (ifp->if_real_bytes) {
1369 /* Check if extents will fit inside the inode */
1370 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
1371 xfs_iext_direct_to_inline(ifp, new_size /
1372 (uint)sizeof(xfs_bmbt_rec_t));
1373 ifp->if_bytes = new_size;
1374 return;
1375 }
1376 if (!is_power_of_2(new_size)){
1377 rnew_size = roundup_pow_of_two(new_size);
1378 }
1379 if (rnew_size != ifp->if_real_bytes) {
1380 ifp->if_u1.if_extents =
1381 kmem_realloc(ifp->if_u1.if_extents,
1382 rnew_size,
1383 ifp->if_real_bytes, KM_NOFS);
1384 }
1385 if (rnew_size > ifp->if_real_bytes) {
1386 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
1387 (uint)sizeof(xfs_bmbt_rec_t)], 0,
1388 rnew_size - ifp->if_real_bytes);
1389 }
1390 }
1391 /* Switch from the inline extent buffer to a direct extent list */
1392 else {
1393 if (!is_power_of_2(new_size)) {
1394 rnew_size = roundup_pow_of_two(new_size);
1395 }
1396 xfs_iext_inline_to_direct(ifp, rnew_size);
1397 }
1398 ifp->if_real_bytes = rnew_size;
1399 ifp->if_bytes = new_size;
1400 }
1401
1402 /*
1403 * Switch from linear (direct) extent records to inline buffer.
1404 */
1405 void
xfs_iext_direct_to_inline(xfs_ifork_t * ifp,xfs_extnum_t nextents)1406 xfs_iext_direct_to_inline(
1407 xfs_ifork_t *ifp, /* inode fork pointer */
1408 xfs_extnum_t nextents) /* number of extents in file */
1409 {
1410 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1411 ASSERT(nextents <= XFS_INLINE_EXTS);
1412 /*
1413 * The inline buffer was zeroed when we switched
1414 * from inline to direct extent allocation mode,
1415 * so we don't need to clear it here.
1416 */
1417 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
1418 nextents * sizeof(xfs_bmbt_rec_t));
1419 kmem_free(ifp->if_u1.if_extents);
1420 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1421 ifp->if_real_bytes = 0;
1422 }
1423
1424 /*
1425 * Switch from inline buffer to linear (direct) extent records.
1426 * new_size should already be rounded up to the next power of 2
1427 * by the caller (when appropriate), so use new_size as it is.
1428 * However, since new_size may be rounded up, we can't update
1429 * if_bytes here. It is the caller's responsibility to update
1430 * if_bytes upon return.
1431 */
1432 void
xfs_iext_inline_to_direct(xfs_ifork_t * ifp,int new_size)1433 xfs_iext_inline_to_direct(
1434 xfs_ifork_t *ifp, /* inode fork pointer */
1435 int new_size) /* number of extents in file */
1436 {
1437 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
1438 memset(ifp->if_u1.if_extents, 0, new_size);
1439 if (ifp->if_bytes) {
1440 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
1441 ifp->if_bytes);
1442 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1443 sizeof(xfs_bmbt_rec_t));
1444 }
1445 ifp->if_real_bytes = new_size;
1446 }
1447
1448 /*
1449 * Resize an extent indirection array to new_size bytes.
1450 */
1451 STATIC void
xfs_iext_realloc_indirect(xfs_ifork_t * ifp,int new_size)1452 xfs_iext_realloc_indirect(
1453 xfs_ifork_t *ifp, /* inode fork pointer */
1454 int new_size) /* new indirection array size */
1455 {
1456 int nlists; /* number of irec's (ex lists) */
1457 int size; /* current indirection array size */
1458
1459 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1460 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1461 size = nlists * sizeof(xfs_ext_irec_t);
1462 ASSERT(ifp->if_real_bytes);
1463 ASSERT((new_size >= 0) && (new_size != size));
1464 if (new_size == 0) {
1465 xfs_iext_destroy(ifp);
1466 } else {
1467 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
1468 kmem_realloc(ifp->if_u1.if_ext_irec,
1469 new_size, size, KM_NOFS);
1470 }
1471 }
1472
1473 /*
1474 * Switch from indirection array to linear (direct) extent allocations.
1475 */
1476 STATIC void
xfs_iext_indirect_to_direct(xfs_ifork_t * ifp)1477 xfs_iext_indirect_to_direct(
1478 xfs_ifork_t *ifp) /* inode fork pointer */
1479 {
1480 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1481 xfs_extnum_t nextents; /* number of extents in file */
1482 int size; /* size of file extents */
1483
1484 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1485 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1486 ASSERT(nextents <= XFS_LINEAR_EXTS);
1487 size = nextents * sizeof(xfs_bmbt_rec_t);
1488
1489 xfs_iext_irec_compact_pages(ifp);
1490 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
1491
1492 ep = ifp->if_u1.if_ext_irec->er_extbuf;
1493 kmem_free(ifp->if_u1.if_ext_irec);
1494 ifp->if_flags &= ~XFS_IFEXTIREC;
1495 ifp->if_u1.if_extents = ep;
1496 ifp->if_bytes = size;
1497 if (nextents < XFS_LINEAR_EXTS) {
1498 xfs_iext_realloc_direct(ifp, size);
1499 }
1500 }
1501
1502 /*
1503 * Free incore file extents.
1504 */
1505 void
xfs_iext_destroy(xfs_ifork_t * ifp)1506 xfs_iext_destroy(
1507 xfs_ifork_t *ifp) /* inode fork pointer */
1508 {
1509 if (ifp->if_flags & XFS_IFEXTIREC) {
1510 int erp_idx;
1511 int nlists;
1512
1513 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1514 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
1515 xfs_iext_irec_remove(ifp, erp_idx);
1516 }
1517 ifp->if_flags &= ~XFS_IFEXTIREC;
1518 } else if (ifp->if_real_bytes) {
1519 kmem_free(ifp->if_u1.if_extents);
1520 } else if (ifp->if_bytes) {
1521 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1522 sizeof(xfs_bmbt_rec_t));
1523 }
1524 ifp->if_u1.if_extents = NULL;
1525 ifp->if_real_bytes = 0;
1526 ifp->if_bytes = 0;
1527 }
1528
1529 /*
1530 * Return a pointer to the extent record for file system block bno.
1531 */
1532 xfs_bmbt_rec_host_t * /* pointer to found extent record */
xfs_iext_bno_to_ext(xfs_ifork_t * ifp,xfs_fileoff_t bno,xfs_extnum_t * idxp)1533 xfs_iext_bno_to_ext(
1534 xfs_ifork_t *ifp, /* inode fork pointer */
1535 xfs_fileoff_t bno, /* block number to search for */
1536 xfs_extnum_t *idxp) /* index of target extent */
1537 {
1538 xfs_bmbt_rec_host_t *base; /* pointer to first extent */
1539 xfs_filblks_t blockcount = 0; /* number of blocks in extent */
1540 xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
1541 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1542 int high; /* upper boundary in search */
1543 xfs_extnum_t idx = 0; /* index of target extent */
1544 int low; /* lower boundary in search */
1545 xfs_extnum_t nextents; /* number of file extents */
1546 xfs_fileoff_t startoff = 0; /* start offset of extent */
1547
1548 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1549 if (nextents == 0) {
1550 *idxp = 0;
1551 return NULL;
1552 }
1553 low = 0;
1554 if (ifp->if_flags & XFS_IFEXTIREC) {
1555 /* Find target extent list */
1556 int erp_idx = 0;
1557 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
1558 base = erp->er_extbuf;
1559 high = erp->er_extcount - 1;
1560 } else {
1561 base = ifp->if_u1.if_extents;
1562 high = nextents - 1;
1563 }
1564 /* Binary search extent records */
1565 while (low <= high) {
1566 idx = (low + high) >> 1;
1567 ep = base + idx;
1568 startoff = xfs_bmbt_get_startoff(ep);
1569 blockcount = xfs_bmbt_get_blockcount(ep);
1570 if (bno < startoff) {
1571 high = idx - 1;
1572 } else if (bno >= startoff + blockcount) {
1573 low = idx + 1;
1574 } else {
1575 /* Convert back to file-based extent index */
1576 if (ifp->if_flags & XFS_IFEXTIREC) {
1577 idx += erp->er_extoff;
1578 }
1579 *idxp = idx;
1580 return ep;
1581 }
1582 }
1583 /* Convert back to file-based extent index */
1584 if (ifp->if_flags & XFS_IFEXTIREC) {
1585 idx += erp->er_extoff;
1586 }
1587 if (bno >= startoff + blockcount) {
1588 if (++idx == nextents) {
1589 ep = NULL;
1590 } else {
1591 ep = xfs_iext_get_ext(ifp, idx);
1592 }
1593 }
1594 *idxp = idx;
1595 return ep;
1596 }
1597
1598 /*
1599 * Return a pointer to the indirection array entry containing the
1600 * extent record for filesystem block bno. Store the index of the
1601 * target irec in *erp_idxp.
1602 */
1603 xfs_ext_irec_t * /* pointer to found extent record */
xfs_iext_bno_to_irec(xfs_ifork_t * ifp,xfs_fileoff_t bno,int * erp_idxp)1604 xfs_iext_bno_to_irec(
1605 xfs_ifork_t *ifp, /* inode fork pointer */
1606 xfs_fileoff_t bno, /* block number to search for */
1607 int *erp_idxp) /* irec index of target ext list */
1608 {
1609 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1610 xfs_ext_irec_t *erp_next; /* next indirection array entry */
1611 int erp_idx; /* indirection array index */
1612 int nlists; /* number of extent irec's (lists) */
1613 int high; /* binary search upper limit */
1614 int low; /* binary search lower limit */
1615
1616 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1617 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1618 erp_idx = 0;
1619 low = 0;
1620 high = nlists - 1;
1621 while (low <= high) {
1622 erp_idx = (low + high) >> 1;
1623 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1624 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
1625 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
1626 high = erp_idx - 1;
1627 } else if (erp_next && bno >=
1628 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
1629 low = erp_idx + 1;
1630 } else {
1631 break;
1632 }
1633 }
1634 *erp_idxp = erp_idx;
1635 return erp;
1636 }
1637
1638 /*
1639 * Return a pointer to the indirection array entry containing the
1640 * extent record at file extent index *idxp. Store the index of the
1641 * target irec in *erp_idxp and store the page index of the target
1642 * extent record in *idxp.
1643 */
1644 xfs_ext_irec_t *
xfs_iext_idx_to_irec(xfs_ifork_t * ifp,xfs_extnum_t * idxp,int * erp_idxp,int realloc)1645 xfs_iext_idx_to_irec(
1646 xfs_ifork_t *ifp, /* inode fork pointer */
1647 xfs_extnum_t *idxp, /* extent index (file -> page) */
1648 int *erp_idxp, /* pointer to target irec */
1649 int realloc) /* new bytes were just added */
1650 {
1651 xfs_ext_irec_t *prev; /* pointer to previous irec */
1652 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
1653 int erp_idx; /* indirection array index */
1654 int nlists; /* number of irec's (ex lists) */
1655 int high; /* binary search upper limit */
1656 int low; /* binary search lower limit */
1657 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
1658
1659 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1660 ASSERT(page_idx >= 0);
1661 ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
1662 ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
1663
1664 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1665 erp_idx = 0;
1666 low = 0;
1667 high = nlists - 1;
1668
1669 /* Binary search extent irec's */
1670 while (low <= high) {
1671 erp_idx = (low + high) >> 1;
1672 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1673 prev = erp_idx > 0 ? erp - 1 : NULL;
1674 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
1675 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
1676 high = erp_idx - 1;
1677 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
1678 (page_idx == erp->er_extoff + erp->er_extcount &&
1679 !realloc)) {
1680 low = erp_idx + 1;
1681 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
1682 erp->er_extcount == XFS_LINEAR_EXTS) {
1683 ASSERT(realloc);
1684 page_idx = 0;
1685 erp_idx++;
1686 erp = erp_idx < nlists ? erp + 1 : NULL;
1687 break;
1688 } else {
1689 page_idx -= erp->er_extoff;
1690 break;
1691 }
1692 }
1693 *idxp = page_idx;
1694 *erp_idxp = erp_idx;
1695 return erp;
1696 }
1697
1698 /*
1699 * Allocate and initialize an indirection array once the space needed
1700 * for incore extents increases above XFS_IEXT_BUFSZ.
1701 */
1702 void
xfs_iext_irec_init(xfs_ifork_t * ifp)1703 xfs_iext_irec_init(
1704 xfs_ifork_t *ifp) /* inode fork pointer */
1705 {
1706 xfs_ext_irec_t *erp; /* indirection array pointer */
1707 xfs_extnum_t nextents; /* number of extents in file */
1708
1709 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1710 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1711 ASSERT(nextents <= XFS_LINEAR_EXTS);
1712
1713 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
1714
1715 if (nextents == 0) {
1716 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1717 } else if (!ifp->if_real_bytes) {
1718 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
1719 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
1720 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
1721 }
1722 erp->er_extbuf = ifp->if_u1.if_extents;
1723 erp->er_extcount = nextents;
1724 erp->er_extoff = 0;
1725
1726 ifp->if_flags |= XFS_IFEXTIREC;
1727 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
1728 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
1729 ifp->if_u1.if_ext_irec = erp;
1730
1731 return;
1732 }
1733
1734 /*
1735 * Allocate and initialize a new entry in the indirection array.
1736 */
1737 xfs_ext_irec_t *
xfs_iext_irec_new(xfs_ifork_t * ifp,int erp_idx)1738 xfs_iext_irec_new(
1739 xfs_ifork_t *ifp, /* inode fork pointer */
1740 int erp_idx) /* index for new irec */
1741 {
1742 xfs_ext_irec_t *erp; /* indirection array pointer */
1743 int i; /* loop counter */
1744 int nlists; /* number of irec's (ex lists) */
1745
1746 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1747 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1748
1749 /* Resize indirection array */
1750 xfs_iext_realloc_indirect(ifp, ++nlists *
1751 sizeof(xfs_ext_irec_t));
1752 /*
1753 * Move records down in the array so the
1754 * new page can use erp_idx.
1755 */
1756 erp = ifp->if_u1.if_ext_irec;
1757 for (i = nlists - 1; i > erp_idx; i--) {
1758 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
1759 }
1760 ASSERT(i == erp_idx);
1761
1762 /* Initialize new extent record */
1763 erp = ifp->if_u1.if_ext_irec;
1764 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1765 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1766 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
1767 erp[erp_idx].er_extcount = 0;
1768 erp[erp_idx].er_extoff = erp_idx > 0 ?
1769 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
1770 return (&erp[erp_idx]);
1771 }
1772
1773 /*
1774 * Remove a record from the indirection array.
1775 */
1776 void
xfs_iext_irec_remove(xfs_ifork_t * ifp,int erp_idx)1777 xfs_iext_irec_remove(
1778 xfs_ifork_t *ifp, /* inode fork pointer */
1779 int erp_idx) /* irec index to remove */
1780 {
1781 xfs_ext_irec_t *erp; /* indirection array pointer */
1782 int i; /* loop counter */
1783 int nlists; /* number of irec's (ex lists) */
1784
1785 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1786 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1787 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1788 if (erp->er_extbuf) {
1789 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
1790 -erp->er_extcount);
1791 kmem_free(erp->er_extbuf);
1792 }
1793 /* Compact extent records */
1794 erp = ifp->if_u1.if_ext_irec;
1795 for (i = erp_idx; i < nlists - 1; i++) {
1796 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
1797 }
1798 /*
1799 * Manually free the last extent record from the indirection
1800 * array. A call to xfs_iext_realloc_indirect() with a size
1801 * of zero would result in a call to xfs_iext_destroy() which
1802 * would in turn call this function again, creating a nasty
1803 * infinite loop.
1804 */
1805 if (--nlists) {
1806 xfs_iext_realloc_indirect(ifp,
1807 nlists * sizeof(xfs_ext_irec_t));
1808 } else {
1809 kmem_free(ifp->if_u1.if_ext_irec);
1810 }
1811 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1812 }
1813
1814 /*
1815 * This is called to clean up large amounts of unused memory allocated
1816 * by the indirection array. Before compacting anything though, verify
1817 * that the indirection array is still needed and switch back to the
1818 * linear extent list (or even the inline buffer) if possible. The
1819 * compaction policy is as follows:
1820 *
1821 * Full Compaction: Extents fit into a single page (or inline buffer)
1822 * Partial Compaction: Extents occupy less than 50% of allocated space
1823 * No Compaction: Extents occupy at least 50% of allocated space
1824 */
1825 void
xfs_iext_irec_compact(xfs_ifork_t * ifp)1826 xfs_iext_irec_compact(
1827 xfs_ifork_t *ifp) /* inode fork pointer */
1828 {
1829 xfs_extnum_t nextents; /* number of extents in file */
1830 int nlists; /* number of irec's (ex lists) */
1831
1832 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1833 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1834 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1835
1836 if (nextents == 0) {
1837 xfs_iext_destroy(ifp);
1838 } else if (nextents <= XFS_INLINE_EXTS) {
1839 xfs_iext_indirect_to_direct(ifp);
1840 xfs_iext_direct_to_inline(ifp, nextents);
1841 } else if (nextents <= XFS_LINEAR_EXTS) {
1842 xfs_iext_indirect_to_direct(ifp);
1843 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
1844 xfs_iext_irec_compact_pages(ifp);
1845 }
1846 }
1847
1848 /*
1849 * Combine extents from neighboring extent pages.
1850 */
1851 void
xfs_iext_irec_compact_pages(xfs_ifork_t * ifp)1852 xfs_iext_irec_compact_pages(
1853 xfs_ifork_t *ifp) /* inode fork pointer */
1854 {
1855 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
1856 int erp_idx = 0; /* indirection array index */
1857 int nlists; /* number of irec's (ex lists) */
1858
1859 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1860 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1861 while (erp_idx < nlists - 1) {
1862 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1863 erp_next = erp + 1;
1864 if (erp_next->er_extcount <=
1865 (XFS_LINEAR_EXTS - erp->er_extcount)) {
1866 memcpy(&erp->er_extbuf[erp->er_extcount],
1867 erp_next->er_extbuf, erp_next->er_extcount *
1868 sizeof(xfs_bmbt_rec_t));
1869 erp->er_extcount += erp_next->er_extcount;
1870 /*
1871 * Free page before removing extent record
1872 * so er_extoffs don't get modified in
1873 * xfs_iext_irec_remove.
1874 */
1875 kmem_free(erp_next->er_extbuf);
1876 erp_next->er_extbuf = NULL;
1877 xfs_iext_irec_remove(ifp, erp_idx + 1);
1878 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1879 } else {
1880 erp_idx++;
1881 }
1882 }
1883 }
1884
1885 /*
1886 * This is called to update the er_extoff field in the indirection
1887 * array when extents have been added or removed from one of the
1888 * extent lists. erp_idx contains the irec index to begin updating
1889 * at and ext_diff contains the number of extents that were added
1890 * or removed.
1891 */
1892 void
xfs_iext_irec_update_extoffs(xfs_ifork_t * ifp,int erp_idx,int ext_diff)1893 xfs_iext_irec_update_extoffs(
1894 xfs_ifork_t *ifp, /* inode fork pointer */
1895 int erp_idx, /* irec index to update */
1896 int ext_diff) /* number of new extents */
1897 {
1898 int i; /* loop counter */
1899 int nlists; /* number of irec's (ex lists */
1900
1901 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1902 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1903 for (i = erp_idx; i < nlists; i++) {
1904 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
1905 }
1906 }
1907