• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * extents.c --- rebuild extent tree
3  *
4  * Copyright (C) 2014 Oracle.
5  *
6  * %Begin-Header%
7  * This file may be redistributed under the terms of the GNU Public
8  * License, version 2.
9  * %End-Header%
10  */
11 
12 #include "config.h"
13 #include <string.h>
14 #include <ctype.h>
15 #include <errno.h>
16 #include "e2fsck.h"
17 #include "problem.h"
18 
19 #undef DEBUG
20 #undef DEBUG_SUMMARY
21 #undef DEBUG_FREE
22 
23 #define NUM_EXTENTS	341	/* about one ETB' worth of extents */
24 
25 static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino);
26 
27 /* Schedule an inode to have its extent tree rebuilt during pass 1E. */
e2fsck_rebuild_extents_later(e2fsck_t ctx,ext2_ino_t ino)28 errcode_t e2fsck_rebuild_extents_later(e2fsck_t ctx, ext2_ino_t ino)
29 {
30 	errcode_t retval = 0;
31 
32 	if (!ext2fs_has_feature_extents(ctx->fs->super) ||
33 	    (ctx->options & E2F_OPT_NO) ||
34 	    (ino != EXT2_ROOT_INO && ino < ctx->fs->super->s_first_ino))
35 		return 0;
36 
37 	if (ctx->flags & E2F_FLAG_ALLOC_OK)
38 		return e2fsck_rebuild_extents(ctx, ino);
39 
40 	if (!ctx->inodes_to_rebuild)
41 		retval = e2fsck_allocate_inode_bitmap(ctx->fs,
42 					     _("extent rebuild inode map"),
43 					     EXT2FS_BMAP64_RBTREE,
44 					     "inodes_to_rebuild",
45 					     &ctx->inodes_to_rebuild);
46 	if (retval)
47 		return retval;
48 
49 	ext2fs_mark_inode_bitmap2(ctx->inodes_to_rebuild, ino);
50 	return 0;
51 }
52 
53 /* Ask if an inode will have its extents rebuilt during pass 1E. */
e2fsck_ino_will_be_rebuilt(e2fsck_t ctx,ext2_ino_t ino)54 int e2fsck_ino_will_be_rebuilt(e2fsck_t ctx, ext2_ino_t ino)
55 {
56 	if (!ctx->inodes_to_rebuild)
57 		return 0;
58 	return ext2fs_test_inode_bitmap2(ctx->inodes_to_rebuild, ino);
59 }
60 
61 struct extent_list {
62 	blk64_t blocks_freed;
63 	struct ext2fs_extent *extents;
64 	unsigned int count;
65 	unsigned int size;
66 	unsigned int ext_read;
67 	errcode_t retval;
68 	ext2_ino_t ino;
69 };
70 
load_extents(e2fsck_t ctx,struct extent_list * list)71 static errcode_t load_extents(e2fsck_t ctx, struct extent_list *list)
72 {
73 	ext2_filsys		fs = ctx->fs;
74 	ext2_extent_handle_t	handle;
75 	struct ext2fs_extent	extent;
76 	errcode_t		retval;
77 
78 	retval = ext2fs_extent_open(fs, list->ino, &handle);
79 	if (retval)
80 		return retval;
81 
82 	retval = ext2fs_extent_get(handle, EXT2_EXTENT_ROOT, &extent);
83 	if (retval)
84 		goto out;
85 
86 	do {
87 		if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
88 			goto next;
89 
90 		/* Internal node; free it and we'll re-allocate it later */
91 		if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) {
92 #if defined(DEBUG) || defined(DEBUG_FREE)
93 			printf("ino=%d free=%llu bf=%llu\n", list->ino,
94 					extent.e_pblk, list->blocks_freed + 1);
95 #endif
96 			list->blocks_freed++;
97 			ext2fs_block_alloc_stats2(fs, extent.e_pblk, -1);
98 			goto next;
99 		}
100 
101 		list->ext_read++;
102 		/* Can we attach it to the previous extent? */
103 		if (list->count) {
104 			struct ext2fs_extent *last = list->extents +
105 						     list->count - 1;
106 			blk64_t end = last->e_len + extent.e_len;
107 
108 			if (last->e_pblk + last->e_len == extent.e_pblk &&
109 			    last->e_lblk + last->e_len == extent.e_lblk &&
110 			    (last->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ==
111 			    (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
112 			    end < (1ULL << 32)) {
113 				last->e_len += extent.e_len;
114 #ifdef DEBUG
115 				printf("R: ino=%d len=%u\n", list->ino,
116 						last->e_len);
117 #endif
118 				goto next;
119 			}
120 		}
121 
122 		/* Do we need to expand? */
123 		if (list->count == list->size) {
124 			unsigned int new_size = (list->size + NUM_EXTENTS) *
125 						sizeof(struct ext2fs_extent);
126 			retval = ext2fs_resize_mem(0, new_size, &list->extents);
127 			if (retval)
128 				goto out;
129 			list->size += NUM_EXTENTS;
130 		}
131 
132 		/* Add a new extent */
133 		memcpy(list->extents + list->count, &extent, sizeof(extent));
134 #ifdef DEBUG
135 		printf("R: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino,
136 				extent.e_pblk, extent.e_lblk, extent.e_len);
137 #endif
138 		list->count++;
139 next:
140 		retval = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT, &extent);
141 	} while (retval == 0);
142 
143 out:
144 	/* Ok if we run off the end */
145 	if (retval == EXT2_ET_EXTENT_NO_NEXT)
146 		retval = 0;
147 	ext2fs_extent_free(handle);
148 	return retval;
149 }
150 
find_blocks(ext2_filsys fs,blk64_t * blocknr,e2_blkcnt_t blockcnt,blk64_t ref_blk EXT2FS_ATTR ((unused)),int ref_offset EXT2FS_ATTR ((unused)),void * priv_data)151 static int find_blocks(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt,
152 		       blk64_t ref_blk EXT2FS_ATTR((unused)),
153 		       int ref_offset EXT2FS_ATTR((unused)), void *priv_data)
154 {
155 	struct extent_list *list = priv_data;
156 
157 	/* Internal node? */
158 	if (blockcnt < 0) {
159 #if defined(DEBUG) || defined(DEBUG_FREE)
160 		printf("ino=%d free=%llu bf=%llu\n", list->ino, *blocknr,
161 				list->blocks_freed + 1);
162 #endif
163 		list->blocks_freed++;
164 		ext2fs_block_alloc_stats2(fs, *blocknr, -1);
165 		return 0;
166 	}
167 
168 	/* Can we attach it to the previous extent? */
169 	if (list->count) {
170 		struct ext2fs_extent *last = list->extents +
171 					     list->count - 1;
172 		blk64_t end = last->e_len + 1;
173 
174 		if (last->e_lblk + last->e_len == (__u64) blockcnt &&
175 		    last->e_pblk + last->e_len == *blocknr &&
176 		    end < (1ULL << 32)) {
177 			last->e_len++;
178 #ifdef DEBUG
179 			printf("R: ino=%d len=%u\n", list->ino, last->e_len);
180 #endif
181 			return 0;
182 		}
183 	}
184 
185 	/* Do we need to expand? */
186 	if (list->count == list->size) {
187 		unsigned int new_size = (list->size + NUM_EXTENTS) *
188 					sizeof(struct ext2fs_extent);
189 		list->retval = ext2fs_resize_mem(0, new_size, &list->extents);
190 		if (list->retval)
191 			return BLOCK_ABORT;
192 		list->size += NUM_EXTENTS;
193 	}
194 
195 	/* Add a new extent */
196 	list->extents[list->count].e_pblk = *blocknr;
197 	list->extents[list->count].e_lblk = blockcnt;
198 	list->extents[list->count].e_len = 1;
199 	list->extents[list->count].e_flags = 0;
200 #ifdef DEBUG
201 	printf("R: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino, *blocknr,
202 			blockcnt, 1);
203 #endif
204 	list->count++;
205 
206 	return 0;
207 }
208 
rebuild_extent_tree(e2fsck_t ctx,struct extent_list * list,ext2_ino_t ino)209 static errcode_t rebuild_extent_tree(e2fsck_t ctx, struct extent_list *list,
210 				     ext2_ino_t ino)
211 {
212 	struct ext2_inode_large	inode;
213 	errcode_t		retval;
214 	ext2_extent_handle_t	handle;
215 	unsigned int		i, ext_written;
216 	struct ext2fs_extent	*ex, extent;
217 	blk64_t			start_val, delta;
218 
219 	list->count = 0;
220 	list->blocks_freed = 0;
221 	list->ino = ino;
222 	list->ext_read = 0;
223 	e2fsck_read_inode_full(ctx, ino, EXT2_INODE(&inode), sizeof(inode),
224 			       "rebuild_extents");
225 
226 	/* Skip deleted inodes and inline data files */
227 	if (inode.i_links_count == 0 ||
228 	    inode.i_flags & EXT4_INLINE_DATA_FL)
229 		return 0;
230 
231 	/* Collect lblk->pblk mappings */
232 	if (inode.i_flags & EXT4_EXTENTS_FL) {
233 		retval = load_extents(ctx, list);
234 		if (retval)
235 			goto err;
236 		goto extents_loaded;
237 	}
238 
239 	retval = ext2fs_block_iterate3(ctx->fs, ino, BLOCK_FLAG_READ_ONLY, 0,
240 				       find_blocks, list);
241 	if (retval)
242 		goto err;
243 	if (list->retval) {
244 		retval = list->retval;
245 		goto err;
246 	}
247 
248 extents_loaded:
249 	/* Reset extent tree */
250 	inode.i_flags &= ~EXT4_EXTENTS_FL;
251 	memset(inode.i_block, 0, sizeof(inode.i_block));
252 
253 	/* Make a note of freed blocks */
254 	quota_data_sub(ctx->qctx, &inode, ino,
255 		       list->blocks_freed * ctx->fs->blocksize);
256 	retval = ext2fs_iblk_sub_blocks(ctx->fs, EXT2_INODE(&inode),
257 					list->blocks_freed);
258 	if (retval)
259 		goto err;
260 
261 	/* Now stuff extents into the file */
262 	retval = ext2fs_extent_open2(ctx->fs, ino, EXT2_INODE(&inode), &handle);
263 	if (retval)
264 		goto err;
265 
266 	ext_written = 0;
267 	start_val = ext2fs_get_stat_i_blocks(ctx->fs, EXT2_INODE(&inode));
268 	for (i = 0, ex = list->extents; i < list->count; i++, ex++) {
269 		memcpy(&extent, ex, sizeof(struct ext2fs_extent));
270 		extent.e_flags &= EXT2_EXTENT_FLAGS_UNINIT;
271 		if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) {
272 			if (extent.e_len > EXT_UNINIT_MAX_LEN) {
273 				extent.e_len = EXT_UNINIT_MAX_LEN;
274 				ex->e_pblk += EXT_UNINIT_MAX_LEN;
275 				ex->e_lblk += EXT_UNINIT_MAX_LEN;
276 				ex->e_len -= EXT_UNINIT_MAX_LEN;
277 				ex--;
278 				i--;
279 			}
280 		} else {
281 			if (extent.e_len > EXT_INIT_MAX_LEN) {
282 				extent.e_len = EXT_INIT_MAX_LEN;
283 				ex->e_pblk += EXT_INIT_MAX_LEN;
284 				ex->e_lblk += EXT_INIT_MAX_LEN;
285 				ex->e_len -= EXT_INIT_MAX_LEN;
286 				ex--;
287 				i--;
288 			}
289 		}
290 
291 #ifdef DEBUG
292 		printf("W: ino=%d pblk=%llu lblk=%llu len=%u\n", ino,
293 				extent.e_pblk, extent.e_lblk, extent.e_len);
294 #endif
295 		retval = ext2fs_extent_insert(handle, EXT2_EXTENT_INSERT_AFTER,
296 					      &extent);
297 		if (retval)
298 			goto err2;
299 		retval = ext2fs_extent_fix_parents(handle);
300 		if (retval)
301 			goto err2;
302 		ext_written++;
303 	}
304 
305 	delta = ext2fs_get_stat_i_blocks(ctx->fs, EXT2_INODE(&inode)) -
306 		start_val;
307 	if (delta)
308 		quota_data_add(ctx->qctx, &inode, ino, delta << 9);
309 
310 #if defined(DEBUG) || defined(DEBUG_SUMMARY)
311 	printf("rebuild: ino=%d extents=%d->%d\n", ino, list->ext_read,
312 	       ext_written);
313 #endif
314 	e2fsck_write_inode(ctx, ino, EXT2_INODE(&inode), "rebuild_extents");
315 
316 err2:
317 	ext2fs_extent_free(handle);
318 err:
319 	return retval;
320 }
321 
322 /* Rebuild the extents immediately */
e2fsck_rebuild_extents(e2fsck_t ctx,ext2_ino_t ino)323 static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino)
324 {
325 	struct extent_list list = { 0 };
326 	errcode_t err;
327 
328 	if (!ext2fs_has_feature_extents(ctx->fs->super) ||
329 	    (ctx->options & E2F_OPT_NO) ||
330 	    (ino != EXT2_ROOT_INO && ino < ctx->fs->super->s_first_ino))
331 		return 0;
332 
333 	e2fsck_read_bitmaps(ctx);
334 	err = ext2fs_get_array(NUM_EXTENTS, sizeof(struct ext2fs_extent),
335 			       &list.extents);
336 	if (err)
337 		return err;
338 	list.size = NUM_EXTENTS;
339 	err = rebuild_extent_tree(ctx, &list, ino);
340 	ext2fs_free_mem(&list.extents);
341 
342 	return err;
343 }
344 
rebuild_extents(e2fsck_t ctx,const char * pass_name,int pr_header)345 static void rebuild_extents(e2fsck_t ctx, const char *pass_name, int pr_header)
346 {
347 	struct problem_context	pctx;
348 #ifdef RESOURCE_TRACK
349 	struct resource_track	rtrack;
350 #endif
351 	struct extent_list	list = { 0 };
352 	int			first = 1;
353 	ext2_ino_t		ino = 0;
354 	errcode_t		retval;
355 
356 	if (!ext2fs_has_feature_extents(ctx->fs->super) ||
357 	    !ext2fs_test_valid(ctx->fs) ||
358 	    ctx->invalid_bitmaps) {
359 		if (ctx->inodes_to_rebuild)
360 			ext2fs_free_inode_bitmap(ctx->inodes_to_rebuild);
361 		ctx->inodes_to_rebuild = NULL;
362 	}
363 
364 	if (ctx->inodes_to_rebuild == NULL)
365 		return;
366 
367 	init_resource_track(&rtrack, ctx->fs->io);
368 	clear_problem_context(&pctx);
369 	e2fsck_read_bitmaps(ctx);
370 
371 	list.size = NUM_EXTENTS;
372 	retval = ext2fs_get_array(sizeof(struct ext2fs_extent),
373 				  list.size, &list.extents);
374 	if (retval)
375 		return;
376 	while (1) {
377 		retval = ext2fs_find_first_set_inode_bitmap2(
378 				ctx->inodes_to_rebuild, ino + 1,
379 				ctx->fs->super->s_inodes_count, &ino);
380 		if (retval)
381 			break;
382 		pctx.ino = ino;
383 		if (first) {
384 			fix_problem(ctx, pr_header, &pctx);
385 			first = 0;
386 		}
387 		pctx.errcode = rebuild_extent_tree(ctx, &list, ino);
388 		if (pctx.errcode) {
389 			end_problem_latch(ctx, PR_LATCH_OPTIMIZE_EXT);
390 			fix_problem(ctx, PR_1E_OPTIMIZE_EXT_ERR, &pctx);
391 		}
392 		if (ctx->progress && !ctx->progress_fd)
393 			e2fsck_simple_progress(ctx, "Rebuilding extents",
394 					100.0 * (float) ino /
395 					(float) ctx->fs->super->s_inodes_count,
396 					ino);
397 	}
398 	end_problem_latch(ctx, PR_LATCH_OPTIMIZE_EXT);
399 
400 	ext2fs_free_inode_bitmap(ctx->inodes_to_rebuild);
401 	ctx->inodes_to_rebuild = NULL;
402 	ext2fs_free_mem(&list.extents);
403 
404 	print_resource_track(ctx, pass_name, &rtrack, ctx->fs->io);
405 }
406 
407 /* Scan a file to see if we should rebuild its extent tree */
e2fsck_check_rebuild_extents(e2fsck_t ctx,ext2_ino_t ino,struct ext2_inode * inode,struct problem_context * pctx)408 errcode_t e2fsck_check_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino,
409 				  struct ext2_inode *inode,
410 				  struct problem_context *pctx)
411 {
412 	struct extent_tree_info	eti;
413 	struct ext2_extent_info	info, top_info;
414 	struct ext2fs_extent	extent;
415 	ext2_extent_handle_t	ehandle;
416 	ext2_filsys		fs = ctx->fs;
417 	errcode_t		retval;
418 
419 	/* block map file and we want extent conversion */
420 	if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
421 	    !(inode->i_flags & EXT4_INLINE_DATA_FL) &&
422 	    (ctx->options & E2F_OPT_CONVERT_BMAP)) {
423 		return e2fsck_rebuild_extents_later(ctx, ino);
424 	}
425 
426 	if (!(inode->i_flags & EXT4_EXTENTS_FL))
427 		return 0;
428 	memset(&eti, 0, sizeof(eti));
429 	eti.ino = ino;
430 
431 	/* Otherwise, go scan the extent tree... */
432 	retval = ext2fs_extent_open2(fs, ino, inode, &ehandle);
433 	if (retval)
434 		return 0;
435 
436 	retval = ext2fs_extent_get_info(ehandle, &top_info);
437 	if (retval)
438 		goto out;
439 
440 	/* Check maximum extent depth */
441 	pctx->ino = ino;
442 	pctx->blk = top_info.max_depth;
443 	pctx->blk2 = ext2fs_max_extent_depth(ehandle);
444 	if (pctx->blk2 < pctx->blk &&
445 	    fix_problem(ctx, PR_1_EXTENT_BAD_MAX_DEPTH, pctx))
446 		eti.force_rebuild = 1;
447 
448 	/* Can we collect extent tree level stats? */
449 	pctx->blk = MAX_EXTENT_DEPTH_COUNT;
450 	if (pctx->blk2 > pctx->blk)
451 		fix_problem(ctx, PR_1E_MAX_EXTENT_TREE_DEPTH, pctx);
452 
453 	/* We need to fix tree depth problems, but the scan isn't a fix. */
454 	if (ctx->options & E2F_OPT_FIXES_ONLY)
455 		goto out;
456 
457 	retval = ext2fs_extent_get(ehandle, EXT2_EXTENT_ROOT, &extent);
458 	if (retval)
459 		goto out;
460 
461 	do {
462 		retval = ext2fs_extent_get_info(ehandle, &info);
463 		if (retval)
464 			break;
465 
466 		/*
467 		 * If this is the first extent in an extent block that we
468 		 * haven't visited, collect stats on the block.
469 		 */
470 		if (info.curr_entry == 1 &&
471 		    !(extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT) &&
472 		    !eti.force_rebuild) {
473 			struct extent_tree_level *etl;
474 
475 			etl = eti.ext_info + info.curr_level;
476 			etl->num_extents += info.num_entries;
477 			etl->max_extents += info.max_entries;
478 			/*
479 			 * Implementation wart: Splitting extent blocks when
480 			 * appending will leave the old block with one free
481 			 * entry.  Therefore unless the node is totally full,
482 			 * pretend that a non-root extent block can hold one
483 			 * fewer entry than it actually does, so that we don't
484 			 * repeatedly rebuild the extent tree.
485 			 */
486 			if (info.curr_level &&
487 			    info.num_entries < info.max_entries)
488 				etl->max_extents--;
489 		}
490 
491 		/* Skip to the end of a block of leaf nodes */
492 		if (extent.e_flags & EXT2_EXTENT_FLAGS_LEAF) {
493 			retval = ext2fs_extent_get(ehandle,
494 						    EXT2_EXTENT_LAST_SIB,
495 						    &extent);
496 			if (retval)
497 				break;
498 		}
499 
500 		retval = ext2fs_extent_get(ehandle, EXT2_EXTENT_NEXT, &extent);
501 	} while (retval == 0);
502 out:
503 	ext2fs_extent_free(ehandle);
504 	return e2fsck_should_rebuild_extents(ctx, pctx, &eti, &top_info);
505 }
506 
507 /* Having scanned a file's extent tree, decide if we should rebuild it */
e2fsck_should_rebuild_extents(e2fsck_t ctx,struct problem_context * pctx,struct extent_tree_info * eti,struct ext2_extent_info * info)508 errcode_t e2fsck_should_rebuild_extents(e2fsck_t ctx,
509 				   struct problem_context *pctx,
510 				   struct extent_tree_info *eti,
511 				   struct ext2_extent_info *info)
512 {
513 	struct extent_tree_level *ei;
514 	int i, j, op;
515 	unsigned int extents_per_block;
516 
517 	if (eti->force_rebuild)
518 		goto rebuild;
519 
520 	if (ctx->options & E2F_OPT_NOOPT_EXTENTS)
521 		return 0;
522 
523 	extents_per_block = (ctx->fs->blocksize -
524 			     sizeof(struct ext3_extent_header)) /
525 			    sizeof(struct ext3_extent);
526 	/*
527 	 * If we can consolidate a level or shorten the tree, schedule the
528 	 * extent tree to be rebuilt.
529 	 */
530 	for (i = 0, ei = eti->ext_info; i < info->max_depth + 1; i++, ei++) {
531 		if (ei->max_extents - ei->num_extents > extents_per_block) {
532 			pctx->blk = i;
533 			op = PR_1E_CAN_NARROW_EXTENT_TREE;
534 			goto rebuild;
535 		}
536 		for (j = 0; j < i; j++) {
537 			if (ei->num_extents < eti->ext_info[j].max_extents) {
538 				pctx->blk = i;
539 				op = PR_1E_CAN_COLLAPSE_EXTENT_TREE;
540 				goto rebuild;
541 			}
542 		}
543 	}
544 	return 0;
545 
546 rebuild:
547 	if (eti->force_rebuild || fix_problem(ctx, op, pctx))
548 		return e2fsck_rebuild_extents_later(ctx, eti->ino);
549 	return 0;
550 }
551 
e2fsck_pass1e(e2fsck_t ctx)552 void e2fsck_pass1e(e2fsck_t ctx)
553 {
554 	rebuild_extents(ctx, "Pass 1E", PR_1E_PASS_HEADER);
555 }
556