1 #include <limits.h>
2 #include <sys/types.h>
3 #include <sys/stat.h>
4 #include "basefs_allocator.h"
5 #include "block_range.h"
6 #include "hashmap.h"
7 #include "base_fs.h"
8
9 struct base_fs_allocator {
10 struct ext2fs_hashmap *entries;
11 struct basefs_entry *cur_entry;
12 /* The next expected logical block to allocate for cur_entry. */
13 blk64_t next_lblk;
14 /* Blocks which are definitely owned by a single inode in BaseFS. */
15 ext2fs_block_bitmap exclusive_block_map;
16 /* Blocks which are available to the first inode that requests it. */
17 ext2fs_block_bitmap dedup_block_map;
18 };
19
20 static errcode_t basefs_block_allocator(ext2_filsys, blk64_t, blk64_t *,
21 struct blk_alloc_ctx *ctx);
22
23 /*
24 * Free any reserved, but unconsumed block ranges in the allocator. This both
25 * frees the block_range_list data structure and unreserves exclusive blocks
26 * from the block map.
27 */
fs_free_blocks_range(ext2_filsys fs,struct base_fs_allocator * allocator,struct block_range_list * list)28 static void fs_free_blocks_range(ext2_filsys fs,
29 struct base_fs_allocator *allocator,
30 struct block_range_list *list)
31 {
32 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
33
34 blk64_t block;
35 while (list->head) {
36 block = consume_next_block(list);
37 if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
38 ext2fs_unmark_block_bitmap2(fs->block_map, block);
39 ext2fs_unmark_block_bitmap2(exclusive_map, block);
40 }
41 }
42 }
43
44 /*
45 * Free any blocks in the bitmap that were reserved but never used. This is
46 * needed to free dedup_block_map and ensure the free block bitmap is
47 * internally consistent.
48 */
fs_free_blocks_bitmap(ext2_filsys fs,ext2fs_block_bitmap bitmap)49 static void fs_free_blocks_bitmap(ext2_filsys fs, ext2fs_block_bitmap bitmap)
50 {
51 blk64_t block = 0;
52 blk64_t start = fs->super->s_first_data_block;
53 blk64_t end = ext2fs_blocks_count(fs->super) - 1;
54 errcode_t retval;
55
56 for (;;) {
57 retval = ext2fs_find_first_set_block_bitmap2(bitmap, start, end,
58 &block);
59 if (retval)
60 break;
61 ext2fs_unmark_block_bitmap2(fs->block_map, block);
62 start = block + 1;
63 }
64 }
65
basefs_allocator_free(ext2_filsys fs,struct base_fs_allocator * allocator)66 static void basefs_allocator_free(ext2_filsys fs,
67 struct base_fs_allocator *allocator)
68 {
69 struct basefs_entry *e;
70 struct ext2fs_hashmap_entry *it = NULL;
71 struct ext2fs_hashmap *entries = allocator->entries;
72
73 if (entries) {
74 while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
75 fs_free_blocks_range(fs, allocator, &e->blocks);
76 delete_block_ranges(&e->blocks);
77 }
78 ext2fs_hashmap_free(entries);
79 }
80 fs_free_blocks_bitmap(fs, allocator->dedup_block_map);
81 ext2fs_free_block_bitmap(allocator->exclusive_block_map);
82 ext2fs_free_block_bitmap(allocator->dedup_block_map);
83 free(allocator);
84 }
85
86 /*
87 * Build a bitmap of which blocks are definitely owned by exactly one file in
88 * Base FS. Blocks which are not valid or are de-duplicated are skipped. This
89 * is called during allocator initialization, to ensure that libext2fs does
90 * not allocate which we want to re-use.
91 *
92 * If a block was allocated in the initial filesystem, it can never be re-used,
93 * so it will appear in neither the exclusive or dedup set. If a block is used
94 * by multiple files, it will be removed from the owned set and instead added
95 * to the dedup set.
96 *
97 * The dedup set is not removed from fs->block_map. This allows us to re-use
98 * dedup blocks separately and not have them be allocated outside of file data.
99 */
fs_reserve_block(ext2_filsys fs,struct base_fs_allocator * allocator,blk64_t block)100 static void fs_reserve_block(ext2_filsys fs,
101 struct base_fs_allocator *allocator,
102 blk64_t block)
103 {
104 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
105 ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
106
107 if (block >= ext2fs_blocks_count(fs->super))
108 return;
109
110 if (ext2fs_test_block_bitmap2(fs->block_map, block)) {
111 if (!ext2fs_test_block_bitmap2(exclusive_map, block))
112 return;
113 ext2fs_unmark_block_bitmap2(exclusive_map, block);
114 ext2fs_mark_block_bitmap2(dedup_map, block);
115 } else {
116 ext2fs_mark_block_bitmap2(fs->block_map, block);
117 ext2fs_mark_block_bitmap2(exclusive_map, block);
118 }
119 }
120
fs_reserve_blocks_range(ext2_filsys fs,struct base_fs_allocator * allocator,struct block_range_list * list)121 static void fs_reserve_blocks_range(ext2_filsys fs,
122 struct base_fs_allocator *allocator,
123 struct block_range_list *list)
124 {
125 blk64_t block;
126 struct block_range *blocks = list->head;
127
128 while (blocks) {
129 for (block = blocks->start; block <= blocks->end; block++)
130 fs_reserve_block(fs, allocator, block);
131 blocks = blocks->next;
132 }
133 }
134
135 /*
136 * For each file in the base FS map, ensure that its blocks are reserved in
137 * the actual block map. This prevents libext2fs from allocating them for
138 * general purpose use, and ensures that if the file needs data blocks, they
139 * can be re-acquired exclusively for that file.
140 *
141 * If a file in the base map is missing, or not a regular file in the new
142 * filesystem, then it's skipped to ensure that its blocks are reusable.
143 */
fs_reserve_blocks(ext2_filsys fs,struct base_fs_allocator * allocator,const char * src_dir)144 static errcode_t fs_reserve_blocks(ext2_filsys fs,
145 struct base_fs_allocator *allocator,
146 const char *src_dir)
147 {
148 int nbytes;
149 char full_path[PATH_MAX];
150 const char *sep = "/";
151 struct stat st;
152 struct basefs_entry *e;
153 struct ext2fs_hashmap_entry *it = NULL;
154 struct ext2fs_hashmap *entries = allocator->entries;
155
156 if (strlen(src_dir) && src_dir[strlen(src_dir) - 1] == '/')
157 sep = "";
158
159 while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
160 nbytes = snprintf(full_path, sizeof(full_path), "%s%s%s",
161 src_dir, sep, e->path);
162 if (nbytes >= sizeof(full_path))
163 return ENAMETOOLONG;
164 if (lstat(full_path, &st) || !S_ISREG(st.st_mode))
165 continue;
166 fs_reserve_blocks_range(fs, allocator, &e->blocks);
167 }
168 return 0;
169 }
170
base_fs_alloc_load(ext2_filsys fs,const char * file,const char * mountpoint,const char * src_dir)171 errcode_t base_fs_alloc_load(ext2_filsys fs, const char *file,
172 const char *mountpoint, const char *src_dir)
173 {
174 errcode_t retval = 0;
175 struct base_fs_allocator *allocator;
176
177 allocator = calloc(1, sizeof(*allocator));
178 if (!allocator) {
179 retval = ENOMEM;
180 goto out;
181 }
182
183 retval = ext2fs_read_bitmaps(fs);
184 if (retval)
185 goto err_load;
186
187 allocator->cur_entry = NULL;
188 allocator->entries = basefs_parse(file, mountpoint);
189 if (!allocator->entries) {
190 retval = EIO;
191 goto err_load;
192 }
193 retval = ext2fs_allocate_block_bitmap(fs, "exclusive map",
194 &allocator->exclusive_block_map);
195 if (retval)
196 goto err_load;
197 retval = ext2fs_allocate_block_bitmap(fs, "dedup map",
198 &allocator->dedup_block_map);
199 if (retval)
200 goto err_load;
201
202 retval = fs_reserve_blocks(fs, allocator, src_dir);
203 if (retval)
204 goto err_load;
205
206 /* Override the default allocator */
207 fs->get_alloc_block2 = basefs_block_allocator;
208 fs->priv_data = allocator;
209
210 goto out;
211
212 err_load:
213 basefs_allocator_free(fs, allocator);
214 out:
215 return retval;
216 }
217
218 /* Try and acquire the next usable block from the Base FS map. */
get_next_block(ext2_filsys fs,struct base_fs_allocator * allocator,struct block_range_list * list,blk64_t * ret)219 static errcode_t get_next_block(ext2_filsys fs, struct base_fs_allocator *allocator,
220 struct block_range_list* list, blk64_t *ret)
221 {
222 blk64_t block;
223 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
224 ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
225
226 if (!list->head)
227 return EXT2_ET_BLOCK_ALLOC_FAIL;
228
229 block = consume_next_block(list);
230 if (block >= ext2fs_blocks_count(fs->super))
231 return EXT2_ET_BLOCK_ALLOC_FAIL;
232 if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
233 ext2fs_unmark_block_bitmap2(exclusive_map, block);
234 *ret = block;
235 return 0;
236 }
237 if (ext2fs_test_block_bitmap2(dedup_map, block)) {
238 ext2fs_unmark_block_bitmap2(dedup_map, block);
239 *ret = block;
240 return 0;
241 }
242 return EXT2_ET_BLOCK_ALLOC_FAIL;
243 }
244
245 /*
246 * BaseFS lists blocks in logical block order. However, the allocator hook is
247 * only called if a block needs to be allocated. In the case of a deduplicated
248 * block, or a hole, the hook is not invoked. This means the next block
249 * allocation request will be out of sequence. For example, consider if BaseFS
250 * specifies the following (0 being a hole):
251 * 1 2 3 0 4 5
252 *
253 * If the new file has a hole at logical block 0, we could accidentally
254 * shift the entire expected block list as follows:
255 * 0 1 2 0 3 4
256 *
257 * To account for this, we track the next expected logical block in the
258 * allocator. If the current request is for a later logical block, we skip and
259 * free the intermediate physical blocks that would have been allocated. This
260 * ensures the original block assignment is respected.
261 */
skip_blocks(ext2_filsys fs,struct base_fs_allocator * allocator,struct blk_alloc_ctx * ctx)262 static void skip_blocks(ext2_filsys fs, struct base_fs_allocator *allocator,
263 struct blk_alloc_ctx *ctx)
264 {
265 blk64_t block;
266 struct block_range_list *list = &allocator->cur_entry->blocks;
267 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
268
269 while (list->head && allocator->next_lblk < ctx->lblk) {
270 block = consume_next_block(list);
271 if (block >= ext2fs_blocks_count(fs->super))
272 continue;
273 if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
274 ext2fs_unmark_block_bitmap2(exclusive_map, block);
275 ext2fs_unmark_block_bitmap2(fs->block_map, block);
276 }
277 allocator->next_lblk++;
278 }
279 }
280
basefs_block_allocator(ext2_filsys fs,blk64_t goal,blk64_t * ret,struct blk_alloc_ctx * ctx)281 static errcode_t basefs_block_allocator(ext2_filsys fs, blk64_t goal,
282 blk64_t *ret, struct blk_alloc_ctx *ctx)
283 {
284 errcode_t retval;
285 struct base_fs_allocator *allocator = fs->priv_data;
286 struct basefs_entry *e = allocator->cur_entry;
287 ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
288
289 if (e && ctx && (ctx->flags & BLOCK_ALLOC_DATA)) {
290 if (allocator->next_lblk < ctx->lblk)
291 skip_blocks(fs, allocator, ctx);
292 allocator->next_lblk = ctx->lblk + 1;
293
294 if (!get_next_block(fs, allocator, &e->blocks, ret))
295 return 0;
296 }
297
298 retval = ext2fs_new_block2(fs, goal, fs->block_map, ret);
299 if (!retval) {
300 ext2fs_mark_block_bitmap2(fs->block_map, *ret);
301 return 0;
302 }
303 if (retval != EXT2_ET_BLOCK_ALLOC_FAIL)
304 return retval;
305
306 /* Try to steal a block from the dedup pool. */
307 retval = ext2fs_find_first_set_block_bitmap2(dedup_map,
308 fs->super->s_first_data_block,
309 ext2fs_blocks_count(fs->super) - 1, ret);
310 if (!retval) {
311 ext2fs_unmark_block_bitmap2(dedup_map, *ret);
312 return 0;
313 }
314
315 /*
316 * As a last resort, take any block from our file's list. This
317 * risks bloating the diff, but means we are more likely to
318 * successfully build an image.
319 */
320 while (e->blocks.head) {
321 if (!get_next_block(fs, allocator, &e->blocks, ret))
322 return 0;
323 }
324 return EXT2_ET_BLOCK_ALLOC_FAIL;
325 }
326
base_fs_alloc_cleanup(ext2_filsys fs)327 void base_fs_alloc_cleanup(ext2_filsys fs)
328 {
329 basefs_allocator_free(fs, fs->priv_data);
330 fs->priv_data = NULL;
331 fs->get_alloc_block2 = NULL;
332 }
333
base_fs_alloc_set_target(ext2_filsys fs,const char * target_path,const char * name EXT2FS_ATTR ((unused)),ext2_ino_t parent_ino EXT2FS_ATTR ((unused)),ext2_ino_t root EXT2FS_ATTR ((unused)),mode_t mode)334 errcode_t base_fs_alloc_set_target(ext2_filsys fs, const char *target_path,
335 const char *name EXT2FS_ATTR((unused)),
336 ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
337 ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
338 {
339 struct base_fs_allocator *allocator = fs->priv_data;
340
341 if (mode != S_IFREG)
342 return 0;
343
344 if (allocator) {
345 allocator->cur_entry = ext2fs_hashmap_lookup(allocator->entries,
346 target_path,
347 strlen(target_path));
348 allocator->next_lblk = 0;
349 }
350 return 0;
351 }
352
base_fs_alloc_unset_target(ext2_filsys fs,const char * target_path EXT2FS_ATTR ((unused)),const char * name EXT2FS_ATTR ((unused)),ext2_ino_t parent_ino EXT2FS_ATTR ((unused)),ext2_ino_t root EXT2FS_ATTR ((unused)),mode_t mode)353 errcode_t base_fs_alloc_unset_target(ext2_filsys fs,
354 const char *target_path EXT2FS_ATTR((unused)),
355 const char *name EXT2FS_ATTR((unused)),
356 ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
357 ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
358 {
359 struct base_fs_allocator *allocator = fs->priv_data;
360
361 if (!allocator || !allocator->cur_entry || mode != S_IFREG)
362 return 0;
363
364 fs_free_blocks_range(fs, allocator, &allocator->cur_entry->blocks);
365 delete_block_ranges(&allocator->cur_entry->blocks);
366 return 0;
367 }
368