1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright 2023 Red Hat
4 */
5
6 #include "index-layout.h"
7
8 #include <linux/random.h>
9
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "murmurhash3.h"
13 #include "numeric.h"
14 #include "time-utils.h"
15
16 #include "config.h"
17 #include "open-chapter.h"
18 #include "volume-index.h"
19
20 /*
21 * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22 * which are computed when the index is created. Every header and region begins on 4K block
23 * boundary. Save regions are further sub-divided into regions of their own.
24 *
25 * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26 * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27 * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28 * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29 *
30 * Every region header has a type and version.
31 *
32 * +-+-+---------+--------+--------+-+
33 * | | | I N D E X 0 101, 0 | |
34 * |H|C+---------+--------+--------+S|
35 * |D|f| Volume | Save | Save |e|
36 * |R|g| Region | Region | Region |a|
37 * | | | 201, -1 | 202, 0 | 202, 1 |l|
38 * +-+-+--------+---------+--------+-+
39 *
40 * The header contains the encoded region layout table as well as some index configuration data.
41 * The sub-index region and its subdivisions are maintained in the same table.
42 *
43 * There are two save regions to preserve the old state in case saving the new state is incomplete.
44 * They are used in alternation. Each save region is further divided into sub-regions.
45 *
46 * +-+-----+------+------+-----+-----+
47 * |H| IPM | MI | MI | | OC |
48 * |D| | zone | zone | ... | |
49 * |R| 301 | 302 | 302 | | 303 |
50 * | | -1 | 0 | 1 | | -1 |
51 * +-+-----+------+------+-----+-----+
52 *
53 * The header contains the encoded region layout table as well as index state data for that save.
54 * Each save also has a unique nonce.
55 */
56
57 #define NONCE_INFO_SIZE 32
58 #define MAX_SAVES 2
59
60 enum region_kind {
61 RL_KIND_EMPTY = 0,
62 RL_KIND_HEADER = 1,
63 RL_KIND_CONFIG = 100,
64 RL_KIND_INDEX = 101,
65 RL_KIND_SEAL = 102,
66 RL_KIND_VOLUME = 201,
67 RL_KIND_SAVE = 202,
68 RL_KIND_INDEX_PAGE_MAP = 301,
69 RL_KIND_VOLUME_INDEX = 302,
70 RL_KIND_OPEN_CHAPTER = 303,
71 };
72
73 /* Some region types are historical and are no longer used. */
74 enum region_type {
75 RH_TYPE_FREE = 0, /* unused */
76 RH_TYPE_SUPER = 1,
77 RH_TYPE_SAVE = 2,
78 RH_TYPE_CHECKPOINT = 3, /* unused */
79 RH_TYPE_UNSAVED = 4,
80 };
81
82 #define RL_SOLE_INSTANCE 65535
83
84 /*
85 * Super block version 2 is the first released version.
86 *
87 * Super block version 3 is the normal version used from RHEL 8.2 onwards.
88 *
89 * Super block versions 4 through 6 were incremental development versions and
90 * are not supported.
91 *
92 * Super block version 7 is used for volumes which have been reduced in size by one chapter in
93 * order to make room to prepend LVM metadata to a volume originally created without lvm. This
94 * allows the index to retain most its deduplication records.
95 */
96 #define SUPER_VERSION_MINIMUM 3
97 #define SUPER_VERSION_CURRENT 3
98 #define SUPER_VERSION_MAXIMUM 7
99
100 static const u8 LAYOUT_MAGIC[] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
101 static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
102
103 #define MAGIC_SIZE (sizeof(LAYOUT_MAGIC) - 1)
104
105 struct region_header {
106 u64 magic;
107 u64 region_blocks;
108 u16 type;
109 /* Currently always version 1 */
110 u16 version;
111 u16 region_count;
112 u16 payload;
113 };
114
115 struct layout_region {
116 u64 start_block;
117 u64 block_count;
118 u32 __unused;
119 u16 kind;
120 u16 instance;
121 };
122
123 struct region_table {
124 size_t encoded_size;
125 struct region_header header;
126 struct layout_region regions[];
127 };
128
129 struct index_save_data {
130 u64 timestamp;
131 u64 nonce;
132 /* Currently always version 1 */
133 u32 version;
134 u32 unused__;
135 };
136
137 struct index_state_version {
138 s32 signature;
139 s32 version_id;
140 };
141
142 static const struct index_state_version INDEX_STATE_VERSION_301 = {
143 .signature = -1,
144 .version_id = 301,
145 };
146
147 struct index_state_data301 {
148 struct index_state_version version;
149 u64 newest_chapter;
150 u64 oldest_chapter;
151 u64 last_save;
152 u32 unused;
153 u32 padding;
154 };
155
156 struct index_save_layout {
157 unsigned int zone_count;
158 struct layout_region index_save;
159 struct layout_region header;
160 struct layout_region index_page_map;
161 struct layout_region free_space;
162 struct layout_region volume_index_zones[MAX_ZONES];
163 struct layout_region open_chapter;
164 struct index_save_data save_data;
165 struct index_state_data301 state_data;
166 };
167
168 struct sub_index_layout {
169 u64 nonce;
170 struct layout_region sub_index;
171 struct layout_region volume;
172 struct index_save_layout *saves;
173 };
174
175 struct super_block_data {
176 u8 magic_label[MAGIC_SIZE];
177 u8 nonce_info[NONCE_INFO_SIZE];
178 u64 nonce;
179 u32 version;
180 u32 block_size;
181 u16 index_count;
182 u16 max_saves;
183 /* Padding reflects a blank field on permanent storage */
184 u8 padding[4];
185 u64 open_chapter_blocks;
186 u64 page_map_blocks;
187 u64 volume_offset;
188 u64 start_offset;
189 };
190
191 struct index_layout {
192 struct io_factory *factory;
193 size_t factory_size;
194 off_t offset;
195 struct super_block_data super;
196 struct layout_region header;
197 struct layout_region config;
198 struct sub_index_layout index;
199 struct layout_region seal;
200 u64 total_blocks;
201 };
202
203 struct save_layout_sizes {
204 unsigned int save_count;
205 size_t block_size;
206 u64 volume_blocks;
207 u64 volume_index_blocks;
208 u64 page_map_blocks;
209 u64 open_chapter_blocks;
210 u64 save_blocks;
211 u64 sub_index_blocks;
212 u64 total_blocks;
213 size_t total_size;
214 };
215
is_converted_super_block(struct super_block_data * super)216 static inline bool is_converted_super_block(struct super_block_data *super)
217 {
218 return super->version == 7;
219 }
220
compute_sizes(const struct uds_configuration * config,struct save_layout_sizes * sls)221 static int __must_check compute_sizes(const struct uds_configuration *config,
222 struct save_layout_sizes *sls)
223 {
224 int result;
225 struct index_geometry *geometry = config->geometry;
226
227 memset(sls, 0, sizeof(*sls));
228 sls->save_count = MAX_SAVES;
229 sls->block_size = UDS_BLOCK_SIZE;
230 sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
231
232 result = uds_compute_volume_index_save_blocks(config, sls->block_size,
233 &sls->volume_index_blocks);
234 if (result != UDS_SUCCESS)
235 return vdo_log_error_strerror(result, "cannot compute index save size");
236
237 sls->page_map_blocks =
238 DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
239 sls->block_size);
240 sls->open_chapter_blocks =
241 DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
242 sls->block_size);
243 sls->save_blocks =
244 1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
245 sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
246 sls->total_blocks = 3 + sls->sub_index_blocks;
247 sls->total_size = sls->total_blocks * sls->block_size;
248
249 return UDS_SUCCESS;
250 }
251
uds_compute_index_size(const struct uds_parameters * parameters,u64 * index_size)252 int uds_compute_index_size(const struct uds_parameters *parameters, u64 *index_size)
253 {
254 int result;
255 struct uds_configuration *index_config;
256 struct save_layout_sizes sizes;
257
258 if (index_size == NULL) {
259 vdo_log_error("Missing output size pointer");
260 return -EINVAL;
261 }
262
263 result = uds_make_configuration(parameters, &index_config);
264 if (result != UDS_SUCCESS) {
265 vdo_log_error_strerror(result, "cannot compute index size");
266 return uds_status_to_errno(result);
267 }
268
269 result = compute_sizes(index_config, &sizes);
270 uds_free_configuration(index_config);
271 if (result != UDS_SUCCESS)
272 return uds_status_to_errno(result);
273
274 *index_size = sizes.total_size;
275 return UDS_SUCCESS;
276 }
277
278 /* Create unique data using the current time and a pseudorandom number. */
create_unique_nonce_data(u8 * buffer)279 static void create_unique_nonce_data(u8 *buffer)
280 {
281 ktime_t now = current_time_ns(CLOCK_REALTIME);
282 u32 rand;
283 size_t offset = 0;
284
285 get_random_bytes(&rand, sizeof(u32));
286 memcpy(buffer + offset, &now, sizeof(now));
287 offset += sizeof(now);
288 memcpy(buffer + offset, &rand, sizeof(rand));
289 offset += sizeof(rand);
290 while (offset < NONCE_INFO_SIZE) {
291 size_t len = min(NONCE_INFO_SIZE - offset, offset);
292
293 memcpy(buffer + offset, buffer, len);
294 offset += len;
295 }
296 }
297
hash_stuff(u64 start,const void * data,size_t len)298 static u64 hash_stuff(u64 start, const void *data, size_t len)
299 {
300 u32 seed = start ^ (start >> 27);
301 u8 hash_buffer[16];
302
303 murmurhash3_128(data, len, seed, hash_buffer);
304 return get_unaligned_le64(hash_buffer + 4);
305 }
306
307 /* Generate a primary nonce from the provided data. */
generate_primary_nonce(const void * data,size_t len)308 static u64 generate_primary_nonce(const void *data, size_t len)
309 {
310 return hash_stuff(0xa1b1e0fc, data, len);
311 }
312
313 /*
314 * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
315 * hashing the original nonce and the data to produce a new nonce.
316 */
generate_secondary_nonce(u64 nonce,const void * data,size_t len)317 static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
318 {
319 return hash_stuff(nonce + 1, data, len);
320 }
321
open_layout_reader(struct index_layout * layout,struct layout_region * lr,off_t offset,struct buffered_reader ** reader_ptr)322 static int __must_check open_layout_reader(struct index_layout *layout,
323 struct layout_region *lr, off_t offset,
324 struct buffered_reader **reader_ptr)
325 {
326 return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
327 lr->block_count, reader_ptr);
328 }
329
open_region_reader(struct index_layout * layout,struct layout_region * region,struct buffered_reader ** reader_ptr)330 static int open_region_reader(struct index_layout *layout, struct layout_region *region,
331 struct buffered_reader **reader_ptr)
332 {
333 return open_layout_reader(layout, region, -layout->super.start_offset,
334 reader_ptr);
335 }
336
open_layout_writer(struct index_layout * layout,struct layout_region * lr,off_t offset,struct buffered_writer ** writer_ptr)337 static int __must_check open_layout_writer(struct index_layout *layout,
338 struct layout_region *lr, off_t offset,
339 struct buffered_writer **writer_ptr)
340 {
341 return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
342 lr->block_count, writer_ptr);
343 }
344
open_region_writer(struct index_layout * layout,struct layout_region * region,struct buffered_writer ** writer_ptr)345 static int open_region_writer(struct index_layout *layout, struct layout_region *region,
346 struct buffered_writer **writer_ptr)
347 {
348 return open_layout_writer(layout, region, -layout->super.start_offset,
349 writer_ptr);
350 }
351
generate_super_block_data(struct save_layout_sizes * sls,struct super_block_data * super)352 static void generate_super_block_data(struct save_layout_sizes *sls,
353 struct super_block_data *super)
354 {
355 memset(super, 0, sizeof(*super));
356 memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
357 create_unique_nonce_data(super->nonce_info);
358
359 super->nonce = generate_primary_nonce(super->nonce_info,
360 sizeof(super->nonce_info));
361 super->version = SUPER_VERSION_CURRENT;
362 super->block_size = sls->block_size;
363 super->index_count = 1;
364 super->max_saves = sls->save_count;
365 super->open_chapter_blocks = sls->open_chapter_blocks;
366 super->page_map_blocks = sls->page_map_blocks;
367 super->volume_offset = 0;
368 super->start_offset = 0;
369 }
370
define_sub_index_nonce(struct index_layout * layout)371 static void define_sub_index_nonce(struct index_layout *layout)
372 {
373 struct sub_index_nonce_data {
374 u64 offset;
375 u16 index_id;
376 };
377 struct sub_index_layout *sil = &layout->index;
378 u64 primary_nonce = layout->super.nonce;
379 u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
380 size_t offset = 0;
381
382 encode_u64_le(buffer, &offset, sil->sub_index.start_block);
383 encode_u16_le(buffer, &offset, 0);
384 sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
385 if (sil->nonce == 0) {
386 sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
387 sizeof(buffer));
388 }
389 }
390
setup_sub_index(struct index_layout * layout,u64 start_block,struct save_layout_sizes * sls)391 static void setup_sub_index(struct index_layout *layout, u64 start_block,
392 struct save_layout_sizes *sls)
393 {
394 struct sub_index_layout *sil = &layout->index;
395 u64 next_block = start_block;
396 unsigned int i;
397
398 sil->sub_index = (struct layout_region) {
399 .start_block = start_block,
400 .block_count = sls->sub_index_blocks,
401 .kind = RL_KIND_INDEX,
402 .instance = 0,
403 };
404
405 sil->volume = (struct layout_region) {
406 .start_block = next_block,
407 .block_count = sls->volume_blocks,
408 .kind = RL_KIND_VOLUME,
409 .instance = RL_SOLE_INSTANCE,
410 };
411
412 next_block += sls->volume_blocks;
413
414 for (i = 0; i < sls->save_count; i++) {
415 sil->saves[i].index_save = (struct layout_region) {
416 .start_block = next_block,
417 .block_count = sls->save_blocks,
418 .kind = RL_KIND_SAVE,
419 .instance = i,
420 };
421
422 next_block += sls->save_blocks;
423 }
424
425 define_sub_index_nonce(layout);
426 }
427
initialize_layout(struct index_layout * layout,struct save_layout_sizes * sls)428 static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
429 {
430 u64 next_block = layout->offset / sls->block_size;
431
432 layout->total_blocks = sls->total_blocks;
433 generate_super_block_data(sls, &layout->super);
434 layout->header = (struct layout_region) {
435 .start_block = next_block++,
436 .block_count = 1,
437 .kind = RL_KIND_HEADER,
438 .instance = RL_SOLE_INSTANCE,
439 };
440
441 layout->config = (struct layout_region) {
442 .start_block = next_block++,
443 .block_count = 1,
444 .kind = RL_KIND_CONFIG,
445 .instance = RL_SOLE_INSTANCE,
446 };
447
448 setup_sub_index(layout, next_block, sls);
449 next_block += sls->sub_index_blocks;
450
451 layout->seal = (struct layout_region) {
452 .start_block = next_block,
453 .block_count = 1,
454 .kind = RL_KIND_SEAL,
455 .instance = RL_SOLE_INSTANCE,
456 };
457 }
458
make_index_save_region_table(struct index_save_layout * isl,struct region_table ** table_ptr)459 static int __must_check make_index_save_region_table(struct index_save_layout *isl,
460 struct region_table **table_ptr)
461 {
462 int result;
463 unsigned int z;
464 struct region_table *table;
465 struct layout_region *lr;
466 u16 region_count;
467 size_t payload;
468 size_t type;
469
470 if (isl->zone_count > 0) {
471 /*
472 * Normal save regions: header, page map, volume index zones,
473 * open chapter, and possibly free space.
474 */
475 region_count = 3 + isl->zone_count;
476 if (isl->free_space.block_count > 0)
477 region_count++;
478
479 payload = sizeof(isl->save_data) + sizeof(isl->state_data);
480 type = RH_TYPE_SAVE;
481 } else {
482 /* Empty save regions: header, page map, free space. */
483 region_count = 3;
484 payload = sizeof(isl->save_data);
485 type = RH_TYPE_UNSAVED;
486 }
487
488 result = vdo_allocate_extended(struct region_table, region_count,
489 struct layout_region,
490 "layout region table for ISL", &table);
491 if (result != VDO_SUCCESS)
492 return result;
493
494 lr = &table->regions[0];
495 *lr++ = isl->header;
496 *lr++ = isl->index_page_map;
497 for (z = 0; z < isl->zone_count; z++)
498 *lr++ = isl->volume_index_zones[z];
499
500 if (isl->zone_count > 0)
501 *lr++ = isl->open_chapter;
502
503 if (isl->free_space.block_count > 0)
504 *lr++ = isl->free_space;
505
506 table->header = (struct region_header) {
507 .magic = REGION_MAGIC,
508 .region_blocks = isl->index_save.block_count,
509 .type = type,
510 .version = 1,
511 .region_count = region_count,
512 .payload = payload,
513 };
514
515 table->encoded_size = (sizeof(struct region_header) + payload +
516 region_count * sizeof(struct layout_region));
517 *table_ptr = table;
518 return UDS_SUCCESS;
519 }
520
encode_region_table(u8 * buffer,size_t * offset,struct region_table * table)521 static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
522 {
523 unsigned int i;
524
525 encode_u64_le(buffer, offset, REGION_MAGIC);
526 encode_u64_le(buffer, offset, table->header.region_blocks);
527 encode_u16_le(buffer, offset, table->header.type);
528 encode_u16_le(buffer, offset, table->header.version);
529 encode_u16_le(buffer, offset, table->header.region_count);
530 encode_u16_le(buffer, offset, table->header.payload);
531
532 for (i = 0; i < table->header.region_count; i++) {
533 encode_u64_le(buffer, offset, table->regions[i].start_block);
534 encode_u64_le(buffer, offset, table->regions[i].block_count);
535 encode_u32_le(buffer, offset, 0);
536 encode_u16_le(buffer, offset, table->regions[i].kind);
537 encode_u16_le(buffer, offset, table->regions[i].instance);
538 }
539 }
540
write_index_save_header(struct index_save_layout * isl,struct region_table * table,struct buffered_writer * writer)541 static int __must_check write_index_save_header(struct index_save_layout *isl,
542 struct region_table *table,
543 struct buffered_writer *writer)
544 {
545 int result;
546 u8 *buffer;
547 size_t offset = 0;
548
549 result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
550 if (result != VDO_SUCCESS)
551 return result;
552
553 encode_region_table(buffer, &offset, table);
554 encode_u64_le(buffer, &offset, isl->save_data.timestamp);
555 encode_u64_le(buffer, &offset, isl->save_data.nonce);
556 encode_u32_le(buffer, &offset, isl->save_data.version);
557 encode_u32_le(buffer, &offset, 0);
558 if (isl->zone_count > 0) {
559 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
560 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
561 encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
562 encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
563 encode_u64_le(buffer, &offset, isl->state_data.last_save);
564 encode_u64_le(buffer, &offset, 0);
565 }
566
567 result = uds_write_to_buffered_writer(writer, buffer, offset);
568 vdo_free(buffer);
569 if (result != UDS_SUCCESS)
570 return result;
571
572 return uds_flush_buffered_writer(writer);
573 }
574
write_index_save_layout(struct index_layout * layout,struct index_save_layout * isl)575 static int write_index_save_layout(struct index_layout *layout,
576 struct index_save_layout *isl)
577 {
578 int result;
579 struct region_table *table;
580 struct buffered_writer *writer;
581
582 result = make_index_save_region_table(isl, &table);
583 if (result != UDS_SUCCESS)
584 return result;
585
586 result = open_region_writer(layout, &isl->header, &writer);
587 if (result != UDS_SUCCESS) {
588 vdo_free(table);
589 return result;
590 }
591
592 result = write_index_save_header(isl, table, writer);
593 vdo_free(table);
594 uds_free_buffered_writer(writer);
595
596 return result;
597 }
598
reset_index_save_layout(struct index_save_layout * isl,u64 page_map_blocks)599 static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
600 {
601 u64 free_blocks;
602 u64 next_block = isl->index_save.start_block;
603
604 isl->zone_count = 0;
605 memset(&isl->save_data, 0, sizeof(isl->save_data));
606
607 isl->header = (struct layout_region) {
608 .start_block = next_block++,
609 .block_count = 1,
610 .kind = RL_KIND_HEADER,
611 .instance = RL_SOLE_INSTANCE,
612 };
613
614 isl->index_page_map = (struct layout_region) {
615 .start_block = next_block,
616 .block_count = page_map_blocks,
617 .kind = RL_KIND_INDEX_PAGE_MAP,
618 .instance = RL_SOLE_INSTANCE,
619 };
620
621 next_block += page_map_blocks;
622
623 free_blocks = isl->index_save.block_count - page_map_blocks - 1;
624 isl->free_space = (struct layout_region) {
625 .start_block = next_block,
626 .block_count = free_blocks,
627 .kind = RL_KIND_EMPTY,
628 .instance = RL_SOLE_INSTANCE,
629 };
630 }
631
invalidate_old_save(struct index_layout * layout,struct index_save_layout * isl)632 static int __must_check invalidate_old_save(struct index_layout *layout,
633 struct index_save_layout *isl)
634 {
635 reset_index_save_layout(isl, layout->super.page_map_blocks);
636 return write_index_save_layout(layout, isl);
637 }
638
discard_index_state_data(struct index_layout * layout)639 static int discard_index_state_data(struct index_layout *layout)
640 {
641 int result;
642 int saved_result = UDS_SUCCESS;
643 unsigned int i;
644
645 for (i = 0; i < layout->super.max_saves; i++) {
646 result = invalidate_old_save(layout, &layout->index.saves[i]);
647 if (result != UDS_SUCCESS)
648 saved_result = result;
649 }
650
651 if (saved_result != UDS_SUCCESS) {
652 return vdo_log_error_strerror(result,
653 "%s: cannot destroy all index saves",
654 __func__);
655 }
656
657 return UDS_SUCCESS;
658 }
659
make_layout_region_table(struct index_layout * layout,struct region_table ** table_ptr)660 static int __must_check make_layout_region_table(struct index_layout *layout,
661 struct region_table **table_ptr)
662 {
663 int result;
664 unsigned int i;
665 /* Regions: header, config, index, volume, saves, seal */
666 u16 region_count = 5 + layout->super.max_saves;
667 u16 payload;
668 struct region_table *table;
669 struct layout_region *lr;
670
671 result = vdo_allocate_extended(struct region_table, region_count,
672 struct layout_region, "layout region table",
673 &table);
674 if (result != VDO_SUCCESS)
675 return result;
676
677 lr = &table->regions[0];
678 *lr++ = layout->header;
679 *lr++ = layout->config;
680 *lr++ = layout->index.sub_index;
681 *lr++ = layout->index.volume;
682
683 for (i = 0; i < layout->super.max_saves; i++)
684 *lr++ = layout->index.saves[i].index_save;
685
686 *lr++ = layout->seal;
687
688 if (is_converted_super_block(&layout->super)) {
689 payload = sizeof(struct super_block_data);
690 } else {
691 payload = (sizeof(struct super_block_data) -
692 sizeof(layout->super.volume_offset) -
693 sizeof(layout->super.start_offset));
694 }
695
696 table->header = (struct region_header) {
697 .magic = REGION_MAGIC,
698 .region_blocks = layout->total_blocks,
699 .type = RH_TYPE_SUPER,
700 .version = 1,
701 .region_count = region_count,
702 .payload = payload,
703 };
704
705 table->encoded_size = (sizeof(struct region_header) + payload +
706 region_count * sizeof(struct layout_region));
707 *table_ptr = table;
708 return UDS_SUCCESS;
709 }
710
write_layout_header(struct index_layout * layout,struct region_table * table,struct buffered_writer * writer)711 static int __must_check write_layout_header(struct index_layout *layout,
712 struct region_table *table,
713 struct buffered_writer *writer)
714 {
715 int result;
716 u8 *buffer;
717 size_t offset = 0;
718
719 result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
720 if (result != VDO_SUCCESS)
721 return result;
722
723 encode_region_table(buffer, &offset, table);
724 memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
725 offset += MAGIC_SIZE;
726 memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
727 offset += NONCE_INFO_SIZE;
728 encode_u64_le(buffer, &offset, layout->super.nonce);
729 encode_u32_le(buffer, &offset, layout->super.version);
730 encode_u32_le(buffer, &offset, layout->super.block_size);
731 encode_u16_le(buffer, &offset, layout->super.index_count);
732 encode_u16_le(buffer, &offset, layout->super.max_saves);
733 encode_u32_le(buffer, &offset, 0);
734 encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
735 encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
736
737 if (is_converted_super_block(&layout->super)) {
738 encode_u64_le(buffer, &offset, layout->super.volume_offset);
739 encode_u64_le(buffer, &offset, layout->super.start_offset);
740 }
741
742 result = uds_write_to_buffered_writer(writer, buffer, offset);
743 vdo_free(buffer);
744 if (result != UDS_SUCCESS)
745 return result;
746
747 return uds_flush_buffered_writer(writer);
748 }
749
write_uds_index_config(struct index_layout * layout,struct uds_configuration * config,off_t offset)750 static int __must_check write_uds_index_config(struct index_layout *layout,
751 struct uds_configuration *config,
752 off_t offset)
753 {
754 int result;
755 struct buffered_writer *writer = NULL;
756
757 result = open_layout_writer(layout, &layout->config, offset, &writer);
758 if (result != UDS_SUCCESS)
759 return vdo_log_error_strerror(result, "failed to open config region");
760
761 result = uds_write_config_contents(writer, config, layout->super.version);
762 if (result != UDS_SUCCESS) {
763 uds_free_buffered_writer(writer);
764 return vdo_log_error_strerror(result, "failed to write config region");
765 }
766
767 result = uds_flush_buffered_writer(writer);
768 if (result != UDS_SUCCESS) {
769 uds_free_buffered_writer(writer);
770 return vdo_log_error_strerror(result, "cannot flush config writer");
771 }
772
773 uds_free_buffered_writer(writer);
774 return UDS_SUCCESS;
775 }
776
save_layout(struct index_layout * layout,off_t offset)777 static int __must_check save_layout(struct index_layout *layout, off_t offset)
778 {
779 int result;
780 struct buffered_writer *writer = NULL;
781 struct region_table *table;
782
783 result = make_layout_region_table(layout, &table);
784 if (result != UDS_SUCCESS)
785 return result;
786
787 result = open_layout_writer(layout, &layout->header, offset, &writer);
788 if (result != UDS_SUCCESS) {
789 vdo_free(table);
790 return result;
791 }
792
793 result = write_layout_header(layout, table, writer);
794 vdo_free(table);
795 uds_free_buffered_writer(writer);
796
797 return result;
798 }
799
create_index_layout(struct index_layout * layout,struct uds_configuration * config)800 static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
801 {
802 int result;
803 struct save_layout_sizes sizes;
804
805 result = compute_sizes(config, &sizes);
806 if (result != UDS_SUCCESS)
807 return result;
808
809 result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
810 &layout->index.saves);
811 if (result != VDO_SUCCESS)
812 return result;
813
814 initialize_layout(layout, &sizes);
815
816 result = discard_index_state_data(layout);
817 if (result != UDS_SUCCESS)
818 return result;
819
820 result = write_uds_index_config(layout, config, 0);
821 if (result != UDS_SUCCESS)
822 return result;
823
824 return save_layout(layout, 0);
825 }
826
generate_index_save_nonce(u64 volume_nonce,struct index_save_layout * isl)827 static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
828 {
829 struct save_nonce_data {
830 struct index_save_data data;
831 u64 offset;
832 } nonce_data;
833 u8 buffer[sizeof(nonce_data)];
834 size_t offset = 0;
835
836 encode_u64_le(buffer, &offset, isl->save_data.timestamp);
837 encode_u64_le(buffer, &offset, 0);
838 encode_u32_le(buffer, &offset, isl->save_data.version);
839 encode_u32_le(buffer, &offset, 0U);
840 encode_u64_le(buffer, &offset, isl->index_save.start_block);
841 VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
842 "%zu bytes encoded of %zu expected",
843 offset, sizeof(nonce_data));
844 return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
845 }
846
validate_index_save_layout(struct index_save_layout * isl,u64 volume_nonce)847 static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
848 {
849 if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
850 return 0;
851
852 if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
853 return 0;
854
855 return isl->save_data.timestamp;
856 }
857
find_latest_uds_index_save_slot(struct index_layout * layout,struct index_save_layout ** isl_ptr)858 static int find_latest_uds_index_save_slot(struct index_layout *layout,
859 struct index_save_layout **isl_ptr)
860 {
861 struct index_save_layout *latest = NULL;
862 struct index_save_layout *isl;
863 unsigned int i;
864 u64 save_time = 0;
865 u64 latest_time = 0;
866
867 for (i = 0; i < layout->super.max_saves; i++) {
868 isl = &layout->index.saves[i];
869 save_time = validate_index_save_layout(isl, layout->index.nonce);
870 if (save_time > latest_time) {
871 latest = isl;
872 latest_time = save_time;
873 }
874 }
875
876 if (latest == NULL) {
877 vdo_log_error("No valid index save found");
878 return UDS_INDEX_NOT_SAVED_CLEANLY;
879 }
880
881 *isl_ptr = latest;
882 return UDS_SUCCESS;
883 }
884
uds_discard_open_chapter(struct index_layout * layout)885 int uds_discard_open_chapter(struct index_layout *layout)
886 {
887 int result;
888 struct index_save_layout *isl;
889 struct buffered_writer *writer;
890
891 result = find_latest_uds_index_save_slot(layout, &isl);
892 if (result != UDS_SUCCESS)
893 return result;
894
895 result = open_region_writer(layout, &isl->open_chapter, &writer);
896 if (result != UDS_SUCCESS)
897 return result;
898
899 result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
900 if (result != UDS_SUCCESS) {
901 uds_free_buffered_writer(writer);
902 return result;
903 }
904
905 result = uds_flush_buffered_writer(writer);
906 uds_free_buffered_writer(writer);
907 return result;
908 }
909
uds_load_index_state(struct index_layout * layout,struct uds_index * index)910 int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
911 {
912 int result;
913 unsigned int zone;
914 struct index_save_layout *isl;
915 struct buffered_reader *readers[MAX_ZONES];
916
917 result = find_latest_uds_index_save_slot(layout, &isl);
918 if (result != UDS_SUCCESS)
919 return result;
920
921 index->newest_virtual_chapter = isl->state_data.newest_chapter;
922 index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
923 index->last_save = isl->state_data.last_save;
924
925 result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
926 if (result != UDS_SUCCESS)
927 return result;
928
929 result = uds_load_open_chapter(index, readers[0]);
930 uds_free_buffered_reader(readers[0]);
931 if (result != UDS_SUCCESS)
932 return result;
933
934 for (zone = 0; zone < isl->zone_count; zone++) {
935 result = open_region_reader(layout, &isl->volume_index_zones[zone],
936 &readers[zone]);
937 if (result != UDS_SUCCESS) {
938 for (; zone > 0; zone--)
939 uds_free_buffered_reader(readers[zone - 1]);
940
941 return result;
942 }
943 }
944
945 result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
946 for (zone = 0; zone < isl->zone_count; zone++)
947 uds_free_buffered_reader(readers[zone]);
948 if (result != UDS_SUCCESS)
949 return result;
950
951 result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
952 if (result != UDS_SUCCESS)
953 return result;
954
955 result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
956 uds_free_buffered_reader(readers[0]);
957
958 return result;
959 }
960
select_oldest_index_save_layout(struct index_layout * layout)961 static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
962 {
963 struct index_save_layout *oldest = NULL;
964 struct index_save_layout *isl;
965 unsigned int i;
966 u64 save_time = 0;
967 u64 oldest_time = 0;
968
969 for (i = 0; i < layout->super.max_saves; i++) {
970 isl = &layout->index.saves[i];
971 save_time = validate_index_save_layout(isl, layout->index.nonce);
972 if (oldest == NULL || save_time < oldest_time) {
973 oldest = isl;
974 oldest_time = save_time;
975 }
976 }
977
978 return oldest;
979 }
980
instantiate_index_save_layout(struct index_save_layout * isl,struct super_block_data * super,u64 volume_nonce,unsigned int zone_count)981 static void instantiate_index_save_layout(struct index_save_layout *isl,
982 struct super_block_data *super,
983 u64 volume_nonce, unsigned int zone_count)
984 {
985 unsigned int z;
986 u64 next_block;
987 u64 free_blocks;
988 u64 volume_index_blocks;
989
990 isl->zone_count = zone_count;
991 memset(&isl->save_data, 0, sizeof(isl->save_data));
992 isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
993 isl->save_data.version = 1;
994 isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
995
996 next_block = isl->index_save.start_block;
997 isl->header = (struct layout_region) {
998 .start_block = next_block++,
999 .block_count = 1,
1000 .kind = RL_KIND_HEADER,
1001 .instance = RL_SOLE_INSTANCE,
1002 };
1003
1004 isl->index_page_map = (struct layout_region) {
1005 .start_block = next_block,
1006 .block_count = super->page_map_blocks,
1007 .kind = RL_KIND_INDEX_PAGE_MAP,
1008 .instance = RL_SOLE_INSTANCE,
1009 };
1010 next_block += super->page_map_blocks;
1011
1012 free_blocks = (isl->index_save.block_count - 1 -
1013 super->page_map_blocks -
1014 super->open_chapter_blocks);
1015 volume_index_blocks = free_blocks / isl->zone_count;
1016 for (z = 0; z < isl->zone_count; z++) {
1017 isl->volume_index_zones[z] = (struct layout_region) {
1018 .start_block = next_block,
1019 .block_count = volume_index_blocks,
1020 .kind = RL_KIND_VOLUME_INDEX,
1021 .instance = z,
1022 };
1023
1024 next_block += volume_index_blocks;
1025 free_blocks -= volume_index_blocks;
1026 }
1027
1028 isl->open_chapter = (struct layout_region) {
1029 .start_block = next_block,
1030 .block_count = super->open_chapter_blocks,
1031 .kind = RL_KIND_OPEN_CHAPTER,
1032 .instance = RL_SOLE_INSTANCE,
1033 };
1034
1035 next_block += super->open_chapter_blocks;
1036
1037 isl->free_space = (struct layout_region) {
1038 .start_block = next_block,
1039 .block_count = free_blocks,
1040 .kind = RL_KIND_EMPTY,
1041 .instance = RL_SOLE_INSTANCE,
1042 };
1043 }
1044
setup_uds_index_save_slot(struct index_layout * layout,unsigned int zone_count,struct index_save_layout ** isl_ptr)1045 static int setup_uds_index_save_slot(struct index_layout *layout,
1046 unsigned int zone_count,
1047 struct index_save_layout **isl_ptr)
1048 {
1049 int result;
1050 struct index_save_layout *isl;
1051
1052 isl = select_oldest_index_save_layout(layout);
1053 result = invalidate_old_save(layout, isl);
1054 if (result != UDS_SUCCESS)
1055 return result;
1056
1057 instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1058 zone_count);
1059
1060 *isl_ptr = isl;
1061 return UDS_SUCCESS;
1062 }
1063
cancel_uds_index_save(struct index_save_layout * isl)1064 static void cancel_uds_index_save(struct index_save_layout *isl)
1065 {
1066 memset(&isl->save_data, 0, sizeof(isl->save_data));
1067 memset(&isl->state_data, 0, sizeof(isl->state_data));
1068 isl->zone_count = 0;
1069 }
1070
uds_save_index_state(struct index_layout * layout,struct uds_index * index)1071 int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1072 {
1073 int result;
1074 unsigned int zone;
1075 struct index_save_layout *isl;
1076 struct buffered_writer *writers[MAX_ZONES];
1077
1078 result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1079 if (result != UDS_SUCCESS)
1080 return result;
1081
1082 isl->state_data = (struct index_state_data301) {
1083 .newest_chapter = index->newest_virtual_chapter,
1084 .oldest_chapter = index->oldest_virtual_chapter,
1085 .last_save = index->last_save,
1086 };
1087
1088 result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1089 if (result != UDS_SUCCESS) {
1090 cancel_uds_index_save(isl);
1091 return result;
1092 }
1093
1094 result = uds_save_open_chapter(index, writers[0]);
1095 uds_free_buffered_writer(writers[0]);
1096 if (result != UDS_SUCCESS) {
1097 cancel_uds_index_save(isl);
1098 return result;
1099 }
1100
1101 for (zone = 0; zone < index->zone_count; zone++) {
1102 result = open_region_writer(layout, &isl->volume_index_zones[zone],
1103 &writers[zone]);
1104 if (result != UDS_SUCCESS) {
1105 for (; zone > 0; zone--)
1106 uds_free_buffered_writer(writers[zone - 1]);
1107
1108 cancel_uds_index_save(isl);
1109 return result;
1110 }
1111 }
1112
1113 result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1114 for (zone = 0; zone < index->zone_count; zone++)
1115 uds_free_buffered_writer(writers[zone]);
1116 if (result != UDS_SUCCESS) {
1117 cancel_uds_index_save(isl);
1118 return result;
1119 }
1120
1121 result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1122 if (result != UDS_SUCCESS) {
1123 cancel_uds_index_save(isl);
1124 return result;
1125 }
1126
1127 result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1128 uds_free_buffered_writer(writers[0]);
1129 if (result != UDS_SUCCESS) {
1130 cancel_uds_index_save(isl);
1131 return result;
1132 }
1133
1134 return write_index_save_layout(layout, isl);
1135 }
1136
load_region_table(struct buffered_reader * reader,struct region_table ** table_ptr)1137 static int __must_check load_region_table(struct buffered_reader *reader,
1138 struct region_table **table_ptr)
1139 {
1140 int result;
1141 unsigned int i;
1142 struct region_header header;
1143 struct region_table *table;
1144 u8 buffer[sizeof(struct region_header)];
1145 size_t offset = 0;
1146
1147 result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1148 if (result != UDS_SUCCESS)
1149 return vdo_log_error_strerror(result, "cannot read region table header");
1150
1151 decode_u64_le(buffer, &offset, &header.magic);
1152 decode_u64_le(buffer, &offset, &header.region_blocks);
1153 decode_u16_le(buffer, &offset, &header.type);
1154 decode_u16_le(buffer, &offset, &header.version);
1155 decode_u16_le(buffer, &offset, &header.region_count);
1156 decode_u16_le(buffer, &offset, &header.payload);
1157
1158 if (header.magic != REGION_MAGIC)
1159 return UDS_NO_INDEX;
1160
1161 if (header.version != 1) {
1162 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1163 "unknown region table version %hu",
1164 header.version);
1165 }
1166
1167 result = vdo_allocate_extended(struct region_table, header.region_count,
1168 struct layout_region,
1169 "single file layout region table", &table);
1170 if (result != VDO_SUCCESS)
1171 return result;
1172
1173 table->header = header;
1174 for (i = 0; i < header.region_count; i++) {
1175 u8 region_buffer[sizeof(struct layout_region)];
1176
1177 offset = 0;
1178 result = uds_read_from_buffered_reader(reader, region_buffer,
1179 sizeof(region_buffer));
1180 if (result != UDS_SUCCESS) {
1181 vdo_free(table);
1182 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1183 "cannot read region table layouts");
1184 }
1185
1186 decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1187 decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1188 offset += sizeof(u32);
1189 decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1190 decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1191 }
1192
1193 *table_ptr = table;
1194 return UDS_SUCCESS;
1195 }
1196
read_super_block_data(struct buffered_reader * reader,struct index_layout * layout,size_t saved_size)1197 static int __must_check read_super_block_data(struct buffered_reader *reader,
1198 struct index_layout *layout,
1199 size_t saved_size)
1200 {
1201 int result;
1202 struct super_block_data *super = &layout->super;
1203 u8 *buffer;
1204 size_t offset = 0;
1205
1206 result = vdo_allocate(saved_size, u8, "super block data", &buffer);
1207 if (result != VDO_SUCCESS)
1208 return result;
1209
1210 result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1211 if (result != UDS_SUCCESS) {
1212 vdo_free(buffer);
1213 return vdo_log_error_strerror(result, "cannot read region table header");
1214 }
1215
1216 memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1217 offset += MAGIC_SIZE;
1218 memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1219 offset += NONCE_INFO_SIZE;
1220 decode_u64_le(buffer, &offset, &super->nonce);
1221 decode_u32_le(buffer, &offset, &super->version);
1222 decode_u32_le(buffer, &offset, &super->block_size);
1223 decode_u16_le(buffer, &offset, &super->index_count);
1224 decode_u16_le(buffer, &offset, &super->max_saves);
1225 offset += sizeof(u32);
1226 decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1227 decode_u64_le(buffer, &offset, &super->page_map_blocks);
1228
1229 if (is_converted_super_block(super)) {
1230 decode_u64_le(buffer, &offset, &super->volume_offset);
1231 decode_u64_le(buffer, &offset, &super->start_offset);
1232 } else {
1233 super->volume_offset = 0;
1234 super->start_offset = 0;
1235 }
1236
1237 vdo_free(buffer);
1238
1239 if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1240 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1241 "unknown superblock magic label");
1242
1243 if ((super->version < SUPER_VERSION_MINIMUM) ||
1244 (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1245 (super->version > SUPER_VERSION_MAXIMUM)) {
1246 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1247 "unknown superblock version number %u",
1248 super->version);
1249 }
1250
1251 if (super->volume_offset < super->start_offset) {
1252 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1253 "inconsistent offsets (start %llu, volume %llu)",
1254 (unsigned long long) super->start_offset,
1255 (unsigned long long) super->volume_offset);
1256 }
1257
1258 /* Sub-indexes are no longer used but the layout retains this field. */
1259 if (super->index_count != 1) {
1260 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1261 "invalid subindex count %u",
1262 super->index_count);
1263 }
1264
1265 if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1266 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1267 "inconsistent superblock nonce");
1268 }
1269
1270 return UDS_SUCCESS;
1271 }
1272
verify_region(struct layout_region * lr,u64 start_block,enum region_kind kind,unsigned int instance)1273 static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1274 enum region_kind kind, unsigned int instance)
1275 {
1276 if (lr->start_block != start_block)
1277 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1278 "incorrect layout region offset");
1279
1280 if (lr->kind != kind)
1281 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1282 "incorrect layout region kind");
1283
1284 if (lr->instance != instance) {
1285 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1286 "incorrect layout region instance");
1287 }
1288
1289 return UDS_SUCCESS;
1290 }
1291
verify_sub_index(struct index_layout * layout,u64 start_block,struct region_table * table)1292 static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1293 struct region_table *table)
1294 {
1295 int result;
1296 unsigned int i;
1297 struct sub_index_layout *sil = &layout->index;
1298 u64 next_block = start_block;
1299
1300 sil->sub_index = table->regions[2];
1301 result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1302 if (result != UDS_SUCCESS)
1303 return result;
1304
1305 define_sub_index_nonce(layout);
1306
1307 sil->volume = table->regions[3];
1308 result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1309 RL_SOLE_INSTANCE);
1310 if (result != UDS_SUCCESS)
1311 return result;
1312
1313 next_block += sil->volume.block_count + layout->super.volume_offset;
1314
1315 for (i = 0; i < layout->super.max_saves; i++) {
1316 sil->saves[i].index_save = table->regions[i + 4];
1317 result = verify_region(&sil->saves[i].index_save, next_block,
1318 RL_KIND_SAVE, i);
1319 if (result != UDS_SUCCESS)
1320 return result;
1321
1322 next_block += sil->saves[i].index_save.block_count;
1323 }
1324
1325 next_block -= layout->super.volume_offset;
1326 if (next_block != start_block + sil->sub_index.block_count) {
1327 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1328 "sub index region does not span all saves");
1329 }
1330
1331 return UDS_SUCCESS;
1332 }
1333
reconstitute_layout(struct index_layout * layout,struct region_table * table,u64 first_block)1334 static int __must_check reconstitute_layout(struct index_layout *layout,
1335 struct region_table *table, u64 first_block)
1336 {
1337 int result;
1338 u64 next_block = first_block;
1339
1340 result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
1341 __func__, &layout->index.saves);
1342 if (result != VDO_SUCCESS)
1343 return result;
1344
1345 layout->total_blocks = table->header.region_blocks;
1346
1347 layout->header = table->regions[0];
1348 result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1349 RL_SOLE_INSTANCE);
1350 if (result != UDS_SUCCESS)
1351 return result;
1352
1353 layout->config = table->regions[1];
1354 result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1355 RL_SOLE_INSTANCE);
1356 if (result != UDS_SUCCESS)
1357 return result;
1358
1359 result = verify_sub_index(layout, next_block, table);
1360 if (result != UDS_SUCCESS)
1361 return result;
1362
1363 next_block += layout->index.sub_index.block_count;
1364
1365 layout->seal = table->regions[table->header.region_count - 1];
1366 result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1367 RL_KIND_SEAL, RL_SOLE_INSTANCE);
1368 if (result != UDS_SUCCESS)
1369 return result;
1370
1371 if (++next_block != (first_block + layout->total_blocks)) {
1372 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1373 "layout table does not span total blocks");
1374 }
1375
1376 return UDS_SUCCESS;
1377 }
1378
load_super_block(struct index_layout * layout,size_t block_size,u64 first_block,struct buffered_reader * reader)1379 static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1380 u64 first_block, struct buffered_reader *reader)
1381 {
1382 int result;
1383 struct region_table *table = NULL;
1384 struct super_block_data *super = &layout->super;
1385
1386 result = load_region_table(reader, &table);
1387 if (result != UDS_SUCCESS)
1388 return result;
1389
1390 if (table->header.type != RH_TYPE_SUPER) {
1391 vdo_free(table);
1392 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1393 "not a superblock region table");
1394 }
1395
1396 result = read_super_block_data(reader, layout, table->header.payload);
1397 if (result != UDS_SUCCESS) {
1398 vdo_free(table);
1399 return vdo_log_error_strerror(result, "unknown superblock format");
1400 }
1401
1402 if (super->block_size != block_size) {
1403 vdo_free(table);
1404 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1405 "superblock saved block_size %u differs from supplied block_size %zu",
1406 super->block_size, block_size);
1407 }
1408
1409 first_block -= (super->volume_offset - super->start_offset);
1410 result = reconstitute_layout(layout, table, first_block);
1411 vdo_free(table);
1412 return result;
1413 }
1414
read_index_save_data(struct buffered_reader * reader,struct index_save_layout * isl,size_t saved_size)1415 static int __must_check read_index_save_data(struct buffered_reader *reader,
1416 struct index_save_layout *isl,
1417 size_t saved_size)
1418 {
1419 int result;
1420 struct index_state_version file_version;
1421 u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1422 size_t offset = 0;
1423
1424 if (saved_size != sizeof(buffer)) {
1425 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1426 "unexpected index save data size %zu",
1427 saved_size);
1428 }
1429
1430 result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1431 if (result != UDS_SUCCESS)
1432 return vdo_log_error_strerror(result, "cannot read index save data");
1433
1434 decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1435 decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1436 decode_u32_le(buffer, &offset, &isl->save_data.version);
1437 offset += sizeof(u32);
1438
1439 if (isl->save_data.version > 1) {
1440 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1441 "unknown index save version number %u",
1442 isl->save_data.version);
1443 }
1444
1445 decode_s32_le(buffer, &offset, &file_version.signature);
1446 decode_s32_le(buffer, &offset, &file_version.version_id);
1447
1448 if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1449 (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1450 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1451 "index state version %d,%d is unsupported",
1452 file_version.signature,
1453 file_version.version_id);
1454 }
1455
1456 decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1457 decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1458 decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1459 /* Skip past some historical fields that are now unused */
1460 offset += sizeof(u32) + sizeof(u32);
1461 return UDS_SUCCESS;
1462 }
1463
reconstruct_index_save(struct index_save_layout * isl,struct region_table * table)1464 static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1465 struct region_table *table)
1466 {
1467 int result;
1468 unsigned int z;
1469 struct layout_region *last_region;
1470 u64 next_block = isl->index_save.start_block;
1471 u64 last_block = next_block + isl->index_save.block_count;
1472
1473 isl->zone_count = table->header.region_count - 3;
1474
1475 last_region = &table->regions[table->header.region_count - 1];
1476 if (last_region->kind == RL_KIND_EMPTY) {
1477 isl->free_space = *last_region;
1478 isl->zone_count--;
1479 } else {
1480 isl->free_space = (struct layout_region) {
1481 .start_block = last_block,
1482 .block_count = 0,
1483 .kind = RL_KIND_EMPTY,
1484 .instance = RL_SOLE_INSTANCE,
1485 };
1486 }
1487
1488 isl->header = table->regions[0];
1489 result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1490 RL_SOLE_INSTANCE);
1491 if (result != UDS_SUCCESS)
1492 return result;
1493
1494 isl->index_page_map = table->regions[1];
1495 result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1496 RL_SOLE_INSTANCE);
1497 if (result != UDS_SUCCESS)
1498 return result;
1499
1500 next_block += isl->index_page_map.block_count;
1501
1502 for (z = 0; z < isl->zone_count; z++) {
1503 isl->volume_index_zones[z] = table->regions[z + 2];
1504 result = verify_region(&isl->volume_index_zones[z], next_block,
1505 RL_KIND_VOLUME_INDEX, z);
1506 if (result != UDS_SUCCESS)
1507 return result;
1508
1509 next_block += isl->volume_index_zones[z].block_count;
1510 }
1511
1512 isl->open_chapter = table->regions[isl->zone_count + 2];
1513 result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1514 RL_SOLE_INSTANCE);
1515 if (result != UDS_SUCCESS)
1516 return result;
1517
1518 next_block += isl->open_chapter.block_count;
1519
1520 result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1521 RL_SOLE_INSTANCE);
1522 if (result != UDS_SUCCESS)
1523 return result;
1524
1525 next_block += isl->free_space.block_count;
1526 if (next_block != last_block) {
1527 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1528 "index save layout table incomplete");
1529 }
1530
1531 return UDS_SUCCESS;
1532 }
1533
load_index_save(struct index_save_layout * isl,struct buffered_reader * reader,unsigned int instance)1534 static int __must_check load_index_save(struct index_save_layout *isl,
1535 struct buffered_reader *reader,
1536 unsigned int instance)
1537 {
1538 int result;
1539 struct region_table *table = NULL;
1540
1541 result = load_region_table(reader, &table);
1542 if (result != UDS_SUCCESS) {
1543 return vdo_log_error_strerror(result, "cannot read index save %u header",
1544 instance);
1545 }
1546
1547 if (table->header.region_blocks != isl->index_save.block_count) {
1548 u64 region_blocks = table->header.region_blocks;
1549
1550 vdo_free(table);
1551 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1552 "unexpected index save %u region block count %llu",
1553 instance,
1554 (unsigned long long) region_blocks);
1555 }
1556
1557 if (table->header.type == RH_TYPE_UNSAVED) {
1558 vdo_free(table);
1559 reset_index_save_layout(isl, 0);
1560 return UDS_SUCCESS;
1561 }
1562
1563
1564 if (table->header.type != RH_TYPE_SAVE) {
1565 vdo_log_error_strerror(UDS_CORRUPT_DATA,
1566 "unexpected index save %u header type %u",
1567 instance, table->header.type);
1568 vdo_free(table);
1569 return UDS_CORRUPT_DATA;
1570 }
1571
1572 result = read_index_save_data(reader, isl, table->header.payload);
1573 if (result != UDS_SUCCESS) {
1574 vdo_free(table);
1575 return vdo_log_error_strerror(result,
1576 "unknown index save %u data format",
1577 instance);
1578 }
1579
1580 result = reconstruct_index_save(isl, table);
1581 vdo_free(table);
1582 if (result != UDS_SUCCESS) {
1583 return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1584 instance);
1585 }
1586
1587 return UDS_SUCCESS;
1588 }
1589
load_sub_index_regions(struct index_layout * layout)1590 static int __must_check load_sub_index_regions(struct index_layout *layout)
1591 {
1592 int result;
1593 unsigned int j;
1594 struct index_save_layout *isl;
1595 struct buffered_reader *reader;
1596
1597 for (j = 0; j < layout->super.max_saves; j++) {
1598 isl = &layout->index.saves[j];
1599 result = open_region_reader(layout, &isl->index_save, &reader);
1600
1601 if (result != UDS_SUCCESS) {
1602 vdo_log_error_strerror(result,
1603 "cannot get reader for index 0 save %u",
1604 j);
1605 return result;
1606 }
1607
1608 result = load_index_save(isl, reader, j);
1609 uds_free_buffered_reader(reader);
1610 if (result != UDS_SUCCESS) {
1611 /* Another save slot might be valid. */
1612 reset_index_save_layout(isl, 0);
1613 continue;
1614 }
1615 }
1616
1617 return UDS_SUCCESS;
1618 }
1619
verify_uds_index_config(struct index_layout * layout,struct uds_configuration * config)1620 static int __must_check verify_uds_index_config(struct index_layout *layout,
1621 struct uds_configuration *config)
1622 {
1623 int result;
1624 struct buffered_reader *reader = NULL;
1625 u64 offset;
1626
1627 offset = layout->super.volume_offset - layout->super.start_offset;
1628 result = open_layout_reader(layout, &layout->config, offset, &reader);
1629 if (result != UDS_SUCCESS)
1630 return vdo_log_error_strerror(result, "failed to open config reader");
1631
1632 result = uds_validate_config_contents(reader, config);
1633 if (result != UDS_SUCCESS) {
1634 uds_free_buffered_reader(reader);
1635 return vdo_log_error_strerror(result, "failed to read config region");
1636 }
1637
1638 uds_free_buffered_reader(reader);
1639 return UDS_SUCCESS;
1640 }
1641
load_index_layout(struct index_layout * layout,struct uds_configuration * config)1642 static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1643 {
1644 int result;
1645 struct buffered_reader *reader;
1646
1647 result = uds_make_buffered_reader(layout->factory,
1648 layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1649 if (result != UDS_SUCCESS)
1650 return vdo_log_error_strerror(result, "unable to read superblock");
1651
1652 result = load_super_block(layout, UDS_BLOCK_SIZE,
1653 layout->offset / UDS_BLOCK_SIZE, reader);
1654 uds_free_buffered_reader(reader);
1655 if (result != UDS_SUCCESS)
1656 return result;
1657
1658 result = verify_uds_index_config(layout, config);
1659 if (result != UDS_SUCCESS)
1660 return result;
1661
1662 return load_sub_index_regions(layout);
1663 }
1664
create_layout_factory(struct index_layout * layout,const struct uds_configuration * config)1665 static int create_layout_factory(struct index_layout *layout,
1666 const struct uds_configuration *config)
1667 {
1668 int result;
1669 size_t writable_size;
1670 struct io_factory *factory = NULL;
1671
1672 result = uds_make_io_factory(config->bdev, &factory);
1673 if (result != UDS_SUCCESS)
1674 return result;
1675
1676 writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1677 if (writable_size < config->size + config->offset) {
1678 uds_put_io_factory(factory);
1679 vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1680 writable_size, config->size + config->offset);
1681 return -ENOSPC;
1682 }
1683
1684 layout->factory = factory;
1685 layout->factory_size = (config->size > 0) ? config->size : writable_size;
1686 layout->offset = config->offset;
1687 return UDS_SUCCESS;
1688 }
1689
uds_make_index_layout(struct uds_configuration * config,bool new_layout,struct index_layout ** layout_ptr)1690 int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1691 struct index_layout **layout_ptr)
1692 {
1693 int result;
1694 struct index_layout *layout = NULL;
1695 struct save_layout_sizes sizes;
1696
1697 result = compute_sizes(config, &sizes);
1698 if (result != UDS_SUCCESS)
1699 return result;
1700
1701 result = vdo_allocate(1, struct index_layout, __func__, &layout);
1702 if (result != VDO_SUCCESS)
1703 return result;
1704
1705 result = create_layout_factory(layout, config);
1706 if (result != UDS_SUCCESS) {
1707 uds_free_index_layout(layout);
1708 return result;
1709 }
1710
1711 if (layout->factory_size < sizes.total_size) {
1712 vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1713 layout->factory_size,
1714 (unsigned long long) sizes.total_size);
1715 uds_free_index_layout(layout);
1716 return -ENOSPC;
1717 }
1718
1719 if (new_layout)
1720 result = create_index_layout(layout, config);
1721 else
1722 result = load_index_layout(layout, config);
1723 if (result != UDS_SUCCESS) {
1724 uds_free_index_layout(layout);
1725 return result;
1726 }
1727
1728 *layout_ptr = layout;
1729 return UDS_SUCCESS;
1730 }
1731
uds_free_index_layout(struct index_layout * layout)1732 void uds_free_index_layout(struct index_layout *layout)
1733 {
1734 if (layout == NULL)
1735 return;
1736
1737 vdo_free(layout->index.saves);
1738 if (layout->factory != NULL)
1739 uds_put_io_factory(layout->factory);
1740
1741 vdo_free(layout);
1742 }
1743
uds_replace_index_layout_storage(struct index_layout * layout,struct block_device * bdev)1744 int uds_replace_index_layout_storage(struct index_layout *layout,
1745 struct block_device *bdev)
1746 {
1747 return uds_replace_storage(layout->factory, bdev);
1748 }
1749
1750 /* Obtain a dm_bufio_client for the volume region. */
uds_open_volume_bufio(struct index_layout * layout,size_t block_size,unsigned int reserved_buffers,struct dm_bufio_client ** client_ptr)1751 int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1752 unsigned int reserved_buffers,
1753 struct dm_bufio_client **client_ptr)
1754 {
1755 off_t offset = (layout->index.volume.start_block +
1756 layout->super.volume_offset -
1757 layout->super.start_offset);
1758
1759 return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1760 client_ptr);
1761 }
1762
uds_get_volume_nonce(struct index_layout * layout)1763 u64 uds_get_volume_nonce(struct index_layout *layout)
1764 {
1765 return layout->index.nonce;
1766 }
1767