1 /*
2 * Block driver for the QCOW version 2 format
3 *
4 * Copyright (c) 2004-2006 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu-common.h"
26 #include "block_int.h"
27 #include "block/qcow2.h"
28
29 static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
30 static int update_refcount(BlockDriverState *bs,
31 int64_t offset, int64_t length,
32 int addend);
33
34 /*********************************************************/
35 /* refcount handling */
36
qcow2_refcount_init(BlockDriverState * bs)37 int qcow2_refcount_init(BlockDriverState *bs)
38 {
39 BDRVQcowState *s = bs->opaque;
40 int ret, refcount_table_size2, i;
41
42 s->refcount_block_cache = qemu_malloc(s->cluster_size);
43 refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
44 s->refcount_table = qemu_malloc(refcount_table_size2);
45 if (s->refcount_table_size > 0) {
46 ret = bdrv_pread(s->hd, s->refcount_table_offset,
47 s->refcount_table, refcount_table_size2);
48 if (ret != refcount_table_size2)
49 goto fail;
50 for(i = 0; i < s->refcount_table_size; i++)
51 be64_to_cpus(&s->refcount_table[i]);
52 }
53 return 0;
54 fail:
55 return -ENOMEM;
56 }
57
qcow2_refcount_close(BlockDriverState * bs)58 void qcow2_refcount_close(BlockDriverState *bs)
59 {
60 BDRVQcowState *s = bs->opaque;
61 qemu_free(s->refcount_block_cache);
62 qemu_free(s->refcount_table);
63 }
64
65
load_refcount_block(BlockDriverState * bs,int64_t refcount_block_offset)66 static int load_refcount_block(BlockDriverState *bs,
67 int64_t refcount_block_offset)
68 {
69 BDRVQcowState *s = bs->opaque;
70 int ret;
71 ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
72 s->cluster_size);
73 if (ret != s->cluster_size)
74 return -EIO;
75 s->refcount_block_cache_offset = refcount_block_offset;
76 return 0;
77 }
78
get_refcount(BlockDriverState * bs,int64_t cluster_index)79 static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
80 {
81 BDRVQcowState *s = bs->opaque;
82 int refcount_table_index, block_index;
83 int64_t refcount_block_offset;
84
85 refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
86 if (refcount_table_index >= s->refcount_table_size)
87 return 0;
88 refcount_block_offset = s->refcount_table[refcount_table_index];
89 if (!refcount_block_offset)
90 return 0;
91 if (refcount_block_offset != s->refcount_block_cache_offset) {
92 /* better than nothing: return allocated if read error */
93 if (load_refcount_block(bs, refcount_block_offset) < 0)
94 return 1;
95 }
96 block_index = cluster_index &
97 ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
98 return be16_to_cpu(s->refcount_block_cache[block_index]);
99 }
100
grow_refcount_table(BlockDriverState * bs,int min_size)101 static int grow_refcount_table(BlockDriverState *bs, int min_size)
102 {
103 BDRVQcowState *s = bs->opaque;
104 int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
105 uint64_t *new_table;
106 int64_t table_offset;
107 uint8_t data[12];
108 int old_table_size;
109 int64_t old_table_offset;
110
111 if (min_size <= s->refcount_table_size)
112 return 0;
113 /* compute new table size */
114 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
115 for(;;) {
116 if (refcount_table_clusters == 0) {
117 refcount_table_clusters = 1;
118 } else {
119 refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
120 }
121 new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
122 if (min_size <= new_table_size)
123 break;
124 }
125 #ifdef DEBUG_ALLOC2
126 printf("grow_refcount_table from %d to %d\n",
127 s->refcount_table_size,
128 new_table_size);
129 #endif
130 new_table_size2 = new_table_size * sizeof(uint64_t);
131 new_table = qemu_mallocz(new_table_size2);
132 memcpy(new_table, s->refcount_table,
133 s->refcount_table_size * sizeof(uint64_t));
134 for(i = 0; i < s->refcount_table_size; i++)
135 cpu_to_be64s(&new_table[i]);
136 /* Note: we cannot update the refcount now to avoid recursion */
137 table_offset = alloc_clusters_noref(bs, new_table_size2);
138 ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
139 if (ret != new_table_size2)
140 goto fail;
141 for(i = 0; i < s->refcount_table_size; i++)
142 be64_to_cpus(&new_table[i]);
143
144 cpu_to_be64w((uint64_t*)data, table_offset);
145 cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
146 if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
147 data, sizeof(data)) != sizeof(data))
148 goto fail;
149 qemu_free(s->refcount_table);
150 old_table_offset = s->refcount_table_offset;
151 old_table_size = s->refcount_table_size;
152 s->refcount_table = new_table;
153 s->refcount_table_size = new_table_size;
154 s->refcount_table_offset = table_offset;
155
156 update_refcount(bs, table_offset, new_table_size2, 1);
157 qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
158 return 0;
159 fail:
160 qcow2_free_clusters(bs, table_offset, new_table_size2);
161 qemu_free(new_table);
162 return -EIO;
163 }
164
165
alloc_refcount_block(BlockDriverState * bs,int64_t cluster_index)166 static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
167 {
168 BDRVQcowState *s = bs->opaque;
169 int64_t offset, refcount_block_offset;
170 int ret, refcount_table_index;
171 uint64_t data64;
172
173 /* Find L1 index and grow refcount table if needed */
174 refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
175 if (refcount_table_index >= s->refcount_table_size) {
176 ret = grow_refcount_table(bs, refcount_table_index + 1);
177 if (ret < 0)
178 return ret;
179 }
180
181 /* Load or allocate the refcount block */
182 refcount_block_offset = s->refcount_table[refcount_table_index];
183 if (!refcount_block_offset) {
184 /* create a new refcount block */
185 /* Note: we cannot update the refcount now to avoid recursion */
186 offset = alloc_clusters_noref(bs, s->cluster_size);
187 memset(s->refcount_block_cache, 0, s->cluster_size);
188 ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
189 if (ret != s->cluster_size)
190 return -EINVAL;
191 s->refcount_table[refcount_table_index] = offset;
192 data64 = cpu_to_be64(offset);
193 ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
194 refcount_table_index * sizeof(uint64_t),
195 &data64, sizeof(data64));
196 if (ret != sizeof(data64))
197 return -EINVAL;
198
199 refcount_block_offset = offset;
200 s->refcount_block_cache_offset = offset;
201 update_refcount(bs, offset, s->cluster_size, 1);
202 } else {
203 if (refcount_block_offset != s->refcount_block_cache_offset) {
204 if (load_refcount_block(bs, refcount_block_offset) < 0)
205 return -EIO;
206 }
207 }
208
209 return refcount_block_offset;
210 }
211
212 #define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT)
write_refcount_block_entries(BDRVQcowState * s,int64_t refcount_block_offset,int first_index,int last_index)213 static int write_refcount_block_entries(BDRVQcowState *s,
214 int64_t refcount_block_offset, int first_index, int last_index)
215 {
216 size_t size;
217
218 first_index &= ~(REFCOUNTS_PER_SECTOR - 1);
219 last_index = (last_index + REFCOUNTS_PER_SECTOR)
220 & ~(REFCOUNTS_PER_SECTOR - 1);
221
222 size = (last_index - first_index) << REFCOUNT_SHIFT;
223 if (bdrv_pwrite(s->hd,
224 refcount_block_offset + (first_index << REFCOUNT_SHIFT),
225 &s->refcount_block_cache[first_index], size) != size)
226 {
227 return -EIO;
228 }
229
230 return 0;
231 }
232
233 /* XXX: cache several refcount block clusters ? */
update_refcount(BlockDriverState * bs,int64_t offset,int64_t length,int addend)234 static int update_refcount(BlockDriverState *bs,
235 int64_t offset, int64_t length,
236 int addend)
237 {
238 BDRVQcowState *s = bs->opaque;
239 int64_t start, last, cluster_offset;
240 int64_t refcount_block_offset = 0;
241 int64_t table_index = -1, old_table_index;
242 int first_index = -1, last_index = -1;
243
244 #ifdef DEBUG_ALLOC2
245 printf("update_refcount: offset=%lld size=%lld addend=%d\n",
246 offset, length, addend);
247 #endif
248 if (length <= 0)
249 return -EINVAL;
250 start = offset & ~(s->cluster_size - 1);
251 last = (offset + length - 1) & ~(s->cluster_size - 1);
252 for(cluster_offset = start; cluster_offset <= last;
253 cluster_offset += s->cluster_size)
254 {
255 int block_index, refcount;
256 int64_t cluster_index = cluster_offset >> s->cluster_bits;
257
258 /* Only write refcount block to disk when we are done with it */
259 old_table_index = table_index;
260 table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
261 if ((old_table_index >= 0) && (table_index != old_table_index)) {
262
263 if (write_refcount_block_entries(s, refcount_block_offset,
264 first_index, last_index) < 0)
265 {
266 return -EIO;
267 }
268
269 first_index = -1;
270 last_index = -1;
271 }
272
273 /* Load the refcount block and allocate it if needed */
274 refcount_block_offset = alloc_refcount_block(bs, cluster_index);
275 if (refcount_block_offset < 0) {
276 return refcount_block_offset;
277 }
278
279 /* we can update the count and save it */
280 block_index = cluster_index &
281 ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
282 if (first_index == -1 || block_index < first_index) {
283 first_index = block_index;
284 }
285 if (block_index > last_index) {
286 last_index = block_index;
287 }
288
289 refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
290 refcount += addend;
291 if (refcount < 0 || refcount > 0xffff)
292 return -EINVAL;
293 if (refcount == 0 && cluster_index < s->free_cluster_index) {
294 s->free_cluster_index = cluster_index;
295 }
296 s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
297 }
298
299 /* Write last changed block to disk */
300 if (refcount_block_offset != 0) {
301 if (write_refcount_block_entries(s, refcount_block_offset,
302 first_index, last_index) < 0)
303 {
304 return -EIO;
305 }
306 }
307
308 return 0;
309 }
310
311 /* addend must be 1 or -1 */
update_cluster_refcount(BlockDriverState * bs,int64_t cluster_index,int addend)312 static int update_cluster_refcount(BlockDriverState *bs,
313 int64_t cluster_index,
314 int addend)
315 {
316 BDRVQcowState *s = bs->opaque;
317 int ret;
318
319 ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
320 if (ret < 0) {
321 return ret;
322 }
323
324 return get_refcount(bs, cluster_index);
325 }
326
327
328
329 /*********************************************************/
330 /* cluster allocation functions */
331
332
333
334 /* return < 0 if error */
alloc_clusters_noref(BlockDriverState * bs,int64_t size)335 static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
336 {
337 BDRVQcowState *s = bs->opaque;
338 int i, nb_clusters;
339
340 nb_clusters = size_to_clusters(s, size);
341 retry:
342 for(i = 0; i < nb_clusters; i++) {
343 int64_t i = s->free_cluster_index++;
344 if (get_refcount(bs, i) != 0)
345 goto retry;
346 }
347 #ifdef DEBUG_ALLOC2
348 printf("alloc_clusters: size=%lld -> %lld\n",
349 size,
350 (s->free_cluster_index - nb_clusters) << s->cluster_bits);
351 #endif
352 return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
353 }
354
qcow2_alloc_clusters(BlockDriverState * bs,int64_t size)355 int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
356 {
357 int64_t offset;
358
359 offset = alloc_clusters_noref(bs, size);
360 update_refcount(bs, offset, size, 1);
361 return offset;
362 }
363
364 /* only used to allocate compressed sectors. We try to allocate
365 contiguous sectors. size must be <= cluster_size */
qcow2_alloc_bytes(BlockDriverState * bs,int size)366 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
367 {
368 BDRVQcowState *s = bs->opaque;
369 int64_t offset, cluster_offset;
370 int free_in_cluster;
371
372 assert(size > 0 && size <= s->cluster_size);
373 if (s->free_byte_offset == 0) {
374 s->free_byte_offset = qcow2_alloc_clusters(bs, s->cluster_size);
375 }
376 redo:
377 free_in_cluster = s->cluster_size -
378 (s->free_byte_offset & (s->cluster_size - 1));
379 if (size <= free_in_cluster) {
380 /* enough space in current cluster */
381 offset = s->free_byte_offset;
382 s->free_byte_offset += size;
383 free_in_cluster -= size;
384 if (free_in_cluster == 0)
385 s->free_byte_offset = 0;
386 if ((offset & (s->cluster_size - 1)) != 0)
387 update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
388 } else {
389 offset = qcow2_alloc_clusters(bs, s->cluster_size);
390 cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
391 if ((cluster_offset + s->cluster_size) == offset) {
392 /* we are lucky: contiguous data */
393 offset = s->free_byte_offset;
394 update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
395 s->free_byte_offset += size;
396 } else {
397 s->free_byte_offset = offset;
398 goto redo;
399 }
400 }
401 return offset;
402 }
403
qcow2_free_clusters(BlockDriverState * bs,int64_t offset,int64_t size)404 void qcow2_free_clusters(BlockDriverState *bs,
405 int64_t offset, int64_t size)
406 {
407 update_refcount(bs, offset, size, -1);
408 }
409
410 /*
411 * free_any_clusters
412 *
413 * free clusters according to its type: compressed or not
414 *
415 */
416
qcow2_free_any_clusters(BlockDriverState * bs,uint64_t cluster_offset,int nb_clusters)417 void qcow2_free_any_clusters(BlockDriverState *bs,
418 uint64_t cluster_offset, int nb_clusters)
419 {
420 BDRVQcowState *s = bs->opaque;
421
422 /* free the cluster */
423
424 if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
425 int nb_csectors;
426 nb_csectors = ((cluster_offset >> s->csize_shift) &
427 s->csize_mask) + 1;
428 qcow2_free_clusters(bs,
429 (cluster_offset & s->cluster_offset_mask) & ~511,
430 nb_csectors * 512);
431 return;
432 }
433
434 qcow2_free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
435
436 return;
437 }
438
439
440
441 /*********************************************************/
442 /* snapshots and image creation */
443
444
445
qcow2_create_refcount_update(QCowCreateState * s,int64_t offset,int64_t size)446 void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
447 int64_t size)
448 {
449 int refcount;
450 int64_t start, last, cluster_offset;
451 uint16_t *p;
452
453 start = offset & ~(s->cluster_size - 1);
454 last = (offset + size - 1) & ~(s->cluster_size - 1);
455 for(cluster_offset = start; cluster_offset <= last;
456 cluster_offset += s->cluster_size) {
457 p = &s->refcount_block[cluster_offset >> s->cluster_bits];
458 refcount = be16_to_cpu(*p);
459 refcount++;
460 *p = cpu_to_be16(refcount);
461 }
462 }
463
464 /* update the refcounts of snapshots and the copied flag */
qcow2_update_snapshot_refcount(BlockDriverState * bs,int64_t l1_table_offset,int l1_size,int addend)465 int qcow2_update_snapshot_refcount(BlockDriverState *bs,
466 int64_t l1_table_offset, int l1_size, int addend)
467 {
468 BDRVQcowState *s = bs->opaque;
469 uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
470 int64_t old_offset, old_l2_offset;
471 int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
472
473 qcow2_l2_cache_reset(bs);
474
475 l2_table = NULL;
476 l1_table = NULL;
477 l1_size2 = l1_size * sizeof(uint64_t);
478 l1_allocated = 0;
479 if (l1_table_offset != s->l1_table_offset) {
480 l1_table = qemu_malloc(l1_size2);
481 l1_allocated = 1;
482 if (bdrv_pread(s->hd, l1_table_offset,
483 l1_table, l1_size2) != l1_size2)
484 goto fail;
485 for(i = 0;i < l1_size; i++)
486 be64_to_cpus(&l1_table[i]);
487 } else {
488 assert(l1_size == s->l1_size);
489 l1_table = s->l1_table;
490 l1_allocated = 0;
491 }
492
493 l2_size = s->l2_size * sizeof(uint64_t);
494 l2_table = qemu_malloc(l2_size);
495 l1_modified = 0;
496 for(i = 0; i < l1_size; i++) {
497 l2_offset = l1_table[i];
498 if (l2_offset) {
499 old_l2_offset = l2_offset;
500 l2_offset &= ~QCOW_OFLAG_COPIED;
501 l2_modified = 0;
502 if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
503 goto fail;
504 for(j = 0; j < s->l2_size; j++) {
505 offset = be64_to_cpu(l2_table[j]);
506 if (offset != 0) {
507 old_offset = offset;
508 offset &= ~QCOW_OFLAG_COPIED;
509 if (offset & QCOW_OFLAG_COMPRESSED) {
510 nb_csectors = ((offset >> s->csize_shift) &
511 s->csize_mask) + 1;
512 if (addend != 0)
513 update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
514 nb_csectors * 512, addend);
515 /* compressed clusters are never modified */
516 refcount = 2;
517 } else {
518 if (addend != 0) {
519 refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
520 } else {
521 refcount = get_refcount(bs, offset >> s->cluster_bits);
522 }
523 }
524
525 if (refcount == 1) {
526 offset |= QCOW_OFLAG_COPIED;
527 }
528 if (offset != old_offset) {
529 l2_table[j] = cpu_to_be64(offset);
530 l2_modified = 1;
531 }
532 }
533 }
534 if (l2_modified) {
535 if (bdrv_pwrite(s->hd,
536 l2_offset, l2_table, l2_size) != l2_size)
537 goto fail;
538 }
539
540 if (addend != 0) {
541 refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
542 } else {
543 refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
544 }
545 if (refcount == 1) {
546 l2_offset |= QCOW_OFLAG_COPIED;
547 }
548 if (l2_offset != old_l2_offset) {
549 l1_table[i] = l2_offset;
550 l1_modified = 1;
551 }
552 }
553 }
554 if (l1_modified) {
555 for(i = 0; i < l1_size; i++)
556 cpu_to_be64s(&l1_table[i]);
557 if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
558 l1_size2) != l1_size2)
559 goto fail;
560 for(i = 0; i < l1_size; i++)
561 be64_to_cpus(&l1_table[i]);
562 }
563 if (l1_allocated)
564 qemu_free(l1_table);
565 qemu_free(l2_table);
566 return 0;
567 fail:
568 if (l1_allocated)
569 qemu_free(l1_table);
570 qemu_free(l2_table);
571 return -EIO;
572 }
573
574
575
576
577 /*********************************************************/
578 /* refcount checking functions */
579
580
581
582 /*
583 * Increases the refcount for a range of clusters in a given refcount table.
584 * This is used to construct a temporary refcount table out of L1 and L2 tables
585 * which can be compared the the refcount table saved in the image.
586 *
587 * Returns the number of errors in the image that were found
588 */
inc_refcounts(BlockDriverState * bs,uint16_t * refcount_table,int refcount_table_size,int64_t offset,int64_t size)589 static int inc_refcounts(BlockDriverState *bs,
590 uint16_t *refcount_table,
591 int refcount_table_size,
592 int64_t offset, int64_t size)
593 {
594 BDRVQcowState *s = bs->opaque;
595 int64_t start, last, cluster_offset;
596 int k;
597 int errors = 0;
598
599 if (size <= 0)
600 return 0;
601
602 start = offset & ~(s->cluster_size - 1);
603 last = (offset + size - 1) & ~(s->cluster_size - 1);
604 for(cluster_offset = start; cluster_offset <= last;
605 cluster_offset += s->cluster_size) {
606 k = cluster_offset >> s->cluster_bits;
607 if (k < 0 || k >= refcount_table_size) {
608 fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
609 cluster_offset);
610 errors++;
611 } else {
612 if (++refcount_table[k] == 0) {
613 fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
614 "\n", cluster_offset);
615 errors++;
616 }
617 }
618 }
619
620 return errors;
621 }
622
623 /*
624 * Increases the refcount in the given refcount table for the all clusters
625 * referenced in the L2 table. While doing so, performs some checks on L2
626 * entries.
627 *
628 * Returns the number of errors found by the checks or -errno if an internal
629 * error occurred.
630 */
check_refcounts_l2(BlockDriverState * bs,uint16_t * refcount_table,int refcount_table_size,int64_t l2_offset,int check_copied)631 static int check_refcounts_l2(BlockDriverState *bs,
632 uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
633 int check_copied)
634 {
635 BDRVQcowState *s = bs->opaque;
636 uint64_t *l2_table, offset;
637 int i, l2_size, nb_csectors, refcount;
638 int errors = 0;
639
640 /* Read L2 table from disk */
641 l2_size = s->l2_size * sizeof(uint64_t);
642 l2_table = qemu_malloc(l2_size);
643
644 if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
645 goto fail;
646
647 /* Do the actual checks */
648 for(i = 0; i < s->l2_size; i++) {
649 offset = be64_to_cpu(l2_table[i]);
650 if (offset != 0) {
651 if (offset & QCOW_OFLAG_COMPRESSED) {
652 /* Compressed clusters don't have QCOW_OFLAG_COPIED */
653 if (offset & QCOW_OFLAG_COPIED) {
654 fprintf(stderr, "ERROR: cluster %" PRId64 ": "
655 "copied flag must never be set for compressed "
656 "clusters\n", offset >> s->cluster_bits);
657 offset &= ~QCOW_OFLAG_COPIED;
658 errors++;
659 }
660
661 /* Mark cluster as used */
662 nb_csectors = ((offset >> s->csize_shift) &
663 s->csize_mask) + 1;
664 offset &= s->cluster_offset_mask;
665 errors += inc_refcounts(bs, refcount_table,
666 refcount_table_size,
667 offset & ~511, nb_csectors * 512);
668 } else {
669 /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
670 if (check_copied) {
671 uint64_t entry = offset;
672 offset &= ~QCOW_OFLAG_COPIED;
673 refcount = get_refcount(bs, offset >> s->cluster_bits);
674 if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
675 fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
676 PRIx64 " refcount=%d\n", entry, refcount);
677 errors++;
678 }
679 }
680
681 /* Mark cluster as used */
682 offset &= ~QCOW_OFLAG_COPIED;
683 errors += inc_refcounts(bs, refcount_table,
684 refcount_table_size,
685 offset, s->cluster_size);
686
687 /* Correct offsets are cluster aligned */
688 if (offset & (s->cluster_size - 1)) {
689 fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
690 "properly aligned; L2 entry corrupted.\n", offset);
691 errors++;
692 }
693 }
694 }
695 }
696
697 qemu_free(l2_table);
698 return errors;
699
700 fail:
701 fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
702 qemu_free(l2_table);
703 return -EIO;
704 }
705
706 /*
707 * Increases the refcount for the L1 table, its L2 tables and all referenced
708 * clusters in the given refcount table. While doing so, performs some checks
709 * on L1 and L2 entries.
710 *
711 * Returns the number of errors found by the checks or -errno if an internal
712 * error occurred.
713 */
check_refcounts_l1(BlockDriverState * bs,uint16_t * refcount_table,int refcount_table_size,int64_t l1_table_offset,int l1_size,int check_copied)714 static int check_refcounts_l1(BlockDriverState *bs,
715 uint16_t *refcount_table,
716 int refcount_table_size,
717 int64_t l1_table_offset, int l1_size,
718 int check_copied)
719 {
720 BDRVQcowState *s = bs->opaque;
721 uint64_t *l1_table, l2_offset, l1_size2;
722 int i, refcount, ret;
723 int errors = 0;
724
725 l1_size2 = l1_size * sizeof(uint64_t);
726
727 /* Mark L1 table as used */
728 errors += inc_refcounts(bs, refcount_table, refcount_table_size,
729 l1_table_offset, l1_size2);
730
731 /* Read L1 table entries from disk */
732 l1_table = qemu_malloc(l1_size2);
733 if (bdrv_pread(s->hd, l1_table_offset,
734 l1_table, l1_size2) != l1_size2)
735 goto fail;
736 for(i = 0;i < l1_size; i++)
737 be64_to_cpus(&l1_table[i]);
738
739 /* Do the actual checks */
740 for(i = 0; i < l1_size; i++) {
741 l2_offset = l1_table[i];
742 if (l2_offset) {
743 /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
744 if (check_copied) {
745 refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
746 >> s->cluster_bits);
747 if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
748 fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
749 " refcount=%d\n", l2_offset, refcount);
750 errors++;
751 }
752 }
753
754 /* Mark L2 table as used */
755 l2_offset &= ~QCOW_OFLAG_COPIED;
756 errors += inc_refcounts(bs, refcount_table,
757 refcount_table_size,
758 l2_offset,
759 s->cluster_size);
760
761 /* L2 tables are cluster aligned */
762 if (l2_offset & (s->cluster_size - 1)) {
763 fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
764 "cluster aligned; L1 entry corrupted\n", l2_offset);
765 errors++;
766 }
767
768 /* Process and check L2 entries */
769 ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
770 l2_offset, check_copied);
771 if (ret < 0) {
772 goto fail;
773 }
774 errors += ret;
775 }
776 }
777 qemu_free(l1_table);
778 return errors;
779
780 fail:
781 fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
782 qemu_free(l1_table);
783 return -EIO;
784 }
785
786 /*
787 * Checks an image for refcount consistency.
788 *
789 * Returns 0 if no errors are found, the number of errors in case the image is
790 * detected as corrupted, and -errno when an internal error occured.
791 */
qcow2_check_refcounts(BlockDriverState * bs)792 int qcow2_check_refcounts(BlockDriverState *bs)
793 {
794 BDRVQcowState *s = bs->opaque;
795 int64_t size;
796 int nb_clusters, refcount1, refcount2, i;
797 QCowSnapshot *sn;
798 uint16_t *refcount_table;
799 int ret, errors = 0;
800
801 size = bdrv_getlength(s->hd);
802 nb_clusters = size_to_clusters(s, size);
803 refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
804
805 /* header */
806 errors += inc_refcounts(bs, refcount_table, nb_clusters,
807 0, s->cluster_size);
808
809 /* current L1 table */
810 ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
811 s->l1_table_offset, s->l1_size, 1);
812 if (ret < 0) {
813 return ret;
814 }
815 errors += ret;
816
817 /* snapshots */
818 for(i = 0; i < s->nb_snapshots; i++) {
819 sn = s->snapshots + i;
820 check_refcounts_l1(bs, refcount_table, nb_clusters,
821 sn->l1_table_offset, sn->l1_size, 0);
822 }
823 errors += inc_refcounts(bs, refcount_table, nb_clusters,
824 s->snapshots_offset, s->snapshots_size);
825
826 /* refcount data */
827 errors += inc_refcounts(bs, refcount_table, nb_clusters,
828 s->refcount_table_offset,
829 s->refcount_table_size * sizeof(uint64_t));
830 for(i = 0; i < s->refcount_table_size; i++) {
831 int64_t offset;
832 offset = s->refcount_table[i];
833 if (offset != 0) {
834 errors += inc_refcounts(bs, refcount_table, nb_clusters,
835 offset, s->cluster_size);
836 }
837 }
838
839 /* compare ref counts */
840 for(i = 0; i < nb_clusters; i++) {
841 refcount1 = get_refcount(bs, i);
842 refcount2 = refcount_table[i];
843 if (refcount1 != refcount2) {
844 fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
845 i, refcount1, refcount2);
846 errors++;
847 }
848 }
849
850 qemu_free(refcount_table);
851
852 return errors;
853 }
854
855