• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Block driver for the QCOW version 2 format
3  *
4  * Copyright (c) 2004-2006 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu-common.h"
26 #include "block_int.h"
27 #include "block/qcow2.h"
28 
29 static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
30 static int update_refcount(BlockDriverState *bs,
31                             int64_t offset, int64_t length,
32                             int addend);
33 
34 /*********************************************************/
35 /* refcount handling */
36 
qcow2_refcount_init(BlockDriverState * bs)37 int qcow2_refcount_init(BlockDriverState *bs)
38 {
39     BDRVQcowState *s = bs->opaque;
40     int ret, refcount_table_size2, i;
41 
42     s->refcount_block_cache = qemu_malloc(s->cluster_size);
43     refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
44     s->refcount_table = qemu_malloc(refcount_table_size2);
45     if (s->refcount_table_size > 0) {
46         ret = bdrv_pread(s->hd, s->refcount_table_offset,
47                          s->refcount_table, refcount_table_size2);
48         if (ret != refcount_table_size2)
49             goto fail;
50         for(i = 0; i < s->refcount_table_size; i++)
51             be64_to_cpus(&s->refcount_table[i]);
52     }
53     return 0;
54  fail:
55     return -ENOMEM;
56 }
57 
qcow2_refcount_close(BlockDriverState * bs)58 void qcow2_refcount_close(BlockDriverState *bs)
59 {
60     BDRVQcowState *s = bs->opaque;
61     qemu_free(s->refcount_block_cache);
62     qemu_free(s->refcount_table);
63 }
64 
65 
load_refcount_block(BlockDriverState * bs,int64_t refcount_block_offset)66 static int load_refcount_block(BlockDriverState *bs,
67                                int64_t refcount_block_offset)
68 {
69     BDRVQcowState *s = bs->opaque;
70     int ret;
71     ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
72                      s->cluster_size);
73     if (ret != s->cluster_size)
74         return -EIO;
75     s->refcount_block_cache_offset = refcount_block_offset;
76     return 0;
77 }
78 
get_refcount(BlockDriverState * bs,int64_t cluster_index)79 static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
80 {
81     BDRVQcowState *s = bs->opaque;
82     int refcount_table_index, block_index;
83     int64_t refcount_block_offset;
84 
85     refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
86     if (refcount_table_index >= s->refcount_table_size)
87         return 0;
88     refcount_block_offset = s->refcount_table[refcount_table_index];
89     if (!refcount_block_offset)
90         return 0;
91     if (refcount_block_offset != s->refcount_block_cache_offset) {
92         /* better than nothing: return allocated if read error */
93         if (load_refcount_block(bs, refcount_block_offset) < 0)
94             return 1;
95     }
96     block_index = cluster_index &
97         ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
98     return be16_to_cpu(s->refcount_block_cache[block_index]);
99 }
100 
grow_refcount_table(BlockDriverState * bs,int min_size)101 static int grow_refcount_table(BlockDriverState *bs, int min_size)
102 {
103     BDRVQcowState *s = bs->opaque;
104     int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
105     uint64_t *new_table;
106     int64_t table_offset;
107     uint8_t data[12];
108     int old_table_size;
109     int64_t old_table_offset;
110 
111     if (min_size <= s->refcount_table_size)
112         return 0;
113     /* compute new table size */
114     refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
115     for(;;) {
116         if (refcount_table_clusters == 0) {
117             refcount_table_clusters = 1;
118         } else {
119             refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
120         }
121         new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
122         if (min_size <= new_table_size)
123             break;
124     }
125 #ifdef DEBUG_ALLOC2
126     printf("grow_refcount_table from %d to %d\n",
127            s->refcount_table_size,
128            new_table_size);
129 #endif
130     new_table_size2 = new_table_size * sizeof(uint64_t);
131     new_table = qemu_mallocz(new_table_size2);
132     memcpy(new_table, s->refcount_table,
133            s->refcount_table_size * sizeof(uint64_t));
134     for(i = 0; i < s->refcount_table_size; i++)
135         cpu_to_be64s(&new_table[i]);
136     /* Note: we cannot update the refcount now to avoid recursion */
137     table_offset = alloc_clusters_noref(bs, new_table_size2);
138     ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
139     if (ret != new_table_size2)
140         goto fail;
141     for(i = 0; i < s->refcount_table_size; i++)
142         be64_to_cpus(&new_table[i]);
143 
144     cpu_to_be64w((uint64_t*)data, table_offset);
145     cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
146     if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
147                     data, sizeof(data)) != sizeof(data))
148         goto fail;
149     qemu_free(s->refcount_table);
150     old_table_offset = s->refcount_table_offset;
151     old_table_size = s->refcount_table_size;
152     s->refcount_table = new_table;
153     s->refcount_table_size = new_table_size;
154     s->refcount_table_offset = table_offset;
155 
156     update_refcount(bs, table_offset, new_table_size2, 1);
157     qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
158     return 0;
159  fail:
160     qcow2_free_clusters(bs, table_offset, new_table_size2);
161     qemu_free(new_table);
162     return -EIO;
163 }
164 
165 
alloc_refcount_block(BlockDriverState * bs,int64_t cluster_index)166 static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
167 {
168     BDRVQcowState *s = bs->opaque;
169     int64_t offset, refcount_block_offset;
170     int ret, refcount_table_index;
171     uint64_t data64;
172 
173     /* Find L1 index and grow refcount table if needed */
174     refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
175     if (refcount_table_index >= s->refcount_table_size) {
176         ret = grow_refcount_table(bs, refcount_table_index + 1);
177         if (ret < 0)
178             return ret;
179     }
180 
181     /* Load or allocate the refcount block */
182     refcount_block_offset = s->refcount_table[refcount_table_index];
183     if (!refcount_block_offset) {
184         /* create a new refcount block */
185         /* Note: we cannot update the refcount now to avoid recursion */
186         offset = alloc_clusters_noref(bs, s->cluster_size);
187         memset(s->refcount_block_cache, 0, s->cluster_size);
188         ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
189         if (ret != s->cluster_size)
190             return -EINVAL;
191         s->refcount_table[refcount_table_index] = offset;
192         data64 = cpu_to_be64(offset);
193         ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
194                           refcount_table_index * sizeof(uint64_t),
195                           &data64, sizeof(data64));
196         if (ret != sizeof(data64))
197             return -EINVAL;
198 
199         refcount_block_offset = offset;
200         s->refcount_block_cache_offset = offset;
201         update_refcount(bs, offset, s->cluster_size, 1);
202     } else {
203         if (refcount_block_offset != s->refcount_block_cache_offset) {
204             if (load_refcount_block(bs, refcount_block_offset) < 0)
205                 return -EIO;
206         }
207     }
208 
209     return refcount_block_offset;
210 }
211 
212 #define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT)
write_refcount_block_entries(BDRVQcowState * s,int64_t refcount_block_offset,int first_index,int last_index)213 static int write_refcount_block_entries(BDRVQcowState *s,
214     int64_t refcount_block_offset, int first_index, int last_index)
215 {
216     size_t size;
217 
218     first_index &= ~(REFCOUNTS_PER_SECTOR - 1);
219     last_index = (last_index + REFCOUNTS_PER_SECTOR)
220         & ~(REFCOUNTS_PER_SECTOR - 1);
221 
222     size = (last_index - first_index) << REFCOUNT_SHIFT;
223     if (bdrv_pwrite(s->hd,
224         refcount_block_offset + (first_index << REFCOUNT_SHIFT),
225         &s->refcount_block_cache[first_index], size) != size)
226     {
227         return -EIO;
228     }
229 
230     return 0;
231 }
232 
233 /* XXX: cache several refcount block clusters ? */
update_refcount(BlockDriverState * bs,int64_t offset,int64_t length,int addend)234 static int update_refcount(BlockDriverState *bs,
235                             int64_t offset, int64_t length,
236                             int addend)
237 {
238     BDRVQcowState *s = bs->opaque;
239     int64_t start, last, cluster_offset;
240     int64_t refcount_block_offset = 0;
241     int64_t table_index = -1, old_table_index;
242     int first_index = -1, last_index = -1;
243 
244 #ifdef DEBUG_ALLOC2
245     printf("update_refcount: offset=%lld size=%lld addend=%d\n",
246            offset, length, addend);
247 #endif
248     if (length <= 0)
249         return -EINVAL;
250     start = offset & ~(s->cluster_size - 1);
251     last = (offset + length - 1) & ~(s->cluster_size - 1);
252     for(cluster_offset = start; cluster_offset <= last;
253         cluster_offset += s->cluster_size)
254     {
255         int block_index, refcount;
256         int64_t cluster_index = cluster_offset >> s->cluster_bits;
257 
258         /* Only write refcount block to disk when we are done with it */
259         old_table_index = table_index;
260         table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
261         if ((old_table_index >= 0) && (table_index != old_table_index)) {
262 
263             if (write_refcount_block_entries(s, refcount_block_offset,
264                 first_index, last_index) < 0)
265             {
266                 return -EIO;
267             }
268 
269             first_index = -1;
270             last_index = -1;
271         }
272 
273         /* Load the refcount block and allocate it if needed */
274         refcount_block_offset = alloc_refcount_block(bs, cluster_index);
275         if (refcount_block_offset < 0) {
276             return refcount_block_offset;
277         }
278 
279         /* we can update the count and save it */
280         block_index = cluster_index &
281             ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
282         if (first_index == -1 || block_index < first_index) {
283             first_index = block_index;
284         }
285         if (block_index > last_index) {
286             last_index = block_index;
287         }
288 
289         refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
290         refcount += addend;
291         if (refcount < 0 || refcount > 0xffff)
292             return -EINVAL;
293         if (refcount == 0 && cluster_index < s->free_cluster_index) {
294             s->free_cluster_index = cluster_index;
295         }
296         s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
297     }
298 
299     /* Write last changed block to disk */
300     if (refcount_block_offset != 0) {
301         if (write_refcount_block_entries(s, refcount_block_offset,
302             first_index, last_index) < 0)
303         {
304             return -EIO;
305         }
306     }
307 
308     return 0;
309 }
310 
311 /* addend must be 1 or -1 */
update_cluster_refcount(BlockDriverState * bs,int64_t cluster_index,int addend)312 static int update_cluster_refcount(BlockDriverState *bs,
313                                    int64_t cluster_index,
314                                    int addend)
315 {
316     BDRVQcowState *s = bs->opaque;
317     int ret;
318 
319     ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
320     if (ret < 0) {
321         return ret;
322     }
323 
324     return get_refcount(bs, cluster_index);
325 }
326 
327 
328 
329 /*********************************************************/
330 /* cluster allocation functions */
331 
332 
333 
334 /* return < 0 if error */
alloc_clusters_noref(BlockDriverState * bs,int64_t size)335 static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
336 {
337     BDRVQcowState *s = bs->opaque;
338     int i, nb_clusters;
339 
340     nb_clusters = size_to_clusters(s, size);
341 retry:
342     for(i = 0; i < nb_clusters; i++) {
343         int64_t i = s->free_cluster_index++;
344         if (get_refcount(bs, i) != 0)
345             goto retry;
346     }
347 #ifdef DEBUG_ALLOC2
348     printf("alloc_clusters: size=%lld -> %lld\n",
349             size,
350             (s->free_cluster_index - nb_clusters) << s->cluster_bits);
351 #endif
352     return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
353 }
354 
qcow2_alloc_clusters(BlockDriverState * bs,int64_t size)355 int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
356 {
357     int64_t offset;
358 
359     offset = alloc_clusters_noref(bs, size);
360     update_refcount(bs, offset, size, 1);
361     return offset;
362 }
363 
364 /* only used to allocate compressed sectors. We try to allocate
365    contiguous sectors. size must be <= cluster_size */
qcow2_alloc_bytes(BlockDriverState * bs,int size)366 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
367 {
368     BDRVQcowState *s = bs->opaque;
369     int64_t offset, cluster_offset;
370     int free_in_cluster;
371 
372     assert(size > 0 && size <= s->cluster_size);
373     if (s->free_byte_offset == 0) {
374         s->free_byte_offset = qcow2_alloc_clusters(bs, s->cluster_size);
375     }
376  redo:
377     free_in_cluster = s->cluster_size -
378         (s->free_byte_offset & (s->cluster_size - 1));
379     if (size <= free_in_cluster) {
380         /* enough space in current cluster */
381         offset = s->free_byte_offset;
382         s->free_byte_offset += size;
383         free_in_cluster -= size;
384         if (free_in_cluster == 0)
385             s->free_byte_offset = 0;
386         if ((offset & (s->cluster_size - 1)) != 0)
387             update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
388     } else {
389         offset = qcow2_alloc_clusters(bs, s->cluster_size);
390         cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
391         if ((cluster_offset + s->cluster_size) == offset) {
392             /* we are lucky: contiguous data */
393             offset = s->free_byte_offset;
394             update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
395             s->free_byte_offset += size;
396         } else {
397             s->free_byte_offset = offset;
398             goto redo;
399         }
400     }
401     return offset;
402 }
403 
qcow2_free_clusters(BlockDriverState * bs,int64_t offset,int64_t size)404 void qcow2_free_clusters(BlockDriverState *bs,
405                           int64_t offset, int64_t size)
406 {
407     update_refcount(bs, offset, size, -1);
408 }
409 
410 /*
411  * free_any_clusters
412  *
413  * free clusters according to its type: compressed or not
414  *
415  */
416 
qcow2_free_any_clusters(BlockDriverState * bs,uint64_t cluster_offset,int nb_clusters)417 void qcow2_free_any_clusters(BlockDriverState *bs,
418     uint64_t cluster_offset, int nb_clusters)
419 {
420     BDRVQcowState *s = bs->opaque;
421 
422     /* free the cluster */
423 
424     if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
425         int nb_csectors;
426         nb_csectors = ((cluster_offset >> s->csize_shift) &
427                        s->csize_mask) + 1;
428         qcow2_free_clusters(bs,
429             (cluster_offset & s->cluster_offset_mask) & ~511,
430             nb_csectors * 512);
431         return;
432     }
433 
434     qcow2_free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
435 
436     return;
437 }
438 
439 
440 
441 /*********************************************************/
442 /* snapshots and image creation */
443 
444 
445 
qcow2_create_refcount_update(QCowCreateState * s,int64_t offset,int64_t size)446 void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
447     int64_t size)
448 {
449     int refcount;
450     int64_t start, last, cluster_offset;
451     uint16_t *p;
452 
453     start = offset & ~(s->cluster_size - 1);
454     last = (offset + size - 1)  & ~(s->cluster_size - 1);
455     for(cluster_offset = start; cluster_offset <= last;
456         cluster_offset += s->cluster_size) {
457         p = &s->refcount_block[cluster_offset >> s->cluster_bits];
458         refcount = be16_to_cpu(*p);
459         refcount++;
460         *p = cpu_to_be16(refcount);
461     }
462 }
463 
464 /* update the refcounts of snapshots and the copied flag */
qcow2_update_snapshot_refcount(BlockDriverState * bs,int64_t l1_table_offset,int l1_size,int addend)465 int qcow2_update_snapshot_refcount(BlockDriverState *bs,
466     int64_t l1_table_offset, int l1_size, int addend)
467 {
468     BDRVQcowState *s = bs->opaque;
469     uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
470     int64_t old_offset, old_l2_offset;
471     int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
472 
473     qcow2_l2_cache_reset(bs);
474 
475     l2_table = NULL;
476     l1_table = NULL;
477     l1_size2 = l1_size * sizeof(uint64_t);
478     l1_allocated = 0;
479     if (l1_table_offset != s->l1_table_offset) {
480         l1_table = qemu_malloc(l1_size2);
481         l1_allocated = 1;
482         if (bdrv_pread(s->hd, l1_table_offset,
483                        l1_table, l1_size2) != l1_size2)
484             goto fail;
485         for(i = 0;i < l1_size; i++)
486             be64_to_cpus(&l1_table[i]);
487     } else {
488         assert(l1_size == s->l1_size);
489         l1_table = s->l1_table;
490         l1_allocated = 0;
491     }
492 
493     l2_size = s->l2_size * sizeof(uint64_t);
494     l2_table = qemu_malloc(l2_size);
495     l1_modified = 0;
496     for(i = 0; i < l1_size; i++) {
497         l2_offset = l1_table[i];
498         if (l2_offset) {
499             old_l2_offset = l2_offset;
500             l2_offset &= ~QCOW_OFLAG_COPIED;
501             l2_modified = 0;
502             if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
503                 goto fail;
504             for(j = 0; j < s->l2_size; j++) {
505                 offset = be64_to_cpu(l2_table[j]);
506                 if (offset != 0) {
507                     old_offset = offset;
508                     offset &= ~QCOW_OFLAG_COPIED;
509                     if (offset & QCOW_OFLAG_COMPRESSED) {
510                         nb_csectors = ((offset >> s->csize_shift) &
511                                        s->csize_mask) + 1;
512                         if (addend != 0)
513                             update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
514                                             nb_csectors * 512, addend);
515                         /* compressed clusters are never modified */
516                         refcount = 2;
517                     } else {
518                         if (addend != 0) {
519                             refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
520                         } else {
521                             refcount = get_refcount(bs, offset >> s->cluster_bits);
522                         }
523                     }
524 
525                     if (refcount == 1) {
526                         offset |= QCOW_OFLAG_COPIED;
527                     }
528                     if (offset != old_offset) {
529                         l2_table[j] = cpu_to_be64(offset);
530                         l2_modified = 1;
531                     }
532                 }
533             }
534             if (l2_modified) {
535                 if (bdrv_pwrite(s->hd,
536                                 l2_offset, l2_table, l2_size) != l2_size)
537                     goto fail;
538             }
539 
540             if (addend != 0) {
541                 refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
542             } else {
543                 refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
544             }
545             if (refcount == 1) {
546                 l2_offset |= QCOW_OFLAG_COPIED;
547             }
548             if (l2_offset != old_l2_offset) {
549                 l1_table[i] = l2_offset;
550                 l1_modified = 1;
551             }
552         }
553     }
554     if (l1_modified) {
555         for(i = 0; i < l1_size; i++)
556             cpu_to_be64s(&l1_table[i]);
557         if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
558                         l1_size2) != l1_size2)
559             goto fail;
560         for(i = 0; i < l1_size; i++)
561             be64_to_cpus(&l1_table[i]);
562     }
563     if (l1_allocated)
564         qemu_free(l1_table);
565     qemu_free(l2_table);
566     return 0;
567  fail:
568     if (l1_allocated)
569         qemu_free(l1_table);
570     qemu_free(l2_table);
571     return -EIO;
572 }
573 
574 
575 
576 
577 /*********************************************************/
578 /* refcount checking functions */
579 
580 
581 
582 /*
583  * Increases the refcount for a range of clusters in a given refcount table.
584  * This is used to construct a temporary refcount table out of L1 and L2 tables
585  * which can be compared the the refcount table saved in the image.
586  *
587  * Returns the number of errors in the image that were found
588  */
inc_refcounts(BlockDriverState * bs,uint16_t * refcount_table,int refcount_table_size,int64_t offset,int64_t size)589 static int inc_refcounts(BlockDriverState *bs,
590                           uint16_t *refcount_table,
591                           int refcount_table_size,
592                           int64_t offset, int64_t size)
593 {
594     BDRVQcowState *s = bs->opaque;
595     int64_t start, last, cluster_offset;
596     int k;
597     int errors = 0;
598 
599     if (size <= 0)
600         return 0;
601 
602     start = offset & ~(s->cluster_size - 1);
603     last = (offset + size - 1) & ~(s->cluster_size - 1);
604     for(cluster_offset = start; cluster_offset <= last;
605         cluster_offset += s->cluster_size) {
606         k = cluster_offset >> s->cluster_bits;
607         if (k < 0 || k >= refcount_table_size) {
608             fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
609                 cluster_offset);
610             errors++;
611         } else {
612             if (++refcount_table[k] == 0) {
613                 fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
614                     "\n", cluster_offset);
615                 errors++;
616             }
617         }
618     }
619 
620     return errors;
621 }
622 
623 /*
624  * Increases the refcount in the given refcount table for the all clusters
625  * referenced in the L2 table. While doing so, performs some checks on L2
626  * entries.
627  *
628  * Returns the number of errors found by the checks or -errno if an internal
629  * error occurred.
630  */
check_refcounts_l2(BlockDriverState * bs,uint16_t * refcount_table,int refcount_table_size,int64_t l2_offset,int check_copied)631 static int check_refcounts_l2(BlockDriverState *bs,
632     uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
633     int check_copied)
634 {
635     BDRVQcowState *s = bs->opaque;
636     uint64_t *l2_table, offset;
637     int i, l2_size, nb_csectors, refcount;
638     int errors = 0;
639 
640     /* Read L2 table from disk */
641     l2_size = s->l2_size * sizeof(uint64_t);
642     l2_table = qemu_malloc(l2_size);
643 
644     if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
645         goto fail;
646 
647     /* Do the actual checks */
648     for(i = 0; i < s->l2_size; i++) {
649         offset = be64_to_cpu(l2_table[i]);
650         if (offset != 0) {
651             if (offset & QCOW_OFLAG_COMPRESSED) {
652                 /* Compressed clusters don't have QCOW_OFLAG_COPIED */
653                 if (offset & QCOW_OFLAG_COPIED) {
654                     fprintf(stderr, "ERROR: cluster %" PRId64 ": "
655                         "copied flag must never be set for compressed "
656                         "clusters\n", offset >> s->cluster_bits);
657                     offset &= ~QCOW_OFLAG_COPIED;
658                     errors++;
659                 }
660 
661                 /* Mark cluster as used */
662                 nb_csectors = ((offset >> s->csize_shift) &
663                                s->csize_mask) + 1;
664                 offset &= s->cluster_offset_mask;
665                 errors += inc_refcounts(bs, refcount_table,
666                               refcount_table_size,
667                               offset & ~511, nb_csectors * 512);
668             } else {
669                 /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
670                 if (check_copied) {
671                     uint64_t entry = offset;
672                     offset &= ~QCOW_OFLAG_COPIED;
673                     refcount = get_refcount(bs, offset >> s->cluster_bits);
674                     if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
675                         fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
676                             PRIx64 " refcount=%d\n", entry, refcount);
677                         errors++;
678                     }
679                 }
680 
681                 /* Mark cluster as used */
682                 offset &= ~QCOW_OFLAG_COPIED;
683                 errors += inc_refcounts(bs, refcount_table,
684                               refcount_table_size,
685                               offset, s->cluster_size);
686 
687                 /* Correct offsets are cluster aligned */
688                 if (offset & (s->cluster_size - 1)) {
689                     fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
690                         "properly aligned; L2 entry corrupted.\n", offset);
691                     errors++;
692                 }
693             }
694         }
695     }
696 
697     qemu_free(l2_table);
698     return errors;
699 
700 fail:
701     fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
702     qemu_free(l2_table);
703     return -EIO;
704 }
705 
706 /*
707  * Increases the refcount for the L1 table, its L2 tables and all referenced
708  * clusters in the given refcount table. While doing so, performs some checks
709  * on L1 and L2 entries.
710  *
711  * Returns the number of errors found by the checks or -errno if an internal
712  * error occurred.
713  */
check_refcounts_l1(BlockDriverState * bs,uint16_t * refcount_table,int refcount_table_size,int64_t l1_table_offset,int l1_size,int check_copied)714 static int check_refcounts_l1(BlockDriverState *bs,
715                               uint16_t *refcount_table,
716                               int refcount_table_size,
717                               int64_t l1_table_offset, int l1_size,
718                               int check_copied)
719 {
720     BDRVQcowState *s = bs->opaque;
721     uint64_t *l1_table, l2_offset, l1_size2;
722     int i, refcount, ret;
723     int errors = 0;
724 
725     l1_size2 = l1_size * sizeof(uint64_t);
726 
727     /* Mark L1 table as used */
728     errors += inc_refcounts(bs, refcount_table, refcount_table_size,
729                   l1_table_offset, l1_size2);
730 
731     /* Read L1 table entries from disk */
732     l1_table = qemu_malloc(l1_size2);
733     if (bdrv_pread(s->hd, l1_table_offset,
734                    l1_table, l1_size2) != l1_size2)
735         goto fail;
736     for(i = 0;i < l1_size; i++)
737         be64_to_cpus(&l1_table[i]);
738 
739     /* Do the actual checks */
740     for(i = 0; i < l1_size; i++) {
741         l2_offset = l1_table[i];
742         if (l2_offset) {
743             /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
744             if (check_copied) {
745                 refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
746                     >> s->cluster_bits);
747                 if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
748                     fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
749                         " refcount=%d\n", l2_offset, refcount);
750                     errors++;
751                 }
752             }
753 
754             /* Mark L2 table as used */
755             l2_offset &= ~QCOW_OFLAG_COPIED;
756             errors += inc_refcounts(bs, refcount_table,
757                           refcount_table_size,
758                           l2_offset,
759                           s->cluster_size);
760 
761             /* L2 tables are cluster aligned */
762             if (l2_offset & (s->cluster_size - 1)) {
763                 fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
764                     "cluster aligned; L1 entry corrupted\n", l2_offset);
765                 errors++;
766             }
767 
768             /* Process and check L2 entries */
769             ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
770                 l2_offset, check_copied);
771             if (ret < 0) {
772                 goto fail;
773             }
774             errors += ret;
775         }
776     }
777     qemu_free(l1_table);
778     return errors;
779 
780 fail:
781     fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
782     qemu_free(l1_table);
783     return -EIO;
784 }
785 
786 /*
787  * Checks an image for refcount consistency.
788  *
789  * Returns 0 if no errors are found, the number of errors in case the image is
790  * detected as corrupted, and -errno when an internal error occured.
791  */
qcow2_check_refcounts(BlockDriverState * bs)792 int qcow2_check_refcounts(BlockDriverState *bs)
793 {
794     BDRVQcowState *s = bs->opaque;
795     int64_t size;
796     int nb_clusters, refcount1, refcount2, i;
797     QCowSnapshot *sn;
798     uint16_t *refcount_table;
799     int ret, errors = 0;
800 
801     size = bdrv_getlength(s->hd);
802     nb_clusters = size_to_clusters(s, size);
803     refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
804 
805     /* header */
806     errors += inc_refcounts(bs, refcount_table, nb_clusters,
807                   0, s->cluster_size);
808 
809     /* current L1 table */
810     ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
811                        s->l1_table_offset, s->l1_size, 1);
812     if (ret < 0) {
813         return ret;
814     }
815     errors += ret;
816 
817     /* snapshots */
818     for(i = 0; i < s->nb_snapshots; i++) {
819         sn = s->snapshots + i;
820         check_refcounts_l1(bs, refcount_table, nb_clusters,
821                            sn->l1_table_offset, sn->l1_size, 0);
822     }
823     errors += inc_refcounts(bs, refcount_table, nb_clusters,
824                   s->snapshots_offset, s->snapshots_size);
825 
826     /* refcount data */
827     errors += inc_refcounts(bs, refcount_table, nb_clusters,
828                   s->refcount_table_offset,
829                   s->refcount_table_size * sizeof(uint64_t));
830     for(i = 0; i < s->refcount_table_size; i++) {
831         int64_t offset;
832         offset = s->refcount_table[i];
833         if (offset != 0) {
834             errors += inc_refcounts(bs, refcount_table, nb_clusters,
835                           offset, s->cluster_size);
836         }
837     }
838 
839     /* compare ref counts */
840     for(i = 0; i < nb_clusters; i++) {
841         refcount1 = get_refcount(bs, i);
842         refcount2 = refcount_table[i];
843         if (refcount1 != refcount2) {
844             fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
845                    i, refcount1, refcount2);
846             errors++;
847         }
848     }
849 
850     qemu_free(refcount_table);
851 
852     return errors;
853 }
854 
855