:p
atchew
Login
Fix incorrect data end calculation in parallels_open(). Split image leak handling to separate check and fix helpers. Add checking and repairing duplicate offsets in BAT Replace fprintf() by qemu_log(). Image repairing in parallels_open(). v2: 2: Moved outsude parallels_check_leak() 2 helpers: parallels_get_leak_size() and parallels_fix_leak(). 3: Used highest_offset() helper in parallels_check_leak(). Fixed a typo. Added comments. Replaced g_malloc() call by qemu_memalign(). Replaced bdrv_pread() call by bdrv_co_pread(). Got rid of keeping bytes and sectors in the same variable. Added setting the bitmap of the used clusters for a new allocated cluster if it isn't out of the bitmap. Moved the leak fix to the end of all the checks. Removed a dependence on image format for the duplicate check. 4 (old): Merged this patch to the previous. 4 (former 5): Fixed formatting. 5 (former 6): Fixed comments. Added O_INACTIVE check in the condition. Replaced inuse detection by header_unclean checking. Replaced playing with corutines by bdrv_check() usage. Alexander Ivanov (5): parallels: Incorrect data end calculation in parallels_open() parallels: Split image leak handling to separate check and fix helpers parallels: Add checking and repairing duplicate offsets in BAT parallels: Replace fprintf by qemu_log in check parallels: Image repairing in parallels_open() block/parallels.c | 321 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 247 insertions(+), 74 deletions(-) -- 2.34.1
The BDRVParallelsState structure contains data_end field that is measured in sectors. In parallels_open() initially this field is set by data_off field from parallels image header. According to the parallels format documentation, data_off field contains an offset, in sectors, from the start of the file to the start of the data area. For "WithoutFreeSpace" images: if data_off is zero, the offset is calculated as the end of the BAT table plus some padding to ensure sector size alignment. The parallels_open() function has code for handling zero value in data_off, but in the result data_end contains the offset in bytes. Replace the alignment to sector size by division by sector size and fix the comparision with s->header_size. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> Reviewed-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->data_end = le32_to_cpu(ph.data_off); if (s->data_end == 0) { - s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); + s->data_end = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); } - if (s->data_end < s->header_size) { + if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { /* there is not enough unused space to fit to block align between BAT and actual data. We can't avoid read-modify-write... */ s->header_size = size; -- 2.34.1
We need to fix leak after deduplication in the next patch. Move leak fixing to a separate helper parallels_fix_leak() and add parallels_get_leak_size() helper wich used in parallels_fix_leak() and parallels_check_leak(). Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 88 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_check_outside_image(BlockDriverState *bs, return 0; } +static int64_t parallels_get_leak_size(BlockDriverState *bs, + BdrvCheckResult *res) +{ + int64_t size; + size = bdrv_getlength(bs->file->bs); + /* + * Before any usage of this function out-of-image corruption has been + * fixed. If the function returns a negative value, it means an error. + */ + return (size < 0) ? size : (size - res->image_end_offset); +} + +static int parallels_fix_leak(BlockDriverState *bs, + BdrvCheckResult *res) +{ + Error *local_err = NULL; + int64_t size; + int ret; + + size = parallels_get_leak_size(bs, res); + if (size <= 0) { + return size; + } + + /* + * In order to really repair the image, we must shrink it. + * That means we have to pass exact=true. + */ + ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, + PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } + + return 0; +} + static int parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVParallelsState *s = bs->opaque; - int64_t size, off, high_off, count; + int64_t off, high_off, count, leak_size; uint32_t i; int ret; - size = bdrv_getlength(bs->file->bs); - if (size < 0) { - res->check_errors++; - return size; - } - high_off = 0; for (i = 0; i < s->bat_size; i++) { off = bat2sect(s, i) << BDRV_SECTOR_BITS; @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, } res->image_end_offset = high_off + s->cluster_size; - if (size > res->image_end_offset) { - count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); - fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", - fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", - size - res->image_end_offset); - res->leaks += count; - if (fix & BDRV_FIX_LEAKS) { - Error *local_err = NULL; - /* - * In order to really repair the image, we must shrink it. - * That means we have to pass exact=true. - */ - ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, - PREALLOC_MODE_OFF, 0, &local_err); - if (ret < 0) { - error_report_err(local_err); - res->check_errors++; - return ret; - } - res->leaks_fixed += count; + leak_size = parallels_get_leak_size(bs, res); + if (leak_size < 0) { + res->check_errors++; + return leak_size; + } + if (leak_size == 0) { + return 0; + } + + if (fix & BDRV_FIX_LEAKS) { + ret = parallels_fix_leak(bs, res); + if (ret < 0) { + return ret; } } + count = DIV_ROUND_UP(leak_size, s->cluster_size); + fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); + + res->leaks += count; + if (fix & BDRV_FIX_LEAKS) { + res->leaks_fixed += count; + } + return 0; } -- 2.34.1
Cluster offsets must be unique among all the BAT entries. Find duplicate offsets in the BAT and fix it by copying the content of the relevant cluster to a newly allocated cluster and set the new cluster offset to the duplicated entry. Add host_cluster_index() and highest_offset() helpers to deduplicate the code. Move parallels_fix_leak() call to parallels_co_check() to fix both types of leak: real corruption and a leak produced by allocate_clusters() during deduplication. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 168 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 151 insertions(+), 17 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, return MIN(nb_sectors, ret); } +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off) +{ + off -= s->header->data_off << BDRV_SECTOR_BITS; + return off / s->cluster_size; +} + +static int64_t highest_offset(BDRVParallelsState *s) +{ + int64_t off, high_off = 0; + int i; + + for (i = 0; i < s->bat_size; i++) { + off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (off > high_off) { + high_off = off; + } + } + return high_off; +} + static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, int nb_sectors, int *pnum) { @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, BdrvCheckMode fix) { BDRVParallelsState *s = bs->opaque; - int64_t off, high_off, count, leak_size; - uint32_t i; - int ret; + int64_t high_off, count, leak_size; - high_off = 0; - for (i = 0; i < s->bat_size; i++) { - off = bat2sect(s, i) << BDRV_SECTOR_BITS; - if (off > high_off) { - high_off = off; - } - } + high_off = highest_offset(s); res->image_end_offset = high_off + s->cluster_size; @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, return 0; } - if (fix & BDRV_FIX_LEAKS) { - ret = parallels_fix_leak(bs, res); - if (ret < 0) { - return ret; - } - } - count = DIV_ROUND_UP(leak_size, s->cluster_size); fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, return 0; } +static int parallels_check_duplicate(BlockDriverState *bs, + BdrvCheckResult *res, + BdrvCheckMode *fix) +{ + BDRVParallelsState *s = bs->opaque; + QEMUIOVector qiov; + int64_t off, high_off, sector; + unsigned long *bitmap; + uint32_t i, bitmap_size, cluster_index; + int n, ret = 0; + uint64_t *buf = NULL; + + high_off = highest_offset(s); + if (high_off == 0) { + return 0; + } + + /* + * Create a bitmap of used clusters. + * If a bit is set, there is a BAT entry pointing to this cluster. + * Loop through the BAT entries, check bits relevant to an entry offset. + * If bit is set, this entry is duplicated. Otherwise set the bit. + * + * We shouldn't worry about newly allocated clusters outside the image + * because they are created higher then any existing cluster pointed by + * a BAT entry. + */ + bitmap_size = host_cluster_index(s, high_off) + 1; + bitmap = bitmap_new(bitmap_size); + + buf = qemu_memalign(4096, s->cluster_size); + qemu_iovec_init(&qiov, 0); + qemu_iovec_add(&qiov, buf, s->cluster_size); + + for (i = 0; i < s->bat_size; i++) { + off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (off == 0) { + continue; + } + + cluster_index = host_cluster_index(s, off); + if (test_bit(cluster_index, bitmap)) { + /* this cluster duplicates another one */ + fprintf(stderr, + "%s duplicate offset in BAT entry %u\n", + *fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + + res->corruptions++; + + if (*fix & BDRV_FIX_ERRORS) { + /* + * Reset the entry and allocate a new cluster + * for the relevant guest offset. In this way we let + * the lower layer to place the new cluster properly. + * Copy the original cluster to the allocated one. + */ + parallels_set_bat_entry(s, i, 0); + + ret = bdrv_co_pread(bs->file, off, s->cluster_size, buf, 0); + if (ret < 0) { + res->check_errors++; + goto out; + } + + sector = (i * s->cluster_size) >> BDRV_SECTOR_BITS; + sector = allocate_clusters(bs, sector, s->tracks, &n); + if (sector < 0) { + res->check_errors++; + ret = sector; + goto out; + } + off = sector << BDRV_SECTOR_BITS; + if (off > high_off) { + high_off = off; + } + + ret = bdrv_co_pwritev(bs->file, off, s->cluster_size, &qiov, 0); + if (ret < 0) { + res->check_errors++; + goto out; + } + + /* + * In the future allocate_cluster() will reuse holed offsets + * inside the image. Keep the used clusters bitmap content + * consistent for the new allocated clusters too. + * + * Note, clusters allocated outside the current image are not + * considered, and the bitmap size doesn't change. + */ + cluster_index = host_cluster_index(s, off); + if (cluster_index < bitmap_size) { + bitmap_set(bitmap, cluster_index, 1); + } + + /* + * When new clusters are allocated, file size increases by + * 128 Mb blocks. We need to truncate the file to the right + * size. Let the leak fix code make its job. + */ + *fix |= BDRV_FIX_LEAKS; + res->corruptions_fixed++; + } + res->image_end_offset = high_off + s->cluster_size; + } else { + bitmap_set(bitmap, cluster_index, 1); + } + } + +out: + qemu_iovec_destroy(&qiov); + g_free(buf); + g_free(bitmap); + return ret; +} + static void parallels_collect_statistics(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, return ret; } + ret = parallels_check_duplicate(bs, res, &fix); + if (ret < 0) { + return ret; + } + parallels_collect_statistics(bs, res, fix); + + if (fix & BDRV_FIX_LEAKS && + (res->corruptions_fixed || res->leaks_fixed)) { + ret = parallels_fix_leak(bs, res); + if (ret < 0) { + return ret; + } + } } ret = bdrv_co_flush(bs); -- 2.34.1
If the check is called during normal work, tracking of the check must be present in VM logs to have some clues if something going wrong with user's data. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ #include "qemu/bswap.h" #include "qemu/bitmap.h" #include "qemu/memalign.h" +#include "qemu/log-for-trace.h" #include "migration/blocker.h" #include "parallels.h" @@ -XXX,XX +XXX,XX @@ static void parallels_check_unclean(BlockDriverState *bs, return; } - fprintf(stderr, "%s image was not closed correctly\n", - fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); + qemu_log("%s image was not closed correctly\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); res->corruptions++; if (fix & BDRV_FIX_ERRORS) { /* parallels_close will do the job right */ @@ -XXX,XX +XXX,XX @@ static int parallels_check_outside_image(BlockDriverState *bs, for (i = 0; i < s->bat_size; i++) { off = bat2sect(s, i) << BDRV_SECTOR_BITS; if (off >= size) { - fprintf(stderr, "%s cluster %u is outside image\n", - fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + qemu_log("%s cluster %u is outside image\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); res->corruptions++; if (fix & BDRV_FIX_ERRORS) { parallels_set_bat_entry(s, i, 0); @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, } count = DIV_ROUND_UP(leak_size, s->cluster_size); - fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", - fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); + qemu_log("%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); res->leaks += count; if (fix & BDRV_FIX_LEAKS) { @@ -XXX,XX +XXX,XX @@ static int parallels_check_duplicate(BlockDriverState *bs, cluster_index = host_cluster_index(s, off); if (test_bit(cluster_index, bitmap)) { /* this cluster duplicates another one */ - fprintf(stderr, - "%s duplicate offset in BAT entry %u\n", - *fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + qemu_log("%s duplicate offset in BAT entry %u\n", + *fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); res->corruptions++; -- 2.34.1
Repair an image at opening if the image is unclean or out-of-image corruption was detected. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 67 +++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, return ret; } - static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, Error **errp) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, { BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; - int ret, size, i; - int64_t file_size; + int ret, size; + int64_t file_size, high_off; QemuOpts *opts = NULL; Error *local_err = NULL; char *buf; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->bat_bitmap = (uint32_t *)(s->header + 1); - for (i = 0; i < s->bat_size; i++) { - int64_t off = bat2sect(s, i); - if (off >= file_size) { - if (flags & BDRV_O_CHECK) { - continue; - } - error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry " - "is larger than file size (%" PRIi64 ")", - off, i, file_size); - ret = -EINVAL; - goto fail; - } - if (off >= s->data_end) { - s->data_end = off + s->tracks; - } - } - - if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { - /* Image was not closed correctly. The check is mandatory */ - s->header_unclean = true; - if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { - error_setg(errp, "parallels: Image was not closed correctly; " - "cannot be opened read/write"); - ret = -EACCES; - goto fail; - } - } - opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); if (!opts) { goto fail_options; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, error_free(s->migration_blocker); goto fail; } + qemu_co_mutex_init(&s->lock); + + if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { + s->header_unclean = true; + } + + high_off = highest_offset(s) >> BDRV_SECTOR_BITS; + if (high_off >= s->data_end) { + s->data_end = high_off + s->tracks; + } + + /* + * We don't repair the image here if it is opened for checks and + * shouldn't change the image if BDRV_O_RDWR or BDRV_O_INACTIVE + * flag is present. + */ + if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) { + return 0; + } + + /* + * Repair the image if it's dirty or + * out-of-image corruption was detected. + */ + if (s->data_end > file_size || s->header_unclean) { + BdrvCheckResult res; + ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Could not repair corrupted image"); + goto fail; + } + } + return 0; fail_format: -- 2.34.1
This patchset should be applied on the top of [PATCH v10 00/12] parallels: Refactor the code of images checks and fix a bug. Fix incorrect data end calculation in parallels_open(). Split image leak handling to separate check and fix helpers. Add checking and repairing duplicate offsets in BAT Replace fprintf() by qemu_log(). Image repairing in parallels_open(). v4: 2,5: Rebased. v3: 2: Added (size >= res->image_end_offset) assert and changed the comment in parallels_get_leak_size(). Changed error printing and leaks fixing order. 3: Removed highest_offset() helper, instead image_end_offset field is used. 5: Moved highest_offset() code to parallels_open() - now it is used only in this function. Fixed data_end update condition. Fixed a leak of s->migration_blocker. v2: 2: Moved outsude parallels_check_leak() 2 helpers: parallels_get_leak_size() and parallels_fix_leak(). 3: Used highest_offset() helper in parallels_check_leak(). Fixed a typo. Added comments. Replaced g_malloc() call by qemu_memalign(). Replaced bdrv_pread() call by bdrv_co_pread(). Got rid of keeping bytes and sectors in the same variable. Added setting the bitmap of the used clusters for a new allocated cluster if it isn't out of the bitmap. Moved the leak fix to the end of all the checks. Removed a dependence on image format for the duplicate check. 4 (old): Merged this patch to the previous. 4 (former 5): Fixed formatting. 5 (former 6): Fixed comments. Added O_INACTIVE check in the condition. Replaced inuse detection by header_unclean checking. Replaced playing with corutines by bdrv_check() usage. Alexander Ivanov (5): parallels: Incorrect data end calculation in parallels_open() parallels: Split image leak handling to separate check and fix helpers parallels: Add checking and repairing duplicate offsets in BAT parallels: Replace fprintf by qemu_log in check parallels: Image repairing in parallels_open() block/parallels.c | 291 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 228 insertions(+), 63 deletions(-) -- 2.34.1
The BDRVParallelsState structure contains data_end field that is measured in sectors. In parallels_open() initially this field is set by data_off field from parallels image header. According to the parallels format documentation, data_off field contains an offset, in sectors, from the start of the file to the start of the data area. For "WithoutFreeSpace" images: if data_off is zero, the offset is calculated as the end of the BAT table plus some padding to ensure sector size alignment. The parallels_open() function has code for handling zero value in data_off, but in the result data_end contains the offset in bytes. Replace the alignment to sector size by division by sector size and fix the comparision with s->header_size. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> Reviewed-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->data_end = le32_to_cpu(ph.data_off); if (s->data_end == 0) { - s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); + s->data_end = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); } - if (s->data_end < s->header_size) { + if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { /* there is not enough unused space to fit to block align between BAT and actual data. We can't avoid read-modify-write... */ s->header_size = size; -- 2.34.1
We need to fix leak after deduplication in the next patch. Move leak fixing to a separate helper parallels_fix_leak() and add parallels_get_leak_size() helper wich used in parallels_fix_leak() and parallels_check_leak(). Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 86 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 25 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, return 0; } +static int64_t parallels_get_leak_size(BlockDriverState *bs, + BdrvCheckResult *res) +{ + int64_t size; + + size = bdrv_getlength(bs->file->bs); + if (size < 0) { + return size; + } + + /* + * Before any usage of this function, image_end_offset has to be set to the + * the highest offset in the BAT, excluding out-of-image offsets. + */ + assert(size >= res->image_end_offset); + + return size - res->image_end_offset; +} + +static int parallels_fix_leak(BlockDriverState *bs, + BdrvCheckResult *res) +{ + Error *local_err = NULL; + int64_t size; + int ret; + + size = parallels_get_leak_size(bs, res); + if (size <= 0) { + return size; + } + + /* + * In order to really repair the image, we must shrink it. + * That means we have to pass exact=true. + */ + ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, + PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } + + return 0; +} + static int coroutine_fn GRAPH_RDLOCK parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVParallelsState *s = bs->opaque; - int64_t size; + int64_t count, leak_size; int ret; - size = bdrv_getlength(bs->file->bs); - if (size < 0) { + leak_size = parallels_get_leak_size(bs, res); + if (leak_size < 0) { res->check_errors++; - return size; + return leak_size; + } + if (leak_size == 0) { + return 0; } - if (size > res->image_end_offset) { - int64_t count; - count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); - fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", - fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", - size - res->image_end_offset); - res->leaks += count; - if (fix & BDRV_FIX_LEAKS) { - Error *local_err = NULL; + count = DIV_ROUND_UP(leak_size, s->cluster_size); + res->leaks += count; + fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); - /* - * In order to really repair the image, we must shrink it. - * That means we have to pass exact=true. - */ - ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, - PREALLOC_MODE_OFF, 0, &local_err); - if (ret < 0) { - error_report_err(local_err); - res->check_errors++; - return ret; - } - res->leaks_fixed += count; + if (fix & BDRV_FIX_LEAKS) { + ret = parallels_fix_leak(bs, res); + if (ret < 0) { + return ret; } + res->leaks_fixed += count; } return 0; -- 2.34.1
Cluster offsets must be unique among all the BAT entries. Find duplicate offsets in the BAT and fix it by copying the content of the relevant cluster to a newly allocated cluster and set the new cluster offset to the duplicated entry. Add host_cluster_index() helper to deduplicate the code. Move parallels_fix_leak() call to parallels_co_check() to fix both types of leak: real corruption and a leak produced by allocate_clusters() during deduplication. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 134 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 5 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, return MIN(nb_sectors, ret); } +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off) +{ + off -= s->header->data_off << BDRV_SECTOR_BITS; + return off / s->cluster_size; +} + static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, int nb_sectors, int *pnum) { @@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, { BDRVParallelsState *s = bs->opaque; int64_t count, leak_size; - int ret; leak_size = parallels_get_leak_size(bs, res); if (leak_size < 0) { @@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); if (fix & BDRV_FIX_LEAKS) { - ret = parallels_fix_leak(bs, res); - if (ret < 0) { - return ret; - } res->leaks_fixed += count; } return 0; } +static int parallels_check_duplicate(BlockDriverState *bs, + BdrvCheckResult *res, + BdrvCheckMode *fix) +{ + BDRVParallelsState *s = bs->opaque; + QEMUIOVector qiov; + int64_t off, sector; + unsigned long *bitmap; + uint32_t i, bitmap_size, cluster_index; + int n, ret = 0; + uint64_t *buf = NULL; + + /* + * Create a bitmap of used clusters. + * If a bit is set, there is a BAT entry pointing to this cluster. + * Loop through the BAT entries, check bits relevant to an entry offset. + * If bit is set, this entry is duplicated. Otherwise set the bit. + * + * We shouldn't worry about newly allocated clusters outside the image + * because they are created higher then any existing cluster pointed by + * a BAT entry. + */ + bitmap_size = host_cluster_index(s, res->image_end_offset); + bitmap = bitmap_new(bitmap_size); + + buf = qemu_memalign(4096, s->cluster_size); + qemu_iovec_init(&qiov, 0); + qemu_iovec_add(&qiov, buf, s->cluster_size); + + for (i = 0; i < s->bat_size; i++) { + off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (off == 0) { + continue; + } + + cluster_index = host_cluster_index(s, off); + if (test_bit(cluster_index, bitmap)) { + /* this cluster duplicates another one */ + fprintf(stderr, + "%s duplicate offset in BAT entry %u\n", + *fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + + res->corruptions++; + + if (*fix & BDRV_FIX_ERRORS) { + /* + * Reset the entry and allocate a new cluster + * for the relevant guest offset. In this way we let + * the lower layer to place the new cluster properly. + * Copy the original cluster to the allocated one. + */ + parallels_set_bat_entry(s, i, 0); + + ret = bdrv_co_pread(bs->file, off, s->cluster_size, buf, 0); + if (ret < 0) { + res->check_errors++; + goto out; + } + + sector = (i * s->cluster_size) >> BDRV_SECTOR_BITS; + sector = allocate_clusters(bs, sector, s->tracks, &n); + if (sector < 0) { + res->check_errors++; + ret = sector; + goto out; + } + off = sector << BDRV_SECTOR_BITS; + + ret = bdrv_co_pwritev(bs->file, off, s->cluster_size, &qiov, 0); + if (ret < 0) { + res->check_errors++; + goto out; + } + + if (off + s->cluster_size > res->image_end_offset) { + res->image_end_offset = off + s->cluster_size; + } + + /* + * In the future allocate_cluster() will reuse holed offsets + * inside the image. Keep the used clusters bitmap content + * consistent for the new allocated clusters too. + * + * Note, clusters allocated outside the current image are not + * considered, and the bitmap size doesn't change. + */ + cluster_index = host_cluster_index(s, off); + if (cluster_index < bitmap_size) { + bitmap_set(bitmap, cluster_index, 1); + } + + /* + * When new clusters are allocated, file size increases by + * 128 Mb blocks. We need to truncate the file to the right + * size. Let the leak fix code make its job. + */ + *fix |= BDRV_FIX_LEAKS; + res->corruptions_fixed++; + } + } else { + bitmap_set(bitmap, cluster_index, 1); + } + } + +out: + qemu_iovec_destroy(&qiov); + g_free(buf); + g_free(bitmap); + return ret; +} + static void parallels_collect_statistics(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, return ret; } + ret = parallels_check_duplicate(bs, res, &fix); + if (ret < 0) { + return ret; + } + parallels_collect_statistics(bs, res, fix); + + if (fix & BDRV_FIX_LEAKS) { + ret = parallels_fix_leak(bs, res); + if (ret < 0) { + return ret; + } + } } ret = bdrv_co_flush(bs); -- 2.34.1
If the check is called during normal work, tracking of the check must be present in VM logs to have some clues if something going wrong with user's data. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> Reviewed-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ #include "qemu/bswap.h" #include "qemu/bitmap.h" #include "qemu/memalign.h" +#include "qemu/log-for-trace.h" #include "migration/blocker.h" #include "parallels.h" @@ -XXX,XX +XXX,XX @@ static void parallels_check_unclean(BlockDriverState *bs, return; } - fprintf(stderr, "%s image was not closed correctly\n", - fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); + qemu_log("%s image was not closed correctly\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); res->corruptions++; if (fix & BDRV_FIX_ERRORS) { /* parallels_close will do the job right */ @@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, for (i = 0; i < s->bat_size; i++) { off = bat2sect(s, i) << BDRV_SECTOR_BITS; if (off + s->cluster_size > size) { - fprintf(stderr, "%s cluster %u is outside image\n", - fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + qemu_log("%s cluster %u is outside image\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); res->corruptions++; if (fix & BDRV_FIX_ERRORS) { parallels_set_bat_entry(s, i, 0); @@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, count = DIV_ROUND_UP(leak_size, s->cluster_size); res->leaks += count; - fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", - fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); + qemu_log("%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); if (fix & BDRV_FIX_LEAKS) { res->leaks_fixed += count; @@ -XXX,XX +XXX,XX @@ static int parallels_check_duplicate(BlockDriverState *bs, cluster_index = host_cluster_index(s, off); if (test_bit(cluster_index, bitmap)) { /* this cluster duplicates another one */ - fprintf(stderr, - "%s duplicate offset in BAT entry %u\n", - *fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + qemu_log("%s duplicate offset in BAT entry %u\n", + *fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); res->corruptions++; -- 2.34.1
Repair an image at opening if the image is unclean or out-of-image corruption was detected. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 65 +++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; int ret, size, i; - int64_t file_nb_sectors; + int64_t file_nb_sectors, sector; QemuOpts *opts = NULL; Error *local_err = NULL; char *buf; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->bat_bitmap = (uint32_t *)(s->header + 1); - for (i = 0; i < s->bat_size; i++) { - int64_t off = bat2sect(s, i); - if (off >= file_nb_sectors) { - if (flags & BDRV_O_CHECK) { - continue; - } - error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry " - "is larger than file size (%" PRIi64 ")", - off << BDRV_SECTOR_BITS, i, - file_nb_sectors << BDRV_SECTOR_BITS); - ret = -EINVAL; - goto fail; - } - if (off >= s->data_end) { - s->data_end = off + s->tracks; - } - } - - if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { - /* Image was not closed correctly. The check is mandatory */ - s->header_unclean = true; - if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { - error_setg(errp, "parallels: Image was not closed correctly; " - "cannot be opened read/write"); - ret = -EACCES; - goto fail; - } - } - opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); if (!opts) { goto fail_options; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } qemu_co_mutex_init(&s->lock); + + if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { + s->header_unclean = true; + } + + for (i = 0; i < s->bat_size; i++) { + sector = bat2sect(s, i); + if (sector + s->tracks > s->data_end) { + s->data_end = sector + s->tracks; + } + } + + /* + * We don't repair the image here if it's opened for checks. Also we don't + * want to change inactive images and can't change readonly images. + */ + if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) { + return 0; + } + + /* + * Repair the image if it's dirty or + * out-of-image corruption was detected. + */ + if (s->data_end > file_nb_sectors || s->header_unclean) { + BdrvCheckResult res; + ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); + if (ret < 0) { + error_free(s->migration_blocker); + error_setg_errno(errp, -ret, "Could not repair corrupted image"); + goto fail; + } + } + return 0; fail_format: -- 2.34.1