:p
atchew
Login
Fix incorrect data end calculation in parallels_open(). Split image leak handling to separate check and fix helpers. Add checking and repairing duplicate offsets in BAT Replace fprintf() by qemu_log(). Image repairing in parallels_open(). v2: 2: Moved outsude parallels_check_leak() 2 helpers: parallels_get_leak_size() and parallels_fix_leak(). 3: Used highest_offset() helper in parallels_check_leak(). Fixed a typo. Added comments. Replaced g_malloc() call by qemu_memalign(). Replaced bdrv_pread() call by bdrv_co_pread(). Got rid of keeping bytes and sectors in the same variable. Added setting the bitmap of the used clusters for a new allocated cluster if it isn't out of the bitmap. Moved the leak fix to the end of all the checks. Removed a dependence on image format for the duplicate check. 4 (old): Merged this patch to the previous. 4 (former 5): Fixed formatting. 5 (former 6): Fixed comments. Added O_INACTIVE check in the condition. Replaced inuse detection by header_unclean checking. Replaced playing with corutines by bdrv_check() usage. Alexander Ivanov (5): parallels: Incorrect data end calculation in parallels_open() parallels: Split image leak handling to separate check and fix helpers parallels: Add checking and repairing duplicate offsets in BAT parallels: Replace fprintf by qemu_log in check parallels: Image repairing in parallels_open() block/parallels.c | 321 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 247 insertions(+), 74 deletions(-) -- 2.34.1
The BDRVParallelsState structure contains data_end field that is measured in sectors. In parallels_open() initially this field is set by data_off field from parallels image header. According to the parallels format documentation, data_off field contains an offset, in sectors, from the start of the file to the start of the data area. For "WithoutFreeSpace" images: if data_off is zero, the offset is calculated as the end of the BAT table plus some padding to ensure sector size alignment. The parallels_open() function has code for handling zero value in data_off, but in the result data_end contains the offset in bytes. Replace the alignment to sector size by division by sector size and fix the comparision with s->header_size. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> Reviewed-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->data_end = le32_to_cpu(ph.data_off); if (s->data_end == 0) { - s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); + s->data_end = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); } - if (s->data_end < s->header_size) { + if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { /* there is not enough unused space to fit to block align between BAT and actual data. We can't avoid read-modify-write... */ s->header_size = size; -- 2.34.1
We need to fix leak after deduplication in the next patch. Move leak fixing to a separate helper parallels_fix_leak() and add parallels_get_leak_size() helper wich used in parallels_fix_leak() and parallels_check_leak(). Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 88 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_check_outside_image(BlockDriverState *bs, return 0; } +static int64_t parallels_get_leak_size(BlockDriverState *bs, + BdrvCheckResult *res) +{ + int64_t size; + size = bdrv_getlength(bs->file->bs); + /* + * Before any usage of this function out-of-image corruption has been + * fixed. If the function returns a negative value, it means an error. + */ + return (size < 0) ? size : (size - res->image_end_offset); +} + +static int parallels_fix_leak(BlockDriverState *bs, + BdrvCheckResult *res) +{ + Error *local_err = NULL; + int64_t size; + int ret; + + size = parallels_get_leak_size(bs, res); + if (size <= 0) { + return size; + } + + /* + * In order to really repair the image, we must shrink it. + * That means we have to pass exact=true. + */ + ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, + PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } + + return 0; +} + static int parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVParallelsState *s = bs->opaque; - int64_t size, off, high_off, count; + int64_t off, high_off, count, leak_size; uint32_t i; int ret; - size = bdrv_getlength(bs->file->bs); - if (size < 0) { - res->check_errors++; - return size; - } - high_off = 0; for (i = 0; i < s->bat_size; i++) { off = bat2sect(s, i) << BDRV_SECTOR_BITS; @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, } res->image_end_offset = high_off + s->cluster_size; - if (size > res->image_end_offset) { - count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); - fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", - fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", - size - res->image_end_offset); - res->leaks += count; - if (fix & BDRV_FIX_LEAKS) { - Error *local_err = NULL; - /* - * In order to really repair the image, we must shrink it. - * That means we have to pass exact=true. - */ - ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, - PREALLOC_MODE_OFF, 0, &local_err); - if (ret < 0) { - error_report_err(local_err); - res->check_errors++; - return ret; - } - res->leaks_fixed += count; + leak_size = parallels_get_leak_size(bs, res); + if (leak_size < 0) { + res->check_errors++; + return leak_size; + } + if (leak_size == 0) { + return 0; + } + + if (fix & BDRV_FIX_LEAKS) { + ret = parallels_fix_leak(bs, res); + if (ret < 0) { + return ret; } } + count = DIV_ROUND_UP(leak_size, s->cluster_size); + fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); + + res->leaks += count; + if (fix & BDRV_FIX_LEAKS) { + res->leaks_fixed += count; + } + return 0; } -- 2.34.1
Cluster offsets must be unique among all the BAT entries. Find duplicate offsets in the BAT and fix it by copying the content of the relevant cluster to a newly allocated cluster and set the new cluster offset to the duplicated entry. Add host_cluster_index() and highest_offset() helpers to deduplicate the code. Move parallels_fix_leak() call to parallels_co_check() to fix both types of leak: real corruption and a leak produced by allocate_clusters() during deduplication. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 168 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 151 insertions(+), 17 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, return MIN(nb_sectors, ret); } +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off) +{ + off -= s->header->data_off << BDRV_SECTOR_BITS; + return off / s->cluster_size; +} + +static int64_t highest_offset(BDRVParallelsState *s) +{ + int64_t off, high_off = 0; + int i; + + for (i = 0; i < s->bat_size; i++) { + off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (off > high_off) { + high_off = off; + } + } + return high_off; +} + static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, int nb_sectors, int *pnum) { @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, BdrvCheckMode fix) { BDRVParallelsState *s = bs->opaque; - int64_t off, high_off, count, leak_size; - uint32_t i; - int ret; + int64_t high_off, count, leak_size; - high_off = 0; - for (i = 0; i < s->bat_size; i++) { - off = bat2sect(s, i) << BDRV_SECTOR_BITS; - if (off > high_off) { - high_off = off; - } - } + high_off = highest_offset(s); res->image_end_offset = high_off + s->cluster_size; @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, return 0; } - if (fix & BDRV_FIX_LEAKS) { - ret = parallels_fix_leak(bs, res); - if (ret < 0) { - return ret; - } - } - count = DIV_ROUND_UP(leak_size, s->cluster_size); fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, return 0; } +static int parallels_check_duplicate(BlockDriverState *bs, + BdrvCheckResult *res, + BdrvCheckMode *fix) +{ + BDRVParallelsState *s = bs->opaque; + QEMUIOVector qiov; + int64_t off, high_off, sector; + unsigned long *bitmap; + uint32_t i, bitmap_size, cluster_index; + int n, ret = 0; + uint64_t *buf = NULL; + + high_off = highest_offset(s); + if (high_off == 0) { + return 0; + } + + /* + * Create a bitmap of used clusters. + * If a bit is set, there is a BAT entry pointing to this cluster. + * Loop through the BAT entries, check bits relevant to an entry offset. + * If bit is set, this entry is duplicated. Otherwise set the bit. + * + * We shouldn't worry about newly allocated clusters outside the image + * because they are created higher then any existing cluster pointed by + * a BAT entry. + */ + bitmap_size = host_cluster_index(s, high_off) + 1; + bitmap = bitmap_new(bitmap_size); + + buf = qemu_memalign(4096, s->cluster_size); + qemu_iovec_init(&qiov, 0); + qemu_iovec_add(&qiov, buf, s->cluster_size); + + for (i = 0; i < s->bat_size; i++) { + off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (off == 0) { + continue; + } + + cluster_index = host_cluster_index(s, off); + if (test_bit(cluster_index, bitmap)) { + /* this cluster duplicates another one */ + fprintf(stderr, + "%s duplicate offset in BAT entry %u\n", + *fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + + res->corruptions++; + + if (*fix & BDRV_FIX_ERRORS) { + /* + * Reset the entry and allocate a new cluster + * for the relevant guest offset. In this way we let + * the lower layer to place the new cluster properly. + * Copy the original cluster to the allocated one. + */ + parallels_set_bat_entry(s, i, 0); + + ret = bdrv_co_pread(bs->file, off, s->cluster_size, buf, 0); + if (ret < 0) { + res->check_errors++; + goto out; + } + + sector = (i * s->cluster_size) >> BDRV_SECTOR_BITS; + sector = allocate_clusters(bs, sector, s->tracks, &n); + if (sector < 0) { + res->check_errors++; + ret = sector; + goto out; + } + off = sector << BDRV_SECTOR_BITS; + if (off > high_off) { + high_off = off; + } + + ret = bdrv_co_pwritev(bs->file, off, s->cluster_size, &qiov, 0); + if (ret < 0) { + res->check_errors++; + goto out; + } + + /* + * In the future allocate_cluster() will reuse holed offsets + * inside the image. Keep the used clusters bitmap content + * consistent for the new allocated clusters too. + * + * Note, clusters allocated outside the current image are not + * considered, and the bitmap size doesn't change. + */ + cluster_index = host_cluster_index(s, off); + if (cluster_index < bitmap_size) { + bitmap_set(bitmap, cluster_index, 1); + } + + /* + * When new clusters are allocated, file size increases by + * 128 Mb blocks. We need to truncate the file to the right + * size. Let the leak fix code make its job. + */ + *fix |= BDRV_FIX_LEAKS; + res->corruptions_fixed++; + } + res->image_end_offset = high_off + s->cluster_size; + } else { + bitmap_set(bitmap, cluster_index, 1); + } + } + +out: + qemu_iovec_destroy(&qiov); + g_free(buf); + g_free(bitmap); + return ret; +} + static void parallels_collect_statistics(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, return ret; } + ret = parallels_check_duplicate(bs, res, &fix); + if (ret < 0) { + return ret; + } + parallels_collect_statistics(bs, res, fix); + + if (fix & BDRV_FIX_LEAKS && + (res->corruptions_fixed || res->leaks_fixed)) { + ret = parallels_fix_leak(bs, res); + if (ret < 0) { + return ret; + } + } } ret = bdrv_co_flush(bs); -- 2.34.1
If the check is called during normal work, tracking of the check must be present in VM logs to have some clues if something going wrong with user's data. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ #include "qemu/bswap.h" #include "qemu/bitmap.h" #include "qemu/memalign.h" +#include "qemu/log-for-trace.h" #include "migration/blocker.h" #include "parallels.h" @@ -XXX,XX +XXX,XX @@ static void parallels_check_unclean(BlockDriverState *bs, return; } - fprintf(stderr, "%s image was not closed correctly\n", - fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); + qemu_log("%s image was not closed correctly\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); res->corruptions++; if (fix & BDRV_FIX_ERRORS) { /* parallels_close will do the job right */ @@ -XXX,XX +XXX,XX @@ static int parallels_check_outside_image(BlockDriverState *bs, for (i = 0; i < s->bat_size; i++) { off = bat2sect(s, i) << BDRV_SECTOR_BITS; if (off >= size) { - fprintf(stderr, "%s cluster %u is outside image\n", - fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + qemu_log("%s cluster %u is outside image\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); res->corruptions++; if (fix & BDRV_FIX_ERRORS) { parallels_set_bat_entry(s, i, 0); @@ -XXX,XX +XXX,XX @@ static int parallels_check_leak(BlockDriverState *bs, } count = DIV_ROUND_UP(leak_size, s->cluster_size); - fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", - fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); + qemu_log("%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", leak_size); res->leaks += count; if (fix & BDRV_FIX_LEAKS) { @@ -XXX,XX +XXX,XX @@ static int parallels_check_duplicate(BlockDriverState *bs, cluster_index = host_cluster_index(s, off); if (test_bit(cluster_index, bitmap)) { /* this cluster duplicates another one */ - fprintf(stderr, - "%s duplicate offset in BAT entry %u\n", - *fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + qemu_log("%s duplicate offset in BAT entry %u\n", + *fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); res->corruptions++; -- 2.34.1
Repair an image at opening if the image is unclean or out-of-image corruption was detected. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 67 +++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, return ret; } - static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, Error **errp) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, { BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; - int ret, size, i; - int64_t file_size; + int ret, size; + int64_t file_size, high_off; QemuOpts *opts = NULL; Error *local_err = NULL; char *buf; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->bat_bitmap = (uint32_t *)(s->header + 1); - for (i = 0; i < s->bat_size; i++) { - int64_t off = bat2sect(s, i); - if (off >= file_size) { - if (flags & BDRV_O_CHECK) { - continue; - } - error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry " - "is larger than file size (%" PRIi64 ")", - off, i, file_size); - ret = -EINVAL; - goto fail; - } - if (off >= s->data_end) { - s->data_end = off + s->tracks; - } - } - - if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { - /* Image was not closed correctly. The check is mandatory */ - s->header_unclean = true; - if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { - error_setg(errp, "parallels: Image was not closed correctly; " - "cannot be opened read/write"); - ret = -EACCES; - goto fail; - } - } - opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); if (!opts) { goto fail_options; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, error_free(s->migration_blocker); goto fail; } + qemu_co_mutex_init(&s->lock); + + if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { + s->header_unclean = true; + } + + high_off = highest_offset(s) >> BDRV_SECTOR_BITS; + if (high_off >= s->data_end) { + s->data_end = high_off + s->tracks; + } + + /* + * We don't repair the image here if it is opened for checks and + * shouldn't change the image if BDRV_O_RDWR or BDRV_O_INACTIVE + * flag is present. + */ + if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) { + return 0; + } + + /* + * Repair the image if it's dirty or + * out-of-image corruption was detected. + */ + if (s->data_end > file_size || s->header_unclean) { + BdrvCheckResult res; + ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Could not repair corrupted image"); + goto fail; + } + } + return 0; fail_format: -- 2.34.1
Fix incorrect data end calculation in parallels_open(). Check if data_end greater than the file size. Add change_info argument to parallels_check_leak(). Add checking and repairing duplicate offsets in BAT Image repairing in parallels_open(). v7: 3: Renamed "change_info" argument to "explicit", move info printing under explicit condition. 4: Different patch. Add data_start field to BDRVParallelsState for future host_cluster_index() function. 5: Prevously was 4th. Used s->data_start instead of s->header->data_off. Add bitmap size increasing if file length is not cluster aligned. Revert a couple conditions for better readability. Changed sectors calculation for better code transparency. Replaced sector variable by host_sector and guest_sector. Renamed off to host_off. Moved out_repare_bat: below return to get jumps on error path only. 6: Prevously was 5th. 7: New patch. Replace bdrv_getlength() by bdrv_co_getlength() in parallels_check_outside_image() because it is used in coroutine context. 8: New patch. Add data_off check. v6: 2: Different patch. Refused to split image leak handling. Instead there is a patch with a data_end check. 3: Different patch. There is a patch with change_info argument. 4: Removed changing fprintf by qemu_log from this patchset. Previously 3rd patch became 4th. Replaced qemu_memalign() by qemu_blockalign(). Got rid of iovecs, replaced bdrv_co_pwritev() by bdrv_co_pwrite(). Added assert(cluster_index < bitmap_size). Now BAT changes are reverted if there was an error in the cluster copying process. Simplified a sector calculation. 5: Moved header magic check to the appropriate place. Added a migrate_del_blocker() call and s->bat_dirty_bmap freeing on error. v5: 3: Fixed a byteorder bug, fixed zero-length image handling and fixed uint32 truncation. v4: 2,5: Rebased. v3: 2: Added (size >= res->image_end_offset) assert and changed the comment in parallels_get_leak_size(). Changed error printing and leaks fixing order. 3: Removed highest_offset() helper, instead image_end_offset field is used. 5: Moved highest_offset() code to parallels_open() - now it is used only in this function. Fixed data_end update condition. Fixed a leak of s->migration_blocker. v2: 2: Moved outsude parallels_check_leak() 2 helpers: parallels_get_leak_size() and parallels_fix_leak(). 3: Used highest_offset() helper in parallels_check_leak(). Fixed a typo. Added comments. Replaced g_malloc() call by qemu_memalign(). Replaced bdrv_pread() call by bdrv_co_pread(). Got rid of keeping bytes and sectors in the same variable. Added setting the bitmap of the used clusters for a new allocated cluster if it isn't out of the bitmap. Moved the leak fix to the end of all the checks. Removed a dependence on image format for the duplicate check. 4 (old): Merged this patch to the previous. 4 (former 5): Fixed formatting. 5 (former 6): Fixed comments. Added O_INACTIVE check in the condition. Replaced inuse detection by header_unclean checking. Replaced playing with corutines by bdrv_check() usage. Alexander Ivanov (8): parallels: Incorrect data end calculation in parallels_open() parallels: Check if data_end greater than the file size parallels: Add "explicit" argument to parallels_check_leak() parallels: Add data_start field to BDRVParallelsState parallels: Add checking and repairing duplicate offsets in BAT parallels: Image repairing in parallels_open() parallels: Use bdrv_co_getlength() in parallels_check_outside_image() parallels: Add data_off check block/parallels.c | 317 +++++++++++++++++++++++++++++++++++++++------- block/parallels.h | 1 + 2 files changed, 274 insertions(+), 44 deletions(-) -- 2.34.1
The BDRVParallelsState structure contains data_end field that is measured in sectors. In parallels_open() initially this field is set by data_off field from parallels image header. According to the parallels format documentation, data_off field contains an offset, in sectors, from the start of the file to the start of the data area. For "WithoutFreeSpace" images: if data_off is zero, the offset is calculated as the end of the BAT table plus some padding to ensure sector size alignment. The parallels_open() function has code for handling zero value in data_off, but in the result data_end contains the offset in bytes. Replace the alignment to sector size by division by sector size and fix the comparision with s->header_size. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> Reviewed-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Hanna Czenczek <hreitz@redhat.com> --- block/parallels.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->data_end = le32_to_cpu(ph.data_off); if (s->data_end == 0) { - s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); + s->data_end = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); } - if (s->data_end < s->header_size) { + if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { /* there is not enough unused space to fit to block align between BAT and actual data. We can't avoid read-modify-write... */ s->header_size = size; -- 2.34.1
Initially data_end is set to the data_off image header field and must not be greater than the file size. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, and actual data. We can't avoid read-modify-write... */ s->header_size = size; } + if (s->data_end > file_nb_sectors) { + error_setg(errp, "Invalid image: incorrect data_off field"); + ret = -EINVAL; + goto fail; + } ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); if (ret < 0) { -- 2.34.1
In the on of the next patches we need to repair leaks without changing leaks and leaks_fixed info in res. Also we don't want to print any warning about leaks. Add "explicit" argument to skip info changing if the argument is false. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, static int coroutine_fn GRAPH_RDLOCK parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, - BdrvCheckMode fix) + BdrvCheckMode fix, bool explicit) { BDRVParallelsState *s = bs->opaque; int64_t size; @@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, if (size > res->image_end_offset) { int64_t count; count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); - fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", - fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", - size - res->image_end_offset); - res->leaks += count; + if (explicit) { + fprintf(stderr, + "%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", + size - res->image_end_offset); + res->leaks += count; + } if (fix & BDRV_FIX_LEAKS) { Error *local_err = NULL; @@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, res->check_errors++; return ret; } - res->leaks_fixed += count; + if (explicit) { + res->leaks_fixed += count; + } } } @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, return ret; } - ret = parallels_check_leak(bs, res, fix); + ret = parallels_check_leak(bs, res, fix, true); if (ret < 0) { return ret; } -- 2.34.1
In the next patch we will need the offset of the data area for host cluster index calculation. Add this field and setting up code. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 7 ++++--- block/parallels.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOMEM; goto fail; } - s->data_end = le32_to_cpu(ph.data_off); - if (s->data_end == 0) { - s->data_end = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); + s->data_start = le32_to_cpu(ph.data_off); + if (s->data_start == 0) { + s->data_start = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); } + s->data_end = s->data_start; if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { /* there is not enough unused space to fit to block align between BAT and actual data. We can't avoid read-modify-write... */ diff --git a/block/parallels.h b/block/parallels.h index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.h +++ b/block/parallels.h @@ -XXX,XX +XXX,XX @@ typedef struct BDRVParallelsState { uint32_t *bat_bitmap; unsigned int bat_size; + int64_t data_start; int64_t data_end; uint64_t prealloc_size; ParallelsPreallocMode prealloc_mode; -- 2.34.1
Cluster offsets must be unique among all the BAT entries. Find duplicate offsets in the BAT and fix it by copying the content of the relevant cluster to a newly allocated cluster and set the new cluster offset to the duplicated entry. Add host_cluster_index() helper to deduplicate the code. When new clusters are allocated, the file size increases by 128 Mb. Call parallels_check_leak() to fix this leak. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, return MIN(nb_sectors, ret); } +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off) +{ + off -= s->data_start << BDRV_SECTOR_BITS; + return off / s->cluster_size; +} + static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, int nb_sectors, int *pnum) { @@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, return 0; } +static int coroutine_fn GRAPH_RDLOCK +parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ + BDRVParallelsState *s = bs->opaque; + int64_t host_off, host_sector, guest_sector; + unsigned long *bitmap; + uint32_t i, bitmap_size, cluster_index, bat_entry; + int n, ret = 0; + uint64_t *buf = NULL; + bool fixed = false; + + /* + * Create a bitmap of used clusters. + * If a bit is set, there is a BAT entry pointing to this cluster. + * Loop through the BAT entries, check bits relevant to an entry offset. + * If bit is set, this entry is duplicated. Otherwise set the bit. + * + * We shouldn't worry about newly allocated clusters outside the image + * because they are created higher then any existing cluster pointed by + * a BAT entry. + */ + bitmap_size = host_cluster_index(s, res->image_end_offset); + if (bitmap_size == 0) { + return 0; + } + if (res->image_end_offset % s->cluster_size) { + /* A not aligned image end leads to a bitmap shorter by 1 */ + bitmap_size++; + } + + bitmap = bitmap_new(bitmap_size); + + buf = qemu_blockalign(bs, s->cluster_size); + + for (i = 0; i < s->bat_size; i++) { + host_off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (host_off == 0) { + continue; + } + + cluster_index = host_cluster_index(s, host_off); + assert(cluster_index < bitmap_size); + if (!test_bit(cluster_index, bitmap)) { + bitmap_set(bitmap, cluster_index, 1); + continue; + } + + /* this cluster duplicates another one */ + fprintf(stderr, "%s duplicate offset in BAT entry %u\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + + res->corruptions++; + + if (!(fix & BDRV_FIX_ERRORS)) { + continue; + } + + /* + * Reset the entry and allocate a new cluster + * for the relevant guest offset. In this way we let + * the lower layer to place the new cluster properly. + * Copy the original cluster to the allocated one. + * But before save the old offset value for repairing + * if we have an error. + */ + bat_entry = s->bat_bitmap[i]; + parallels_set_bat_entry(s, i, 0); + + ret = bdrv_co_pread(bs->file, host_off, s->cluster_size, buf, 0); + if (ret < 0) { + res->check_errors++; + goto out_repare_bat; + } + + guest_sector = (i * (int64_t)s->cluster_size) >> BDRV_SECTOR_BITS; + host_sector = allocate_clusters(bs, guest_sector, s->tracks, &n); + if (host_sector < 0) { + res->check_errors++; + goto out_repare_bat; + } + host_off = host_sector << BDRV_SECTOR_BITS; + + ret = bdrv_co_pwrite(bs->file, host_off, s->cluster_size, buf, 0); + if (ret < 0) { + res->check_errors++; + goto out_repare_bat; + } + + if (host_off + s->cluster_size > res->image_end_offset) { + res->image_end_offset = host_off + s->cluster_size; + } + + /* + * In the future allocate_cluster() will reuse holed offsets + * inside the image. Keep the used clusters bitmap content + * consistent for the new allocated clusters too. + * + * Note, clusters allocated outside the current image are not + * considered, and the bitmap size doesn't change. + */ + cluster_index = host_cluster_index(s, host_off); + if (cluster_index < bitmap_size) { + bitmap_set(bitmap, cluster_index, 1); + } + + fixed = true; + res->corruptions_fixed++; + + } + + if (fixed) { + /* + * When new clusters are allocated, the file size increases by + * 128 Mb. We need to truncate the file to the right size. Let + * the leak fix code make its job without res changing. + */ + ret = parallels_check_leak(bs, res, fix, false); + } + +out_free: + g_free(buf); + g_free(bitmap); + return ret; +/* + * We can get here only from places where index and old_offset have + * meaningful values. + */ +out_repare_bat: + s->bat_bitmap[i] = bat_entry; + goto out_free; +} + static void parallels_collect_statistics(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, return ret; } + ret = parallels_check_duplicate(bs, res, fix); + if (ret < 0) { + return ret; + } + parallels_collect_statistics(bs, res, fix); } -- 2.34.1
Repair an image at opening if the image is unclean or out-of-image corruption was detected. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 70 +++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; int ret, size, i; - int64_t file_nb_sectors; + int64_t file_nb_sectors, sector; QemuOpts *opts = NULL; Error *local_err = NULL; char *buf; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, and actual data. We can't avoid read-modify-write... */ s->header_size = size; } - if (s->data_end > file_nb_sectors) { - error_setg(errp, "Invalid image: incorrect data_off field"); - ret = -EINVAL; - goto fail; - } ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); if (ret < 0) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->bat_bitmap = (uint32_t *)(s->header + 1); - for (i = 0; i < s->bat_size; i++) { - int64_t off = bat2sect(s, i); - if (off >= file_nb_sectors) { - if (flags & BDRV_O_CHECK) { - continue; - } - error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry " - "is larger than file size (%" PRIi64 ")", - off << BDRV_SECTOR_BITS, i, - file_nb_sectors << BDRV_SECTOR_BITS); - ret = -EINVAL; - goto fail; - } - if (off >= s->data_end) { - s->data_end = off + s->tracks; - } - } - if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { - /* Image was not closed correctly. The check is mandatory */ s->header_unclean = true; - if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { - error_setg(errp, "parallels: Image was not closed correctly; " - "cannot be opened read/write"); - ret = -EACCES; - goto fail; - } } opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); ret = migrate_add_blocker(s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); + error_setg(errp, "Migration blocker error"); goto fail; } qemu_co_mutex_init(&s->lock); + + for (i = 0; i < s->bat_size; i++) { + sector = bat2sect(s, i); + if (sector + s->tracks > s->data_end) { + s->data_end = sector + s->tracks; + } + } + + /* + * We don't repair the image here if it's opened for checks. Also we don't + * want to change inactive images and can't change readonly images. + */ + if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) { + return 0; + } + + /* + * Repair the image if it's dirty or + * out-of-image corruption was detected. + */ + if (s->data_end > file_nb_sectors || s->header_unclean) { + BdrvCheckResult res; + ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not repair corrupted image"); + migrate_del_blocker(s->migration_blocker); + goto fail; + } + } + return 0; fail_format: @@ -XXX,XX +XXX,XX @@ fail_format: fail_options: ret = -EINVAL; fail: + /* + * "s" object was allocated by g_malloc0 so we can safely + * try to free its fields even they were not allocated. + */ + error_free(s->migration_blocker); + g_free(s->bat_dirty_bmap); qemu_vfree(s->header); return ret; } -- 2.34.1
bdrv_co_getlength() should be used in coroutine context. Replace bdrv_getlength() by bdrv_co_getlength() in parallels_check_outside_image(). Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, int64_t size; int ret; - size = bdrv_getlength(bs->file->bs); + size = bdrv_co_getlength(bs->file->bs); if (size < 0) { res->check_errors++; return size; -- 2.34.1
data_off field of the parallels image header can be corrupted. Check if this field greater than the header + BAT size and less than file size. Change checking code in parallels_open() accordingly. Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 98 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 83 insertions(+), 15 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_check_unclean(BlockDriverState *bs, } } +/* + * Returns 0 if data_off is correct or returns correct offset. + */ +static uint32_t parallels_test_data_off(BDRVParallelsState *s, + int64_t file_nb_sectors) +{ + uint32_t data_off, min_off; + bool old_magic; + + old_magic = !memcmp(s->header->magic, HEADER_MAGIC, 16); + + data_off = le32_to_cpu(s->header->data_off); + if (data_off == 0 && old_magic) { + return 0; + } + + min_off = DIV_ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); + if (!old_magic) { + min_off = ROUND_UP(min_off, s->cluster_size / BDRV_SECTOR_SIZE); + } + + if (data_off >= min_off && data_off <= file_nb_sectors) { + return 0; + } + + return min_off; +} + +static int coroutine_fn GRAPH_RDLOCK +parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ + BDRVParallelsState *s = bs->opaque; + int64_t file_size; + uint32_t data_off; + + file_size = bdrv_co_nb_sectors(bs->file->bs); + if (file_size < 0) { + res->check_errors++; + return file_size; + } + + data_off = parallels_test_data_off(s, file_size); + if (data_off == 0) { + return 0; + } + + res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + s->header->data_off = cpu_to_le32(data_off); + res->corruptions_fixed++; + } + + fprintf(stderr, "%s data_off field has incorrect value\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); + + return 0; +} + static int coroutine_fn GRAPH_RDLOCK parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, WITH_QEMU_LOCK_GUARD(&s->lock) { parallels_check_unclean(bs, res, fix); + ret = parallels_check_data_off(bs, res, fix); + if (ret < 0) { + return ret; + } + ret = parallels_check_outside_image(bs, res, fix); if (ret < 0) { return ret; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; int ret, size, i; - int64_t file_nb_sectors, sector; + int64_t file_nb_sectors, sector, new_data_start; QemuOpts *opts = NULL; Error *local_err = NULL; char *buf; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOMEM; goto fail; } - s->data_start = le32_to_cpu(ph.data_off); - if (s->data_start == 0) { - s->data_start = DIV_ROUND_UP(size, BDRV_SECTOR_SIZE); + + ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); + if (ret < 0) { + goto fail; + } + s->bat_bitmap = (uint32_t *)(s->header + 1); + + if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { + s->header_unclean = true; + } + + new_data_start = parallels_test_data_off(s, file_nb_sectors); + if (new_data_start == 0) { + s->data_start = le32_to_cpu(ph.data_off); + } else { + s->data_start = new_data_start; } s->data_end = s->data_start; if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->header_size = size; } - ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); - if (ret < 0) { - goto fail; - } - s->bat_bitmap = (uint32_t *)(s->header + 1); - - if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { - s->header_unclean = true; - } - opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); if (!opts) { goto fail_options; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, * Repair the image if it's dirty or * out-of-image corruption was detected. */ - if (s->data_end > file_nb_sectors || s->header_unclean) { + if (s->data_end > file_nb_sectors || s->header_unclean + || new_data_start != 0) { BdrvCheckResult res; ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); if (ret < 0) { -- 2.34.1