:p
atchew
Login
The following changes since commit 6d8e75d41c58892ccc5d4ad61c4da476684c1c83: Merge remote-tracking branch 'remotes/rth/tags/pull-axp-20190519' into staging (2019-05-20 11:38:36 +0100) are available in the Git repository at: git://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to c423a6af592cf36b4f149c54e2966dd0016b7e96: iotests: Make 245 faster and more reliable (2019-05-20 17:08:57 +0200) ---------------------------------------------------------------- Block layer patches: - block: AioContext management, part 1 - qmp: forbid qmp_cont in RUN_STATE_FINISH_MIGRATE - nvme: fix copy direction in DMA reads going to CMB - file-posix: Fix block status for unaligned raw images with O_DIRECT - file-posix: Fix xfs_write_zeroes() after EOF - Documentation and iotests improvements ---------------------------------------------------------------- Alberto Garcia (2): qcow2: Define and use QCOW2_COMPRESSED_SECTOR_SIZE block: Use BDRV_REQUEST_MAX_BYTES instead of BDRV_REQUEST_MAX_SECTORS Kevin Wolf (10): block: Add bdrv_try_set_aio_context() block: Make bdrv_attach/detach_aio_context() static block: Move recursion to bdrv_set_aio_context() block: Propagate AioContext change to parents test-block-iothread: Test AioContext propagation through the tree block: Implement .(can_)set_aio_ctx for BlockBackend block: Add blk_set_allow_aio_context_change() blockjob: Propagate AioContext change to all job nodes blockjob: Remove AioContext notifiers test-block-iothread: Test AioContext propagation for block jobs Klaus Birkelund Jensen (1): nvme: fix copy direction in DMA reads going to CMB Max Reitz (9): block/file-posix: Truncate in xfs_write_zeroes() block/file-posix: Unaligned O_DIRECT block-status iotests: Test unaligned raw images with O_DIRECT qemu-img.texi: Be specific about JSON object types qemu-img.texi: Describe human-readable info output block: Improve "Block node is read-only" message iotests.py: Let assert_qmp() accept an array iotests.py: Fix VM.run_job iotests: Make 245 faster and more reliable Vladimir Sementsov-Ogievskiy (2): qmp: forbid qmp_cont in RUN_STATE_FINISH_MIGRATE iotest: fix 169: do not run qmp_cont in RUN_STATE_FINISH_MIGRATE block/qcow2.h | 4 + include/block/block.h | 10 ++ include/block/block_int.h | 25 +---- include/sysemu/block-backend.h | 1 + block.c | 174 +++++++++++++++++++++++++++++++---- block/backup.c | 8 -- block/block-backend.c | 55 ++++++++++- block/file-posix.c | 29 ++++++ block/io.c | 6 +- block/mirror.c | 10 +- block/qcow2-cluster.c | 5 +- block/qcow2-refcount.c | 25 ++--- block/qcow2.c | 3 +- blockjob.c | 77 ++++++++-------- hw/block/nvme.c | 2 +- qemu-io-cmds.c | 7 +- qmp.c | 3 + tests/test-block-iothread.c | 202 +++++++++++++++++++++++++++++++++++++++++ qemu-img.texi | 52 ++++++++++- tests/qemu-iotests/169 | 7 +- tests/qemu-iotests/221 | 4 + tests/qemu-iotests/245 | 22 +++-- tests/qemu-iotests/245.out | 12 +++ tests/qemu-iotests/253 | 84 +++++++++++++++++ tests/qemu-iotests/253.out | 14 +++ tests/qemu-iotests/group | 1 + tests/qemu-iotests/iotests.py | 20 +++- 27 files changed, 728 insertions(+), 134 deletions(-) create mode 100755 tests/qemu-iotests/253 create mode 100644 tests/qemu-iotests/253.out
From: Max Reitz <mreitz@redhat.com> XFS_IOC_ZERO_RANGE does not increase the file length: $ touch foo $ xfs_io -c 'zero 0 65536' foo $ stat -c "size=%s, blocks=%b" foo size=0, blocks=128 We do want writes beyond the EOF to automatically increase the file length, however. This is evidenced by the fact that iotest 061 is broken on XFS since qcow2's check implementation checks for blocks beyond the EOF. Reported-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/file-posix.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index XXXXXXX..XXXXXXX 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -XXX,XX +XXX,XX @@ out: #ifdef CONFIG_XFS static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes) { + int64_t len; struct xfs_flock64 fl; int err; + len = lseek(s->fd, 0, SEEK_END); + if (len < 0) { + return -errno; + } + + if (offset + bytes > len) { + /* XFS_IOC_ZERO_RANGE does not increase the file length */ + if (ftruncate(s->fd, offset + bytes) < 0) { + return -errno; + } + } + memset(&fl, 0, sizeof(fl)); fl.l_whence = SEEK_SET; fl.l_start = offset; -- 2.20.1
From: Alberto Garcia <berto@igalia.com> When an L2 table entry points to a compressed cluster the space used by the data is specified in 512-byte sectors. This size is independent from BDRV_SECTOR_SIZE and is specific to the qcow2 file format. The QCOW2_COMPRESSED_SECTOR_SIZE constant defined in this patch makes this explicit. Signed-off-by: Alberto Garcia <berto@igalia.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/qcow2.h | 4 ++++ block/qcow2-cluster.c | 5 +++-- block/qcow2-refcount.c | 25 ++++++++++++++----------- block/qcow2.c | 3 ++- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/block/qcow2.h b/block/qcow2.h index XXXXXXX..XXXXXXX 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -XXX,XX +XXX,XX @@ #define MIN_CLUSTER_BITS 9 #define MAX_CLUSTER_BITS 21 +/* Defined in the qcow2 spec (compressed cluster descriptor) */ +#define QCOW2_COMPRESSED_SECTOR_SIZE 512U +#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1)) + /* Must be at least 2 to cover COW */ #define MIN_L2_CACHE_SIZE 2 /* cache entries */ diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index XXXXXXX..XXXXXXX 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -XXX,XX +XXX,XX @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset; } - nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - - (cluster_offset >> 9); + nb_csectors = + (cluster_offset + compressed_size - 1) / QCOW2_COMPRESSED_SECTOR_SIZE - + (cluster_offset / QCOW2_COMPRESSED_SECTOR_SIZE); cluster_offset |= QCOW_OFLAG_COMPRESSED | ((uint64_t)nb_csectors << s->csize_shift); diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index XXXXXXX..XXXXXXX 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -XXX,XX +XXX,XX @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, switch (ctype) { case QCOW2_CLUSTER_COMPRESSED: { - int nb_csectors; - nb_csectors = ((l2_entry >> s->csize_shift) & - s->csize_mask) + 1; - qcow2_free_clusters(bs, - (l2_entry & s->cluster_offset_mask) & ~511, - nb_csectors * 512, type); + int64_t offset = (l2_entry & s->cluster_offset_mask) + & QCOW2_COMPRESSED_SECTOR_MASK; + int size = QCOW2_COMPRESSED_SECTOR_SIZE * + (((l2_entry >> s->csize_shift) & s->csize_mask) + 1); + qcow2_free_clusters(bs, offset, size, type); } break; case QCOW2_CLUSTER_NORMAL: @@ -XXX,XX +XXX,XX @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, nb_csectors = ((entry >> s->csize_shift) & s->csize_mask) + 1; if (addend != 0) { + uint64_t coffset = (entry & s->cluster_offset_mask) + & QCOW2_COMPRESSED_SECTOR_MASK; ret = update_refcount( - bs, (entry & s->cluster_offset_mask) & ~511, - nb_csectors * 512, abs(addend), addend < 0, + bs, coffset, + nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE, + abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); if (ret < 0) { goto fail; @@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1; l2_entry &= s->cluster_offset_mask; - ret = qcow2_inc_refcounts_imrt(bs, res, - refcount_table, refcount_table_size, - l2_entry & ~511, nb_csectors * 512); + ret = qcow2_inc_refcounts_imrt( + bs, res, refcount_table, refcount_table_size, + l2_entry & QCOW2_COMPRESSED_SECTOR_MASK, + nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE); if (ret < 0) { goto fail; } diff --git a/block/qcow2.c b/block/qcow2.c index XXXXXXX..XXXXXXX 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs, coffset = file_cluster_offset & s->cluster_offset_mask; nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1; - csize = nb_csectors * 512 - (coffset & 511); + csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE - + (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK); buf = g_try_malloc(csize); if (!buf) { -- 2.20.1
From: Alberto Garcia <berto@igalia.com> There are a few places in which we turn a number of bytes into sectors in order to compare the result against BDRV_REQUEST_MAX_SECTORS instead of using BDRV_REQUEST_MAX_BYTES directly. Signed-off-by: Alberto Garcia <berto@igalia.com> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/io.c | 6 +++--- qemu-io-cmds.c | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/block/io.c b/block/io.c index XXXXXXX..XXXXXXX 100644 --- a/block/io.c +++ b/block/io.c @@ -XXX,XX +XXX,XX @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, size_t size) { - if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) { + if (size > BDRV_REQUEST_MAX_BYTES) { return -EIO; } @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS); + assert(bytes <= BDRV_REQUEST_MAX_BYTES); assert(drv->bdrv_co_readv); return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS); + assert(bytes <= BDRV_REQUEST_MAX_BYTES); assert(drv->bdrv_co_writev); ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index XXXXXXX..XXXXXXX 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -XXX,XX +XXX,XX @@ static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset, { int ret; - if (bytes >> 9 > BDRV_REQUEST_MAX_SECTORS) { + if (bytes > BDRV_REQUEST_MAX_BYTES) { return -ERANGE; } @@ -XXX,XX +XXX,XX @@ static int discard_f(BlockBackend *blk, int argc, char **argv) if (bytes < 0) { print_cvtnum_err(bytes, argv[optind]); return bytes; - } else if (bytes >> BDRV_SECTOR_BITS > BDRV_REQUEST_MAX_SECTORS) { + } else if (bytes > BDRV_REQUEST_MAX_BYTES) { printf("length cannot exceed %"PRIu64", given %s\n", - (uint64_t)BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS, - argv[optind]); + (uint64_t)BDRV_REQUEST_MAX_BYTES, argv[optind]); return -EINVAL; } -- 2.20.1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> qmp_cont in RUN_STATE_FINISH_MIGRATE may lead to moving vm to RUN_STATE_RUNNING, before actual migration finish. So, when migration thread will try to go to RUN_STATE_POSTMIGRATE, assuming transition RUN_STATE_FINISH_MIGRATE->RUN_STATE_POSTMIGRATE, it will crash, as current state is RUN_STATE_RUNNING, and transition RUN_STATE_RUNNING->RUN_STATE_POSTMIGRATE is forbidden. Reported-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- qmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qmp.c b/qmp.c index XXXXXXX..XXXXXXX 100644 --- a/qmp.c +++ b/qmp.c @@ -XXX,XX +XXX,XX @@ void qmp_cont(Error **errp) return; } else if (runstate_check(RUN_STATE_SUSPENDED)) { return; + } else if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { + error_setg(errp, "Migration is not finalized yet"); + return; } for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { -- 2.20.1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> qmp_cont fails if vm in RUN_STATE_FINISH_MIGRATE, so let's wait for final RUN_STATE_POSTMIGRATE. Also, while being here, check qmp_cont result. Reported-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Tested-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/qemu-iotests/169 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/169 +++ b/tests/qemu-iotests/169 @@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase): event = self.vm_a.event_wait('MIGRATION') if event['data']['status'] == 'completed': break + while True: + result = self.vm_a.qmp('query-status') + if (result['return']['status'] == 'postmigrate'): + break # test that bitmap is still here removed = (not migrate_bitmaps) and persistent self.check_bitmap(self.vm_a, False if removed else sha256) - self.vm_a.qmp('cont') + result = self.vm_a.qmp('cont') + self.assert_qmp(result, 'return', {}) # test that bitmap is still here after invalidation self.check_bitmap(self.vm_a, sha256) -- 2.20.1
From: Klaus Birkelund Jensen <klaus@birkelund.eu> `nvme_dma_read_prp` erronously used `qemu_iovec_*to*_buf` instead of `qemu_iovec_*from*_buf` when the request involved the controller memory buffer. Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com> Reviewed-by: Kenneth Heitke <kenneth.heitke@intel.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index XXXXXXX..XXXXXXX 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, } qemu_sglist_destroy(&qsg); } else { - if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { + if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { trace_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } -- 2.20.1
Eventually, we want to make sure that all parents and all children of a node are in the same AioContext as the node itself. This means that changing the AioContext may fail because one of the other involved parties (e.g. a guest device that was configured with an iothread) cannot allow switching to a different AioContext. Introduce a set of functions that allow to first check whether all involved nodes can switch to a new context and only then do the actual switch. The check recursively covers children and parents. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/block/block.h | 8 ++++ include/block/block_int.h | 3 ++ block.c | 92 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) diff --git a/include/block/block.h b/include/block/block.h index XXXXXXX..XXXXXXX 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -XXX,XX +XXX,XX @@ void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); * This function must be called with iothread lock held. */ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context); +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp); +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp); +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp); int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); diff --git a/include/block/block_int.h b/include/block/block_int.h index XXXXXXX..XXXXXXX 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -XXX,XX +XXX,XX @@ struct BdrvChildRole { * can update its reference. */ int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, const char *filename, Error **errp); + + bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp); }; extern const BdrvChildRole child_file; diff --git a/block.c b/block.c index XXXXXXX..XXXXXXX 100644 --- a/block.c +++ b/block.c @@ -XXX,XX +XXX,XX @@ static int bdrv_child_cb_inactivate(BdrvChild *child) return 0; } +static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockDriverState *bs = child->opaque; + return bdrv_can_set_aio_context(bs, ctx, ignore, errp); +} + /* * Returns the options and flags that a temporary snapshot should get, based on * the originally requested flags (the originally requested image will have @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = { .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, }; /* @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = { .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, }; static void bdrv_backing_attach(BdrvChild *c) @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_backing = { .drained_end = bdrv_child_cb_drained_end, .inactivate = bdrv_child_cb_inactivate, .update_filename = bdrv_backing_update_filename, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) aio_context_release(new_context); } +static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + + /* A BdrvChildRole that doesn't handle AioContext changes cannot + * tolerate any AioContext changes */ + if (!c->role->can_set_aio_ctx) { + char *user = bdrv_child_user_desc(c); + error_setg(errp, "Changing iothreads is not supported by %s", user); + g_free(user); + return false; + } + if (!c->role->can_set_aio_ctx(c, ctx, ignore, errp)) { + assert(!errp || *errp); + return false; + } + return true; +} + +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); +} + +/* @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. */ +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BdrvChild *c; + + if (bdrv_get_aio_context(bs) == ctx) { + return true; + } + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + QLIST_FOREACH(c, &bs->children, next) { + if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + + return true; +} + +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) +{ + GSList *ignore; + bool ret; + + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); + g_slist_free(ignore); + + if (!ret) { + return -EPERM; + } + + bdrv_set_aio_context(bs, ctx); + return 0; +} + +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp) +{ + return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); +} + void bdrv_add_aio_context_notifier(BlockDriverState *bs, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) -- 2.20.1
Since commit b97511c7bc8, there is no reason for block drivers any more to call these functions (see the function comment in block_int.h). They are now just internal helper functions for bdrv_set_aio_context() and can be made static. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/block/block_int.h | 21 --------------------- block.c | 6 +++--- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/include/block/block_int.h b/include/block/block_int.h index XXXXXXX..XXXXXXX 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -XXX,XX +XXX,XX @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, void bdrv_add_before_write_notifier(BlockDriverState *bs, NotifierWithReturn *notifier); -/** - * bdrv_detach_aio_context: - * - * May be called from .bdrv_detach_aio_context() to detach children from the - * current #AioContext. This is only needed by block drivers that manage their - * own children. Both ->file and ->backing are automatically handled and - * block drivers should not call this function on them explicitly. - */ -void bdrv_detach_aio_context(BlockDriverState *bs); - -/** - * bdrv_attach_aio_context: - * - * May be called from .bdrv_attach_aio_context() to attach children to the new - * #AioContext. This is only needed by block drivers that manage their own - * children. Both ->file and ->backing are automatically handled and block - * drivers should not call this function on them explicitly. - */ -void bdrv_attach_aio_context(BlockDriverState *bs, - AioContext *new_context); - /** * bdrv_add_aio_context_notifier: * diff --git a/block.c b/block.c index XXXXXXX..XXXXXXX 100644 --- a/block.c +++ b/block.c @@ -XXX,XX +XXX,XX @@ static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) g_free(ban); } -void bdrv_detach_aio_context(BlockDriverState *bs) +static void bdrv_detach_aio_context(BlockDriverState *bs) { BdrvAioNotifier *baf, *baf_tmp; BdrvChild *child; @@ -XXX,XX +XXX,XX @@ void bdrv_detach_aio_context(BlockDriverState *bs) bs->aio_context = NULL; } -void bdrv_attach_aio_context(BlockDriverState *bs, - AioContext *new_context) +static void bdrv_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) { BdrvAioNotifier *ban, *ban_tmp; BdrvChild *child; -- 2.20.1
Instead of having two recursions, in bdrv_attach_aio_context() and in bdrv_detach_aio_context(), just having one recursion is enough. Said functions are only about a single node now. It is important that the recursion doesn't happen between detaching and attaching a context to the current node because the nested call will drain the node, and draining with a NULL context would segfault. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/block.c b/block.c index XXXXXXX..XXXXXXX 100644 --- a/block.c +++ b/block.c @@ -XXX,XX +XXX,XX @@ static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) static void bdrv_detach_aio_context(BlockDriverState *bs) { BdrvAioNotifier *baf, *baf_tmp; - BdrvChild *child; assert(!bs->walking_aio_notifiers); bs->walking_aio_notifiers = true; @@ -XXX,XX +XXX,XX @@ static void bdrv_detach_aio_context(BlockDriverState *bs) if (bs->drv && bs->drv->bdrv_detach_aio_context) { bs->drv->bdrv_detach_aio_context(bs); } - QLIST_FOREACH(child, &bs->children, next) { - bdrv_detach_aio_context(child->bs); - } if (bs->quiesce_counter) { aio_enable_external(bs->aio_context); @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { BdrvAioNotifier *ban, *ban_tmp; - BdrvChild *child; if (bs->quiesce_counter) { aio_disable_external(new_context); @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_aio_context(BlockDriverState *bs, bs->aio_context = new_context; - QLIST_FOREACH(child, &bs->children, next) { - bdrv_attach_aio_context(child->bs, new_context); - } if (bs->drv && bs->drv->bdrv_attach_aio_context) { bs->drv->bdrv_attach_aio_context(bs, new_context); } @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_aio_context(BlockDriverState *bs, * the same as the current context of bs). */ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) { + BdrvChild *child; + if (bdrv_get_aio_context(bs) == new_context) { return; } bdrv_drained_begin(bs); + + QLIST_FOREACH(child, &bs->children, next) { + bdrv_set_aio_context(child->bs, new_context); + } + bdrv_detach_aio_context(bs); /* This function executes in the old AioContext so acquire the new one in -- 2.20.1
All block nodes and users in any connected component of the block graph must be in the same AioContext, so changing the AioContext of one node must not only change all of its children, but all of its parents (and in turn their children etc.) as well. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/block/block.h | 2 ++ include/block/block_int.h | 1 + block.c | 48 ++++++++++++++++++++++++++++++++++----- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index XXXXXXX..XXXXXXX 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -XXX,XX +XXX,XX @@ void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); * This function must be called with iothread lock held. */ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context); +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore); int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, Error **errp); int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, diff --git a/include/block/block_int.h b/include/block/block_int.h index XXXXXXX..XXXXXXX 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -XXX,XX +XXX,XX @@ struct BdrvChildRole { bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore, Error **errp); + void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); }; extern const BdrvChildRole child_file; diff --git a/block.c b/block.c index XXXXXXX..XXXXXXX 100644 --- a/block.c +++ b/block.c @@ -XXX,XX +XXX,XX @@ static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, return bdrv_can_set_aio_context(bs, ctx, ignore, errp); } +static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore) +{ + BlockDriverState *bs = child->opaque; + return bdrv_set_aio_context_ignore(bs, ctx, ignore); +} + /* * Returns the options and flags that a temporary snapshot should get, based on * the originally requested flags (the originally requested image will have @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = { .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; /* @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = { .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; static void bdrv_backing_attach(BdrvChild *c) @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_backing = { .inactivate = bdrv_child_cb_inactivate, .update_filename = bdrv_backing_update_filename, .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_aio_context(BlockDriverState *bs, bs->walking_aio_notifiers = false; } -/* The caller must own the AioContext lock for the old AioContext of bs, but it - * must not own the AioContext lock for new_context (unless new_context is - * the same as the current context of bs). */ -void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) +/* @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. */ +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore) { BdrvChild *child; @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) bdrv_drained_begin(bs); QLIST_FOREACH(child, &bs->children, next) { - bdrv_set_aio_context(child->bs, new_context); + if (g_slist_find(*ignore, child)) { + continue; + } + *ignore = g_slist_prepend(*ignore, child); + bdrv_set_aio_context_ignore(child->bs, new_context, ignore); + } + QLIST_FOREACH(child, &bs->parents, next_parent) { + if (g_slist_find(*ignore, child)) { + continue; + } + if (child->role->set_aio_ctx) { + *ignore = g_slist_prepend(*ignore, child); + child->role->set_aio_ctx(child, new_context, ignore); + } } bdrv_detach_aio_context(bs); @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) aio_context_release(new_context); } +/* The caller must own the AioContext lock for the old AioContext of bs, but it + * must not own the AioContext lock for new_context (unless new_context is + * the same as the current context of bs). */ +void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) +{ + GSList *ignore_list = NULL; + bdrv_set_aio_context_ignore(bs, new_context, &ignore_list); + g_slist_free(ignore_list); +} + static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp) { @@ -XXX,XX +XXX,XX @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, return -EPERM; } - bdrv_set_aio_context(bs, ctx); + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + bdrv_set_aio_context_ignore(bs, ctx, &ignore); + g_slist_free(ignore); + return 0; } -- 2.20.1
Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/test-block-iothread.c | 131 ++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index XXXXXXX..XXXXXXX 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -XXX,XX +XXX,XX @@ #include "block/blockjob_int.h" #include "sysemu/block-backend.h" #include "qapi/error.h" +#include "qapi/qmp/qdict.h" #include "iothread.h" static int coroutine_fn bdrv_test_co_prwv(BlockDriverState *bs, @@ -XXX,XX +XXX,XX @@ static void test_attach_blockjob(void) blk_unref(blk); } +/* + * Test that changing the AioContext for one node in a tree (here through blk) + * changes all other nodes as well: + * + * blk + * | + * | bs_verify [blkverify] + * | / \ + * | / \ + * bs_a [bdrv_test] bs_b [bdrv_test] + * + */ +static void test_propagate_basic(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + BlockBackend *blk; + BlockDriverState *bs_a, *bs_b, *bs_verify; + QDict *options; + + /* Create bs_a and its BlockBackend */ + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs_a = bdrv_new_open_driver(&bdrv_test, "bs_a", BDRV_O_RDWR, &error_abort); + blk_insert_bs(blk, bs_a, &error_abort); + + /* Create bs_b */ + bs_b = bdrv_new_open_driver(&bdrv_test, "bs_b", BDRV_O_RDWR, &error_abort); + + /* Create blkverify filter that references both bs_a and bs_b */ + options = qdict_new(); + qdict_put_str(options, "driver", "blkverify"); + qdict_put_str(options, "test", "bs_a"); + qdict_put_str(options, "raw", "bs_b"); + + bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); + + /* Switch the AioContext */ + blk_set_aio_context(blk, ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs_a) == ctx); + g_assert(bdrv_get_aio_context(bs_verify) == ctx); + g_assert(bdrv_get_aio_context(bs_b) == ctx); + + /* Switch the AioContext back */ + ctx = qemu_get_aio_context(); + blk_set_aio_context(blk, ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs_a) == ctx); + g_assert(bdrv_get_aio_context(bs_verify) == ctx); + g_assert(bdrv_get_aio_context(bs_b) == ctx); + + bdrv_unref(bs_verify); + bdrv_unref(bs_b); + bdrv_unref(bs_a); + blk_unref(blk); +} + +/* + * Test that diamonds in the graph don't lead to endless recursion: + * + * blk + * | + * bs_verify [blkverify] + * / \ + * / \ + * bs_b [raw] bs_c[raw] + * \ / + * \ / + * bs_a [bdrv_test] + */ +static void test_propagate_diamond(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + BlockBackend *blk; + BlockDriverState *bs_a, *bs_b, *bs_c, *bs_verify; + QDict *options; + + /* Create bs_a */ + bs_a = bdrv_new_open_driver(&bdrv_test, "bs_a", BDRV_O_RDWR, &error_abort); + + /* Create bs_b and bc_c */ + options = qdict_new(); + qdict_put_str(options, "driver", "raw"); + qdict_put_str(options, "file", "bs_a"); + qdict_put_str(options, "node-name", "bs_b"); + bs_b = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); + + options = qdict_new(); + qdict_put_str(options, "driver", "raw"); + qdict_put_str(options, "file", "bs_a"); + qdict_put_str(options, "node-name", "bs_c"); + bs_c = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); + + /* Create blkverify filter that references both bs_b and bs_c */ + options = qdict_new(); + qdict_put_str(options, "driver", "blkverify"); + qdict_put_str(options, "test", "bs_b"); + qdict_put_str(options, "raw", "bs_c"); + + bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk, bs_verify, &error_abort); + + /* Switch the AioContext */ + blk_set_aio_context(blk, ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs_verify) == ctx); + g_assert(bdrv_get_aio_context(bs_a) == ctx); + g_assert(bdrv_get_aio_context(bs_b) == ctx); + g_assert(bdrv_get_aio_context(bs_c) == ctx); + + /* Switch the AioContext back */ + ctx = qemu_get_aio_context(); + blk_set_aio_context(blk, ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs_verify) == ctx); + g_assert(bdrv_get_aio_context(bs_a) == ctx); + g_assert(bdrv_get_aio_context(bs_b) == ctx); + g_assert(bdrv_get_aio_context(bs_c) == ctx); + + blk_unref(blk); + bdrv_unref(bs_verify); + bdrv_unref(bs_c); + bdrv_unref(bs_b); + bdrv_unref(bs_a); +} + int main(int argc, char **argv) { int i; @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) } g_test_add_func("/attach/blockjob", test_attach_blockjob); + g_test_add_func("/propagate/basic", test_propagate_basic); + g_test_add_func("/propagate/diamond", test_propagate_diamond); return g_test_run(); } -- 2.20.1
bdrv_try_set_aio_context() currently fails if a BlockBackend is attached to a node because it doesn't implement the BdrvChildRole callbacks for AioContext management. We can allow changing the AioContext of monitor-owned BlockBackends as long as no device is attached to them. When setting the AioContext of the root node of a BlockBackend, we now need to pass blk->root as an ignored child because we don't want the root node to recursively call back into BlockBackend and execute blk_do_set_aio_context() a second time. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/block-backend.c | 45 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index XXXXXXX..XXXXXXX 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -XXX,XX +XXX,XX @@ static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); +static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp); +static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore); + static char *blk_root_get_parent_desc(BdrvChild *child) { BlockBackend *blk = child->opaque; @@ -XXX,XX +XXX,XX @@ static const BdrvChildRole child_root = { .attach = blk_root_attach, .detach = blk_root_detach, + + .can_set_aio_ctx = blk_root_can_set_aio_ctx, + .set_aio_ctx = blk_root_set_aio_ctx, }; /* @@ -XXX,XX +XXX,XX @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) return blk_get_aio_context(blk_acb->blk); } -void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) +static void blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + bool update_root_node) { BlockDriverState *bs = blk_bs(blk); ThrottleGroupMember *tgm = &blk->public.throttle_group_member; @@ -XXX,XX +XXX,XX @@ void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) throttle_group_attach_aio_context(tgm, new_context); bdrv_drained_end(bs); } - bdrv_set_aio_context(bs, new_context); + if (update_root_node) { + GSList *ignore = g_slist_prepend(NULL, blk->root); + bdrv_set_aio_context_ignore(bs, new_context, &ignore); + g_slist_free(ignore); + } } } +void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) +{ + blk_do_set_aio_context(blk, new_context, true); +} + +static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockBackend *blk = child->opaque; + + /* Only manually created BlockBackends that are not attached to anything + * can change their AioContext without updating their user. */ + if (!blk->name || blk->dev) { + /* TODO Add BB name/QOM path */ + error_setg(errp, "Cannot change iothread of active block backend"); + return false; + } + + return true; +} + +static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore) +{ + BlockBackend *blk = child->opaque; + blk_do_set_aio_context(blk, ctx, false); +} + void blk_add_aio_context_notifier(BlockBackend *blk, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) -- 2.20.1
Some users (like block jobs) can tolerate an AioContext change for their BlockBackend. Add a function that tells the BlockBackend that it can allow changes. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/sysemu/block-backend.h | 1 + block/block-backend.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index XXXXXXX..XXXXXXX 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -XXX,XX +XXX,XX @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); +void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); void blk_iostatus_enable(BlockBackend *blk); bool blk_iostatus_is_enabled(const BlockBackend *blk); BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); diff --git a/block/block-backend.c b/block/block-backend.c index XXXXXXX..XXXXXXX 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -XXX,XX +XXX,XX @@ struct BlockBackend { uint64_t shared_perm; bool disable_perm; + bool allow_aio_context_change; bool allow_write_beyond_eof; NotifierList remove_bs_notifiers, insert_bs_notifiers; @@ -XXX,XX +XXX,XX @@ void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) blk->allow_write_beyond_eof = allow; } +void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) +{ + blk->allow_aio_context_change = allow; +} + static int blk_check_byte_request(BlockBackend *blk, int64_t offset, size_t size) { @@ -XXX,XX +XXX,XX @@ static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, { BlockBackend *blk = child->opaque; + if (blk->allow_aio_context_change) { + return true; + } + /* Only manually created BlockBackends that are not attached to anything * can change their AioContext without updating their user. */ if (!blk->name || blk->dev) { -- 2.20.1
Block jobs require that all of the nodes the job is using are in the same AioContext. Therefore all BdrvChild objects of the job propagate .(can_)set_aio_context to all other job nodes, so that the switch is checked and performed consistently even if both nodes are in different subtrees. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/backup.c | 8 -------- block/mirror.c | 10 +--------- blockjob.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/block/backup.c b/block/backup.c index XXXXXXX..XXXXXXX 100644 --- a/block/backup.c +++ b/block/backup.c @@ -XXX,XX +XXX,XX @@ static void backup_clean(Job *job) s->target = NULL; } -static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context) -{ - BackupBlockJob *s = container_of(job, BackupBlockJob, common); - - blk_set_aio_context(s->target, aio_context); -} - void backup_do_checkpoint(BlockJob *job, Error **errp) { BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); @@ -XXX,XX +XXX,XX @@ static const BlockJobDriver backup_job_driver = { .abort = backup_abort, .clean = backup_clean, }, - .attached_aio_context = backup_attached_aio_context, .drain = backup_drain, }; diff --git a/block/mirror.c b/block/mirror.c index XXXXXXX..XXXXXXX 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -XXX,XX +XXX,XX @@ static bool mirror_drained_poll(BlockJob *job) return !!s->in_flight; } -static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context) -{ - MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); - - blk_set_aio_context(s->target, new_context); -} - static void mirror_drain(BlockJob *job) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); @@ -XXX,XX +XXX,XX @@ static const BlockJobDriver mirror_job_driver = { .complete = mirror_complete, }, .drained_poll = mirror_drained_poll, - .attached_aio_context = mirror_attached_aio_context, .drain = mirror_drain, }; @@ -XXX,XX +XXX,XX @@ static const BlockJobDriver commit_active_job_driver = { .complete = mirror_complete, }, .drained_poll = mirror_drained_poll, - .attached_aio_context = mirror_attached_aio_context, .drain = mirror_drain, }; @@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, * ensure that. */ blk_set_force_allow_inactivate(s->target); } + blk_set_allow_aio_context_change(s->target, true); s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; diff --git a/blockjob.c b/blockjob.c index XXXXXXX..XXXXXXX 100644 --- a/blockjob.c +++ b/blockjob.c @@ -XXX,XX +XXX,XX @@ static void child_job_drained_end(BdrvChild *c) job_resume(&job->job); } +static bool child_job_can_set_aio_ctx(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockJob *job = c->opaque; + GSList *l; + + for (l = job->nodes; l; l = l->next) { + BdrvChild *sibling = l->data; + if (!bdrv_child_can_set_aio_context(sibling, ctx, ignore, errp)) { + return false; + } + } + return true; +} + +static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx, + GSList **ignore) +{ + BlockJob *job = c->opaque; + GSList *l; + + for (l = job->nodes; l; l = l->next) { + BdrvChild *sibling = l->data; + if (g_slist_find(*ignore, sibling)) { + continue; + } + *ignore = g_slist_prepend(*ignore, sibling); + bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore); + } +} + static const BdrvChildRole child_job = { .get_parent_desc = child_job_get_parent_desc, .drained_begin = child_job_drained_begin, .drained_poll = child_job_drained_poll, .drained_end = child_job_drained_end, + .can_set_aio_ctx = child_job_can_set_aio_ctx, + .set_aio_ctx = child_job_set_aio_ctx, .stay_at_node = true, }; @@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, blk_add_aio_context_notifier(blk, block_job_attached_aio_context, block_job_detach_aio_context, job); + blk_set_allow_aio_context_change(blk, true); /* Only set speed when necessary to avoid NotSupported error */ if (speed != 0) { -- 2.20.1
The notifiers made sure that the job is quiesced and that the job->aio_context field is updated. The first part is unnecessary today since bdrv_set_aio_context_ignore() drains the block node, and this means drainig the block job, too. The second part can be done in the .set_aio_ctx callback of the block job BdrvChildRole. The notifiers were problematic because they poll the AioContext while the graph is in an inconsistent state with some nodes already in the new context, but others still in the old context. So removing the notifiers not only simplifies the code, but actually makes the code safer. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- blockjob.c | 43 ++----------------------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) diff --git a/blockjob.c b/blockjob.c index XXXXXXX..XXXXXXX 100644 --- a/blockjob.c +++ b/blockjob.c @@ -XXX,XX +XXX,XX @@ BlockJob *block_job_get(const char *id) } } -static void block_job_attached_aio_context(AioContext *new_context, - void *opaque); -static void block_job_detach_aio_context(void *opaque); - void block_job_free(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); @@ -XXX,XX +XXX,XX @@ void block_job_free(Job *job) bs->job = NULL; block_job_remove_all_bdrv(bjob); - blk_remove_aio_context_notifier(bjob->blk, - block_job_attached_aio_context, - block_job_detach_aio_context, bjob); blk_unref(bjob->blk); error_free(bjob->blocker); } -static void block_job_attached_aio_context(AioContext *new_context, - void *opaque) -{ - BlockJob *job = opaque; - const JobDriver *drv = job->job.driver; - BlockJobDriver *bjdrv = container_of(drv, BlockJobDriver, job_driver); - - job->job.aio_context = new_context; - if (bjdrv->attached_aio_context) { - bjdrv->attached_aio_context(job, new_context); - } - - job_resume(&job->job); -} - void block_job_drain(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); @@ -XXX,XX +XXX,XX @@ void block_job_drain(Job *job) } } -static void block_job_detach_aio_context(void *opaque) -{ - BlockJob *job = opaque; - - /* In case the job terminates during aio_poll()... */ - job_ref(&job->job); - - job_pause(&job->job); - - while (!job->job.paused && !job_is_completed(&job->job)) { - job_drain(&job->job); - } - - job->job.aio_context = NULL; - job_unref(&job->job); -} - static char *child_job_get_parent_desc(BdrvChild *c) { BlockJob *job = c->opaque; @@ -XXX,XX +XXX,XX @@ static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx, *ignore = g_slist_prepend(*ignore, sibling); bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore); } + + job->job.aio_context = ctx; } static const BdrvChildRole child_job = { @@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); - blk_add_aio_context_notifier(blk, block_job_attached_aio_context, - block_job_detach_aio_context, job); blk_set_allow_aio_context_change(blk, true); /* Only set speed when necessary to avoid NotSupported error */ -- 2.20.1
Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/test-block-iothread.c | 71 +++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index XXXXXXX..XXXXXXX 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -XXX,XX +XXX,XX @@ static void test_propagate_diamond(void) bdrv_unref(bs_a); } +static void test_propagate_mirror(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + AioContext *main_ctx = qemu_get_aio_context(); + BlockDriverState *src, *target; + BlockBackend *blk; + Job *job; + Error *local_err = NULL; + + /* Create src and target*/ + src = bdrv_new_open_driver(&bdrv_test, "src", BDRV_O_RDWR, &error_abort); + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, + &error_abort); + + /* Start a mirror job */ + mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, + MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, + BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, + false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, + &error_abort); + job = job_get("job0"); + + /* Change the AioContext of src */ + bdrv_try_set_aio_context(src, ctx, &error_abort); + g_assert(bdrv_get_aio_context(src) == ctx); + g_assert(bdrv_get_aio_context(target) == ctx); + g_assert(job->aio_context == ctx); + + /* Change the AioContext of target */ + aio_context_acquire(ctx); + bdrv_try_set_aio_context(target, main_ctx, &error_abort); + aio_context_release(ctx); + g_assert(bdrv_get_aio_context(src) == main_ctx); + g_assert(bdrv_get_aio_context(target) == main_ctx); + + /* With a BlockBackend on src, changing target must fail */ + blk = blk_new(0, BLK_PERM_ALL); + blk_insert_bs(blk, src, &error_abort); + + bdrv_try_set_aio_context(target, ctx, &local_err); + g_assert(local_err); + error_free(local_err); + + g_assert(blk_get_aio_context(blk) == main_ctx); + g_assert(bdrv_get_aio_context(src) == main_ctx); + g_assert(bdrv_get_aio_context(target) == main_ctx); + + /* ...unless we explicitly allow it */ + aio_context_acquire(ctx); + blk_set_allow_aio_context_change(blk, true); + bdrv_try_set_aio_context(target, ctx, &error_abort); + aio_context_release(ctx); + + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(src) == ctx); + g_assert(bdrv_get_aio_context(target) == ctx); + + job_cancel_sync_all(); + + aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx); + bdrv_try_set_aio_context(target, main_ctx, &error_abort); + aio_context_release(ctx); + + blk_unref(blk); + bdrv_unref(src); + bdrv_unref(target); +} + int main(int argc, char **argv) { int i; @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) g_test_add_func("/attach/blockjob", test_attach_blockjob); g_test_add_func("/propagate/basic", test_propagate_basic); g_test_add_func("/propagate/diamond", test_propagate_diamond); + g_test_add_func("/propagate/mirror", test_propagate_mirror); return g_test_run(); } -- 2.20.1
From: Max Reitz <mreitz@redhat.com> Currently, qemu crashes whenever someone queries the block status of an unaligned image tail of an O_DIRECT image: $ echo > foo $ qemu-img map --image-opts driver=file,filename=foo,cache.direct=on Offset Length Mapped to File qemu-img: block/io.c:2093: bdrv_co_block_status: Assertion `*pnum && QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset' failed. This is because bdrv_co_block_status() checks that the result returned by the driver's implementation is aligned to the request_alignment, but file-posix can fail to do so, which is actually mentioned in a comment there: "[...] possibly including a partial sector at EOF". Fix this by rounding up those partial sectors. There are two possible alternative fixes: (1) We could refuse to open unaligned image files with O_DIRECT altogether. That sounds reasonable until you realize that qcow2 does necessarily not fill up its metadata clusters, and that nobody runs qemu-img create with O_DIRECT. Therefore, unpreallocated qcow2 files usually have an unaligned image tail. (2) bdrv_co_block_status() could ignore unaligned tails. It actually throws away everything past the EOF already, so that sounds reasonable. Unfortunately, the block layer knows file lengths only with a granularity of BDRV_SECTOR_SIZE, so bdrv_co_block_status() usually would have to guess whether its file length information is inexact or whether the driver is broken. Fixing what raw_co_block_status() returns is the safest thing to do. There seems to be no other block driver that sets request_alignment and does not make sure that it always returns aligned values. Cc: qemu-stable@nongnu.org Signed-off-by: Max Reitz <mreitz@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/file-posix.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index XXXXXXX..XXXXXXX 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, off_t data = 0, hole = 0; int ret; + assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); + ret = fd_open(bs); if (ret < 0) { return ret; @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, /* On a data extent, compute bytes to the end of the extent, * possibly including a partial sector at EOF. */ *pnum = MIN(bytes, hole - offset); + + /* + * We are not allowed to return partial sectors, though, so + * round up if necessary. + */ + if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) { + int64_t file_length = raw_getlength(bs); + if (file_length > 0) { + /* Ignore errors, this is just a safeguard */ + assert(hole == file_length); + } + *pnum = ROUND_UP(*pnum, bs->bl.request_alignment); + } + ret = BDRV_BLOCK_DATA; } else { /* On a hole, compute bytes to the beginning of the next extent. */ -- 2.20.1
From: Max Reitz <mreitz@redhat.com> We already have 221 for accesses through the page cache, but it is better to create a new file for O_DIRECT instead of integrating those test cases into 221. This way, we can make use of _supported_cache_modes (and _default_cache_mode) so the test is automatically skipped on filesystems that do not support O_DIRECT. As part of the split, add _supported_cache_modes to 221. With that, it no longer fails when run with -c none or -c directsync. Signed-off-by: Max Reitz <mreitz@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/qemu-iotests/221 | 4 ++ tests/qemu-iotests/253 | 84 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/253.out | 14 +++++++ tests/qemu-iotests/group | 1 + 4 files changed, 103 insertions(+) create mode 100755 tests/qemu-iotests/253 create mode 100644 tests/qemu-iotests/253.out diff --git a/tests/qemu-iotests/221 b/tests/qemu-iotests/221 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/221 +++ b/tests/qemu-iotests/221 @@ -XXX,XX +XXX,XX @@ #!/usr/bin/env bash # # Test qemu-img vs. unaligned images +# (See also 253, which is the O_DIRECT version) # # Copyright (C) 2018-2019 Red Hat, Inc. # @@ -XXX,XX +XXX,XX @@ _supported_fmt raw _supported_proto file _supported_os Linux +_default_cache_mode writeback +_supported_cache_modes writeback writethrough unsafe + echo echo "=== Check mapping of unaligned raw image ===" echo diff --git a/tests/qemu-iotests/253 b/tests/qemu-iotests/253 new file mode 100755 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tests/qemu-iotests/253 @@ -XXX,XX +XXX,XX @@ +#!/usr/bin/env bash +# +# Test qemu-img vs. unaligned images; O_DIRECT version +# (Originates from 221) +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +seq="$(basename $0)" +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt raw +_supported_proto file +_supported_os Linux + +_default_cache_mode none +_supported_cache_modes none directsync + +echo +echo "=== Check mapping of unaligned raw image ===" +echo + +# We do not know how large a physical sector is, but it is certainly +# going to be a factor of 1 MB +size=$((1 * 1024 * 1024 - 1)) + +# qemu-img create rounds size up to BDRV_SECTOR_SIZE +_make_test_img $size +$QEMU_IMG map --output=json --image-opts \ + "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ + | _filter_qemu_img_map + +# so we resize it and check again +truncate --size=$size "$TEST_IMG" +$QEMU_IMG map --output=json --image-opts \ + "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ + | _filter_qemu_img_map + +# qemu-io with O_DIRECT always writes whole physical sectors. Again, +# we do not know how large a physical sector is, so we just start +# writing from a 64 kB boundary, which should always be aligned. +offset=$((1 * 1024 * 1024 - 64 * 1024)) +$QEMU_IO -c "w $offset $((size - offset))" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json --image-opts \ + "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ + | _filter_qemu_img_map + +# Resize it and check again -- contrary to 221, we may not get partial +# sectors here, so there should be only two areas (one zero, one +# data). +truncate --size=$size "$TEST_IMG" +$QEMU_IMG map --output=json --image-opts \ + "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ + | _filter_qemu_img_map + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tests/qemu-iotests/253.out @@ -XXX,XX +XXX,XX @@ +QA output created by 253 + +=== Check mapping of unaligned raw image === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575 +[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +wrote 65535/65535 bytes at offset 983040 +63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -XXX,XX +XXX,XX @@ 248 rw auto quick 249 rw auto quick 252 rw auto backing quick +253 rw auto quick -- 2.20.1
From: Max Reitz <mreitz@redhat.com> Just writing that --output=json outputs JSON information does not really help; we should also make a note of what QAPI type the result object has. (The map subcommand does not emit a QAPI-typed object, but its section already describes the object structure well enough.) Signed-off-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- qemu-img.texi | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/qemu-img.texi b/qemu-img.texi index XXXXXXX..XXXXXXX 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -XXX,XX +XXX,XX @@ overridden with a pattern byte specified by @var{pattern}. Perform a consistency check on the disk image @var{filename}. The command can output in the format @var{ofmt} which is either @code{human} or @code{json}. +The JSON output is an object of QAPI type @code{ImageCheck}. If @code{-r} is specified, qemu-img tries to repair any inconsistencies found during the check. @code{-r leaks} repairs only cluster leaks, whereas @@ -XXX,XX +XXX,XX @@ The size syntax is similar to dd(1)'s size syntax. Give information about the disk image @var{filename}. Use it in particular to know the size reserved on disk which can be different from the displayed size. If VM snapshots are stored in the disk image, -they are displayed too. The command can output in the format @var{ofmt} -which is either @code{human} or @code{json}. +they are displayed too. If a disk image has a backing file chain, information about each disk image in the chain can be recursively enumerated by using the option @code{--backing-chain}. @@ -XXX,XX +XXX,XX @@ To enumerate information about each disk image in the above chain, starting from qemu-img info --backing-chain snap2.qcow2 @end example +The command can output in the format @var{ofmt} which is either @code{human} or +@code{json}. The JSON output is an object of QAPI type @code{ImageInfo}; with +@code{--backing-chain}, it is an array of @code{ImageInfo} objects. + @item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [-U] @var{filename} Dump the metadata of image @var{filename} and its backing file chain. @@ -XXX,XX +XXX,XX @@ Calculate the file size required for a new image. This information can be used to size logical volumes or SAN LUNs appropriately for the image that will be placed in them. The values reported are guaranteed to be large enough to fit the image. The command can output in the format @var{ofmt} which is either -@code{human} or @code{json}. +@code{human} or @code{json}. The JSON output is an object of QAPI type +@code{BlockMeasureInfo}. If the size @var{N} is given then act as if creating a new empty image file using @command{qemu-img create}. If @var{filename} is given then act as if -- 2.20.1
From: Max Reitz <mreitz@redhat.com> Ideally, it should be self-explanatory. However, keys like "disk size" arguably really are not self-explanatory. In any case, there is no harm in going into a some more detail here. Signed-off-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- qemu-img.texi | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/qemu-img.texi b/qemu-img.texi index XXXXXXX..XXXXXXX 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -XXX,XX +XXX,XX @@ The command can output in the format @var{ofmt} which is either @code{human} or @code{json}. The JSON output is an object of QAPI type @code{ImageInfo}; with @code{--backing-chain}, it is an array of @code{ImageInfo} objects. +@code{--output=human} reports the following information (for every image in the +chain): +@table @var +@item image +The image file name + +@item file format +The image format + +@item virtual size +The size of the guest disk + +@item disk size +How much space the image file occupies on the host file system (may be shown as +0 if this information is unavailable, e.g. because there is no file system) + +@item cluster_size +Cluster size of the image format, if applicable + +@item encrypted +Whether the image is encrypted (only present if so) + +@item cleanly shut down +This is shown as @code{no} if the image is dirty and will have to be +auto-repaired the next time it is opened in qemu. + +@item backing file +The backing file name, if present + +@item backing file format +The format of the backing file, if the image enforces it + +@item Snapshot list +A list of all internal snapshots + +@item Format specific information +Further information whose structure depends on the image format. This section +is a textual representation of the respective @code{ImageInfoSpecific*} QAPI +object (e.g. @code{ImageInfoSpecificQCow2} for qcow2 images). +@end table + @item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [-U] @var{filename} Dump the metadata of image @var{filename} and its backing file chain. -- 2.20.1
From: Max Reitz <mreitz@redhat.com> This message does not make any sense when it appears as the response to making an R/W node read-only. We should detect that case and emit a different message, then. Signed-off-by: Max Reitz <mreitz@redhat.com> Reviewed-by: Alberto Garcia <berto@igalia.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index XXXXXXX..XXXXXXX 100644 --- a/block.c +++ b/block.c @@ -XXX,XX +XXX,XX @@ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, GSList *ignore_children, Error **errp); static void bdrv_child_abort_perm_update(BdrvChild *c); static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm); typedef struct BlockReopenQueueEntry { bool prepared; @@ -XXX,XX +XXX,XX @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && !bdrv_is_writable_after_reopen(bs, q)) { - error_setg(errp, "Block node is read-only"); + if (!bdrv_is_writable_after_reopen(bs, NULL)) { + error_setg(errp, "Block node is read-only"); + } else { + uint64_t current_perms, current_shared; + bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); + if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { + error_setg(errp, "Cannot make block node read-only, there is " + "a writer on it"); + } else { + error_setg(errp, "Cannot make block node read-only and create " + "a writer on it"); + } + } + return -EPERM; } -- 2.20.1
From: Max Reitz <mreitz@redhat.com> Sometimes we cannot tell which error message qemu will emit, and we do not care. With this change, we can then just pass an array of all possible messages to assert_qmp() and it will choose the right one. Signed-off-by: Max Reitz <mreitz@redhat.com> Reviewed-by: Alberto Garcia <berto@igalia.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/qemu-iotests/iotests.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -XXX,XX +XXX,XX @@ class QMPTestCase(unittest.TestCase): self.fail('path "%s" has value "%s"' % (path, str(result))) def assert_qmp(self, d, path, value): - '''Assert that the value for a specific path in a QMP dict matches''' + '''Assert that the value for a specific path in a QMP dict + matches. When given a list of values, assert that any of + them matches.''' + result = self.dictpath(d, path) - self.assertEqual(result, value, 'values not equal "%s" and "%s"' % (str(result), str(value))) + + # [] makes no sense as a list of valid values, so treat it as + # an actual single value. + if isinstance(value, list) and value != []: + for v in value: + if result == v: + return + self.fail('no match for "%s" in %s' % (str(result), str(value))) + else: + self.assertEqual(result, value, + 'values not equal "%s" and "%s"' + % (str(result), str(value))) def assert_no_active_block_jobs(self): result = self.vm.qmp('query-block-jobs') -- 2.20.1
From: Max Reitz <mreitz@redhat.com> log() is in the current module, there is no need to prefix it. In fact, doing so may make VM.run_job() unusable in tests that never use iotests.log() themselves. Signed-off-by: Max Reitz <mreitz@redhat.com> Reviewed-by: Alberto Garcia <berto@igalia.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/qemu-iotests/iotests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -XXX,XX +XXX,XX @@ class VM(qtest.QEMUQtestMachine): elif status == 'null': return error else: - iotests.log(ev) + log(ev) def node_info(self, node_name): nodes = self.qmp('query-named-block-nodes') -- 2.20.1
From: Max Reitz <mreitz@redhat.com> Sometimes, 245 fails for me because some stream job has already finished while the test expects it to still be active. (With -c none, it fails basically every time.) The most reliable way to fix this is to simply set auto_finalize=false so the job will remain in the block graph as long as we need it. This allows us to drop the rate limiting, too, which makes the test faster. The only problem with this is that there is a single place that yields a different error message depending on whether the stream job is still copying data (so COR is enabled) or not (COR has been disabled, but the job still has the WRITE_UNCHANGED permission on the target node). We can easily address that by expecting either error message. Note that we do not need auto_finalize=false (or rate limiting) for the active commit job, because It never completes without an explicit block-job-complete anyway. Signed-off-by: Max Reitz <mreitz@redhat.com> Reviewed-by: Alberto Garcia <berto@igalia.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/qemu-iotests/245 | 22 ++++++++++++++-------- tests/qemu-iotests/245.out | 12 ++++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/245 +++ b/tests/qemu-iotests/245 @@ -XXX,XX +XXX,XX @@ class TestBlockdevReopen(iotests.QMPTestCase): # hd2 <- hd0 result = self.vm.qmp('block-stream', conv_keys = True, job_id = 'stream0', - device = 'hd0', base_node = 'hd2', speed = 512 * 1024) + device = 'hd0', base_node = 'hd2', + auto_finalize = False) self.assert_qmp(result, 'return', {}) # We can't remove hd2 while the stream job is ongoing @@ -XXX,XX +XXX,XX @@ class TestBlockdevReopen(iotests.QMPTestCase): opts['backing'] = None self.reopen(opts, {}, "Cannot change 'backing' link from 'hd0' to 'hd1'") - self.wait_until_completed(drive = 'stream0') + self.vm.run_job('stream0', auto_finalize = False, auto_dismiss = True) # Reopen the chain during a block-stream job (from hd2 to hd1) def test_block_stream_4(self): @@ -XXX,XX +XXX,XX @@ class TestBlockdevReopen(iotests.QMPTestCase): # hd1 <- hd0 result = self.vm.qmp('block-stream', conv_keys = True, job_id = 'stream0', - device = 'hd1', speed = 512 * 1024) + device = 'hd1', auto_finalize = False) self.assert_qmp(result, 'return', {}) # We can't reopen with the original options because that would # make hd1 read-only and block-stream requires it to be read-write - self.reopen(opts, {}, "Can't set node 'hd1' to r/o with copy-on-read enabled") + # (Which error message appears depends on whether the stream job is + # already done with copying at this point.) + self.reopen(opts, {}, + ["Can't set node 'hd1' to r/o with copy-on-read enabled", + "Cannot make block node read-only, there is a writer on it"]) # We can't remove hd2 while the stream job is ongoing opts['backing']['backing'] = None @@ -XXX,XX +XXX,XX @@ class TestBlockdevReopen(iotests.QMPTestCase): opts['backing'] = None self.reopen(opts) - self.wait_until_completed(drive = 'stream0') + self.vm.run_job('stream0', auto_finalize = False, auto_dismiss = True) # Reopen the chain during a block-commit job (from hd0 to hd2) def test_block_commit_1(self): @@ -XXX,XX +XXX,XX @@ class TestBlockdevReopen(iotests.QMPTestCase): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-commit', conv_keys = True, job_id = 'commit0', - device = 'hd0', speed = 1024 * 1024) + device = 'hd0') self.assert_qmp(result, 'return', {}) # We can't remove hd2 while the commit job is ongoing @@ -XXX,XX +XXX,XX @@ class TestBlockdevReopen(iotests.QMPTestCase): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-commit', conv_keys = True, job_id = 'commit0', - device = 'hd0', top_node = 'hd1', speed = 1024 * 1024) + device = 'hd0', top_node = 'hd1', + auto_finalize = False) self.assert_qmp(result, 'return', {}) # We can't remove hd2 while the commit job is ongoing @@ -XXX,XX +XXX,XX @@ class TestBlockdevReopen(iotests.QMPTestCase): self.reopen(opts, {}, "Cannot change backing link if 'hd0' has an implicit backing file") # hd2 <- hd0 - self.wait_until_completed(drive = 'commit0') + self.vm.run_job('commit0', auto_finalize = False, auto_dismiss = True) self.assert_qmp(self.get_node('hd0'), 'ro', False) self.assertEqual(self.get_node('hd1'), None) diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/245.out +++ b/tests/qemu-iotests/245.out @@ -XXX,XX +XXX,XX @@ Ran 18 tests OK +{"execute": "job-finalize", "arguments": {"id": "commit0"}} +{"return": {}} +{"data": {"id": "commit0", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "commit0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"execute": "job-finalize", "arguments": {"id": "stream0"}} +{"return": {}} +{"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"execute": "job-finalize", "arguments": {"id": "stream0"}} +{"return": {}} +{"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -- 2.20.1
The following changes since commit 13356edb87506c148b163b8c7eb0695647d00c2a: Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging (2023-01-24 09:45:33 +0000) are available in the Git repository at: https://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to d570177b50c389f379f93183155a27d44856ab46: qemu-img: Change info key names for protocol nodes (2023-02-01 16:52:33 +0100) v4: - Fixed the 'qemu-img-close-errors' test case to run only on Linux and only with the file protocol, use qemu-io instead of truncate v3: - Make the compiler happier on BSD and CentOS Stream 8 v2: - Rebased to resolve merge conflicts in coroutine.h ---------------------------------------------------------------- Block layer patches - qemu-img info: Show protocol-level information - Move more functions to coroutines - Make coroutine annotations ready for static analysis - qemu-img: Fix exit code for errors closing the image - qcow2 bitmaps: Fix theoretical corruption in error path - pflash: Only load non-zero parts of backend image to save memory - Code cleanup and test case improvements ---------------------------------------------------------------- Alberto Faria (2): coroutine: annotate coroutine_fn for libclang block: Add no_coroutine_fn and coroutine_mixed_fn marker Emanuele Giuseppe Esposito (14): block-coroutine-wrapper: support void functions block: Convert bdrv_io_plug() to co_wrapper block: Convert bdrv_io_unplug() to co_wrapper block: Convert bdrv_is_inserted() to co_wrapper block: Rename refresh_total_sectors to bdrv_refresh_total_sectors block: Convert bdrv_refresh_total_sectors() to co_wrapper_mixed block-backend: use bdrv_getlength instead of blk_getlength block: use bdrv_co_refresh_total_sectors when possible block: Convert bdrv_get_allocated_file_size() to co_wrapper block: Convert bdrv_get_info() to co_wrapper_mixed block: Convert bdrv_eject() to co_wrapper block: Convert bdrv_lock_medium() to co_wrapper block: Convert bdrv_debug_event() to co_wrapper_mixed block: Rename bdrv_load/save_vmstate() to bdrv_co_load/save_vmstate() Hanna Reitz (12): block: Improve empty format-specific info dump block/file: Add file-specific image info block/vmdk: Change extent info type block: Split BlockNodeInfo off of ImageInfo qemu-img: Use BlockNodeInfo block/qapi: Let bdrv_query_image_info() recurse block/qapi: Introduce BlockGraphInfo block/qapi: Add indentation to bdrv_node_info_dump() iotests: Filter child node information iotests/106, 214, 308: Read only one size line qemu-img: Let info print block graph qemu-img: Change info key names for protocol nodes Kevin Wolf (4): qcow2: Fix theoretical corruption in store_bitmap() error path qemu-img commit: Report errors while closing the image qemu-img bitmap: Report errors while closing the image qemu-iotests: Test qemu-img bitmap/commit exit code on error Paolo Bonzini (2): qemu-io: do not reinvent the blk_pwrite_zeroes wheel block: remove bdrv_coroutine_enter Philippe Mathieu-Daudé (1): block/nbd: Add missing <qemu/bswap.h> include Thomas Huth (2): tests/qemu-iotests/312: Mark "quorum" as required driver tests/qemu-iotests/262: Check for availability of "blkverify" first Xiang Zheng (1): pflash: Only read non-zero parts of backend image qapi/block-core.json | 123 +++++++- include/block/block-common.h | 11 +- include/block/block-io.h | 41 ++- include/block/block_int-common.h | 26 +- include/block/block_int-io.h | 5 +- include/block/nbd.h | 1 + include/block/qapi.h | 14 +- include/qemu/osdep.h | 44 +++ include/sysemu/block-backend-io.h | 31 +- block.c | 88 +++--- block/blkdebug.c | 11 +- block/blkio.c | 15 +- block/blklogwrites.c | 6 +- block/blkreplay.c | 6 +- block/blkverify.c | 6 +- block/block-backend.c | 38 +-- block/commit.c | 4 +- block/copy-on-read.c | 18 +- block/crypto.c | 14 +- block/curl.c | 10 +- block/file-posix.c | 137 +++++---- block/file-win32.c | 18 +- block/filter-compress.c | 20 +- block/gluster.c | 23 +- block/io.c | 76 ++--- block/iscsi.c | 17 +- block/mirror.c | 6 +- block/monitor/block-hmp-cmds.c | 2 +- block/nbd.c | 8 +- block/nfs.c | 4 +- block/null.c | 13 +- block/nvme.c | 14 +- block/preallocate.c | 16 +- block/qapi.c | 317 ++++++++++++++++----- block/qcow.c | 5 +- block/qcow2-bitmap.c | 5 +- block/qcow2-refcount.c | 2 +- block/qcow2.c | 17 +- block/qed.c | 11 +- block/quorum.c | 8 +- block/raw-format.c | 25 +- block/rbd.c | 9 +- block/replication.c | 6 +- block/ssh.c | 4 +- block/throttle.c | 6 +- block/vdi.c | 7 +- block/vhdx.c | 5 +- block/vmdk.c | 22 +- block/vpc.c | 5 +- blockdev.c | 8 +- hw/block/block.c | 36 ++- hw/scsi/scsi-disk.c | 5 + qemu-img.c | 100 +++++-- qemu-io-cmds.c | 62 +--- tests/unit/test-block-iothread.c | 3 + scripts/block-coroutine-wrapper.py | 20 +- tests/qemu-iotests/iotests.py | 18 +- block/meson.build | 1 + tests/qemu-iotests/065 | 2 +- tests/qemu-iotests/106 | 4 +- tests/qemu-iotests/214 | 6 +- tests/qemu-iotests/262 | 3 +- tests/qemu-iotests/302.out | 5 + tests/qemu-iotests/308 | 4 +- tests/qemu-iotests/312 | 1 + tests/qemu-iotests/common.filter | 22 +- tests/qemu-iotests/common.rc | 22 +- tests/qemu-iotests/tests/qemu-img-close-errors | 96 +++++++ tests/qemu-iotests/tests/qemu-img-close-errors.out | 23 ++ 69 files changed, 1209 insertions(+), 552 deletions(-) create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out