:p
atchew
Login
The following changes since commit 143c2e0432859826c9e8d5b2baa307355f1a5332: Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2021-07-19' into staging (2021-07-19 19:06:05 +0100) are available in the Git repository at: git://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to d21471696b07f30cb00453709d055a25c1afde85: iotests/307: Test iothread conflict for exports (2021-07-20 16:49:50 +0200) ---------------------------------------------------------------- Block layer patches - mirror: Fix active mirror deadlock - replication: Fix crashes due to operations on wrong BdrvChild - configure: Add option to use driver whitelist even in tools - vvfat: Fix crash when opening image read-write - export: Fix crash in error path with fixed-iothread=false ---------------------------------------------------------------- Kevin Wolf (1): block: Add option to use driver whitelist even in tools Lukas Straub (4): replication: Remove s->active_disk replication: Reduce usage of s->hidden_disk and s->secondary_disk replication: Properly attach children replication: Remove workaround Max Reitz (2): block/export: Conditionally ignore set-context error iotests/307: Test iothread conflict for exports Vladimir Sementsov-Ogievskiy (4): block/mirror: set .co for active-write MirrorOp objects iotest 151: add test-case that shows active mirror dead-lock block/mirror: fix active mirror dead-lock in mirror_wait_on_conflicts block/vvfat: fix: drop backing configure | 14 +++++- block.c | 3 ++ block/export/export.c | 5 +- block/mirror.c | 13 ++++++ block/replication.c | 111 +++++++++++++++++++++++++++------------------ block/vvfat.c | 43 ++---------------- meson.build | 1 + tests/qemu-iotests/151 | 54 +++++++++++++++++++++- tests/qemu-iotests/151.out | 4 +- tests/qemu-iotests/307 | 15 ++++++ tests/qemu-iotests/307.out | 8 ++++ 11 files changed, 182 insertions(+), 89 deletions(-)
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> This field is unused, but it very helpful for debugging. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20210702211636.228981-2-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/mirror.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/mirror.c b/block/mirror.c index XXXXXXX..XXXXXXX 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -XXX,XX +XXX,XX @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, .bytes = bytes, .is_active_write = true, .is_in_flight = true, + .co = qemu_coroutine_self(), }; qemu_co_queue_init(&op->waiting_requests); QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); -- 2.31.1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> There is a dead-lock in active mirror: when we have parallel intersecting requests (note that non intersecting requests may be considered intersecting after aligning to mirror granularity), it may happen that request A waits request B in mirror_wait_on_conflicts() and request B waits for A. Look at the test for details. Test now dead-locks, that's why it's disabled. Next commit will fix mirror and enable the test. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20210702211636.228981-3-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/qemu-iotests/151 | 62 ++++++++++++++++++++++++++++++++++++-- tests/qemu-iotests/151.out | 4 +-- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/151 +++ b/tests/qemu-iotests/151 @@ -XXX,XX +XXX,XX @@ class TestActiveMirror(iotests.QMPTestCase): 'if': 'none', 'node-name': 'source-node', 'driver': iotests.imgfmt, - 'file': {'driver': 'file', - 'filename': source_img}} + 'file': {'driver': 'blkdebug', + 'image': {'driver': 'file', + 'filename': source_img}}} blk_target = {'node-name': 'target-node', 'driver': iotests.imgfmt, @@ -XXX,XX +XXX,XX @@ class TestActiveMirror(iotests.QMPTestCase): self.potential_writes_in_flight = False + def testIntersectingActiveIO(self): + # FIXME: test-case is dead-locking. To reproduce dead-lock just drop + # this return statement + return + + # Fill the source image + result = self.vm.hmp_qemu_io('source', 'write -P 1 0 2M') + + # Start the block job (very slowly) + result = self.vm.qmp('blockdev-mirror', + job_id='mirror', + filter_node_name='mirror-node', + device='source-node', + target='target-node', + sync='full', + copy_mode='write-blocking', + speed=1) + + self.vm.hmp_qemu_io('source', 'break write_aio A') + self.vm.hmp_qemu_io('source', 'aio_write 0 1M') # 1 + self.vm.hmp_qemu_io('source', 'wait_break A') + self.vm.hmp_qemu_io('source', 'aio_write 0 2M') # 2 + self.vm.hmp_qemu_io('source', 'aio_write 0 2M') # 3 + + # Now 2 and 3 are in mirror_wait_on_conflicts, waiting for 1 + + self.vm.hmp_qemu_io('source', 'break write_aio B') + self.vm.hmp_qemu_io('source', 'aio_write 1M 2M') # 4 + self.vm.hmp_qemu_io('source', 'wait_break B') + + # 4 doesn't wait for 2 and 3, because they didn't yet set + # in_flight_bitmap. So, nothing prevents 4 to go except for our + # break-point B. + + self.vm.hmp_qemu_io('source', 'resume A') + + # Now we resumed 1, so 2 and 3 goes to the next iteration of while loop + # in mirror_wait_on_conflicts(). They don't exit, as bitmap is dirty + # due to request 4. And they start to wait: 2 wait for 3, 3 wait for 2 + # - DEAD LOCK. + # Note that it's important that we add request 4 at last: requests are + # appended to the list, so we are sure that 4 is last in the list, so 2 + # and 3 now waits for each other, not for 4. + + self.vm.hmp_qemu_io('source', 'resume B') + + # Resuming 4 doesn't help, 2 and 3 already dead-locked + # To check the dead-lock run: + # gdb -p $(pidof qemu-system-x86_64) -ex 'set $job=(MirrorBlockJob *)jobs.lh_first' -ex 'p *$job->ops_in_flight.tqh_first' -ex 'p *$job->ops_in_flight.tqh_first->next.tqe_next' + # You'll see two MirrorOp objects waiting on each other + + result = self.vm.qmp('block-job-set-speed', device='mirror', speed=0) + self.assert_qmp(result, 'return', {}) + self.complete_and_wait(drive='mirror') + + self.potential_writes_in_flight = False + if __name__ == '__main__': iotests.main(supported_fmts=['qcow2', 'raw'], diff --git a/tests/qemu-iotests/151.out b/tests/qemu-iotests/151.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/151.out +++ b/tests/qemu-iotests/151.out @@ -XXX,XX +XXX,XX @@ -... +.... ---------------------------------------------------------------------- -Ran 3 tests +Ran 4 tests OK -- 2.31.1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> It's possible that requests start to wait each other in mirror_wait_on_conflicts(). To avoid it let's use same technique as in block/io.c in bdrv_wait_serialising_requests_locked() / bdrv_find_conflicting_request(): don't wait on intersecting request if it is already waiting for some other request. For details of the dead-lock look at testIntersectingActiveIO() test-case which we actually fixing now. Fixes: d06107ade0ce74dc39739bac80de84b51ec18546 Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20210702211636.228981-4-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/mirror.c | 12 ++++++++++++ tests/qemu-iotests/151 | 18 +++++------------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index XXXXXXX..XXXXXXX 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -XXX,XX +XXX,XX @@ struct MirrorOp { bool is_in_flight; CoQueue waiting_requests; Coroutine *co; + MirrorOp *waiting_for_op; QTAILQ_ENTRY(MirrorOp) next; }; @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self, if (ranges_overlap(self_start_chunk, self_nb_chunks, op_start_chunk, op_nb_chunks)) { + /* + * If the operation is already (indirectly) waiting for us, or + * will wait for us as soon as it wakes up, then just go on + * (instead of producing a deadlock in the former case). + */ + if (op->waiting_for_op) { + continue; + } + + self->waiting_for_op = op; qemu_co_queue_wait(&op->waiting_requests, NULL); + self->waiting_for_op = NULL; break; } } diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/151 +++ b/tests/qemu-iotests/151 @@ -XXX,XX +XXX,XX @@ class TestActiveMirror(iotests.QMPTestCase): self.potential_writes_in_flight = False def testIntersectingActiveIO(self): - # FIXME: test-case is dead-locking. To reproduce dead-lock just drop - # this return statement - return - # Fill the source image result = self.vm.hmp_qemu_io('source', 'write -P 1 0 2M') @@ -XXX,XX +XXX,XX @@ class TestActiveMirror(iotests.QMPTestCase): # Now we resumed 1, so 2 and 3 goes to the next iteration of while loop # in mirror_wait_on_conflicts(). They don't exit, as bitmap is dirty - # due to request 4. And they start to wait: 2 wait for 3, 3 wait for 2 - # - DEAD LOCK. - # Note that it's important that we add request 4 at last: requests are - # appended to the list, so we are sure that 4 is last in the list, so 2 - # and 3 now waits for each other, not for 4. + # due to request 4. + # In the past at that point 2 and 3 would wait for each other producing + # a dead-lock. Now this is fixed and they will wait for request 4. self.vm.hmp_qemu_io('source', 'resume B') - # Resuming 4 doesn't help, 2 and 3 already dead-locked - # To check the dead-lock run: - # gdb -p $(pidof qemu-system-x86_64) -ex 'set $job=(MirrorBlockJob *)jobs.lh_first' -ex 'p *$job->ops_in_flight.tqh_first' -ex 'p *$job->ops_in_flight.tqh_first->next.tqe_next' - # You'll see two MirrorOp objects waiting on each other + # After resuming 4, one of 2 and 3 goes first and set in_flight_bitmap, + # so the other will wait for it. result = self.vm.qmp('block-job-set-speed', device='mirror', speed=0) self.assert_qmp(result, 'return', {}) -- 2.31.1
Currently, the block driver whitelists are only applied for the system emulator. All other binaries still give unrestricted access to all block drivers. There are use cases where this made sense because the main concern was avoiding customers running VMs on less optimised block drivers and getting bad performance. Allowing the same image format e.g. as a target for 'qemu-img convert' is not a problem then. However, if the concern is the supportability of the driver in general, either in full or when used read-write, not applying the list driver whitelist in tools doesn't help - especially since qemu-nbd and qemu-storage-daemon now give access to more or less the same operations in block drivers as running a system emulator. In order to address this, introduce a new configure option that enforces the driver whitelist in all binaries. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Message-Id: <20210709164141.254097-1-kwolf@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- configure | 14 ++++++++++++-- block.c | 3 +++ meson.build | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/configure b/configure index XXXXXXX..XXXXXXX 100755 --- a/configure +++ b/configure @@ -XXX,XX +XXX,XX @@ cross_prefix="" audio_drv_list="" block_drv_rw_whitelist="" block_drv_ro_whitelist="" +block_drv_whitelist_tools="no" host_cc="cc" audio_win_int="" libs_qga="" @@ -XXX,XX +XXX,XX @@ for opt do ;; --block-drv-ro-whitelist=*) block_drv_ro_whitelist=$(echo "$optarg" | sed -e 's/,/ /g') ;; + --enable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="yes" + ;; + --disable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="no" + ;; --enable-debug-tcg) debug_tcg="yes" ;; --disable-debug-tcg) debug_tcg="no" @@ -XXX,XX +XXX,XX @@ Advanced options (experts only): --block-drv-whitelist=L Same as --block-drv-rw-whitelist=L --block-drv-rw-whitelist=L set block driver read-write whitelist - (affects only QEMU, not qemu-img) + (by default affects only QEMU, not tools like qemu-img) --block-drv-ro-whitelist=L set block driver read-only whitelist - (affects only QEMU, not qemu-img) + (by default affects only QEMU, not tools like qemu-img) + --enable-block-drv-whitelist-in-tools + use block whitelist also in tools instead of only QEMU --enable-trace-backends=B Set trace backend Available backends: $trace_backend_list --with-trace-file=NAME Full PATH,NAME of file to store traces @@ -XXX,XX +XXX,XX @@ if test "$audio_win_int" = "yes" ; then fi echo "CONFIG_BDRV_RW_WHITELIST=$block_drv_rw_whitelist" >> $config_host_mak echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak +if test "$block_drv_whitelist_tools" = "yes" ; then + echo "CONFIG_BDRV_WHITELIST_TOOLS=y" >> $config_host_mak +fi if test "$xfs" = "yes" ; then echo "CONFIG_XFS=y" >> $config_host_mak fi diff --git a/block.c b/block.c index XXXXXXX..XXXXXXX 100644 --- a/block.c +++ b/block.c @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, void bdrv_init(void) { +#ifdef CONFIG_BDRV_WHITELIST_TOOLS + use_bdrv_whitelist = 1; +#endif module_call_init(MODULE_INIT_BLOCK); } diff --git a/meson.build b/meson.build index XXXXXXX..XXXXXXX 100644 --- a/meson.build +++ b/meson.build @@ -XXX,XX +XXX,XX @@ summary_info += {'coroutine pool': config_host['CONFIG_COROUTINE_POOL'] == '1 if have_block summary_info += {'Block whitelist (rw)': config_host['CONFIG_BDRV_RW_WHITELIST']} summary_info += {'Block whitelist (ro)': config_host['CONFIG_BDRV_RO_WHITELIST']} + summary_info += {'Use block whitelist in tools': config_host.has_key('CONFIG_BDRV_WHITELIST_TOOLS')} summary_info += {'VirtFS support': have_virtfs} summary_info += {'build virtiofs daemon': have_virtiofsd} summary_info += {'Live block migration': config_host.has_key('CONFIG_LIVE_BLOCK_MIGRATION')} -- 2.31.1
From: Lukas Straub <lukasstraub2@web.de> s->active_disk is bs->file. Remove it and use local variables instead. Signed-off-by: Lukas Straub <lukasstraub2@web.de> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <2534f867ea9be5b666dfce19744b7d4e2b96c976.1626619393.git.lukasstraub2@web.de> Reviewed-by: Zhang Chen <chen.zhang@intel.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/replication.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/block/replication.c b/block/replication.c index XXXXXXX..XXXXXXX 100644 --- a/block/replication.c +++ b/block/replication.c @@ -XXX,XX +XXX,XX @@ typedef enum { typedef struct BDRVReplicationState { ReplicationMode mode; ReplicationStage stage; - BdrvChild *active_disk; BlockJob *commit_job; BdrvChild *hidden_disk; BdrvChild *secondary_disk; @@ -XXX,XX +XXX,XX @@ out: return ret; } -static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) +static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp) { + BDRVReplicationState *s = bs->opaque; + BdrvChild *active_disk = bs->file; Error *local_err = NULL; int ret; @@ -XXX,XX +XXX,XX @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) return; } - if (!s->active_disk->bs->drv) { + if (!active_disk->bs->drv) { error_setg(errp, "Active disk %s is ejected", - s->active_disk->bs->node_name); + active_disk->bs->node_name); return; } - ret = bdrv_make_empty(s->active_disk, errp); + ret = bdrv_make_empty(active_disk, errp); if (ret < 0) { return; } @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, BlockDriverState *bs = rs->opaque; BDRVReplicationState *s; BlockDriverState *top_bs; + BdrvChild *active_disk; int64_t active_length, hidden_length, disk_length; AioContext *aio_context; Error *local_err = NULL; @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, case REPLICATION_MODE_PRIMARY: break; case REPLICATION_MODE_SECONDARY: - s->active_disk = bs->file; - if (!s->active_disk || !s->active_disk->bs || - !s->active_disk->bs->backing) { + active_disk = bs->file; + if (!active_disk || !active_disk->bs || !active_disk->bs->backing) { error_setg(errp, "Active disk doesn't have backing file"); aio_context_release(aio_context); return; } - s->hidden_disk = s->active_disk->bs->backing; + s->hidden_disk = active_disk->bs->backing; if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) { error_setg(errp, "Hidden disk doesn't have backing file"); aio_context_release(aio_context); @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, } /* verify the length */ - active_length = bdrv_getlength(s->active_disk->bs); + active_length = bdrv_getlength(active_disk->bs); hidden_length = bdrv_getlength(s->hidden_disk->bs); disk_length = bdrv_getlength(s->secondary_disk->bs); if (active_length < 0 || hidden_length < 0 || disk_length < 0 || @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, } /* Must be true, or the bdrv_getlength() calls would have failed */ - assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv); + assert(active_disk->bs->drv && s->hidden_disk->bs->drv); - if (!s->active_disk->bs->drv->bdrv_make_empty || + if (!active_disk->bs->drv->bdrv_make_empty || !s->hidden_disk->bs->drv->bdrv_make_empty) { error_setg(errp, "Active disk or hidden disk doesn't support make_empty"); @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, s->stage = BLOCK_REPLICATION_RUNNING; if (s->mode == REPLICATION_MODE_SECONDARY) { - secondary_do_checkpoint(s, errp); + secondary_do_checkpoint(bs, errp); } s->error = 0; @@ -XXX,XX +XXX,XX @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp) } if (s->mode == REPLICATION_MODE_SECONDARY) { - secondary_do_checkpoint(s, errp); + secondary_do_checkpoint(bs, errp); } aio_context_release(aio_context); } @@ -XXX,XX +XXX,XX @@ static void replication_done(void *opaque, int ret) if (ret == 0) { s->stage = BLOCK_REPLICATION_DONE; - s->active_disk = NULL; s->secondary_disk = NULL; s->hidden_disk = NULL; s->error = 0; @@ -XXX,XX +XXX,XX @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) } if (!failover) { - secondary_do_checkpoint(s, errp); + secondary_do_checkpoint(bs, errp); s->stage = BLOCK_REPLICATION_DONE; aio_context_release(aio_context); return; @@ -XXX,XX +XXX,XX @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) s->stage = BLOCK_REPLICATION_FAILOVER; s->commit_job = commit_active_start( - NULL, s->active_disk->bs, s->secondary_disk->bs, + NULL, bs->file->bs, s->secondary_disk->bs, JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT, NULL, replication_done, bs, true, errp); break; -- 2.31.1
From: Lukas Straub <lukasstraub2@web.de> In preparation for the next patch, initialize s->hidden_disk and s->secondary_disk later and replace access to them with local variables in the places where they aren't initialized yet. Signed-off-by: Lukas Straub <lukasstraub2@web.de> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <1eb9dc179267207d9c7eccaeb30761758e32e9ab.1626619393.git.lukasstraub2@web.de> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/replication.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/block/replication.c b/block/replication.c index XXXXXXX..XXXXXXX 100644 --- a/block/replication.c +++ b/block/replication.c @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, Error **errp) { BDRVReplicationState *s = bs->opaque; + BdrvChild *hidden_disk, *secondary_disk; BlockReopenQueue *reopen_queue = NULL; + /* + * s->hidden_disk and s->secondary_disk may not be set yet, as they will + * only be set after the children are writable. + */ + hidden_disk = bs->file->bs->backing; + secondary_disk = hidden_disk->bs->backing; + if (writable) { - s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs); - s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs); + s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs); + s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); } - bdrv_subtree_drained_begin(s->hidden_disk->bs); - bdrv_subtree_drained_begin(s->secondary_disk->bs); + bdrv_subtree_drained_begin(hidden_disk->bs); + bdrv_subtree_drained_begin(secondary_disk->bs); if (s->orig_hidden_read_only) { QDict *opts = qdict_new(); qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); - reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, + reopen_queue = bdrv_reopen_queue(reopen_queue, hidden_disk->bs, opts, true); } if (s->orig_secondary_read_only) { QDict *opts = qdict_new(); qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); - reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs, + reopen_queue = bdrv_reopen_queue(reopen_queue, secondary_disk->bs, opts, true); } @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, } } - bdrv_subtree_drained_end(s->hidden_disk->bs); - bdrv_subtree_drained_end(s->secondary_disk->bs); + bdrv_subtree_drained_end(hidden_disk->bs); + bdrv_subtree_drained_end(secondary_disk->bs); } static void backup_job_cleanup(BlockDriverState *bs) @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, BlockDriverState *bs = rs->opaque; BDRVReplicationState *s; BlockDriverState *top_bs; - BdrvChild *active_disk; + BdrvChild *active_disk, *hidden_disk, *secondary_disk; int64_t active_length, hidden_length, disk_length; AioContext *aio_context; Error *local_err = NULL; @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } - s->hidden_disk = active_disk->bs->backing; - if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) { + hidden_disk = active_disk->bs->backing; + if (!hidden_disk->bs || !hidden_disk->bs->backing) { error_setg(errp, "Hidden disk doesn't have backing file"); aio_context_release(aio_context); return; } - s->secondary_disk = s->hidden_disk->bs->backing; - if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) { + secondary_disk = hidden_disk->bs->backing; + if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) { error_setg(errp, "The secondary disk doesn't have block backend"); aio_context_release(aio_context); return; @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, /* verify the length */ active_length = bdrv_getlength(active_disk->bs); - hidden_length = bdrv_getlength(s->hidden_disk->bs); - disk_length = bdrv_getlength(s->secondary_disk->bs); + hidden_length = bdrv_getlength(hidden_disk->bs); + disk_length = bdrv_getlength(secondary_disk->bs); if (active_length < 0 || hidden_length < 0 || disk_length < 0 || active_length != hidden_length || hidden_length != disk_length) { error_setg(errp, "Active disk, hidden disk, secondary disk's length" @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, } /* Must be true, or the bdrv_getlength() calls would have failed */ - assert(active_disk->bs->drv && s->hidden_disk->bs->drv); + assert(active_disk->bs->drv && hidden_disk->bs->drv); if (!active_disk->bs->drv->bdrv_make_empty || - !s->hidden_disk->bs->drv->bdrv_make_empty) { + !hidden_disk->bs->drv->bdrv_make_empty) { error_setg(errp, "Active disk or hidden disk doesn't support make_empty"); aio_context_release(aio_context); @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } + s->hidden_disk = hidden_disk; + s->secondary_disk = secondary_disk; + /* start backup job now */ error_setg(&s->blocker, "Block device is in use by internal backup job"); -- 2.31.1
From: Lukas Straub <lukasstraub2@web.de> The replication driver needs access to the children block-nodes of it's child so it can issue bdrv_make_empty() and bdrv_co_pwritev() to manage the replication. However, it does this by directly copying the BdrvChilds, which is wrong. Fix this by properly attaching the block-nodes with bdrv_attach_child() and requesting the required permissions. This ultimatively fixes a potential crash in replication_co_writev(), because it may write to s->secondary_disk if it is in state BLOCK_REPLICATION_FAILOVER_FAILED, without requesting write permissions first. And now the workaround in secondary_do_checkpoint() can be removed. Signed-off-by: Lukas Straub <lukasstraub2@web.de> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <5d0539d729afb8072d0d7cde977c5066285591b4.1626619393.git.lukasstraub2@web.de> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/replication.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/block/replication.c b/block/replication.c index XXXXXXX..XXXXXXX 100644 --- a/block/replication.c +++ b/block/replication.c @@ -XXX,XX +XXX,XX @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { - *nperm = BLK_PERM_CONSISTENT_READ; + if (role & BDRV_CHILD_PRIMARY) { + *nperm = BLK_PERM_CONSISTENT_READ; + } else { + *nperm = 0; + } + if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) { *nperm |= BLK_PERM_WRITE; } @@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } - s->hidden_disk = hidden_disk; - s->secondary_disk = secondary_disk; + bdrv_ref(hidden_disk->bs); + s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk", + &child_of_bds, BDRV_CHILD_DATA, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + aio_context_release(aio_context); + return; + } + + bdrv_ref(secondary_disk->bs); + s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs, + "secondary disk", &child_of_bds, + BDRV_CHILD_DATA, &local_err); + if (local_err) { + error_propagate(errp, local_err); + aio_context_release(aio_context); + return; + } /* start backup job now */ error_setg(&s->blocker, @@ -XXX,XX +XXX,XX @@ static void replication_done(void *opaque, int ret) if (ret == 0) { s->stage = BLOCK_REPLICATION_DONE; + bdrv_unref_child(bs, s->secondary_disk); s->secondary_disk = NULL; + bdrv_unref_child(bs, s->hidden_disk); s->hidden_disk = NULL; s->error = 0; } else { -- 2.31.1
From: Lukas Straub <lukasstraub2@web.de> Remove the workaround introduced in commit 6ecbc6c52672db5c13805735ca02784879ce8285 "replication: Avoid blk_make_empty() on read-only child". It is not needed anymore since s->hidden_disk is guaranteed to be writable when secondary_do_checkpoint() runs. Because replication_start(), _do_checkpoint() and _stop() are only called by COLO migration code and COLO-migration activates all disks via bdrv_invalidate_cache_all() before it calls these functions. Signed-off-by: Lukas Straub <lukasstraub2@web.de> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <d3acfad43879e9f376bffa7dd797ae74d0a7c81a.1626619393.git.lukasstraub2@web.de> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/replication.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/block/replication.c b/block/replication.c index XXXXXXX..XXXXXXX 100644 --- a/block/replication.c +++ b/block/replication.c @@ -XXX,XX +XXX,XX @@ static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp) return; } - BlockBackend *blk = blk_new(qemu_get_current_aio_context(), - BLK_PERM_WRITE, BLK_PERM_ALL); - blk_insert_bs(blk, s->hidden_disk->bs, &local_err); - if (local_err) { - error_propagate(errp, local_err); - blk_unref(blk); - return; - } - - ret = blk_make_empty(blk, errp); - blk_unref(blk); + ret = bdrv_make_empty(s->hidden_disk, errp); if (ret < 0) { return; } -- 2.31.1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Most probably this fake backing child doesn't work anyway (see notes about it in a8a4d15c1c34d). Still, since 25f78d9e2de528473d52 drivers are required to set .supports_backing if they want to call bdrv_set_backing_hd, so now vvfat just doesn't work because of this check. Let's finally drop this fake backing file. Fixes: 25f78d9e2de528473d52acfcf7acdfb64e3453d4 Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20210715124853.13335-1-vsementsov@virtuozzo.com> Tested-by: John Arbuckle <programmingkidx@gmail.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/vvfat.c | 43 ++++--------------------------------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index XXXXXXX..XXXXXXX 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -XXX,XX +XXX,XX @@ static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs, return BDRV_BLOCK_DATA; } -static int coroutine_fn -write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) -{ - int ret; - - BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque); - qemu_co_mutex_lock(&s->lock); - ret = try_commit(s); - qemu_co_mutex_unlock(&s->lock); - - return ret; -} - -static BlockDriver vvfat_write_target = { - .format_name = "vvfat_write_target", - .instance_size = sizeof(void*), - .bdrv_co_pwritev = write_target_commit, -}; - static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) @@ -XXX,XX +XXX,XX @@ static int enable_write_target(BlockDriverState *bs, Error **errp) { BDRVVVFATState *s = bs->opaque; BlockDriver *bdrv_qcow = NULL; - BlockDriverState *backing; QemuOpts *opts = NULL; int ret; int size = sector2cluster(s, s->sector_count); @@ -XXX,XX +XXX,XX @@ static int enable_write_target(BlockDriverState *bs, Error **errp) unlink(s->qcow_filename); #endif - backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR, - &error_abort); - *(void**) backing->opaque = s; - - bdrv_set_backing_hd(s->bs, backing, &error_abort); - bdrv_unref(backing); - return 0; err: @@ -XXX,XX +XXX,XX @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { - if (role & BDRV_CHILD_DATA) { - /* This is a private node, nobody should try to attach to it */ - *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; - *nshared = BLK_PERM_WRITE_UNCHANGED; - } else { - assert(role & BDRV_CHILD_COW); - /* The backing file is there so 'commit' can use it. vvfat doesn't - * access it in any way. */ - *nperm = 0; - *nshared = BLK_PERM_ALL; - } + assert(role & BDRV_CHILD_DATA); + /* This is a private node, nobody should try to attach to it */ + *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; + *nshared = BLK_PERM_WRITE_UNCHANGED; } static void vvfat_close(BlockDriverState *bs) -- 2.31.1
From: Max Reitz <mreitz@redhat.com> When invoking block-export-add with some iothread and fixed-iothread=false, and changing the node's iothread fails, the error is supposed to be ignored. However, it is still stored in *errp, which is wrong. If a second error occurs, the "*errp must be NULL" assertion in error_setv() fails: qemu-system-x86_64: ../util/error.c:59: error_setv: Assertion `*errp == NULL' failed. So if fixed-iothread=false, we should ignore the error by passing NULL to bdrv_try_set_aio_context(). Fixes: f51d23c80af73c95e0ce703ad06a300f1b3d63ef ("block/export: add iothread and fixed-iothread options") Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210624083825.29224-2-mreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/export/export.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/export/export.c b/block/export/export.c index XXXXXXX..XXXXXXX 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -XXX,XX +XXX,XX @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) if (export->has_iothread) { IOThread *iothread; AioContext *new_ctx; + Error **set_context_errp; iothread = iothread_by_id(export->iothread); if (!iothread) { @@ -XXX,XX +XXX,XX @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) new_ctx = iothread_get_aio_context(iothread); - ret = bdrv_try_set_aio_context(bs, new_ctx, errp); + /* Ignore errors with fixed-iothread=false */ + set_context_errp = fixed_iothread ? errp : NULL; + ret = bdrv_try_set_aio_context(bs, new_ctx, set_context_errp); if (ret == 0) { aio_context_release(ctx); aio_context_acquire(new_ctx); -- 2.31.1
From: Max Reitz <mreitz@redhat.com> Passing fixed-iothread=true should make iothread conflicts fatal, whereas fixed-iothread=false should not. Combine the second case with an error condition that is checked after the iothread is handled, to verify that qemu does not crash if there is such an error after changing the iothread failed. Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210624083825.29224-3-mreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Tested-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/qemu-iotests/307 | 15 +++++++++++++++ tests/qemu-iotests/307.out | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/tests/qemu-iotests/307 b/tests/qemu-iotests/307 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/307 +++ b/tests/qemu-iotests/307 @@ -XXX,XX +XXX,XX @@ with iotests.FilePath('image') as img, \ iotests.log('=== Launch VM ===') vm.add_object('iothread,id=iothread0') + vm.add_object('iothread,id=iothread1') vm.add_blockdev(f'file,filename={img},node-name=file') vm.add_blockdev(f'{iotests.imgfmt},file=file,node-name=fmt') vm.add_blockdev('raw,file=file,node-name=ro,read-only=on') + vm.add_blockdev('null-co,node-name=null') vm.add_device(f'id=scsi0,driver=virtio-scsi,iothread=iothread0') vm.launch() @@ -XXX,XX +XXX,XX @@ with iotests.FilePath('image') as img, \ vm.qmp_log('query-block-exports') iotests.qemu_nbd_list_log('-k', socket) + iotests.log('\n=== Add export with conflicting iothread ===') + + vm.qmp_log('device_add', id='sdb', driver='scsi-hd', drive='null') + + # Should fail because of fixed-iothread + vm.qmp_log('block-export-add', id='export1', type='nbd', node_name='null', + iothread='iothread1', fixed_iothread=True, writable=True) + + # Should ignore the iothread conflict, but then fail because of the + # permission conflict (and not crash) + vm.qmp_log('block-export-add', id='export1', type='nbd', node_name='null', + iothread='iothread1', fixed_iothread=False, writable=True) + iotests.log('\n=== Add a writable export ===') # This fails because share-rw=off diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/307.out +++ b/tests/qemu-iotests/307.out @@ -XXX,XX +XXX,XX @@ exports available: 1 base:allocation +=== Add export with conflicting iothread === +{"execute": "device_add", "arguments": {"drive": "null", "driver": "scsi-hd", "id": "sdb"}} +{"return": {}} +{"execute": "block-export-add", "arguments": {"fixed-iothread": true, "id": "export1", "iothread": "iothread1", "node-name": "null", "type": "nbd", "writable": true}} +{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}} +{"execute": "block-export-add", "arguments": {"fixed-iothread": false, "id": "export1", "iothread": "iothread1", "node-name": "null", "type": "nbd", "writable": true}} +{"error": {"class": "GenericError", "desc": "Permission conflict on node 'null': permissions 'write' are both required by an unnamed block device (uses node 'null' as 'root' child) and unshared by block device 'sdb' (uses node 'null' as 'root' child)."}} + === Add a writable export === {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}} {"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt': permissions 'write' are both required by an unnamed block device (uses node 'fmt' as 'root' child) and unshared by block device 'sda' (uses node 'fmt' as 'root' child)."}} -- 2.31.1
The following changes since commit 13356edb87506c148b163b8c7eb0695647d00c2a: Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging (2023-01-24 09:45:33 +0000) are available in the Git repository at: https://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to d570177b50c389f379f93183155a27d44856ab46: qemu-img: Change info key names for protocol nodes (2023-02-01 16:52:33 +0100) v4: - Fixed the 'qemu-img-close-errors' test case to run only on Linux and only with the file protocol, use qemu-io instead of truncate v3: - Make the compiler happier on BSD and CentOS Stream 8 v2: - Rebased to resolve merge conflicts in coroutine.h ---------------------------------------------------------------- Block layer patches - qemu-img info: Show protocol-level information - Move more functions to coroutines - Make coroutine annotations ready for static analysis - qemu-img: Fix exit code for errors closing the image - qcow2 bitmaps: Fix theoretical corruption in error path - pflash: Only load non-zero parts of backend image to save memory - Code cleanup and test case improvements ---------------------------------------------------------------- Alberto Faria (2): coroutine: annotate coroutine_fn for libclang block: Add no_coroutine_fn and coroutine_mixed_fn marker Emanuele Giuseppe Esposito (14): block-coroutine-wrapper: support void functions block: Convert bdrv_io_plug() to co_wrapper block: Convert bdrv_io_unplug() to co_wrapper block: Convert bdrv_is_inserted() to co_wrapper block: Rename refresh_total_sectors to bdrv_refresh_total_sectors block: Convert bdrv_refresh_total_sectors() to co_wrapper_mixed block-backend: use bdrv_getlength instead of blk_getlength block: use bdrv_co_refresh_total_sectors when possible block: Convert bdrv_get_allocated_file_size() to co_wrapper block: Convert bdrv_get_info() to co_wrapper_mixed block: Convert bdrv_eject() to co_wrapper block: Convert bdrv_lock_medium() to co_wrapper block: Convert bdrv_debug_event() to co_wrapper_mixed block: Rename bdrv_load/save_vmstate() to bdrv_co_load/save_vmstate() Hanna Reitz (12): block: Improve empty format-specific info dump block/file: Add file-specific image info block/vmdk: Change extent info type block: Split BlockNodeInfo off of ImageInfo qemu-img: Use BlockNodeInfo block/qapi: Let bdrv_query_image_info() recurse block/qapi: Introduce BlockGraphInfo block/qapi: Add indentation to bdrv_node_info_dump() iotests: Filter child node information iotests/106, 214, 308: Read only one size line qemu-img: Let info print block graph qemu-img: Change info key names for protocol nodes Kevin Wolf (4): qcow2: Fix theoretical corruption in store_bitmap() error path qemu-img commit: Report errors while closing the image qemu-img bitmap: Report errors while closing the image qemu-iotests: Test qemu-img bitmap/commit exit code on error Paolo Bonzini (2): qemu-io: do not reinvent the blk_pwrite_zeroes wheel block: remove bdrv_coroutine_enter Philippe Mathieu-Daudé (1): block/nbd: Add missing <qemu/bswap.h> include Thomas Huth (2): tests/qemu-iotests/312: Mark "quorum" as required driver tests/qemu-iotests/262: Check for availability of "blkverify" first Xiang Zheng (1): pflash: Only read non-zero parts of backend image qapi/block-core.json | 123 +++++++- include/block/block-common.h | 11 +- include/block/block-io.h | 41 ++- include/block/block_int-common.h | 26 +- include/block/block_int-io.h | 5 +- include/block/nbd.h | 1 + include/block/qapi.h | 14 +- include/qemu/osdep.h | 44 +++ include/sysemu/block-backend-io.h | 31 +- block.c | 88 +++--- block/blkdebug.c | 11 +- block/blkio.c | 15 +- block/blklogwrites.c | 6 +- block/blkreplay.c | 6 +- block/blkverify.c | 6 +- block/block-backend.c | 38 +-- block/commit.c | 4 +- block/copy-on-read.c | 18 +- block/crypto.c | 14 +- block/curl.c | 10 +- block/file-posix.c | 137 +++++---- block/file-win32.c | 18 +- block/filter-compress.c | 20 +- block/gluster.c | 23 +- block/io.c | 76 ++--- block/iscsi.c | 17 +- block/mirror.c | 6 +- block/monitor/block-hmp-cmds.c | 2 +- block/nbd.c | 8 +- block/nfs.c | 4 +- block/null.c | 13 +- block/nvme.c | 14 +- block/preallocate.c | 16 +- block/qapi.c | 317 ++++++++++++++++----- block/qcow.c | 5 +- block/qcow2-bitmap.c | 5 +- block/qcow2-refcount.c | 2 +- block/qcow2.c | 17 +- block/qed.c | 11 +- block/quorum.c | 8 +- block/raw-format.c | 25 +- block/rbd.c | 9 +- block/replication.c | 6 +- block/ssh.c | 4 +- block/throttle.c | 6 +- block/vdi.c | 7 +- block/vhdx.c | 5 +- block/vmdk.c | 22 +- block/vpc.c | 5 +- blockdev.c | 8 +- hw/block/block.c | 36 ++- hw/scsi/scsi-disk.c | 5 + qemu-img.c | 100 +++++-- qemu-io-cmds.c | 62 +--- tests/unit/test-block-iothread.c | 3 + scripts/block-coroutine-wrapper.py | 20 +- tests/qemu-iotests/iotests.py | 18 +- block/meson.build | 1 + tests/qemu-iotests/065 | 2 +- tests/qemu-iotests/106 | 4 +- tests/qemu-iotests/214 | 6 +- tests/qemu-iotests/262 | 3 +- tests/qemu-iotests/302.out | 5 + tests/qemu-iotests/308 | 4 +- tests/qemu-iotests/312 | 1 + tests/qemu-iotests/common.filter | 22 +- tests/qemu-iotests/common.rc | 22 +- tests/qemu-iotests/tests/qemu-img-close-errors | 96 +++++++ tests/qemu-iotests/tests/qemu-img-close-errors.out | 23 ++ 69 files changed, 1209 insertions(+), 552 deletions(-) create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out