:p
atchew
Login
The following changes since commit 39a6e4f87e7b75a45b08d6dc8b8b7c2954c87440: Merge tag 'pull-qapi-2024-02-03' of https://repo.or.cz/qemu/armbru into staging (2024-02-03 13:31:58 +0000) are available in the Git repository at: https://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to 7ccd0415f2d67e6739da756241f60d98d5c80bf8: virtio-blk: avoid using ioeventfd state in irqfd conditional (2024-02-07 21:59:07 +0100) ---------------------------------------------------------------- Block layer patches - Allow concurrent BB context changes - virtio: Re-enable notifications after drain - virtio-blk: Fix missing use of irqfd - scsi: Don't ignore most usb-storage properties - blkio: Respect memory-alignment for bounce buffer allocations - iotests tmpdir fixes - virtio-blk: Code cleanups ---------------------------------------------------------------- Daniel P. Berrangé (2): iotests: fix leak of tmpdir in dry-run mode iotests: give tempdir an identifying name Hanna Czenczek (5): block-backend: Allow concurrent context changes scsi: Await request purging virtio-scsi: Attach event vq notifier with no_poll virtio: Re-enable notifications after drain virtio-blk: Use ioeventfd_attach in start_ioeventfd Kevin Wolf (2): scsi: Don't ignore most usb-storage properties blkio: Respect memory-alignment for bounce buffer allocations Stefan Hajnoczi (7): virtio-blk: enforce iothread-vq-mapping validation virtio-blk: clarify that there is at least 1 virtqueue virtio-blk: add vq_rq[] bounds check in virtio_blk_dma_restart_cb() virtio-blk: declare VirtIOBlock::rq with a type monitor: use aio_co_reschedule_self() virtio-blk: do not use C99 mixed declarations virtio-blk: avoid using ioeventfd state in irqfd conditional include/block/aio.h | 7 +- include/hw/scsi/scsi.h | 5 +- include/hw/virtio/virtio-blk.h | 2 +- block/blkio.c | 3 + block/block-backend.c | 22 ++-- hw/block/virtio-blk.c | 226 +++++++++++++++++++++++------------------ hw/scsi/scsi-bus.c | 63 ++++++------ hw/scsi/virtio-scsi.c | 7 +- hw/usb/dev-storage-classic.c | 5 +- hw/virtio/virtio.c | 42 ++++++++ qapi/qmp-dispatch.c | 7 +- tests/qemu-iotests/testenv.py | 2 +- tests/qemu-iotests/check | 3 +- 13 files changed, 236 insertions(+), 158 deletions(-)
From: Stefan Hajnoczi <stefanha@redhat.com> Hanna Czenczek <hreitz@redhat.com> noticed that the safety of `vq_aio_context[vq->value] = ctx;` with user-defined vq->value inputs is not obvious. The code is structured in validate() + apply() steps so input validation is there, but it happens way earlier and there is nothing that guarantees apply() can only be called with validated inputs. This patch moves the validate() call inside the apply() function so validation is guaranteed. I also added the bounds checking assertion that Hanna suggested. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org> Reviewed-by: Hanna Czenczek <hreitz@redhat.com> Message-ID: <20240206190610.107963-2-stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/block/virtio-blk.c | 183 +++++++++++++++++++++++------------------- 1 file changed, 102 insertions(+), 81 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index XXXXXXX..XXXXXXX 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -XXX,XX +XXX,XX @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, return 0; } -static bool -validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, - uint16_t num_queues, Error **errp) -{ - g_autofree unsigned long *vqs = bitmap_new(num_queues); - g_autoptr(GHashTable) iothreads = - g_hash_table_new(g_str_hash, g_str_equal); - - for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { - const char *name = node->value->iothread; - uint16List *vq; - - if (!iothread_by_id(name)) { - error_setg(errp, "IOThread \"%s\" object does not exist", name); - return false; - } - - if (!g_hash_table_add(iothreads, (gpointer)name)) { - error_setg(errp, - "duplicate IOThread name \"%s\" in iothread-vq-mapping", - name); - return false; - } - - if (node != list) { - if (!!node->value->vqs != !!list->value->vqs) { - error_setg(errp, "either all items in iothread-vq-mapping " - "must have vqs or none of them must have it"); - return false; - } - } - - for (vq = node->value->vqs; vq; vq = vq->next) { - if (vq->value >= num_queues) { - error_setg(errp, "vq index %u for IOThread \"%s\" must be " - "less than num_queues %u in iothread-vq-mapping", - vq->value, name, num_queues); - return false; - } - - if (test_and_set_bit(vq->value, vqs)) { - error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " - "because it is already assigned", vq->value, name); - return false; - } - } - } - - if (list->value->vqs) { - for (uint16_t i = 0; i < num_queues; i++) { - if (!test_bit(i, vqs)) { - error_setg(errp, - "missing vq %u IOThread assignment in iothread-vq-mapping", - i); - return false; - } - } - } - - return true; -} - static void virtio_resize_cb(void *opaque) { VirtIODevice *vdev = opaque; @@ -XXX,XX +XXX,XX @@ static const BlockDevOps virtio_block_ops = { .drained_end = virtio_blk_drained_end, }; -/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ -static void -apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, - AioContext **vq_aio_context, uint16_t num_queues) +static bool +validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, + uint16_t num_queues, Error **errp) +{ + g_autofree unsigned long *vqs = bitmap_new(num_queues); + g_autoptr(GHashTable) iothreads = + g_hash_table_new(g_str_hash, g_str_equal); + + for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { + const char *name = node->value->iothread; + uint16List *vq; + + if (!iothread_by_id(name)) { + error_setg(errp, "IOThread \"%s\" object does not exist", name); + return false; + } + + if (!g_hash_table_add(iothreads, (gpointer)name)) { + error_setg(errp, + "duplicate IOThread name \"%s\" in iothread-vq-mapping", + name); + return false; + } + + if (node != list) { + if (!!node->value->vqs != !!list->value->vqs) { + error_setg(errp, "either all items in iothread-vq-mapping " + "must have vqs or none of them must have it"); + return false; + } + } + + for (vq = node->value->vqs; vq; vq = vq->next) { + if (vq->value >= num_queues) { + error_setg(errp, "vq index %u for IOThread \"%s\" must be " + "less than num_queues %u in iothread-vq-mapping", + vq->value, name, num_queues); + return false; + } + + if (test_and_set_bit(vq->value, vqs)) { + error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " + "because it is already assigned", vq->value, name); + return false; + } + } + } + + if (list->value->vqs) { + for (uint16_t i = 0; i < num_queues; i++) { + if (!test_bit(i, vqs)) { + error_setg(errp, + "missing vq %u IOThread assignment in iothread-vq-mapping", + i); + return false; + } + } + } + + return true; +} + +/** + * apply_iothread_vq_mapping: + * @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads. + * @vq_aio_context: The array of AioContext pointers to fill in. + * @num_queues: The length of @vq_aio_context. + * @errp: If an error occurs, a pointer to the area to store the error. + * + * Fill in the AioContext for each virtqueue in the @vq_aio_context array given + * the iothread-vq-mapping parameter in @iothread_vq_mapping_list. + * + * Returns: %true on success, %false on failure. + **/ +static bool apply_iothread_vq_mapping( + IOThreadVirtQueueMappingList *iothread_vq_mapping_list, + AioContext **vq_aio_context, + uint16_t num_queues, + Error **errp) { IOThreadVirtQueueMappingList *node; size_t num_iothreads = 0; size_t cur_iothread = 0; + if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list, + num_queues, errp)) { + return false; + } + for (node = iothread_vq_mapping_list; node; node = node->next) { num_iothreads++; } @@ -XXX,XX +XXX,XX @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, /* Explicit vq:IOThread assignment */ for (vq = node->value->vqs; vq; vq = vq->next) { + assert(vq->value < num_queues); vq_aio_context[vq->value] = ctx; } } else { @@ -XXX,XX +XXX,XX @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, cur_iothread++; } + + return true; } /* Context: BQL held */ @@ -XXX,XX +XXX,XX @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + if (conf->iothread && conf->iothread_vq_mapping_list) { + error_setg(errp, + "iothread and iothread-vq-mapping properties cannot be set " + "at the same time"); + return false; + } + if (conf->iothread || conf->iothread_vq_mapping_list) { if (!k->set_guest_notifiers || !k->ioeventfd_assign) { error_setg(errp, @@ -XXX,XX +XXX,XX @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) s->vq_aio_context = g_new(AioContext *, conf->num_queues); if (conf->iothread_vq_mapping_list) { - apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, - conf->num_queues); + if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list, + s->vq_aio_context, + conf->num_queues, + errp)) { + g_free(s->vq_aio_context); + s->vq_aio_context = NULL; + return false; + } } else if (conf->iothread) { AioContext *ctx = iothread_get_aio_context(conf->iothread); for (unsigned i = 0; i < conf->num_queues; i++) { @@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } - if (conf->iothread_vq_mapping_list) { - if (conf->iothread) { - error_setg(errp, "iothread and iothread-vq-mapping properties " - "cannot be set at the same time"); - return; - } - - if (!validate_iothread_vq_mapping_list(conf->iothread_vq_mapping_list, - conf->num_queues, errp)) { - return; - } - } - s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, s->host_features); virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); -- 2.43.0
From: Stefan Hajnoczi <stefanha@redhat.com> It is not possible to instantiate a virtio-blk device with 0 virtqueues. The following check is located in ->realize(): if (!conf->num_queues) { error_setg(errp, "num-queues property must be larger than 0"); return; } Later on we access s->vq_aio_context[0] under the assumption that there is as least one virtqueue. Hanna Czenczek <hreitz@redhat.com> noted that it would help to show that the array index is already valid. Add an assertion to document that s->vq_aio_context[0] is always safe...and catch future code changes that break this assumption. Suggested-by: Hanna Czenczek <hreitz@redhat.com> Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org> Reviewed-by: Hanna Czenczek <hreitz@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20240206190610.107963-3-stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/block/virtio-blk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index XXXXXXX..XXXXXXX 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -XXX,XX +XXX,XX @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) * Try to change the AioContext so that block jobs and other operations can * co-locate their activity in the same AioContext. If it fails, nevermind. */ + assert(nvqs > 0); /* enforced during ->realize() */ r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], &local_err); if (r < 0) { -- 2.43.0
From: Stefan Hajnoczi <stefanha@redhat.com> Hanna Czenczek <hreitz@redhat.com> noted that the array index in virtio_blk_dma_restart_cb() is not bounds-checked: g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); ... while (rq) { VirtIOBlockReq *next = rq->next; uint16_t idx = virtio_get_queue_index(rq->vq); rq->next = vq_rq[idx]; ^^^^^^^^^^ The code is correct because both rq->vq and vq_rq[] depend on num_queues, but this is indirect and not 100% obvious. Add an assertion. Suggested-by: Hanna Czenczek <hreitz@redhat.com> Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org> Reviewed-by: Hanna Czenczek <hreitz@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20240206190610.107963-4-stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/block/virtio-blk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index XXXXXXX..XXXXXXX 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -XXX,XX +XXX,XX @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, VirtIOBlockReq *next = rq->next; uint16_t idx = virtio_get_queue_index(rq->vq); + /* Only num_queues vqs were created so vq_rq[idx] is within bounds */ + assert(idx < num_queues); rq->next = vq_rq[idx]; vq_rq[idx] = rq; rq = next; -- 2.43.0
From: Stefan Hajnoczi <stefanha@redhat.com> The VirtIOBlock::rq field has had the type void * since its introduction in commit 869a5c6df19a ("Stop VM on error in virtio-blk. (Gleb Natapov)"). Perhaps this was done to avoid the forward declaration of VirtIOBlockReq. Hanna Czenczek <hreitz@redhat.com> pointed out the missing type. Specify the actual type because there is no need to use void * here. Suggested-by: Hanna Czenczek <hreitz@redhat.com> Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org> Reviewed-by: Hanna Czenczek <hreitz@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20240206190610.107963-5-stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/hw/virtio/virtio-blk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index XXXXXXX..XXXXXXX 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -XXX,XX +XXX,XX @@ struct VirtIOBlock { VirtIODevice parent_obj; BlockBackend *blk; QemuMutex rq_lock; - void *rq; /* protected by rq_lock */ + struct VirtIOBlockReq *rq; /* protected by rq_lock */ VirtIOBlkConf conf; unsigned short sector_mask; bool original_wce; -- 2.43.0
From: Stefan Hajnoczi <stefanha@redhat.com> The aio_co_reschedule_self() API is designed to avoid the race condition between scheduling the coroutine in another AioContext and yielding. The QMP dispatch code uses the open-coded version that appears susceptible to the race condition at first glance: aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); qemu_coroutine_yield(); The code is actually safe because the iohandler and qemu_aio_context AioContext run under the Big QEMU Lock. Nevertheless, set a good example and use aio_co_reschedule_self() so it's obvious that there is no race. Suggested-by: Hanna Reitz <hreitz@redhat.com> Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org> Reviewed-by: Hanna Czenczek <hreitz@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20240206190610.107963-6-stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- qapi/qmp-dispatch.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c index XXXXXXX..XXXXXXX 100644 --- a/qapi/qmp-dispatch.c +++ b/qapi/qmp-dispatch.c @@ -XXX,XX +XXX,XX @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ * executing the command handler so that it can make progress if it * involves an AIO_WAIT_WHILE(). */ - aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); - qemu_coroutine_yield(); + aio_co_reschedule_self(qemu_get_aio_context()); } monitor_set_cur(qemu_coroutine_self(), cur_mon); @@ -XXX,XX +XXX,XX @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ * Move back to iohandler_ctx so that nested event loops for * qemu_aio_context don't start new monitor commands. */ - aio_co_schedule(iohandler_get_aio_context(), - qemu_coroutine_self()); - qemu_coroutine_yield(); + aio_co_reschedule_self(iohandler_get_aio_context()); } } else { /* -- 2.43.0
From: Hanna Czenczek <hreitz@redhat.com> Since AioContext locks have been removed, a BlockBackend's AioContext may really change at any time (only exception is that it is often confined to a drained section, as noted in this patch). Therefore, blk_get_aio_context() cannot rely on its root node's context always matching that of the BlockBackend. In practice, whether they match does not matter anymore anyway: Requests can be sent to BDSs from any context, so anyone who requests the BB's context should have no reason to require the root node to have the same context. Therefore, we can and should remove the assertion to that effect. In addition, because the context can be set and queried from different threads concurrently, it has to be accessed with atomic operations. Buglink: https://issues.redhat.com/browse/RHEL-19381 Suggested-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Hanna Czenczek <hreitz@redhat.com> Message-ID: <20240202144755.671354-2-hreitz@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/block-backend.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index XXXXXXX..XXXXXXX 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -XXX,XX +XXX,XX @@ struct BlockBackend { char *name; int refcnt; BdrvChild *root; - AioContext *ctx; + AioContext *ctx; /* access with atomic operations only */ DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ @@ -XXX,XX +XXX,XX @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) } } +/** + * Return BB's current AioContext. Note that this context may change + * concurrently at any time, with one exception: If the BB has a root node + * attached, its context will only change through bdrv_try_change_aio_context(), + * which creates a drained section. Therefore, incrementing such a BB's + * in-flight counter will prevent its context from changing. + */ AioContext *blk_get_aio_context(BlockBackend *blk) { - BlockDriverState *bs; IO_CODE(); if (!blk) { return qemu_get_aio_context(); } - bs = blk_bs(blk); - if (bs) { - AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); - assert(ctx == blk->ctx); - } - - return blk->ctx; + return qatomic_read(&blk->ctx); } int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, @@ -XXX,XX +XXX,XX @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, GLOBAL_STATE_CODE(); if (!bs) { - blk->ctx = new_context; + qatomic_set(&blk->ctx, new_context); return 0; } @@ -XXX,XX +XXX,XX @@ static void blk_root_set_aio_ctx_commit(void *opaque) AioContext *new_context = s->new_ctx; ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - blk->ctx = new_context; + qatomic_set(&blk->ctx, new_context); if (tgm->throttle_state) { throttle_group_detach_aio_context(tgm); throttle_group_attach_aio_context(tgm, new_context); -- 2.43.0
From: Hanna Czenczek <hreitz@redhat.com> scsi_device_for_each_req_async() currently does not provide any way to be awaited. One of its callers is scsi_device_purge_requests(), which therefore currently does not guarantee that all requests are fully settled when it returns. We want all requests to be settled, because scsi_device_purge_requests() is called through the unrealize path, including the one invoked by virtio_scsi_hotunplug() through qdev_simple_device_unplug_cb(), which most likely assumes that all SCSI requests are done then. In fact, scsi_device_purge_requests() already contains a blk_drain(), but this will not fully await scsi_device_for_each_req_async(), only the I/O requests it potentially cancels (not the non-I/O requests). However, we can have scsi_device_for_each_req_async() increment the BB in-flight counter, and have scsi_device_for_each_req_async_bh() decrement it when it is done. This way, the blk_drain() will fully await all SCSI requests to be purged. This also removes the need for scsi_device_for_each_req_async_bh() to double-check the current context and potentially re-schedule itself, should it now differ from the BB's context: Changing a BB's AioContext with a root node is done through bdrv_try_change_aio_context(), which creates a drained section. With this patch, we keep the BB in-flight counter elevated throughout, so we know the BB's context cannot change. Signed-off-by: Hanna Czenczek <hreitz@redhat.com> Message-ID: <20240202144755.671354-3-hreitz@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/scsi/scsi-bus.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index XXXXXXX..XXXXXXX 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async_bh(void *opaque) SCSIRequest *next; /* - * If the AioContext changed before this BH was called then reschedule into - * the new AioContext before accessing ->requests. This can happen when - * scsi_device_for_each_req_async() is called and then the AioContext is - * changed before BHs are run. + * The BB cannot have changed contexts between this BH being scheduled and + * now: BBs' AioContexts, when they have a node attached, can only be + * changed via bdrv_try_change_aio_context(), in a drained section. While + * we have the in-flight counter incremented, that drain must block. */ ctx = blk_get_aio_context(s->conf.blk); - if (ctx != qemu_get_current_aio_context()) { - aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, - g_steal_pointer(&data)); - return; - } + assert(ctx == qemu_get_current_aio_context()); QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { data->fn(req, data->fn_opaque); @@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async_bh(void *opaque) /* Drop the reference taken by scsi_device_for_each_req_async() */ object_unref(OBJECT(s)); + + /* Paired with blk_inc_in_flight() in scsi_device_for_each_req_async() */ + blk_dec_in_flight(s->conf.blk); } /* * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() * runs in the AioContext that is executing the request. + * Keeps the BlockBackend's in-flight counter incremented until everything is + * done, so draining it will settle all scheduled @fn() calls. */ static void scsi_device_for_each_req_async(SCSIDevice *s, void (*fn)(SCSIRequest *, void *), @@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async(SCSIDevice *s, */ object_ref(OBJECT(s)); + /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */ + blk_inc_in_flight(s->conf.blk); aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), scsi_device_for_each_req_async_bh, data); @@ -XXX,XX +XXX,XX @@ static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) scsi_req_cancel_async(req, NULL); } +/** + * Cancel all requests, and block until they are deleted. + */ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) { scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); + /* + * Await all the scsi_device_purge_one_req() calls scheduled by + * scsi_device_for_each_req_async(), and all I/O requests that were + * cancelled this way, but may still take a bit of time to settle. + */ blk_drain(sdev->conf.blk); + scsi_device_set_ua(sdev, sense); } -- 2.43.0
From: Daniel P. Berrangé <berrange@redhat.com> Creating an instance of the 'TestEnv' class will create a temporary directory. This dir is only deleted, however, in the __exit__ handler invoked by a context manager. In dry-run mode, we don't use the TestEnv via a context manager, so were leaking the temporary directory. Since meson invokes 'check' 5 times on each configure run, developers /tmp was filling up with empty temporary directories. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Message-ID: <20240205154019.1841037-1-berrange@redhat.com> Reviewed-by: Michael Tokarev <mjt@tls.msk.ru> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/qemu-iotests/check | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -XXX,XX +XXX,XX @@ if __name__ == '__main__': sys.exit(str(e)) if args.dry_run: - print('\n'.join([os.path.basename(t) for t in tests])) + with env: + print('\n'.join([os.path.basename(t) for t in tests])) else: with TestRunner(env, tap=args.tap, color=args.color) as tr: -- 2.43.0
From: Daniel P. Berrangé <berrange@redhat.com> If something goes wrong causing the iotests not to cleanup their temporary directory, it is useful if the dir had an identifying name to show what is to blame. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Message-ID: <20240205155158.1843304-1-berrange@redhat.com> Revieved-by: Michael Tokarev <mjt@tls.msk.ru> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- tests/qemu-iotests/testenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/testenv.py +++ b/tests/qemu-iotests/testenv.py @@ -XXX,XX +XXX,XX @@ def init_directories(self) -> None: self.tmp_sock_dir = False Path(self.sock_dir).mkdir(parents=True, exist_ok=True) except KeyError: - self.sock_dir = tempfile.mkdtemp() + self.sock_dir = tempfile.mkdtemp(prefix="qemu-iotests-") self.tmp_sock_dir = True self.sample_img_dir = os.getenv('SAMPLE_IMG_DIR', -- 2.43.0
From: Stefan Hajnoczi <stefanha@redhat.com> QEMU's coding style generally forbids C99 mixed declarations. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20240206140410.65650-1-stefanha@redhat.com> Reviewed-by: Hanna Czenczek <hreitz@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/block/virtio-blk.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index XXXXXXX..XXXXXXX 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -XXX,XX +XXX,XX @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) int64_t zrp_size, n, j = 0; int64_t nz = data->zone_report_data.nr_zones; int8_t err_status = VIRTIO_BLK_S_OK; + struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) { + .nr_zones = cpu_to_le64(nz), + }; trace_virtio_blk_zone_report_complete(vdev, req, nz, ret); if (ret) { @@ -XXX,XX +XXX,XX @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) goto out; } - struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) { - .nr_zones = cpu_to_le64(nz), - }; zrp_size = sizeof(struct virtio_blk_zone_report) + sizeof(struct virtio_blk_zone_descriptor) * nz; n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr)); @@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; int64_t len = iov_size(out_iov, out_num); + ZoneCmdData *data; trace_virtio_blk_handle_zone_append(vdev, req, offset >> BDRV_SECTOR_BITS); if (!check_zoned_request(s, offset, len, true, &err_status)) { goto out; } - ZoneCmdData *data = g_malloc(sizeof(ZoneCmdData)); + data = g_malloc(sizeof(ZoneCmdData)); data->req = req; data->in_iov = in_iov; data->in_num = in_num; @@ -XXX,XX +XXX,XX @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, { VirtIOBlock *s = opaque; uint16_t num_queues = s->conf.num_queues; + g_autofree VirtIOBlockReq **vq_rq = NULL; + VirtIOBlockReq *rq; if (!running) { return; } /* Split the device-wide s->rq request list into per-vq request lists */ - g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); - VirtIOBlockReq *rq; + vq_rq = g_new0(VirtIOBlockReq *, num_queues); WITH_QEMU_LOCK_GUARD(&s->rq_lock) { rq = s->rq; @@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOBlock *s = VIRTIO_BLK(dev); VirtIOBlkConf *conf = &s->conf; + BlockDriverState *bs; Error *err = NULL; unsigned i; @@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } - BlockDriverState *bs = blk_bs(conf->conf.blk); + bs = blk_bs(conf->conf.blk); if (bs->bl.zoned != BLK_Z_NONE) { virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED); if (bs->bl.zoned == BLK_Z_HM) { -- 2.43.0
usb-storage is for the most part just a wrapper around an internally created scsi-disk device. It uses DEFINE_BLOCK_PROPERTIES() to offer all of the usual block device properties to the user, but then only forwards a few select properties to the internal device while the rest is silently ignored. This changes scsi_bus_legacy_add_drive() to accept a whole BlockConf instead of some individual values inside of it so that usb-storage can now pass the whole configuration to the internal scsi-disk. This enables the remaining block device properties, e.g. logical/physical_block_size or discard_granularity. Buglink: https://issues.redhat.com/browse/RHEL-22375 Signed-off-by: Kevin Wolf <kwolf@redhat.com> Message-ID: <20240131130607.24117-1-kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/hw/scsi/scsi.h | 5 +---- hw/scsi/scsi-bus.c | 33 +++++++++++++-------------------- hw/usb/dev-storage-classic.c | 5 +---- 3 files changed, 15 insertions(+), 28 deletions(-) diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index XXXXXXX..XXXXXXX 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -XXX,XX +XXX,XX @@ static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d) } SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, - int unit, bool removable, int bootindex, - bool share_rw, - BlockdevOnError rerror, - BlockdevOnError werror, + int unit, bool removable, BlockConf *conf, const char *serial, Error **errp); void scsi_bus_set_ua(SCSIBus *bus, SCSISense sense); void scsi_bus_legacy_handle_cmdline(SCSIBus *bus); diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index XXXXXXX..XXXXXXX 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -XXX,XX +XXX,XX @@ static void scsi_qdev_unrealize(DeviceState *qdev) /* handle legacy '-drive if=scsi,...' cmd line args */ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, - int unit, bool removable, int bootindex, - bool share_rw, - BlockdevOnError rerror, - BlockdevOnError werror, + int unit, bool removable, BlockConf *conf, const char *serial, Error **errp) { const char *driver; char *name; DeviceState *dev; + SCSIDevice *s; DriveInfo *dinfo; if (blk_is_sg(blk)) { @@ -XXX,XX +XXX,XX @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, object_property_add_child(OBJECT(bus), name, OBJECT(dev)); g_free(name); + s = SCSI_DEVICE(dev); + s->conf = *conf; + qdev_prop_set_uint32(dev, "scsi-id", unit); - if (bootindex >= 0) { - object_property_set_int(OBJECT(dev), "bootindex", bootindex, - &error_abort); - } if (object_property_find(OBJECT(dev), "removable")) { qdev_prop_set_bit(dev, "removable", removable); } @@ -XXX,XX +XXX,XX @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, object_unparent(OBJECT(dev)); return NULL; } - if (!object_property_set_bool(OBJECT(dev), "share-rw", share_rw, errp)) { - object_unparent(OBJECT(dev)); - return NULL; - } - - qdev_prop_set_enum(dev, "rerror", rerror); - qdev_prop_set_enum(dev, "werror", werror); if (!qdev_realize_and_unref(dev, &bus->qbus, errp)) { object_unparent(OBJECT(dev)); return NULL; } - return SCSI_DEVICE(dev); + return s; } void scsi_bus_legacy_handle_cmdline(SCSIBus *bus) @@ -XXX,XX +XXX,XX @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus) Location loc; DriveInfo *dinfo; int unit; + BlockConf conf = { + .bootindex = -1, + .share_rw = false, + .rerror = BLOCKDEV_ON_ERROR_AUTO, + .werror = BLOCKDEV_ON_ERROR_AUTO, + }; loc_push_none(&loc); for (unit = 0; unit <= bus->info->max_target; unit++) { @@ -XXX,XX +XXX,XX @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus) } qemu_opts_loc_restore(dinfo->opts); scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo), - unit, false, -1, false, - BLOCKDEV_ON_ERROR_AUTO, - BLOCKDEV_ON_ERROR_AUTO, - NULL, &error_fatal); + unit, false, &conf, NULL, &error_fatal); } loc_pop(&loc); } diff --git a/hw/usb/dev-storage-classic.c b/hw/usb/dev-storage-classic.c index XXXXXXX..XXXXXXX 100644 --- a/hw/usb/dev-storage-classic.c +++ b/hw/usb/dev-storage-classic.c @@ -XXX,XX +XXX,XX @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp) scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev), &usb_msd_scsi_info_storage); scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable, - s->conf.bootindex, s->conf.share_rw, - s->conf.rerror, s->conf.werror, - dev->serial, - errp); + &s->conf, dev->serial, errp); blk_unref(blk); if (!scsi_dev) { return; -- 2.43.0
blkio_alloc_mem_region() requires that the requested buffer size is a multiple of the memory-alignment property. If it isn't, the allocation fails with a return value of -EINVAL. Fix the call in blkio_resize_bounce_pool() to make sure the requested size is properly aligned. I observed this problem with vhost-vdpa, which requires page aligned memory. As the virtio-blk device behind it still had 512 byte blocks, we got bs->bl.request_alignment = 512, but actually any request that needed a bounce buffer and was not aligned to 4k would fail without this fix. Suggested-by: Stefano Garzarella <sgarzare@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> Message-ID: <20240131173140.42398-1-kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/blkio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blkio.c b/block/blkio.c index XXXXXXX..XXXXXXX 100644 --- a/block/blkio.c +++ b/block/blkio.c @@ -XXX,XX +XXX,XX @@ static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes) /* Pad size to reduce frequency of resize calls */ bytes += 128 * 1024; + /* Align the pool size to avoid blkio_alloc_mem_region() failure */ + bytes = QEMU_ALIGN_UP(bytes, s->mem_region_alignment); + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { int ret; -- 2.43.0
From: Hanna Czenczek <hreitz@redhat.com> As of commit 38738f7dbbda90fbc161757b7f4be35b52205552 ("virtio-scsi: don't waste CPU polling the event virtqueue"), we only attach an io_read notifier for the virtio-scsi event virtqueue instead, and no polling notifiers. During operation, the event virtqueue is typically non-empty, but none of the buffers are intended to be used immediately. Instead, they only get used when certain events occur. Therefore, it makes no sense to continuously poll it when non-empty, because it is supposed to be and stay non-empty. We do this by using virtio_queue_aio_attach_host_notifier_no_poll() instead of virtio_queue_aio_attach_host_notifier() for the event virtqueue. Commit 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement BlockDevOps->drained_begin()") however has virtio_scsi_drained_end() use virtio_queue_aio_attach_host_notifier() for all virtqueues, including the event virtqueue. This can lead to it being polled again, undoing the benefit of commit 38738f7dbbda90fbc161757b7f4be35b52205552. Fix it by using virtio_queue_aio_attach_host_notifier_no_poll() for the event virtqueue. Reported-by: Fiona Ebner <f.ebner@proxmox.com> Fixes: 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement BlockDevOps->drained_begin()") Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Tested-by: Fiona Ebner <f.ebner@proxmox.com> Reviewed-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Hanna Czenczek <hreitz@redhat.com> Message-ID: <20240202153158.788922-2-hreitz@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/scsi/virtio-scsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index XXXXXXX..XXXXXXX 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_drained_begin(SCSIBus *bus) static void virtio_scsi_drained_end(SCSIBus *bus) { VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); VirtIODevice *vdev = VIRTIO_DEVICE(s); uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + s->parent_obj.conf.num_queues; @@ -XXX,XX +XXX,XX @@ static void virtio_scsi_drained_end(SCSIBus *bus) for (uint32_t i = 0; i < total_queues; i++) { VirtQueue *vq = virtio_get_queue(vdev, i); - virtio_queue_aio_attach_host_notifier(vq, s->ctx); + if (vq == vs->event_vq) { + virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx); + } else { + virtio_queue_aio_attach_host_notifier(vq, s->ctx); + } } } -- 2.43.0
From: Hanna Czenczek <hreitz@redhat.com> During drain, we do not care about virtqueue notifications, which is why we remove the handlers on it. When removing those handlers, whether vq notifications are enabled or not depends on whether we were in polling mode or not; if not, they are enabled (by default); if so, they have been disabled by the io_poll_start callback. Because we do not care about those notifications after removing the handlers, this is fine. However, we have to explicitly ensure they are enabled when re-attaching the handlers, so we will resume receiving notifications. We do this in virtio_queue_aio_attach_host_notifier*(). If such a function is called while we are in a polling section, attaching the notifiers will then invoke the io_poll_start callback, re-disabling notifications. Because we will always miss virtqueue updates in the drained section, we also need to poll the virtqueue once after attaching the notifiers. Buglink: https://issues.redhat.com/browse/RHEL-3934 Signed-off-by: Hanna Czenczek <hreitz@redhat.com> Message-ID: <20240202153158.788922-3-hreitz@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/block/aio.h | 7 ++++++- hw/virtio/virtio.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/include/block/aio.h b/include/block/aio.h index XXXXXXX..XXXXXXX 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -XXX,XX +XXX,XX @@ void aio_set_event_notifier(AioContext *ctx, AioPollFn *io_poll, EventNotifierHandler *io_poll_ready); -/* Set polling begin/end callbacks for an event notifier that has already been +/* + * Set polling begin/end callbacks for an event notifier that has already been * registered with aio_set_event_notifier. Do nothing if the event notifier is * not registered. + * + * Note that if the io_poll_end() callback (or the entire notifier) is removed + * during polling, it will not be called, so an io_poll_begin() is not + * necessarily always followed by an io_poll_end(). */ void aio_set_event_notifier_poll(AioContext *ctx, EventNotifier *notifier, diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index XXXXXXX..XXXXXXX 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -XXX,XX +XXX,XX @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) { + /* + * virtio_queue_aio_detach_host_notifier() can leave notifications disabled. + * Re-enable them. (And if detach has not been used before, notifications + * being enabled is still the default state while a notifier is attached; + * see virtio_queue_host_notifier_aio_poll_end(), which will always leave + * notifications enabled once the polling section is left.) + */ + if (!virtio_queue_get_notification(vq)) { + virtio_queue_set_notification(vq, 1); + } + aio_set_event_notifier(ctx, &vq->host_notifier, virtio_queue_host_notifier_read, virtio_queue_host_notifier_aio_poll, @@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) aio_set_event_notifier_poll(ctx, &vq->host_notifier, virtio_queue_host_notifier_aio_poll_begin, virtio_queue_host_notifier_aio_poll_end); + + /* + * We will have ignored notifications about new requests from the guest + * while no notifiers were attached, so "kick" the virt queue to process + * those requests now. + */ + event_notifier_set(&vq->host_notifier); } /* @@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) */ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) { + /* See virtio_queue_aio_attach_host_notifier() */ + if (!virtio_queue_get_notification(vq)) { + virtio_queue_set_notification(vq, 1); + } + aio_set_event_notifier(ctx, &vq->host_notifier, virtio_queue_host_notifier_read, NULL, NULL); + + /* + * See virtio_queue_aio_attach_host_notifier(). + * Note that this may be unnecessary for the type of virtqueues this + * function is used for. Still, it will not hurt to have a quick look into + * whether we can/should process any of the virtqueue elements. + */ + event_notifier_set(&vq->host_notifier); } void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) { aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL); + + /* + * aio_set_event_notifier_poll() does not guarantee whether io_poll_end() + * will run after io_poll_begin(), so by removing the notifier, we do not + * know whether virtio_queue_host_notifier_aio_poll_end() has run after a + * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether + * notifications are enabled or disabled. It does not really matter anyway; + * we just removed the notifier, so we do not care about notifications until + * we potentially re-attach it. The attach_host_notifier functions will + * ensure that notifications are enabled again when they are needed. + */ } void virtio_queue_host_notifier_read(EventNotifier *n) -- 2.43.0
From: Hanna Czenczek <hreitz@redhat.com> Commit d3f6f294aeadd5f88caf0155e4360808c95b3146 ("virtio-blk: always set ioeventfd during startup") has made virtio_blk_start_ioeventfd() always kick the virtqueue (set the ioeventfd), regardless of whether the BB is drained. That is no longer necessary, because attaching the host notifier will now set the ioeventfd, too; this happens either immediately right here in virtio_blk_start_ioeventfd(), or later when the drain ends, in virtio_blk_ioeventfd_attach(). With event_notifier_set() removed, the code becomes the same as the one in virtio_blk_ioeventfd_attach(), so we can reuse that function. Signed-off-by: Hanna Czenczek <hreitz@redhat.com> Message-ID: <20240202153158.788922-4-hreitz@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/block/virtio-blk.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index XXXXXXX..XXXXXXX 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -XXX,XX +XXX,XX @@ #include "hw/virtio/virtio-blk-common.h" #include "qemu/coroutine.h" +static void virtio_blk_ioeventfd_attach(VirtIOBlock *s); + static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, VirtIOBlockReq *req) { @@ -XXX,XX +XXX,XX @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) s->ioeventfd_started = true; smp_wmb(); /* paired with aio_notify_accept() on the read side */ - /* Get this show started by hooking up our callbacks */ - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); - AioContext *ctx = s->vq_aio_context[i]; - - /* Kick right away to begin processing requests already in vring */ - event_notifier_set(virtio_queue_get_host_notifier(vq)); - - if (!blk_in_drain(s->conf.conf.blk)) { - virtio_queue_aio_attach_host_notifier(vq, ctx); - } + /* + * Get this show started by hooking up our callbacks. If drained now, + * virtio_blk_drained_end() will do this later. + * Attaching the notifier also kicks the virtqueues, processing any requests + * they may already have. + */ + if (!blk_in_drain(s->conf.conf.blk)) { + virtio_blk_ioeventfd_attach(s); } return 0; -- 2.43.0
From: Stefan Hajnoczi <stefanha@redhat.com> Requests that complete in an IOThread use irqfd to notify the guest while requests that complete in the main loop thread use the traditional qdev irq code path. The reason for this conditional is that the irq code path requires the BQL: if (s->ioeventfd_started && !s->ioeventfd_disabled) { virtio_notify_irqfd(vdev, req->vq); } else { virtio_notify(vdev, req->vq); } There is a corner case where the conditional invokes the irq code path instead of the irqfd code path: static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) { ... /* * Set ->ioeventfd_started to false before draining so that host notifiers * are not detached/attached anymore. */ s->ioeventfd_started = false; /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ blk_drain(s->conf.conf.blk); During blk_drain() the conditional produces the wrong result because ioeventfd_started is false. Use qemu_in_iothread() instead of checking the ioeventfd state. Buglink: https://issues.redhat.com/browse/RHEL-15394 Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20240122172625.415386-1-stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- hw/block/virtio-blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index XXXXXXX..XXXXXXX 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -XXX,XX +XXX,XX @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) iov_discard_undo(&req->inhdr_undo); iov_discard_undo(&req->outhdr_undo); virtqueue_push(req->vq, &req->elem, req->in_len); - if (s->ioeventfd_started && !s->ioeventfd_disabled) { + if (qemu_in_iothread()) { virtio_notify_irqfd(vdev, req->vq); } else { virtio_notify(vdev, req->vq); -- 2.43.0
The following changes since commit 13356edb87506c148b163b8c7eb0695647d00c2a: Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging (2023-01-24 09:45:33 +0000) are available in the Git repository at: https://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to d570177b50c389f379f93183155a27d44856ab46: qemu-img: Change info key names for protocol nodes (2023-02-01 16:52:33 +0100) v4: - Fixed the 'qemu-img-close-errors' test case to run only on Linux and only with the file protocol, use qemu-io instead of truncate v3: - Make the compiler happier on BSD and CentOS Stream 8 v2: - Rebased to resolve merge conflicts in coroutine.h ---------------------------------------------------------------- Block layer patches - qemu-img info: Show protocol-level information - Move more functions to coroutines - Make coroutine annotations ready for static analysis - qemu-img: Fix exit code for errors closing the image - qcow2 bitmaps: Fix theoretical corruption in error path - pflash: Only load non-zero parts of backend image to save memory - Code cleanup and test case improvements ---------------------------------------------------------------- Alberto Faria (2): coroutine: annotate coroutine_fn for libclang block: Add no_coroutine_fn and coroutine_mixed_fn marker Emanuele Giuseppe Esposito (14): block-coroutine-wrapper: support void functions block: Convert bdrv_io_plug() to co_wrapper block: Convert bdrv_io_unplug() to co_wrapper block: Convert bdrv_is_inserted() to co_wrapper block: Rename refresh_total_sectors to bdrv_refresh_total_sectors block: Convert bdrv_refresh_total_sectors() to co_wrapper_mixed block-backend: use bdrv_getlength instead of blk_getlength block: use bdrv_co_refresh_total_sectors when possible block: Convert bdrv_get_allocated_file_size() to co_wrapper block: Convert bdrv_get_info() to co_wrapper_mixed block: Convert bdrv_eject() to co_wrapper block: Convert bdrv_lock_medium() to co_wrapper block: Convert bdrv_debug_event() to co_wrapper_mixed block: Rename bdrv_load/save_vmstate() to bdrv_co_load/save_vmstate() Hanna Reitz (12): block: Improve empty format-specific info dump block/file: Add file-specific image info block/vmdk: Change extent info type block: Split BlockNodeInfo off of ImageInfo qemu-img: Use BlockNodeInfo block/qapi: Let bdrv_query_image_info() recurse block/qapi: Introduce BlockGraphInfo block/qapi: Add indentation to bdrv_node_info_dump() iotests: Filter child node information iotests/106, 214, 308: Read only one size line qemu-img: Let info print block graph qemu-img: Change info key names for protocol nodes Kevin Wolf (4): qcow2: Fix theoretical corruption in store_bitmap() error path qemu-img commit: Report errors while closing the image qemu-img bitmap: Report errors while closing the image qemu-iotests: Test qemu-img bitmap/commit exit code on error Paolo Bonzini (2): qemu-io: do not reinvent the blk_pwrite_zeroes wheel block: remove bdrv_coroutine_enter Philippe Mathieu-Daudé (1): block/nbd: Add missing <qemu/bswap.h> include Thomas Huth (2): tests/qemu-iotests/312: Mark "quorum" as required driver tests/qemu-iotests/262: Check for availability of "blkverify" first Xiang Zheng (1): pflash: Only read non-zero parts of backend image qapi/block-core.json | 123 +++++++- include/block/block-common.h | 11 +- include/block/block-io.h | 41 ++- include/block/block_int-common.h | 26 +- include/block/block_int-io.h | 5 +- include/block/nbd.h | 1 + include/block/qapi.h | 14 +- include/qemu/osdep.h | 44 +++ include/sysemu/block-backend-io.h | 31 +- block.c | 88 +++--- block/blkdebug.c | 11 +- block/blkio.c | 15 +- block/blklogwrites.c | 6 +- block/blkreplay.c | 6 +- block/blkverify.c | 6 +- block/block-backend.c | 38 +-- block/commit.c | 4 +- block/copy-on-read.c | 18 +- block/crypto.c | 14 +- block/curl.c | 10 +- block/file-posix.c | 137 +++++---- block/file-win32.c | 18 +- block/filter-compress.c | 20 +- block/gluster.c | 23 +- block/io.c | 76 ++--- block/iscsi.c | 17 +- block/mirror.c | 6 +- block/monitor/block-hmp-cmds.c | 2 +- block/nbd.c | 8 +- block/nfs.c | 4 +- block/null.c | 13 +- block/nvme.c | 14 +- block/preallocate.c | 16 +- block/qapi.c | 317 ++++++++++++++++----- block/qcow.c | 5 +- block/qcow2-bitmap.c | 5 +- block/qcow2-refcount.c | 2 +- block/qcow2.c | 17 +- block/qed.c | 11 +- block/quorum.c | 8 +- block/raw-format.c | 25 +- block/rbd.c | 9 +- block/replication.c | 6 +- block/ssh.c | 4 +- block/throttle.c | 6 +- block/vdi.c | 7 +- block/vhdx.c | 5 +- block/vmdk.c | 22 +- block/vpc.c | 5 +- blockdev.c | 8 +- hw/block/block.c | 36 ++- hw/scsi/scsi-disk.c | 5 + qemu-img.c | 100 +++++-- qemu-io-cmds.c | 62 +--- tests/unit/test-block-iothread.c | 3 + scripts/block-coroutine-wrapper.py | 20 +- tests/qemu-iotests/iotests.py | 18 +- block/meson.build | 1 + tests/qemu-iotests/065 | 2 +- tests/qemu-iotests/106 | 4 +- tests/qemu-iotests/214 | 6 +- tests/qemu-iotests/262 | 3 +- tests/qemu-iotests/302.out | 5 + tests/qemu-iotests/308 | 4 +- tests/qemu-iotests/312 | 1 + tests/qemu-iotests/common.filter | 22 +- tests/qemu-iotests/common.rc | 22 +- tests/qemu-iotests/tests/qemu-img-close-errors | 96 +++++++ tests/qemu-iotests/tests/qemu-img-close-errors.out | 23 ++ 69 files changed, 1209 insertions(+), 552 deletions(-) create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out