From nobody Fri Oct 18 06:20:23 2024 Delivered-To: importer2@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer2=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=movementarian.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1716999962916885.7591599342202; Wed, 29 May 2024 09:26:02 -0700 (PDT) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1sCM7S-0001cy-L0; Wed, 29 May 2024 12:25:42 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sCM6y-0001A9-1r for qemu-devel@nongnu.org; Wed, 29 May 2024 12:25:12 -0400 Received: from ssh.movementarian.org ([139.162.205.133] helo=movementarian.org) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sCM6v-0006Kx-Dj for qemu-devel@nongnu.org; Wed, 29 May 2024 12:25:11 -0400 Received: from movement by movementarian.org with local (Exim 4.95) (envelope-from ) id 1sCM6j-006CPq-JC; Wed, 29 May 2024 17:24:57 +0100 From: John Levon To: qemu-devel@nongnu.org Cc: alex.williamson@redhat.com, clg@redhat.com, jag.raman@oracle.com, thanos.makatos@nutanix.com, John Johnson , Elena Ufimtseva , John Levon Subject: [PATCH 16/26] vfio-user: region read/write Date: Wed, 29 May 2024 17:23:09 +0100 Message-Id: <20240529162319.1476680-17-levon@movementarian.org> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20240529162319.1476680-1-levon@movementarian.org> References: <20240529162319.1476680-1-levon@movementarian.org> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer2=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=139.162.205.133; envelope-from=movement@movementarian.org; helo=movementarian.org X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_PASS=-0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer2=patchew.org@nongnu.org Sender: qemu-devel-bounces+importer2=patchew.org@nongnu.org X-ZM-MESSAGEID: 1716999964107100002 Content-Type: text/plain; charset="utf-8" From: Jagannathan Raman Add support for posted writes on remote devices Originally-by: John Johnson Signed-off-by: Elena Ufimtseva Signed-off-by: Jagannathan Raman Signed-off-by: John Levon --- hw/vfio/helpers.c | 20 ++++-- hw/vfio/pci.c | 5 +- hw/vfio/trace-events | 1 + hw/vfio/user-pci.c | 5 ++ hw/vfio/user-protocol.h | 12 ++++ hw/vfio/user.c | 120 ++++++++++++++++++++++++++++++++++ hw/vfio/user.h | 1 + include/hw/vfio/vfio-common.h | 3 +- 8 files changed, 158 insertions(+), 9 deletions(-) diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index d0f1db30da..b2a5d7d1ba 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -182,12 +182,15 @@ void vfio_region_write(void *opaque, hwaddr addr, break; } =20 - ret =3D vbasedev->io->region_write(vbasedev, region->nr, addr, size, &= buf); + ret =3D vbasedev->io->region_write(vbasedev, region->nr, addr, size, &= buf, + region->post_wr); if (ret !=3D size) { + const char *errmsg =3D ret < 0 ? strerror(-ret) : "short write"; + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ",%d) failed: %m", + ",%d) failed: %s", __func__, vbasedev->name, region->nr, - addr, data, size); + addr, data, size, errmsg); } =20 trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); @@ -219,9 +222,11 @@ uint64_t vfio_region_read(void *opaque, =20 ret =3D vbasedev->io->region_read(vbasedev, region->nr, addr, size, &b= uf); if (ret !=3D size) { - error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", + const char *errmsg =3D ret < 0 ? strerror(-ret) : "short read"; + + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %s", __func__, vbasedev->name, region->nr, - addr, size); + addr, size, errmsg); return (uint64_t)-1; } switch (size) { @@ -363,13 +368,14 @@ int vfio_region_setup(Object *obj, VFIODevice *vbased= ev, VFIORegion *region, region->size =3D info->size; region->fd_offset =3D info->offset; region->nr =3D index; + region->post_wr =3D false; + if (vbasedev->regfds !=3D NULL) { region->fd =3D vbasedev->regfds[index]; } else { region->fd =3D vbasedev->fd; } =20 - if (region->size) { region->mem =3D g_new0(MemoryRegion, 1); memory_region_init_io(region->mem, obj, &vfio_region_ops, @@ -757,7 +763,7 @@ static int vfio_io_region_read(VFIODevice *vbasedev, ui= nt8_t index, off_t off, } =20 static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t= off, - uint32_t size, void *data) + uint32_t size, void *data, bool post) { struct vfio_region_info *info =3D vbasedev->regions[index]; int ret; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 50a22b6986..c69716f2df 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -51,7 +51,7 @@ (off), (size), (data))) #define VDEV_CONFIG_WRITE(vbasedev, off, size, data) \ ((vbasedev)->io->region_write((vbasedev), VFIO_PCI_CONFIG_REGION_INDEX= , \ - (off), (size), (data))) + (off), (size), (data), false)) =20 #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" =20 @@ -1780,6 +1780,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int= nr) bar->type =3D pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); bar->size =3D bar->region.size; + + /* IO regions are sync, memory can be async */ + bar->region.post_wr =3D (bar->ioport =3D=3D 0); } =20 static void vfio_bars_prepare(VFIOPCIDevice *vdev) diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 3bfb625ee3..a65d530557 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -185,3 +185,4 @@ vfio_user_send_write(uint16_t id, int wrote) " id 0x%x = wrote 0x%x" vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " majo= r %d minor %d caps: %s" vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs = %d" vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) "= index %d flags 0x%x size 0x%"PRIx64 +vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " regio= n %d offset 0x%"PRIx64" count %d" diff --git a/hw/vfio/user-pci.c b/hw/vfio/user-pci.c index 3c2d145812..54b19b68d0 100644 --- a/hw/vfio/user-pci.c +++ b/hw/vfio/user-pci.c @@ -41,6 +41,7 @@ struct VFIOUserPCIDevice { VFIOPCIDevice device; char *sock_name; bool send_queued; /* all sends are queued */ + bool no_post; /* all regions write are sync */ }; =20 /* @@ -103,6 +104,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Erro= r **errp) if (udev->send_queued) { proxy->flags |=3D VFIO_PROXY_FORCE_QUEUED; } + if (udev->no_post) { + proxy->flags |=3D VFIO_PROXY_NO_POST; + } =20 if (!vfio_user_validate_version(proxy, errp)) { goto error; @@ -169,6 +173,7 @@ static void vfio_user_instance_finalize(Object *obj) static Property vfio_user_pci_dev_properties[] =3D { DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, fals= e), + DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, fal= se), DEFINE_PROP_END_OF_LIST(), }; =20 diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 6f70a48905..6987435e96 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -139,4 +139,16 @@ typedef struct { uint64_t offset; } VFIOUserRegionInfo; =20 +/* + * VFIO_USER_REGION_READ + * VFIO_USER_REGION_WRITE + */ +typedef struct { + VFIOUserHdr hdr; + uint64_t offset; + uint32_t region; + uint32_t count; + char data[]; +} VFIOUserRegionRW; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.c b/hw/vfio/user.c index e9a89df82a..9bb1ee8880 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -57,6 +57,8 @@ static void vfio_user_cb(void *opaque); =20 static void vfio_user_request(void *opaque); static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg); +static void vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds); static void vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds, int rsize); static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, @@ -628,6 +630,33 @@ static int vfio_user_send_queued(VFIOUserProxy *proxy,= VFIOUserMsg *msg) return 0; } =20 +/* + * async send - msg can be queued, but will be freed when sent + */ +static void vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds) +{ + VFIOUserMsg *msg; + int ret; + + if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) { + error_printf("vfio_user_send_async on sync message\n"); + return; + } + + QEMU_LOCK_GUARD(&proxy->lock); + + msg =3D vfio_user_getmsg(proxy, hdr, fds); + msg->id =3D hdr->id; + msg->rsize =3D 0; + msg->type =3D VFIO_MSG_ASYNC; + + ret =3D vfio_user_send_queued(proxy, msg); + if (ret < 0) { + vfio_user_recycle(proxy, msg); + } +} + static void vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds, int rsize) { @@ -1155,9 +1184,84 @@ static int vfio_user_get_region_info(VFIOUserProxy *= proxy, trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size); =20 memcpy(info, &msgp->argsz, info->argsz); + + /* read-after-write hazard if guest can directly access region */ + if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) { + WITH_QEMU_LOCK_GUARD(&proxy->lock) { + proxy->flags |=3D VFIO_PROXY_NO_POST; + } + } + return 0; } =20 +static int vfio_user_region_read(VFIOUserProxy *proxy, uint8_t index, + off_t offset, uint32_t count, void *data) +{ + g_autofree VFIOUserRegionRW *msgp =3D NULL; + int size =3D sizeof(*msgp) + count; + + if (count > proxy->max_xfer_size) { + return -EINVAL; + } + + msgp =3D g_malloc0(size); + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp)= , 0); + msgp->offset =3D offset; + msgp->region =3D index; + msgp->count =3D count; + trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count); + + vfio_user_send_wait(proxy, &msgp->hdr, NULL, size); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } else if (msgp->count > count) { + return -E2BIG; + } else { + memcpy(data, &msgp->data, msgp->count); + } + + return msgp->count; +} + +static int vfio_user_region_write(VFIOUserProxy *proxy, uint8_t index, + off_t offset, uint32_t count, void *data, + bool post) +{ + VFIOUserRegionRW *msgp =3D NULL; + int flags =3D post ? VFIO_USER_NO_REPLY : 0; + int size =3D sizeof(*msgp) + count; + int ret; + + if (count > proxy->max_xfer_size) { + return -EINVAL; + } + + msgp =3D g_malloc0(size); + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); + msgp->offset =3D offset; + msgp->region =3D index; + msgp->count =3D count; + memcpy(&msgp->data, data, count); + trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count); + + /* async send will free msg after it's sent */ + if (post && !(proxy->flags & VFIO_PROXY_NO_POST)) { + vfio_user_send_async(proxy, &msgp->hdr, NULL); + return count; + } + + vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + ret =3D -msgp->hdr.error_reply; + } else { + ret =3D count; + } + + g_free(msgp); + return ret; +} + =20 /* * Socket-based io_ops @@ -1187,6 +1291,22 @@ static int vfio_user_io_get_region_info(VFIODevice *= vbasedev, return 0; } =20 +static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t size, void *data) +{ + return vfio_user_region_read(vbasedev->proxy, index, off, size, data); +} + +static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index, + off_t off, unsigned size, void *data, + bool post) +{ + return vfio_user_region_write(vbasedev->proxy, index, off, size, data, + post); +} + VFIODeviceIO vfio_dev_io_sock =3D { .get_region_info =3D vfio_user_io_get_region_info, + .region_read =3D vfio_user_io_region_read, + .region_write =3D vfio_user_io_region_write, }; diff --git a/hw/vfio/user.h b/hw/vfio/user.h index 18a5a40073..1f99a976d6 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -84,6 +84,7 @@ typedef struct VFIOUserProxy { /* VFIOProxy flags */ #define VFIO_PROXY_CLIENT 0x1 #define VFIO_PROXY_FORCE_QUEUED 0x4 +#define VFIO_PROXY_NO_POST 0x8 =20 typedef struct VFIODevice VFIODevice; =20 diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 85c3fec3b9..617891c4fe 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -58,6 +58,7 @@ typedef struct VFIORegion { VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ int fd; /* fd to mmap() region */ + bool post_wr; /* writes can be posted */ } VFIORegion; =20 typedef struct VFIOMigration { @@ -193,7 +194,7 @@ struct VFIODeviceIO { int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t s= ize, void *data); int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t = size, - void *data); + void *data, bool post); }; =20 extern VFIODeviceIO vfio_dev_io_ioctl; --=20 2.34.1