From nobody Fri Oct 18 06:27:18 2024 Delivered-To: importer2@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer2=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=movementarian.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1717000166481707.3767500772409; Wed, 29 May 2024 09:29:26 -0700 (PDT) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1sCM6u-00018J-Kb; Wed, 29 May 2024 12:25:08 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sCM6p-00015r-Nf for qemu-devel@nongnu.org; Wed, 29 May 2024 12:25:05 -0400 Received: from ssh.movementarian.org ([139.162.205.133] helo=movementarian.org) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sCM6l-0006Kc-3g for qemu-devel@nongnu.org; Wed, 29 May 2024 12:25:03 -0400 Received: from movement by movementarian.org with local (Exim 4.95) (envelope-from ) id 1sCM6j-006COt-2R; Wed, 29 May 2024 17:24:57 +0100 From: John Levon To: qemu-devel@nongnu.org Cc: alex.williamson@redhat.com, clg@redhat.com, jag.raman@oracle.com, thanos.makatos@nutanix.com, John Johnson , Elena Ufimtseva , John Levon Subject: [PATCH 08/26] vfio: add device IO ops vector Date: Wed, 29 May 2024 17:23:01 +0100 Message-Id: <20240529162319.1476680-9-levon@movementarian.org> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20240529162319.1476680-1-levon@movementarian.org> References: <20240529162319.1476680-1-levon@movementarian.org> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer2=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=139.162.205.133; envelope-from=movement@movementarian.org; helo=movementarian.org X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_PASS=-0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer2=patchew.org@nongnu.org Sender: qemu-devel-bounces+importer2=patchew.org@nongnu.org X-ZM-MESSAGEID: 1717000167004100002 Content-Type: text/plain; charset="utf-8" From: Jagannathan Raman Used for communication with VFIO driver (prep work for vfio-user, which will communicate over a socket) Originally-by: John Johnson Signed-off-by: Elena Ufimtseva Signed-off-by: Jagannathan Raman Signed-off-by: John Levon --- hw/vfio/ap.c | 2 +- hw/vfio/ccw.c | 2 +- hw/vfio/common.c | 7 +- hw/vfio/helpers.c | 100 +++++++++++++++++++++++-- hw/vfio/pci.c | 137 +++++++++++++++++++++------------- hw/vfio/platform.c | 2 +- include/hw/vfio/vfio-common.h | 27 ++++++- 7 files changed, 211 insertions(+), 66 deletions(-) diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index c12531a788..23d700e67a 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -229,7 +229,7 @@ static void vfio_ap_instance_init(Object *obj) * handle ram_block_discard_disable(). */ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops, - DEVICE(vapdev), true); + &vfio_dev_io_ioctl, DEVICE(vapdev), true); } =20 #ifdef CONFIG_IOMMUFD diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 315449c1b1..b4139c8aef 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -681,7 +681,7 @@ static void vfio_ccw_instance_init(Object *obj) * ram_block_discard_disable(). */ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops, - DEVICE(vcdev), true); + &vfio_dev_io_ioctl, DEVICE(vcdev), true); } =20 #ifdef CONFIG_IOMMUFD diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 1a71c3be05..03c5424938 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -964,7 +964,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainer= Base *bcontainer) continue; } =20 - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + if (vbasedev->io->device_feature(vbasedev, feature)) { warn_report("%s: Failed to stop DMA logging, err %d (%s)", vbasedev->name, -errno, strerror(errno)); } @@ -1067,9 +1067,8 @@ static int vfio_devices_dma_logging_start(VFIOContain= erBase *bcontainer, continue; } =20 - ret =3D ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + ret =3D vbasedev->io->device_feature(vbasedev, feature); if (ret) { - ret =3D -errno; error_setg_errno(errp, errno, "%s: Failed to start DMA logging= ", vbasedev->name); goto out; @@ -1148,7 +1147,7 @@ static int vfio_device_dma_logging_report(VFIODevice = *vbasedev, hwaddr iova, feature->flags =3D VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT; =20 - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + if (vbasedev->io->device_feature(vbasedev, feature)) { return -errno; } =20 diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index cdc7eb5bd5..2cd8fbe70c 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -42,7 +42,7 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int inde= x) .count =3D 0, }; =20 - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io->set_irqs(vbasedev, &irq_set); } =20 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) @@ -55,7 +55,7 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, in= t index) .count =3D 1, }; =20 - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io->set_irqs(vbasedev, &irq_set); } =20 void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) @@ -68,7 +68,7 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int = index) .count =3D 1, }; =20 - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io->set_irqs(vbasedev, &irq_set); } =20 static inline const char *action_to_str(int action) @@ -115,6 +115,7 @@ bool vfio_set_irq_signaling(VFIODevice *vbasedev, int i= ndex, int subindex, int argsz; const char *name; int32_t *pfd; + int ret; =20 argsz =3D sizeof(*irq_set) + sizeof(*pfd); =20 @@ -127,7 +128,9 @@ bool vfio_set_irq_signaling(VFIODevice *vbasedev, int i= ndex, int subindex, pfd =3D (int32_t *)&irq_set->data; *pfd =3D fd; =20 - if (!ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + ret =3D vbasedev->io->set_irqs(vbasedev, irq_set); + + if (!ret) { return true; } =20 @@ -159,6 +162,7 @@ void vfio_region_write(void *opaque, hwaddr addr, uint32_t dword; uint64_t qword; } buf; + int ret; =20 switch (size) { case 1: @@ -178,7 +182,8 @@ void vfio_region_write(void *opaque, hwaddr addr, break; } =20 - if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) !=3D si= ze) { + ret =3D vbasedev->io->region_write(vbasedev, region->nr, addr, size, &= buf); + if (ret !=3D size) { error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 ",%d) failed: %m", __func__, vbasedev->name, region->nr, @@ -210,8 +215,10 @@ uint64_t vfio_region_read(void *opaque, uint64_t qword; } buf; uint64_t data =3D 0; + int ret; =20 - if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) !=3D siz= e) { + ret =3D vbasedev->io->region_read(vbasedev, region->nr, addr, size, &b= uf); + if (ret !=3D size) { error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", __func__, vbasedev->name, region->nr, addr, size); @@ -532,6 +539,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int inde= x, struct vfio_region_info **info) { size_t argsz =3D sizeof(struct vfio_region_info); + int ret; =20 /* create region cache */ if (vbasedev->regions =3D=3D NULL) { @@ -550,7 +558,8 @@ int vfio_get_region_info(VFIODevice *vbasedev, int inde= x, retry: (*info)->argsz =3D argsz; =20 - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + ret =3D vbasedev->io->get_region_info(vbasedev, *info); + if (ret !=3D 0) { g_free(*info); *info =3D NULL; return -errno; @@ -660,12 +669,87 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const c= har *str, Error **errp) } =20 void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, - DeviceState *dev, bool ram_discard) + VFIODeviceIO *io, DeviceState *dev, bool ram_discard) { vbasedev->type =3D type; vbasedev->ops =3D ops; vbasedev->dev =3D dev; + vbasedev->io =3D io; vbasedev->fd =3D -1; =20 vbasedev->ram_block_discard_allowed =3D ram_discard; } + +/* + * Traditional ioctl() based io + */ + +static int vfio_io_device_feature(VFIODevice *vbasedev, + struct vfio_device_feature *feature) +{ + int ret; + + ret =3D ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + + return ret < 0 ? -errno : ret; +} + +static int vfio_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info) +{ + int ret; + + ret =3D ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_io_get_irq_info(VFIODevice *vbasedev, + struct vfio_irq_info *info) +{ + int ret; + + ret =3D ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_io_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq= s) +{ + int ret; + + ret =3D ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irqs); + + return ret < 0 ? -errno : ret; +} + +static int vfio_io_region_read(VFIODevice *vbasedev, uint8_t index, off_t = off, + uint32_t size, void *data) +{ + struct vfio_region_info *info =3D vbasedev->regions[index]; + int ret; + + ret =3D pread(vbasedev->fd, data, size, info->offset + off); + + return ret < 0 ? -errno : ret; +} + +static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t= off, + uint32_t size, void *data) +{ + struct vfio_region_info *info =3D vbasedev->regions[index]; + int ret; + + ret =3D pwrite(vbasedev->fd, data, size, info->offset + off); + + return ret < 0 ? -errno : ret; +} + +VFIODeviceIO vfio_dev_io_ioctl =3D { + .device_feature =3D vfio_io_device_feature, + .get_region_info =3D vfio_io_get_region_info, + .get_irq_info =3D vfio_io_get_irq_info, + .set_irqs =3D vfio_io_set_irqs, + .region_read =3D vfio_io_region_read, + .region_write =3D vfio_io_region_write, +}; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 99783d3cd4..2ad0dbe342 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -45,6 +45,14 @@ #include "migration/qemu-file.h" #include "sysemu/iommufd.h" =20 +/* convenience macros for PCI config space */ +#define VDEV_CONFIG_READ(vbasedev, off, size, data) \ + ((vbasedev)->io->region_read((vbasedev), VFIO_PCI_CONFIG_REGION_INDEX,= \ + (off), (size), (data))) +#define VDEV_CONFIG_WRITE(vbasedev, off, size, data) \ + ((vbasedev)->io->region_write((vbasedev), VFIO_PCI_CONFIG_REGION_INDEX= , \ + (off), (size), (data))) + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" =20 /* Protected by BQL */ @@ -379,6 +387,7 @@ static void vfio_msi_interrupt(void *opaque) static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) { g_autofree struct vfio_irq_set *irq_set =3D NULL; + VFIODevice *vbasedev =3D &vdev->vbasedev; int ret =3D 0, argsz; int32_t *fd; =20 @@ -394,7 +403,7 @@ static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) fd =3D (int32_t *)&irq_set->data; *fd =3D -1; =20 - ret =3D ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret =3D vbasedev->io->set_irqs(vbasedev, irq_set); =20 return ret; } @@ -453,7 +462,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, boo= l msix) fds[i] =3D fd; } =20 - ret =3D ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret =3D vdev->vbasedev.io->set_irqs(&vdev->vbasedev, irq_set); =20 g_free(irq_set); =20 @@ -879,13 +888,14 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) =20 static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { + VFIODevice *vbasedev =3D &vdev->vbasedev; struct vfio_region_info *reg_info =3D NULL; uint64_t size; off_t off =3D 0; ssize_t bytes; =20 - if (vfio_get_region_info(&vdev->vbasedev, - VFIO_PCI_ROM_REGION_INDEX, ®_info)) { + if (!vfio_get_region_info(vbasedev, + VFIO_PCI_ROM_REGION_INDEX, ®_info)) { error_report("vfio: Error getting ROM info: %m"); return; } @@ -911,18 +921,19 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) memset(vdev->rom, 0xff, size); =20 while (size) { - bytes =3D pread(vdev->vbasedev.fd, vdev->rom + off, - size, vdev->rom_offset + off); + bytes =3D vbasedev->io->region_read(vbasedev, VFIO_PCI_ROM_REGION_= INDEX, + off, size, vdev->rom + off); if (bytes =3D=3D 0) { break; } else if (bytes > 0) { off +=3D bytes; size -=3D bytes; } else { - if (errno =3D=3D EINTR || errno =3D=3D EAGAIN) { + if (bytes =3D=3D -EINTR || bytes =3D=3D -EAGAIN) { continue; } - error_report("vfio: Error reading device ROM: %m"); + error_report("vfio: Error reading device ROM: %s", + strerror(-bytes)); break; } } @@ -1010,11 +1021,10 @@ static const MemoryRegionOps vfio_rom_ops =3D { =20 static void vfio_pci_size_rom(VFIOPCIDevice *vdev) { + VFIODevice *vbasedev =3D &vdev->vbasedev; uint32_t orig, size =3D cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); - off_t offset =3D vdev->config_offset + PCI_ROM_ADDRESS; DeviceState *dev =3D DEVICE(vdev); char *name; - int fd =3D vdev->vbasedev.fd; =20 if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ @@ -1031,11 +1041,12 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) * Use the same size ROM BAR as the physical device. The contents * will get filled in later when the guest tries to read it. */ - if (pread(fd, &orig, 4, offset) !=3D 4 || - pwrite(fd, &size, 4, offset) !=3D 4 || - pread(fd, &size, 4, offset) !=3D 4 || - pwrite(fd, &orig, 4, offset) !=3D 4) { - error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name); + if (VDEV_CONFIG_READ(vbasedev, PCI_ROM_ADDRESS, 4, &orig) !=3D 4 || + VDEV_CONFIG_WRITE(vbasedev, PCI_ROM_ADDRESS, 4, &size) !=3D 4 || + VDEV_CONFIG_READ(vbasedev, PCI_ROM_ADDRESS, 4, &size) !=3D 4 || + VDEV_CONFIG_WRITE(vbasedev, PCI_ROM_ADDRESS, 4, &orig) !=3D 4) { + + error_report("%s(%s) ROM access failed", __func__, vbasedev->name); return; } =20 @@ -1215,6 +1226,7 @@ static void vfio_sub_page_bar_update_mapping(PCIDevic= e *pdev, int bar) uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) { VFIOPCIDevice *vdev =3D VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev =3D &vdev->vbasedev; uint32_t emu_bits =3D 0, emu_val =3D 0, phys_val =3D 0, val; =20 memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); @@ -1227,12 +1239,13 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint= 32_t addr, int len) if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { ssize_t ret; =20 - ret =3D pread(vdev->vbasedev.fd, &phys_val, len, - vdev->config_offset + addr); + ret =3D VDEV_CONFIG_READ(vbasedev, addr, len, &phys_val); if (ret !=3D len) { - error_report("%s(%s, 0x%x, 0x%x) failed: %m", - __func__, vdev->vbasedev.name, addr, len); - return -errno; + const char *err =3D ret < 0 ? strerror(-ret) : "short read"; + + error_report("%s(%s, 0x%x, 0x%x) failed: %s", + __func__, vbasedev->name, addr, len, err); + return -1; } phys_val =3D le32_to_cpu(phys_val); } @@ -1248,15 +1261,19 @@ void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len) { VFIOPCIDevice *vdev =3D VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev =3D &vdev->vbasedev; uint32_t val_le =3D cpu_to_le32(val); + int ret; =20 trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len); =20 /* Write everything to VFIO, let it filter out what we can't write */ - if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) - !=3D len) { - error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->vbasedev.name, addr, val, len); + ret =3D VDEV_CONFIG_WRITE(vbasedev, addr, len, &val_le); + if (ret !=3D len) { + const char *err =3D ret < 0 ? strerror(-ret) : "short write"; + + error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %s", + __func__, vbasedev->name, addr, val, len, err); } =20 /* MSI/MSI-X Enabling/Disabling */ @@ -1344,9 +1361,12 @@ static bool vfio_msi_setup(VFIOPCIDevice *vdev, int = pos, Error **errp) int ret, entries; Error *err =3D NULL; =20 - if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), - vdev->config_offset + pos + PCI_CAP_FLAGS) !=3D sizeof(ctrl)= ) { - error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); + ret =3D VDEV_CONFIG_READ(&vdev->vbasedev, pos + PCI_CAP_FLAGS, + sizeof(ctrl), &ctrl); + if (ret !=3D sizeof(ctrl)) { + const char *errmsg =3D ret < 0 ? strerror(-ret) : "short read"; + + error_setg(errp, "failed reading MSI PCI_CAP_FLAGS %s", errmsg); return false; } ctrl =3D le16_to_cpu(ctrl); @@ -1550,34 +1570,43 @@ static bool vfio_pci_relocate_msix(VFIOPCIDevice *v= dev, Error **errp) */ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) { + VFIODevice *vbasedev =3D &vdev->vbasedev; uint8_t pos; uint16_t ctrl; uint32_t table, pba; - int ret, fd =3D vdev->vbasedev.fd; struct vfio_irq_info irq_info =3D { .argsz =3D sizeof(irq_info), .index =3D VFIO_PCI_MSIX_IRQ_INDEX }; VFIOMSIXInfo *msix; + int ret; =20 pos =3D pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); if (!pos) { return true; } =20 - if (pread(fd, &ctrl, sizeof(ctrl), - vdev->config_offset + pos + PCI_MSIX_FLAGS) !=3D sizeof(ctrl= )) { - error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS"); + ret =3D VDEV_CONFIG_READ(vbasedev, pos + PCI_MSIX_FLAGS, + sizeof(ctrl), &ctrl); + if (ret !=3D sizeof(ctrl)) { + const char *err =3D ret < 0 ? strerror(-ret) : "short read"; + + error_setg(errp, "failed to read PCI MSIX FLAGS %s", err); return false; } =20 - if (pread(fd, &table, sizeof(table), - vdev->config_offset + pos + PCI_MSIX_TABLE) !=3D sizeof(tabl= e)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE"); + ret =3D VDEV_CONFIG_READ(vbasedev, pos + PCI_MSIX_TABLE, + sizeof(table), &table); + if (ret !=3D sizeof(table)) { + const char *err =3D ret < 0 ? strerror(-ret) : "short read"; + + error_setg(errp, "failed to read PCI MSIX TABLE %s", err); return false; } =20 - if (pread(fd, &pba, sizeof(pba), - vdev->config_offset + pos + PCI_MSIX_PBA) !=3D sizeof(pba)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX PBA"); + ret =3D VDEV_CONFIG_READ(vbasedev, pos + PCI_MSIX_PBA, sizeof(pba), &p= ba); + if (ret !=3D sizeof(pba)) { + const char *err =3D ret < 0 ? strerror(-ret) : "short read"; + + error_setg(errp, "failed to read PCI MSIX PBA %s", err); return false; } =20 @@ -1592,7 +1621,7 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev= , Error **errp) msix->pba_offset =3D pba & ~PCI_MSIX_FLAGS_BIRMASK; msix->entries =3D (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; =20 - ret =3D ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + ret =3D vdev->vbasedev.io->get_irq_info(&vdev->vbasedev, &irq_info); if (ret < 0) { error_setg_errno(errp, -ret, "failed to get MSI-X irq info"); g_free(msix); @@ -1736,10 +1765,12 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, i= nt nr) } =20 /* Determine what type of BAR this is for registration */ - ret =3D pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), - vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); + ret =3D VDEV_CONFIG_READ(&vdev->vbasedev, PCI_BASE_ADDRESS_0 + (4 * nr= ), + sizeof(pci_bar), &pci_bar); if (ret !=3D sizeof(pci_bar)) { - error_report("vfio: Failed to read BAR %d (%m)", nr); + const char *err =3D ret < 0 ? strerror(-ret) : "short read"; + + error_report("vfio: Failed to read BAR %d (%s)", nr, err); return; } =20 @@ -2439,21 +2470,25 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) =20 void vfio_pci_post_reset(VFIOPCIDevice *vdev) { + VFIODevice *vbasedev =3D &vdev->vbasedev; Error *err =3D NULL; - int nr; + int ret, nr; =20 if (!vfio_intx_enable(vdev, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } =20 for (nr =3D 0; nr < PCI_NUM_REGIONS - 1; ++nr) { - off_t addr =3D vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr); + off_t addr =3D PCI_BASE_ADDRESS_0 + (4 * nr); uint32_t val =3D 0; uint32_t len =3D sizeof(val); =20 - if (pwrite(vdev->vbasedev.fd, &val, len, addr) !=3D len) { - error_report("%s(%s) reset bar %d failed: %m", __func__, - vdev->vbasedev.name, nr); + ret =3D VDEV_CONFIG_WRITE(vbasedev, addr, len, &val); + if (ret !=3D len) { + const char *errmsg =3D ret < 0 ? strerror(-ret) : "short write= "; + + error_report("%s(%s) reset bar %d failed: %s", __func__, + vbasedev->name, nr, errmsg); } } =20 @@ -2795,7 +2830,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev,= Error **errp) =20 irq_info.index =3D VFIO_PCI_ERR_IRQ_INDEX; =20 - ret =3D ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + ret =3D vbasedev->io->get_irq_info(vbasedev, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); @@ -2916,8 +2951,10 @@ static void vfio_register_req_notifier(VFIOPCIDevice= *vdev) return; } =20 - if (ioctl(vdev->vbasedev.fd, - VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0 || irq_info.count <= 1) { + if (vdev->vbasedev.io->get_irq_info(&vdev->vbasedev, &irq_info) < 0) { + return; + } + if (irq_info.count < 1) { return; } =20 @@ -3337,7 +3374,7 @@ static void vfio_instance_init(Object *obj) vdev->host.function =3D ~0U; =20 vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops, - DEVICE(vdev), false); + &vfio_dev_io_ioctl, DEVICE(vdev), false); =20 vdev->nv_gpudirect_clique =3D 0xFF; =20 diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index a85c199c76..86ecd97fde 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -649,7 +649,7 @@ static void vfio_platform_instance_init(Object *obj) VFIODevice *vbasedev =3D &vdev->vbasedev; =20 vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_o= ps, - DEVICE(vdev), false); + &vfio_dev_io_ioctl, DEVICE(vdev), false); } =20 #ifdef CONFIG_IOMMUFD diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 2428c7d80c..689cafe28b 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -99,6 +99,7 @@ typedef struct VFIOIOMMUFDContainer { } VFIOIOMMUFDContainer; =20 typedef struct VFIODeviceOps VFIODeviceOps; +typedef struct VFIODeviceIO VFIODeviceIO; =20 typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; @@ -118,6 +119,7 @@ typedef struct VFIODevice { OnOffAuto enable_migration; bool migration_events; VFIODeviceOps *ops; + VFIODeviceIO *io; unsigned int num_irqs; unsigned int num_regions; unsigned int flags; @@ -163,6 +165,29 @@ struct VFIODeviceOps { int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); }; =20 +#ifdef CONFIG_LINUX + +/* + * How devices communicate with the server. The default option is through + * ioctl() to the kernel VFIO driver, but vfio-user can use a socket to a = remote + * process. + */ +struct VFIODeviceIO { + int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *); + int (*get_region_info)(VFIODevice *vdev, + struct vfio_region_info *info); + int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); + int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); + int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t s= ize, + void *data); + int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t = size, + void *data); +}; + +extern VFIODeviceIO vfio_dev_io_ioctl; + +#endif /* CONFIG_LINUX */ + typedef struct VFIOGroup { int fd; int groupid; @@ -289,5 +314,5 @@ int vfio_get_dirty_bitmap(const VFIOContainerBase *bcon= tainer, uint64_t iova, bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp); void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **err= p); void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, - DeviceState *dev, bool ram_discard); + VFIODeviceIO *io, DeviceState *dev, bool ram_discard= ); #endif /* HW_VFIO_VFIO_COMMON_H */ --=20 2.34.1