[PATCH RFCv1 06/10] hw/arm/virt: Assign vfio-pci devices to nested SMMUs

Nicolin Chen posted 10 patches 2 months, 3 weeks ago
[PATCH RFCv1 06/10] hw/arm/virt: Assign vfio-pci devices to nested SMMUs
Posted by Nicolin Chen 2 months, 3 weeks ago
With iommu=nested-smmuv3, there could be multiple nested SMMU instances in
the vms. A passthrough device must to look up for its iommu handler in its
sysfs node, and then link to the nested SMMU instance created for the same
iommu handler. This isn't easy to do.

Add an auto-assign piece after all vSMMU backed pxb buses are created. It
loops the existing input devices, and sets/replaces their pci bus numbers
with a newly created pcie-root-port to the pxb bus.

Note that this is not an ideal solution to handle hot plug device.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 hw/arm/virt.c         | 110 ++++++++++++++++++++++++++++++++++++++++++
 include/hw/arm/virt.h |  13 +++++
 2 files changed, 123 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a54332fca8..3610f53304 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -38,6 +38,7 @@
 #include "hw/arm/primecell.h"
 #include "hw/arm/virt.h"
 #include "hw/block/flash.h"
+#include "hw/vfio/pci.h"
 #include "hw/vfio/vfio-calxeda-xgmac.h"
 #include "hw/vfio/vfio-amd-xgbe.h"
 #include "hw/display/ramfb.h"
@@ -1491,6 +1492,112 @@ static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
                            bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf);
 }
 
+static char *create_new_pcie_port(VirtNestedSmmu *nested_smmu, Error **errp)
+{
+    uint32_t port_nr = nested_smmu->pci_bus->qbus.num_children;
+    uint32_t chassis_nr = UINT8_MAX - nested_smmu->index;
+    uint32_t bus_nr = pci_bus_num(nested_smmu->pci_bus);
+    DeviceState *dev;
+    char *name_port;
+
+    /* Create a root port */
+    dev = qdev_new("pcie-root-port");
+    name_port = g_strdup_printf("smmu_bus0x%x_port%d", bus_nr, port_nr);
+
+    if (!qdev_set_id(dev, name_port, &error_fatal)) {
+        /* FIXME retry with a different port num? */
+        error_setg(errp, "Could not set pcie-root-port ID %s", name_port);
+        g_free(name_port);
+        g_free(dev);
+        return NULL;
+    }
+    qdev_prop_set_uint32(dev, "chassis", chassis_nr);
+    qdev_prop_set_uint32(dev, "slot", port_nr);
+    qdev_prop_set_uint64(dev, "io-reserve", 0);
+    qdev_realize_and_unref(dev, BUS(nested_smmu->pci_bus), &error_fatal);
+    return name_port;
+}
+
+static int assign_nested_smmu(void *opaque, QemuOpts *opts, Error **errp)
+{
+    VirtMachineState *vms = (VirtMachineState *)opaque;
+    const char *sysfsdev = qemu_opt_get(opts, "sysfsdev");
+    const char *iommufd = qemu_opt_get(opts, "iommufd");
+    const char *driver = qemu_opt_get(opts, "driver");
+    const char *host = qemu_opt_get(opts, "host");
+    const char *bus = qemu_opt_get(opts, "bus");
+    VirtNestedSmmu *nested_smmu;
+    char *link_iommu;
+    char *dir_iommu;
+    char *smmu_node;
+    char *name_port;
+    int ret = 0;
+
+    if (!iommufd || !driver) {
+        return 0;
+    }
+    if (!sysfsdev && !host) {
+        return 0;
+    }
+    if (strncmp(driver, TYPE_VFIO_PCI, strlen(TYPE_VFIO_PCI))) {
+        return 0;
+    }
+    /* If the device wants to attach to the default bus, do not reassign it */
+    if (bus && !strncmp(bus, "pcie.0", strlen(bus))) {
+        return 0;
+    }
+
+    if (sysfsdev) {
+        link_iommu = g_strdup_printf("%s/iommu", sysfsdev);
+    } else {
+        link_iommu = g_strdup_printf("/sys/bus/pci/devices/%s/iommu", host);
+    }
+
+    dir_iommu = realpath(link_iommu, NULL);
+    if (!dir_iommu) {
+        error_setg(errp, "Could not get the real path for iommu link: %s",
+                   link_iommu);
+        ret = -EINVAL;
+        goto free_link;
+    }
+
+    smmu_node = g_path_get_basename(dir_iommu);
+    if (!smmu_node) {
+        error_setg(errp, "Could not get SMMU node name for iommu at: %s",
+                   dir_iommu);
+        ret = -EINVAL;
+        goto free_dir;
+    }
+
+    nested_smmu = find_nested_smmu_by_sysfs(vms, smmu_node);
+    if (!nested_smmu) {
+        error_setg(errp, "Could not find any detected SMMU matching node: %s",
+                   smmu_node);
+        ret = -EINVAL;
+        goto free_node;
+    }
+
+    name_port = create_new_pcie_port(nested_smmu, errp);
+    if (!name_port) {
+        ret = -EBUSY;
+        goto free_node;
+    }
+
+    qemu_opt_set(opts, "bus", name_port, &error_fatal);
+    if (bus) {
+        error_report("overriding PCI bus %s to %s for device %s [%s]",
+                     bus, name_port, host, sysfsdev);
+    }
+
+free_node:
+    free(smmu_node);
+free_dir:
+    free(dir_iommu);
+free_link:
+    free(link_iommu);
+    return ret;
+}
+
 /*
  * FIXME this is used to reverse for hotplug devices, yet it could result in a
  * big waste of PCI bus numbners.
@@ -1669,6 +1776,9 @@ static void create_pcie(VirtMachineState *vms)
             qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", 0x0,
                                    vms->nested_smmu_phandle[i], 0x0, 0x10000);
         }
+
+        qemu_opts_foreach(qemu_find_opts("device"),
+                          assign_nested_smmu, vms, &error_fatal);
     } else if (vms->iommu) {
         vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt);
 
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 0a3f1ab8b5..dfbc4bba3c 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -246,4 +246,17 @@ find_nested_smmu_by_index(VirtMachineState *vms, int index)
     return NULL;
 }
 
+static inline VirtNestedSmmu *
+find_nested_smmu_by_sysfs(VirtMachineState *vms, char *node)
+{
+    VirtNestedSmmu *nested_smmu;
+
+    QLIST_FOREACH(nested_smmu, &vms->nested_smmu_list, next) {
+        if (!strncmp(nested_smmu->smmu_node, node, strlen(node))) {
+            return nested_smmu;
+        }
+    }
+    return NULL;
+}
+
 #endif /* QEMU_ARM_VIRT_H */
-- 
2.43.0
Re: [PATCH RFCv1 06/10] hw/arm/virt: Assign vfio-pci devices to nested SMMUs
Posted by Eric Auger 2 months, 1 week ago

On 6/26/24 02:28, Nicolin Chen wrote:
> With iommu=nested-smmuv3, there could be multiple nested SMMU instances in
> the vms. A passthrough device must to look up for its iommu handler in its
> sysfs node, and then link to the nested SMMU instance created for the same
> iommu handler. This isn't easy to do.
>
> Add an auto-assign piece after all vSMMU backed pxb buses are created. It
> loops the existing input devices, and sets/replaces their pci bus numbers
> with a newly created pcie-root-port to the pxb bus.
Here again I don't think it is acceptable to create such topology under
the hood. Libvirt shall master the whole PCIe topology.

Eric
>
> Note that this is not an ideal solution to handle hot plug device.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>  hw/arm/virt.c         | 110 ++++++++++++++++++++++++++++++++++++++++++
>  include/hw/arm/virt.h |  13 +++++
>  2 files changed, 123 insertions(+)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index a54332fca8..3610f53304 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -38,6 +38,7 @@
>  #include "hw/arm/primecell.h"
>  #include "hw/arm/virt.h"
>  #include "hw/block/flash.h"
> +#include "hw/vfio/pci.h"
>  #include "hw/vfio/vfio-calxeda-xgmac.h"
>  #include "hw/vfio/vfio-amd-xgbe.h"
>  #include "hw/display/ramfb.h"
> @@ -1491,6 +1492,112 @@ static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
>                             bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf);
>  }
>  
> +static char *create_new_pcie_port(VirtNestedSmmu *nested_smmu, Error **errp)
> +{
> +    uint32_t port_nr = nested_smmu->pci_bus->qbus.num_children;
> +    uint32_t chassis_nr = UINT8_MAX - nested_smmu->index;
> +    uint32_t bus_nr = pci_bus_num(nested_smmu->pci_bus);
> +    DeviceState *dev;
> +    char *name_port;
> +
> +    /* Create a root port */
> +    dev = qdev_new("pcie-root-port");
> +    name_port = g_strdup_printf("smmu_bus0x%x_port%d", bus_nr, port_nr);
> +
> +    if (!qdev_set_id(dev, name_port, &error_fatal)) {
> +        /* FIXME retry with a different port num? */
> +        error_setg(errp, "Could not set pcie-root-port ID %s", name_port);
> +        g_free(name_port);
> +        g_free(dev);
> +        return NULL;
> +    }
> +    qdev_prop_set_uint32(dev, "chassis", chassis_nr);
> +    qdev_prop_set_uint32(dev, "slot", port_nr);
> +    qdev_prop_set_uint64(dev, "io-reserve", 0);
> +    qdev_realize_and_unref(dev, BUS(nested_smmu->pci_bus), &error_fatal);
> +    return name_port;
> +}
> +
> +static int assign_nested_smmu(void *opaque, QemuOpts *opts, Error **errp)
> +{
> +    VirtMachineState *vms = (VirtMachineState *)opaque;
> +    const char *sysfsdev = qemu_opt_get(opts, "sysfsdev");
> +    const char *iommufd = qemu_opt_get(opts, "iommufd");
> +    const char *driver = qemu_opt_get(opts, "driver");
> +    const char *host = qemu_opt_get(opts, "host");
> +    const char *bus = qemu_opt_get(opts, "bus");
> +    VirtNestedSmmu *nested_smmu;
> +    char *link_iommu;
> +    char *dir_iommu;
> +    char *smmu_node;
> +    char *name_port;
> +    int ret = 0;
> +
> +    if (!iommufd || !driver) {
> +        return 0;
> +    }
> +    if (!sysfsdev && !host) {
> +        return 0;
> +    }
> +    if (strncmp(driver, TYPE_VFIO_PCI, strlen(TYPE_VFIO_PCI))) {
> +        return 0;
> +    }
> +    /* If the device wants to attach to the default bus, do not reassign it */
> +    if (bus && !strncmp(bus, "pcie.0", strlen(bus))) {
> +        return 0;
> +    }
> +
> +    if (sysfsdev) {
> +        link_iommu = g_strdup_printf("%s/iommu", sysfsdev);
> +    } else {
> +        link_iommu = g_strdup_printf("/sys/bus/pci/devices/%s/iommu", host);
> +    }
> +
> +    dir_iommu = realpath(link_iommu, NULL);
> +    if (!dir_iommu) {
> +        error_setg(errp, "Could not get the real path for iommu link: %s",
> +                   link_iommu);
> +        ret = -EINVAL;
> +        goto free_link;
> +    }
> +
> +    smmu_node = g_path_get_basename(dir_iommu);
> +    if (!smmu_node) {
> +        error_setg(errp, "Could not get SMMU node name for iommu at: %s",
> +                   dir_iommu);
> +        ret = -EINVAL;
> +        goto free_dir;
> +    }
> +
> +    nested_smmu = find_nested_smmu_by_sysfs(vms, smmu_node);
> +    if (!nested_smmu) {
> +        error_setg(errp, "Could not find any detected SMMU matching node: %s",
> +                   smmu_node);
> +        ret = -EINVAL;
> +        goto free_node;
> +    }
> +
> +    name_port = create_new_pcie_port(nested_smmu, errp);
> +    if (!name_port) {
> +        ret = -EBUSY;
> +        goto free_node;
> +    }
> +
> +    qemu_opt_set(opts, "bus", name_port, &error_fatal);
> +    if (bus) {
> +        error_report("overriding PCI bus %s to %s for device %s [%s]",
> +                     bus, name_port, host, sysfsdev);
> +    }
> +
> +free_node:
> +    free(smmu_node);
> +free_dir:
> +    free(dir_iommu);
> +free_link:
> +    free(link_iommu);
> +    return ret;
> +}
> +
>  /*
>   * FIXME this is used to reverse for hotplug devices, yet it could result in a
>   * big waste of PCI bus numbners.
> @@ -1669,6 +1776,9 @@ static void create_pcie(VirtMachineState *vms)
>              qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", 0x0,
>                                     vms->nested_smmu_phandle[i], 0x0, 0x10000);
>          }
> +
> +        qemu_opts_foreach(qemu_find_opts("device"),
> +                          assign_nested_smmu, vms, &error_fatal);
>      } else if (vms->iommu) {
>          vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt);
>  
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index 0a3f1ab8b5..dfbc4bba3c 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -246,4 +246,17 @@ find_nested_smmu_by_index(VirtMachineState *vms, int index)
>      return NULL;
>  }
>  
> +static inline VirtNestedSmmu *
> +find_nested_smmu_by_sysfs(VirtMachineState *vms, char *node)
> +{
> +    VirtNestedSmmu *nested_smmu;
> +
> +    QLIST_FOREACH(nested_smmu, &vms->nested_smmu_list, next) {
> +        if (!strncmp(nested_smmu->smmu_node, node, strlen(node))) {
> +            return nested_smmu;
> +        }
> +    }
> +    return NULL;
> +}
> +
>  #endif /* QEMU_ARM_VIRT_H */