From: Yi Liu <yi.l.liu@intel.com>
Compared with legacy vfio container BE, one of the benefits provided by
iommufd is to reduce the redundant page pinning on kernel side through
the usage of IOAS_COPY_DMA. For iommufd containers within the same address
space, IOVA mappings can be copied from a source container to destination
container.
To achieve this, move the vfio_memory_listener to be per address space.
In the memory listener callbacks, all the containers within the address
space will be looped. For the iommufd containers, QEMU uses IOAS_MAP_DMA
on the first one, and then uses IOAS_COPY_DMA to copy the IOVA mappings
from the first iommufd container to other iommufd containers within the
address space. For legacy containers, IOVA mapping is done by
VFIO_IOMMU_MAP_DMA.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
include/hw/vfio/vfio-common.h | 4 +
include/hw/vfio/vfio-container-base.h | 8 +-
hw/vfio/as.c | 118 ++++++++++++++++++++++----
hw/vfio/container-base.c | 13 ++-
hw/vfio/container.c | 19 ++---
hw/vfio/iommufd.c | 48 +++++++++--
6 files changed, 169 insertions(+), 41 deletions(-)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index c096778476..9c2e52be0d 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -178,6 +178,10 @@ void vfio_host_win_add(VFIOContainer *bcontainer,
int vfio_host_win_del(VFIOContainer *bcontainer, hwaddr min_iova,
hwaddr max_iova);
VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
+void vfio_as_add_container(VFIOAddressSpace *space,
+ VFIOContainer *bcontainer);
+void vfio_as_del_container(VFIOAddressSpace *space,
+ VFIOContainer *container);
void vfio_put_address_space(VFIOAddressSpace *space);
void vfio_put_base_device(VFIODevice *vbasedev);
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 9907d05531..eae9b1de6f 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -31,12 +31,15 @@
typedef enum VFIOContainerFeature {
VFIO_FEAT_LIVE_MIGRATION,
+ VFIO_FEAT_DMA_COPY,
} VFIOContainerFeature;
typedef struct VFIOContainer VFIOContainer;
typedef struct VFIOAddressSpace {
AddressSpace *as;
+ MemoryListener listener;
+ bool listener_initialized;
QLIST_HEAD(, VFIOContainer) containers;
QLIST_ENTRY(VFIOAddressSpace) list;
} VFIOAddressSpace;
@@ -75,7 +78,6 @@ typedef struct VFIOIOMMUBackendOpsClass VFIOIOMMUBackendOpsClass;
struct VFIOContainer {
VFIOIOMMUBackendOpsClass *ops;
VFIOAddressSpace *space;
- MemoryListener listener;
Error *error;
bool initialized;
bool dirty_pages_supported;
@@ -94,6 +96,8 @@ bool vfio_container_check_extension(VFIOContainer *container,
int vfio_container_dma_map(VFIOContainer *container,
hwaddr iova, ram_addr_t size,
void *vaddr, bool readonly);
+int vfio_container_dma_copy(VFIOContainer *src, VFIOContainer *dst,
+ hwaddr iova, ram_addr_t size, bool readonly);
int vfio_container_dma_unmap(VFIOContainer *container,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb);
@@ -132,6 +136,8 @@ struct VFIOIOMMUBackendOpsClass {
int (*dma_map)(VFIOContainer *container,
hwaddr iova, ram_addr_t size,
void *vaddr, bool readonly);
+ int (*dma_copy)(VFIOContainer *src, VFIOContainer *dst,
+ hwaddr iova, ram_addr_t size, bool readonly);
int (*dma_unmap)(VFIOContainer *container,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb);
diff --git a/hw/vfio/as.c b/hw/vfio/as.c
index ee126a5f03..04cd5a1d30 100644
--- a/hw/vfio/as.c
+++ b/hw/vfio/as.c
@@ -348,16 +348,16 @@ static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
return true;
}
-static void vfio_listener_region_add(MemoryListener *listener,
- MemoryRegionSection *section)
+static void vfio_container_region_add(VFIOContainer *container,
+ VFIOContainer **src_container,
+ MemoryRegionSection *section)
{
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
hwaddr iova, end;
Int128 llend, llsize;
void *vaddr;
int ret;
VFIOHostDMAWindow *hostwin;
- bool hostwin_found;
+ bool hostwin_found, copy_dma_supported = false;
Error *err = NULL;
if (vfio_listener_skipped_section(section)) {
@@ -501,12 +501,26 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
}
+ copy_dma_supported = vfio_container_check_extension(container,
+ VFIO_FEAT_DMA_COPY);
+
+ if (copy_dma_supported && *src_container) {
+ if (!vfio_container_dma_copy(*src_container, container,
+ iova, int128_get64(llsize),
+ section->readonly)) {
+ return;
+ } else {
+ info_report("IOAS copy failed try map for container: %p",
+ container);
+ }
+ }
+
ret = vfio_container_dma_map(container, iova, int128_get64(llsize),
vaddr, section->readonly);
if (ret) {
- error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx", %p) = %d (%m)",
- container, iova, int128_get64(llsize), vaddr, ret);
+ error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx", %p) = %d (%m)", container, iova,
+ int128_get64(llsize), vaddr, ret);
if (memory_region_is_ram_device(section->mr)) {
/* Allow unexpected mappings not to be fatal for RAM devices */
error_report_err(err);
@@ -515,6 +529,9 @@ static void vfio_listener_region_add(MemoryListener *listener,
goto fail;
}
+ if (copy_dma_supported) {
+ *src_container = container;
+ }
return;
fail:
@@ -541,10 +558,22 @@ fail:
}
}
-static void vfio_listener_region_del(MemoryListener *listener,
+static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ VFIOAddressSpace *space = container_of(listener,
+ VFIOAddressSpace, listener);
+ VFIOContainer *container, *src_container;
+
+ src_container = NULL;
+ QLIST_FOREACH(container, &space->containers, next) {
+ vfio_container_region_add(container, &src_container, section);
+ }
+}
+
+static void vfio_container_region_del(VFIOContainer *container,
+ MemoryRegionSection *section)
+{
hwaddr iova, end;
Int128 llend, llsize;
int ret;
@@ -658,18 +687,38 @@ static void vfio_listener_region_del(MemoryListener *listener,
vfio_container_del_section_window(container, section);
}
+static void vfio_listener_region_del(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIOAddressSpace *space = container_of(listener,
+ VFIOAddressSpace, listener);
+ VFIOContainer *container;
+
+ QLIST_FOREACH(container, &space->containers, next) {
+ vfio_container_region_del(container, section);
+ }
+}
+
static void vfio_listener_log_global_start(MemoryListener *listener)
{
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ VFIOAddressSpace *space = container_of(listener,
+ VFIOAddressSpace, listener);
+ VFIOContainer *container;
- vfio_container_set_dirty_page_tracking(container, true);
+ QLIST_FOREACH(container, &space->containers, next) {
+ vfio_container_set_dirty_page_tracking(container, true);
+ }
}
static void vfio_listener_log_global_stop(MemoryListener *listener)
{
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ VFIOAddressSpace *space = container_of(listener,
+ VFIOAddressSpace, listener);
+ VFIOContainer *container;
- vfio_container_set_dirty_page_tracking(container, false);
+ QLIST_FOREACH(container, &space->containers, next) {
+ vfio_container_set_dirty_page_tracking(container, false);
+ }
}
typedef struct {
@@ -799,11 +848,9 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
int128_get64(section->size), ram_addr);
}
-static void vfio_listener_log_sync(MemoryListener *listener,
- MemoryRegionSection *section)
+static void vfio_container_log_sync(VFIOContainer *container,
+ MemoryRegionSection *section)
{
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
-
if (vfio_listener_skipped_section(section) ||
!container->dirty_pages_supported) {
return;
@@ -814,6 +861,18 @@ static void vfio_listener_log_sync(MemoryListener *listener,
}
}
+static void vfio_listener_log_sync(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIOAddressSpace *space = container_of(listener,
+ VFIOAddressSpace, listener);
+ VFIOContainer *container;
+
+ QLIST_FOREACH(container, &space->containers, next) {
+ vfio_container_log_sync(container, section);
+ }
+}
+
const MemoryListener vfio_memory_listener = {
.name = "vfio",
.region_add = vfio_listener_region_add,
@@ -858,6 +917,31 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
return space;
}
+void vfio_as_add_container(VFIOAddressSpace *space,
+ VFIOContainer *container)
+{
+ if (space->listener_initialized) {
+ memory_listener_unregister(&space->listener);
+ }
+
+ QLIST_INSERT_HEAD(&space->containers, container, next);
+
+ /* Unregistration happen in vfio_as_del_container() */
+ space->listener = vfio_memory_listener;
+ memory_listener_register(&space->listener, space->as);
+ space->listener_initialized = true;
+}
+
+void vfio_as_del_container(VFIOAddressSpace *space,
+ VFIOContainer *container)
+{
+ QLIST_SAFE_REMOVE(container, next);
+
+ if (QLIST_EMPTY(&space->containers)) {
+ memory_listener_unregister(&space->listener);
+ }
+}
+
void vfio_put_address_space(VFIOAddressSpace *space)
{
if (QLIST_EMPTY(&space->containers)) {
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 3ae939c6c9..88eab9b197 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -47,6 +47,17 @@ int vfio_container_dma_map(VFIOContainer *container,
return container->ops->dma_map(container, iova, size, vaddr, readonly);
}
+int vfio_container_dma_copy(VFIOContainer *src, VFIOContainer *dst,
+ hwaddr iova, ram_addr_t size, bool readonly)
+{
+ if (!src->ops->dma_copy || src->ops->dma_copy != dst->ops->dma_copy) {
+ error_report("Incompatible container: unable to copy dma");
+ return -EINVAL;
+ }
+
+ return src->ops->dma_copy(src, dst, iova, size, readonly);
+}
+
int vfio_container_dma_unmap(VFIOContainer *container,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb)
@@ -137,8 +148,6 @@ void vfio_container_destroy(VFIOContainer *container)
VFIOGuestIOMMU *giommu, *tmp;
VFIOHostDMAWindow *hostwin, *next;
- QLIST_SAFE_REMOVE(container, next);
-
QLIST_FOREACH_SAFE(vrdl, &container->vrdl_list, next, vrdl_tmp) {
RamDiscardManager *rdm;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 61caf388c2..07579c9a38 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -386,9 +386,6 @@ err_out:
static void vfio_listener_release(VFIOLegacyContainer *container)
{
- VFIOContainer *bcontainer = &container->bcontainer;
-
- memory_listener_unregister(&bcontainer->listener);
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
memory_listener_unregister(&container->prereg_listener);
}
@@ -929,14 +926,11 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
vfio_kvm_device_add_group(group);
QLIST_INIT(&container->group_list);
- QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
group->container = container;
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
- bcontainer->listener = vfio_memory_listener;
-
- memory_listener_register(&bcontainer->listener, bcontainer->space->as);
+ vfio_as_add_container(space, bcontainer);
if (bcontainer->error) {
ret = -1;
@@ -949,8 +943,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
return 0;
listener_release_exit:
+ vfio_as_del_container(space, bcontainer);
QLIST_REMOVE(group, container_next);
- QLIST_REMOVE(bcontainer, next);
vfio_kvm_device_del_group(group);
vfio_listener_release(container);
@@ -973,6 +967,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
{
VFIOLegacyContainer *container = group->container;
VFIOContainer *bcontainer = &container->bcontainer;
+ VFIOAddressSpace *space = bcontainer->space;
QLIST_REMOVE(group, container_next);
group->container = NULL;
@@ -980,10 +975,12 @@ static void vfio_disconnect_container(VFIOGroup *group)
/*
* Explicitly release the listener first before unset container,
* since unset may destroy the backend container if it's the last
- * group.
+ * group. By removing container from the list, container is disconnected
+ * with address space memory listener.
*/
if (QLIST_EMPTY(&container->group_list)) {
vfio_listener_release(container);
+ vfio_as_del_container(space, bcontainer);
}
if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
@@ -992,10 +989,8 @@ static void vfio_disconnect_container(VFIOGroup *group)
}
if (QLIST_EMPTY(&container->group_list)) {
- VFIOAddressSpace *space = bcontainer->space;
-
- vfio_container_destroy(bcontainer);
trace_vfio_disconnect_container(container->fd);
+ vfio_container_destroy(bcontainer);
close(container->fd);
g_free(container);
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 18f755bcc0..9c1a1b1779 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -40,6 +40,8 @@ static bool iommufd_check_extension(VFIOContainer *bcontainer,
VFIOContainerFeature feat)
{
switch (feat) {
+ case VFIO_FEAT_DMA_COPY:
+ return true;
default:
return false;
};
@@ -56,6 +58,21 @@ static int iommufd_map(VFIOContainer *bcontainer, hwaddr iova,
iova, size, vaddr, readonly);
}
+static int iommufd_copy(VFIOContainer *src, VFIOContainer *dst,
+ hwaddr iova, ram_addr_t size, bool readonly)
+{
+ VFIOIOMMUFDContainer *container_src = container_of(src,
+ VFIOIOMMUFDContainer, bcontainer);
+ VFIOIOMMUFDContainer *container_dst = container_of(dst,
+ VFIOIOMMUFDContainer, bcontainer);
+
+ assert(container_src->be->fd == container_dst->be->fd);
+
+ return iommufd_backend_copy_dma(container_src->be, container_src->ioas_id,
+ container_dst->ioas_id, iova,
+ size, readonly);
+}
+
static int iommufd_unmap(VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb)
@@ -414,12 +431,14 @@ static int iommufd_attach_device(VFIODevice *vbasedev, AddressSpace *as,
* between iommufd and kvm.
*/
- QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
-
- bcontainer->listener = vfio_memory_listener;
-
- memory_listener_register(&bcontainer->listener, bcontainer->space->as);
+ vfio_as_add_container(space, bcontainer);
+ if (bcontainer->error) {
+ ret = -1;
+ error_propagate_prepend(errp, bcontainer->error,
+ "memory listener initialization failed: ");
+ goto error;
+ }
bcontainer->initialized = true;
out:
@@ -436,8 +455,7 @@ out:
ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info);
if (ret) {
error_setg_errno(errp, errno, "error getting device info");
- memory_listener_unregister(&bcontainer->listener);
- QLIST_SAFE_REMOVE(bcontainer, next);
+ vfio_as_del_container(space, bcontainer);
goto error;
}
@@ -466,6 +484,7 @@ static void iommufd_detach_device(VFIODevice *vbasedev)
VFIOIOMMUFDContainer *container;
VFIODevice *vbasedev_iter;
VFIOIOASHwpt *hwpt;
+ VFIOAddressSpace *space;
Error *err = NULL;
if (!bcontainer) {
@@ -491,15 +510,25 @@ found:
vfio_container_put_hwpt(hwpt);
}
+ space = bcontainer->space;
+ /*
+ * Needs to remove the bcontainer from space->containers list before
+ * detach container. Otherwise, detach container may destroy the
+ * container if it's the last device. By removing bcontainer from the
+ * list, container is disconnected with address space memory listener.
+ */
+ if (QLIST_EMPTY(&container->hwpt_list)) {
+ vfio_as_del_container(space, bcontainer);
+ }
__vfio_device_detach_container(vbasedev, container, &err);
if (err) {
error_report_err(err);
}
if (QLIST_EMPTY(&container->hwpt_list)) {
- VFIOAddressSpace *space = bcontainer->space;
+ uint32_t ioas_id = container->ioas_id;
- iommufd_backend_put_ioas(container->be, container->ioas_id);
vfio_iommufd_container_destroy(container);
+ iommufd_backend_put_ioas(vbasedev->iommufd, ioas_id);
vfio_put_address_space(space);
}
vbasedev->container = NULL;
@@ -514,6 +543,7 @@ static void vfio_iommu_backend_iommufd_ops_class_init(ObjectClass *oc,
ops->check_extension = iommufd_check_extension;
ops->dma_map = iommufd_map;
+ ops->dma_copy = iommufd_copy;
ops->dma_unmap = iommufd_unmap;
ops->attach_device = iommufd_attach_device;
ops->detach_device = iommufd_detach_device;
--
2.37.3
© 2016 - 2024 Red Hat, Inc.