[RFC PATCH v2 4/5] vhost_user: Add MEM_READ/WRITE backend requests

Albert Esteve posted 5 patches 6 months ago
[RFC PATCH v2 4/5] vhost_user: Add MEM_READ/WRITE backend requests
Posted by Albert Esteve 6 months ago
With SHMEM_MAP messages, sharing descriptors between
devices will cause that these devices do not see the
mappings, and fail to access these memory regions.

To solve this, introduce MEM_READ/WRITE requests
that will get triggered as a fallback when
vhost-user memory translation fails.

Signed-off-by: Albert Esteve <aesteve@redhat.com>
---
 hw/virtio/vhost-user.c                    | 31 +++++++++
 subprojects/libvhost-user/libvhost-user.c | 84 +++++++++++++++++++++++
 subprojects/libvhost-user/libvhost-user.h | 38 ++++++++++
 3 files changed, 153 insertions(+)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 57406dc8b4..18cacb2d68 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -118,6 +118,8 @@ typedef enum VhostUserBackendRequest {
     VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
     VHOST_USER_BACKEND_SHMEM_MAP = 9,
     VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
+    VHOST_USER_BACKEND_MEM_READ = 11,
+    VHOST_USER_BACKEND_MEM_WRITE = 12,
     VHOST_USER_BACKEND_MAX
 }  VhostUserBackendRequest;
 
@@ -145,6 +147,12 @@ typedef struct VhostUserShMemConfig {
     uint64_t memory_sizes[VHOST_MEMORY_BASELINE_NREGIONS];
 } VhostUserShMemConfig;
 
+typedef struct VhostUserMemRWMsg {
+    uint64_t guest_address;
+    uint32_t size;
+    uint8_t data[];
+} VhostUserMemRWMsg;
+
 typedef struct VhostUserLog {
     uint64_t mmap_size;
     uint64_t mmap_offset;
@@ -253,6 +261,7 @@ typedef union {
         VhostUserTransferDeviceState transfer_state;
         VhostUserMMap mmap;
         VhostUserShMemConfig shmem;
+        VhostUserMemRWMsg mem_rw;
 } VhostUserPayload;
 
 typedef struct VhostUserMsg {
@@ -1871,6 +1880,22 @@ vhost_user_backend_handle_shmem_unmap(struct vhost_dev *dev,
     return 0;
 }
 
+static int
+vhost_user_backend_handle_mem_read(struct vhost_dev *dev,
+                                   VhostUserMemRWMsg *mem_rw)
+{
+    /* TODO */
+    return -EPERM;
+}
+
+static int
+vhost_user_backend_handle_mem_write(struct vhost_dev *dev,
+                                   VhostUserMemRWMsg *mem_rw)
+{
+    /* TODO */
+    return -EPERM;
+}
+
 static void close_backend_channel(struct vhost_user *u)
 {
     g_source_destroy(u->backend_src);
@@ -1946,6 +1971,12 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition,
     case VHOST_USER_BACKEND_SHMEM_UNMAP:
         ret = vhost_user_backend_handle_shmem_unmap(dev, &payload.mmap);
         break;
+    case VHOST_USER_BACKEND_MEM_READ:
+        ret = vhost_user_backend_handle_mem_read(dev, &payload.mem_rw);
+        break;
+    case VHOST_USER_BACKEND_MEM_WRITE:
+        ret = vhost_user_backend_handle_mem_write(dev, &payload.mem_rw);
+        break;
     default:
         error_report("Received unexpected msg type: %d.", hdr.request);
         ret = -EINVAL;
diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
index 28556d183a..b5184064b5 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -1651,6 +1651,90 @@ vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
     return vu_process_message_reply(dev, &vmsg);
 }
 
+bool
+vu_send_mem_read(VuDev *dev, uint64_t guest_addr, uint32_t size,
+                 uint8_t *data)
+{
+    VhostUserMsg msg_reply;
+    VhostUserMsg msg = {
+        .request = VHOST_USER_BACKEND_MEM_READ,
+        .size = sizeof(msg.payload.mem_rw),
+        .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+        .payload = {
+            .mem_rw = {
+                .guest_address = guest_addr,
+                .size = size,
+            }
+        }
+    };
+
+    pthread_mutex_lock(&dev->backend_mutex);
+    if (!vu_message_write(dev, dev->backend_fd, &msg)) {
+        goto out_err;
+    }
+
+    if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) {
+        goto out_err;
+    }
+
+    if (msg_reply.request != msg.request) {
+        DPRINT("Received unexpected msg type. Expected %d, received %d",
+               msg.request, msg_reply.request);
+        goto out_err;
+    }
+
+    if (msg_reply.payload.mem_rw.size != size) {
+        DPRINT("Received unexpected number of bytes in the response. "
+               "Expected %d, received %d",
+               size, msg_reply.payload.mem_rw.size);
+        goto out_err;
+    }
+
+    data = malloc(msg_reply.payload.mem_rw.size);
+    if (!data) {
+        DPRINT("Failed to malloc read memory data");
+        goto out_err;
+    }
+
+    memcpy(data, msg_reply.payload.mem_rw.data, size);
+    pthread_mutex_unlock(&dev->backend_mutex);
+    return true;
+
+out_err:
+    pthread_mutex_unlock(&dev->backend_mutex);
+    return false;
+}
+
+bool
+vu_send_mem_write(VuDev *dev, uint64_t guest_addr, uint32_t size,
+                  uint8_t *data)
+{
+    VhostUserMsg msg = {
+        .request = VHOST_USER_BACKEND_MEM_WRITE,
+        .size = sizeof(msg.payload.mem_rw),
+        .flags = VHOST_USER_VERSION,
+        .payload = {
+            .mem_rw = {
+                .guest_address = guest_addr,
+                .size = size,
+            }
+        }
+    };
+    memcpy(msg.payload.mem_rw.data, data, size);
+
+    if (vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK)) {
+        msg.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    if (!vu_message_write(dev, dev->backend_fd, &msg)) {
+        pthread_mutex_unlock(&dev->backend_mutex);
+        return false;
+    }
+
+    /* Also unlocks the backend_mutex */
+    return vu_process_message_reply(dev, &msg);
+}
+
 static bool
 vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
index 7f6c22cc1a..8ef794870d 100644
--- a/subprojects/libvhost-user/libvhost-user.h
+++ b/subprojects/libvhost-user/libvhost-user.h
@@ -129,6 +129,8 @@ typedef enum VhostUserBackendRequest {
     VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
     VHOST_USER_BACKEND_SHMEM_MAP = 9,
     VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
+    VHOST_USER_BACKEND_MEM_READ = 11,
+    VHOST_USER_BACKEND_MEM_WRITE = 12,
     VHOST_USER_BACKEND_MAX
 }  VhostUserBackendRequest;
 
@@ -152,6 +154,12 @@ typedef struct VhostUserMemRegMsg {
     VhostUserMemoryRegion region;
 } VhostUserMemRegMsg;
 
+typedef struct VhostUserMemRWMsg {
+    uint64_t guest_address;
+    uint32_t size;
+    uint8_t data[];
+} VhostUserMemRWMsg;
+
 typedef struct VhostUserLog {
     uint64_t mmap_size;
     uint64_t mmap_offset;
@@ -235,6 +243,7 @@ typedef struct VhostUserMsg {
         VhostUserInflight inflight;
         VhostUserShared object;
         VhostUserMMap mmap;
+        VhostUserMemRWMsg mem_rw;
     } payload;
 
     int fds[VHOST_MEMORY_BASELINE_NREGIONS];
@@ -650,6 +659,35 @@ bool vu_shmem_map(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
 bool vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
                   uint64_t shm_offset, uint64_t len);
 
+/**
+ * vu_send_mem_read:
+ * @dev: a VuDev context
+ * @guest_addr: guest physical address to read
+ * @size: number of bytes to read
+ * @data: head of an unitialized bytes array
+ *
+ * Reads `size` bytes of `guest_addr` in the frontend and stores
+ * them in `data`.
+ *
+ * Returns: TRUE on success, FALSE on failure.
+ */
+bool vu_send_mem_read(VuDev *dev, uint64_t guest_addr, uint32_t size,
+                      uint8_t *data);
+
+/**
+ * vu_send_mem_write:
+ * @dev: a VuDev context
+ * @guest_addr: guest physical address to write
+ * @size: number of bytes to write
+ * @data: head of an array with `size` bytes to write
+ *
+ * Writes `size` bytes from `data` into `guest_addr` in the frontend.
+ *
+ * Returns: TRUE on success, FALSE on failure.
+ */
+bool vu_send_mem_write(VuDev *dev, uint64_t guest_addr, uint32_t size,
+                      uint8_t *data);
+
 /**
  * vu_queue_set_notification:
  * @dev: a VuDev context
-- 
2.45.2
Re: [RFC PATCH v2 4/5] vhost_user: Add MEM_READ/WRITE backend requests
Posted by Stefan Hajnoczi 5 months, 2 weeks ago
On Fri, Jun 28, 2024 at 04:57:09PM +0200, Albert Esteve wrote:
> With SHMEM_MAP messages, sharing descriptors between
> devices will cause that these devices do not see the
> mappings, and fail to access these memory regions.
> 
> To solve this, introduce MEM_READ/WRITE requests
> that will get triggered as a fallback when
> vhost-user memory translation fails.
> 
> Signed-off-by: Albert Esteve <aesteve@redhat.com>
> ---
>  hw/virtio/vhost-user.c                    | 31 +++++++++
>  subprojects/libvhost-user/libvhost-user.c | 84 +++++++++++++++++++++++
>  subprojects/libvhost-user/libvhost-user.h | 38 ++++++++++
>  3 files changed, 153 insertions(+)
> 
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 57406dc8b4..18cacb2d68 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -118,6 +118,8 @@ typedef enum VhostUserBackendRequest {
>      VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
>      VHOST_USER_BACKEND_SHMEM_MAP = 9,
>      VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
> +    VHOST_USER_BACKEND_MEM_READ = 11,
> +    VHOST_USER_BACKEND_MEM_WRITE = 12,
>      VHOST_USER_BACKEND_MAX
>  }  VhostUserBackendRequest;
>  
> @@ -145,6 +147,12 @@ typedef struct VhostUserShMemConfig {
>      uint64_t memory_sizes[VHOST_MEMORY_BASELINE_NREGIONS];
>  } VhostUserShMemConfig;
>  
> +typedef struct VhostUserMemRWMsg {
> +    uint64_t guest_address;
> +    uint32_t size;
> +    uint8_t data[];

I don't think flexible array members work in VhostUserMsg payload
structs in its current form. It would be necessary to move the
VhostUserMsg.payload field to the end of the VhostUserMsg and then
heap-allocate VhostUserMsg with the additional size required for
VhostUserMemRWMsg.data[].

Right now this patch is calling memcpy() on memory beyond
VhostUserMsg.payload because the VhostUserMsg struct does not have size
bytes of extra space and the payload field is in the middle of the
struct where flexible array members cannot be used.

> +} VhostUserMemRWMsg;
> +
>  typedef struct VhostUserLog {
>      uint64_t mmap_size;
>      uint64_t mmap_offset;
> @@ -253,6 +261,7 @@ typedef union {
>          VhostUserTransferDeviceState transfer_state;
>          VhostUserMMap mmap;
>          VhostUserShMemConfig shmem;
> +        VhostUserMemRWMsg mem_rw;
>  } VhostUserPayload;
>  
>  typedef struct VhostUserMsg {
> @@ -1871,6 +1880,22 @@ vhost_user_backend_handle_shmem_unmap(struct vhost_dev *dev,
>      return 0;
>  }
>  
> +static int
> +vhost_user_backend_handle_mem_read(struct vhost_dev *dev,
> +                                   VhostUserMemRWMsg *mem_rw)
> +{
> +    /* TODO */
> +    return -EPERM;
> +}
> +
> +static int
> +vhost_user_backend_handle_mem_write(struct vhost_dev *dev,
> +                                   VhostUserMemRWMsg *mem_rw)
> +{
> +    /* TODO */
> +    return -EPERM;
> +}

Reading/writing guest memory can be done via
address_space_read/write(vdev->dma_as, ...).

> +
>  static void close_backend_channel(struct vhost_user *u)
>  {
>      g_source_destroy(u->backend_src);
> @@ -1946,6 +1971,12 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition,
>      case VHOST_USER_BACKEND_SHMEM_UNMAP:
>          ret = vhost_user_backend_handle_shmem_unmap(dev, &payload.mmap);
>          break;
> +    case VHOST_USER_BACKEND_MEM_READ:
> +        ret = vhost_user_backend_handle_mem_read(dev, &payload.mem_rw);
> +        break;
> +    case VHOST_USER_BACKEND_MEM_WRITE:
> +        ret = vhost_user_backend_handle_mem_write(dev, &payload.mem_rw);
> +        break;
>      default:
>          error_report("Received unexpected msg type: %d.", hdr.request);
>          ret = -EINVAL;
> diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
> index 28556d183a..b5184064b5 100644
> --- a/subprojects/libvhost-user/libvhost-user.c
> +++ b/subprojects/libvhost-user/libvhost-user.c
> @@ -1651,6 +1651,90 @@ vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
>      return vu_process_message_reply(dev, &vmsg);
>  }
>  
> +bool
> +vu_send_mem_read(VuDev *dev, uint64_t guest_addr, uint32_t size,
> +                 uint8_t *data)
> +{
> +    VhostUserMsg msg_reply;
> +    VhostUserMsg msg = {
> +        .request = VHOST_USER_BACKEND_MEM_READ,
> +        .size = sizeof(msg.payload.mem_rw),
> +        .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
> +        .payload = {
> +            .mem_rw = {
> +                .guest_address = guest_addr,
> +                .size = size,
> +            }
> +        }
> +    };
> +
> +    pthread_mutex_lock(&dev->backend_mutex);
> +    if (!vu_message_write(dev, dev->backend_fd, &msg)) {
> +        goto out_err;
> +    }
> +
> +    if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) {
> +        goto out_err;
> +    }
> +
> +    if (msg_reply.request != msg.request) {
> +        DPRINT("Received unexpected msg type. Expected %d, received %d",
> +               msg.request, msg_reply.request);
> +        goto out_err;
> +    }
> +
> +    if (msg_reply.payload.mem_rw.size != size) {
> +        DPRINT("Received unexpected number of bytes in the response. "
> +               "Expected %d, received %d",
> +               size, msg_reply.payload.mem_rw.size);
> +        goto out_err;
> +    }
> +
> +    data = malloc(msg_reply.payload.mem_rw.size);

The caller passed in size and data so the caller has provided the
buffer. malloc() is not necessary here.

> +    if (!data) {
> +        DPRINT("Failed to malloc read memory data");
> +        goto out_err;
> +    }
> +
> +    memcpy(data, msg_reply.payload.mem_rw.data, size);

It should be possible to avoid memcpy() here by receiving directly into
the caller's buffer. If you don't want to look into this, please leave a
TODO comment.

> +    pthread_mutex_unlock(&dev->backend_mutex);
> +    return true;
> +
> +out_err:
> +    pthread_mutex_unlock(&dev->backend_mutex);
> +    return false;
> +}
> +
> +bool
> +vu_send_mem_write(VuDev *dev, uint64_t guest_addr, uint32_t size,
> +                  uint8_t *data)
> +{
> +    VhostUserMsg msg = {
> +        .request = VHOST_USER_BACKEND_MEM_WRITE,
> +        .size = sizeof(msg.payload.mem_rw),
> +        .flags = VHOST_USER_VERSION,
> +        .payload = {
> +            .mem_rw = {
> +                .guest_address = guest_addr,
> +                .size = size,
> +            }
> +        }
> +    };
> +    memcpy(msg.payload.mem_rw.data, data, size);

This memcpy() can be eliminated too. It's worth a code comment in case
someone looks at optimizing this in the future.

> +
> +    if (vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK)) {
> +        msg.flags |= VHOST_USER_NEED_REPLY_MASK;
> +    }
> +
> +    if (!vu_message_write(dev, dev->backend_fd, &msg)) {
> +        pthread_mutex_unlock(&dev->backend_mutex);
> +        return false;
> +    }
> +
> +    /* Also unlocks the backend_mutex */
> +    return vu_process_message_reply(dev, &msg);
> +}
> +
>  static bool
>  vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
>  {
> diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
> index 7f6c22cc1a..8ef794870d 100644
> --- a/subprojects/libvhost-user/libvhost-user.h
> +++ b/subprojects/libvhost-user/libvhost-user.h
> @@ -129,6 +129,8 @@ typedef enum VhostUserBackendRequest {
>      VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
>      VHOST_USER_BACKEND_SHMEM_MAP = 9,
>      VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
> +    VHOST_USER_BACKEND_MEM_READ = 11,
> +    VHOST_USER_BACKEND_MEM_WRITE = 12,
>      VHOST_USER_BACKEND_MAX
>  }  VhostUserBackendRequest;
>  
> @@ -152,6 +154,12 @@ typedef struct VhostUserMemRegMsg {
>      VhostUserMemoryRegion region;
>  } VhostUserMemRegMsg;
>  
> +typedef struct VhostUserMemRWMsg {
> +    uint64_t guest_address;
> +    uint32_t size;
> +    uint8_t data[];
> +} VhostUserMemRWMsg;
> +
>  typedef struct VhostUserLog {
>      uint64_t mmap_size;
>      uint64_t mmap_offset;
> @@ -235,6 +243,7 @@ typedef struct VhostUserMsg {
>          VhostUserInflight inflight;
>          VhostUserShared object;
>          VhostUserMMap mmap;
> +        VhostUserMemRWMsg mem_rw;
>      } payload;
>  
>      int fds[VHOST_MEMORY_BASELINE_NREGIONS];
> @@ -650,6 +659,35 @@ bool vu_shmem_map(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
>  bool vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t fd_offset,
>                    uint64_t shm_offset, uint64_t len);
>  
> +/**
> + * vu_send_mem_read:
> + * @dev: a VuDev context
> + * @guest_addr: guest physical address to read
> + * @size: number of bytes to read
> + * @data: head of an unitialized bytes array
> + *
> + * Reads `size` bytes of `guest_addr` in the frontend and stores
> + * them in `data`.
> + *
> + * Returns: TRUE on success, FALSE on failure.
> + */
> +bool vu_send_mem_read(VuDev *dev, uint64_t guest_addr, uint32_t size,
> +                      uint8_t *data);
> +
> +/**
> + * vu_send_mem_write:
> + * @dev: a VuDev context
> + * @guest_addr: guest physical address to write
> + * @size: number of bytes to write
> + * @data: head of an array with `size` bytes to write
> + *
> + * Writes `size` bytes from `data` into `guest_addr` in the frontend.
> + *
> + * Returns: TRUE on success, FALSE on failure.
> + */
> +bool vu_send_mem_write(VuDev *dev, uint64_t guest_addr, uint32_t size,
> +                      uint8_t *data);
> +
>  /**
>   * vu_queue_set_notification:
>   * @dev: a VuDev context
> -- 
> 2.45.2
>