[PATCH v5] linux-aio: add IO_CMD_FDSYNC command support

Prasad Pandit posted 1 patch 1 week, 2 days ago
block/file-posix.c      |  9 +++++++++
block/linux-aio.c       | 21 ++++++++++++++++++++-
include/block/raw-aio.h |  1 +
3 files changed, 30 insertions(+), 1 deletion(-)
[PATCH v5] linux-aio: add IO_CMD_FDSYNC command support
Posted by Prasad Pandit 1 week, 2 days ago
From: Prasad Pandit <pjp@fedoraproject.org>

Libaio defines IO_CMD_FDSYNC command to sync all outstanding
asynchronous I/O operations, by flushing out file data to the
disk storage. Enable linux-aio to submit such aio request.

When using aio=native without fdsync() support, QEMU creates
pthreads, and destroying these pthreads results in TLB flushes.
In a real-time guest environment, TLB flushes cause a latency
spike. This patch helps to avoid such spikes.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Prasad Pandit <pjp@fedoraproject.org>
---
 block/file-posix.c      |  9 +++++++++
 block/linux-aio.c       | 21 ++++++++++++++++++++-
 include/block/raw-aio.h |  1 +
 3 files changed, 30 insertions(+), 1 deletion(-)

v5: update commit message, conditionally call laio_has_fdsyny
  -> https://lists.nongnu.org/archive/html/qemu-devel/2024-04/msg03420.html
  -> https://lists.nongnu.org/archive/html/qemu-devel/2024-03/msg03867.html

diff --git a/block/file-posix.c b/block/file-posix.c
index 35684f7e21..9831b08fb6 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -159,6 +159,7 @@ typedef struct BDRVRawState {
     bool has_discard:1;
     bool has_write_zeroes:1;
     bool use_linux_aio:1;
+    bool has_laio_fdsync:1;
     bool use_linux_io_uring:1;
     int page_cache_inconsistent; /* errno from fdatasync failure */
     bool has_fallocate;
@@ -718,6 +719,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
         ret = -EINVAL;
         goto fail;
     }
+    if (s->use_linux_aio) {
+        s->has_laio_fdsync = laio_has_fdsync(s->fd);
+    }
 #else
     if (s->use_linux_aio) {
         error_setg(errp, "aio=native was specified, but is not supported "
@@ -2599,6 +2603,11 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
     if (raw_check_linux_io_uring(s)) {
         return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
     }
+#endif
+#ifdef CONFIG_LINUX_AIO
+    if (s->has_laio_fdsync && raw_check_linux_aio(s)) {
+        return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
+    }
 #endif
     return raw_thread_pool_submit(handle_aiocb_flush, &acb);
 }
diff --git a/block/linux-aio.c b/block/linux-aio.c
index ec05d946f3..e3b5ec9aba 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -384,6 +384,9 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
     case QEMU_AIO_READ:
         io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
         break;
+    case QEMU_AIO_FLUSH:
+        io_prep_fdsync(iocbs, fd);
+        break;
     /* Currently Linux kernel does not support other operations */
     default:
         fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
@@ -412,7 +415,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
     AioContext *ctx = qemu_get_current_aio_context();
     struct qemu_laiocb laiocb = {
         .co         = qemu_coroutine_self(),
-        .nbytes     = qiov->size,
+        .nbytes     = qiov ? qiov->size : 0,
         .ctx        = aio_get_linux_aio(ctx),
         .ret        = -EINPROGRESS,
         .is_read    = (type == QEMU_AIO_READ),
@@ -486,3 +489,19 @@ void laio_cleanup(LinuxAioState *s)
     }
     g_free(s);
 }
+
+bool laio_has_fdsync(int fd)
+{
+    struct iocb cb;
+    struct iocb *cbs[] = {&cb, NULL};
+
+    io_context_t ctx = 0;
+    io_setup(1, &ctx);
+
+    /* check if host kernel supports IO_CMD_FDSYNC */
+    io_prep_fdsync(&cb, fd);
+    int ret = io_submit(ctx, 1, cbs);
+
+    io_destroy(ctx);
+    return (ret == -EINVAL) ? false : true;
+}
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
index 20e000b8ef..626706827f 100644
--- a/include/block/raw-aio.h
+++ b/include/block/raw-aio.h
@@ -60,6 +60,7 @@ void laio_cleanup(LinuxAioState *s);
 int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
                                 int type, uint64_t dev_max_batch);
 
+bool laio_has_fdsync(int);
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
 #endif
-- 
2.44.0