[PATCH] hw/nvme: be more careful when deasserting IRQs

Jakub Jermář posted 1 patch 2 days, 11 hours ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/next-importer-push tags/patchew/20210610114624.304681-1-jakub.jermar@kernkonzept.com
Maintainers: Klaus Jensen <its@irrelevant.dk>, Keith Busch <kbusch@kernel.org>
hw/nvme/ctrl.c | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)

[PATCH] hw/nvme: be more careful when deasserting IRQs

Posted by Jakub Jermář 2 days, 11 hours ago
An IRQ vector used by a completion queue cannot be deasserted without
first checking if the same vector does not need to stay asserted for
some other completion queue.

Signed-off-by: Jakub Jermar <jakub.jermar@kernkonzept.com>
---
 hw/nvme/ctrl.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 0bcaf7192f..c0980929eb 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -473,6 +473,21 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
     }
 }
 
+/*
+ * Check if the vector used by the cq can be deasserted, i.e. it needn't be
+ * asserted for some other cq.
+ */
+static bool nvme_irq_can_deassert(NvmeCtrl *n, NvmeCQueue *cq)
+{
+    for (unsigned qid = 0; qid < n->params.max_ioqpairs + 1; qid++) {
+        NvmeCQueue *q = n->cq[qid];
+
+        if (q && q->vector == cq->vector && q->head != q->tail)
+            return false;  /* some queue needs this to stay asserted */
+    }
+    return true;
+}
+
 static void nvme_req_clear(NvmeRequest *req)
 {
     req->ns = NULL;
@@ -4089,7 +4104,9 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
         trace_pci_nvme_err_invalid_del_cq_notempty(qid);
         return NVME_INVALID_QUEUE_DEL;
     }
-    nvme_irq_deassert(n, cq);
+    if (nvme_irq_can_deassert(n, cq)) {
+        nvme_irq_deassert(n, cq);
+    }
     trace_pci_nvme_del_cq(qid);
     nvme_free_cq(cq, n);
     return NVME_SUCCESS;
@@ -5757,7 +5774,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
             timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
         }
 
-        if (cq->tail == cq->head) {
+        if (nvme_irq_can_deassert(n, cq)) {
             nvme_irq_deassert(n, cq);
         }
     } else {
-- 
2.31.1


Re: [PATCH] hw/nvme: be more careful when deasserting IRQs

Posted by Klaus Jensen 2 days, 5 hours ago
On Jun 10 13:46, Jakub Jermář wrote:
>An IRQ vector used by a completion queue cannot be deasserted without
>first checking if the same vector does not need to stay asserted for
>some other completion queue.
>
>Signed-off-by: Jakub Jermar <jakub.jermar@kernkonzept.com>
>---
> hw/nvme/ctrl.c | 21 +++++++++++++++++++--
> 1 file changed, 19 insertions(+), 2 deletions(-)
>
>diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>index 0bcaf7192f..c0980929eb 100644
>--- a/hw/nvme/ctrl.c
>+++ b/hw/nvme/ctrl.c
>@@ -473,6 +473,21 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
>     }
> }
>
>+/*
>+ * Check if the vector used by the cq can be deasserted, i.e. it needn't be
>+ * asserted for some other cq.
>+ */
>+static bool nvme_irq_can_deassert(NvmeCtrl *n, NvmeCQueue *cq)
>+{
>+    for (unsigned qid = 0; qid < n->params.max_ioqpairs + 1; qid++) {
>+        NvmeCQueue *q = n->cq[qid];
>+
>+        if (q && q->vector == cq->vector && q->head != q->tail)
>+            return false;  /* some queue needs this to stay asserted */
>+    }
>+    return true;
>+}
>+
> static void nvme_req_clear(NvmeRequest *req)
> {
>     req->ns = NULL;
>@@ -4089,7 +4104,9 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
>         trace_pci_nvme_err_invalid_del_cq_notempty(qid);
>         return NVME_INVALID_QUEUE_DEL;
>     }
>-    nvme_irq_deassert(n, cq);
>+    if (nvme_irq_can_deassert(n, cq)) {
>+        nvme_irq_deassert(n, cq);
>+    }
>     trace_pci_nvme_del_cq(qid);
>     nvme_free_cq(cq, n);
>     return NVME_SUCCESS;
>@@ -5757,7 +5774,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
>             timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
>         }
>
>-        if (cq->tail == cq->head) {
>+        if (nvme_irq_can_deassert(n, cq)) {
>             nvme_irq_deassert(n, cq);
>         }
>     } else {
>-- 
>2.31.1
>

This is actually an artifact of commit ca247d35098d3 ("hw/block/nvme: 
fix pin-based interrupt behavior") that I did a year ago. Prior to that 
fix, the completion queue id was used to index the internal IS register 
(irq_status), which, while wrong spec-wise, had the effect of... 
actually working.

Anyway, I agree that the logic is flawed right now, since we should only 
deassert when all outstanding cqe's have been acknowledged by the host.

nvme_irq_can_deassert should be guarded with a check on msix_enabled(), 
but in any case I am not happy about looping over all completion queues 
on each cq doorbell write. I think this can be ref counted? I.e. 
decrement when cq->tail == cq->head on the cq doorbell write and 
increment only when going from empty to non-empty in nvme_post_cqes().