pcie_sriov_pf_disable_vfs() is called when resetting the PF, but it only
disables VFs and does not reset SR-IOV extended capability, leaking the
state and making the VF Enable register inconsistent with the actual
state.
Replace pcie_sriov_pf_disable_vfs() with pcie_sriov_pf_reset(), which
does not only disable VFs but also resets the capability.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
include/hw/pci/pcie_sriov.h | 4 ++--
hw/net/igb.c | 2 +-
hw/nvme/ctrl.c | 2 +-
hw/pci/pcie_sriov.c | 26 ++++++++++++++++++--------
4 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index 095fb0c9edf9..b77eb7bf58ac 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -58,8 +58,8 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize);
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
uint32_t val, int len);
-/* Reset SR/IOV VF Enable bit to unregister all VFs */
-void pcie_sriov_pf_disable_vfs(PCIDevice *dev);
+/* Reset SR/IOV */
+void pcie_sriov_pf_reset(PCIDevice *dev);
/* Get logical VF number of a VF - only valid for VFs */
uint16_t pcie_sriov_vf_number(PCIDevice *dev);
diff --git a/hw/net/igb.c b/hw/net/igb.c
index 0b5c31a58bba..9345506f81ec 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -493,7 +493,7 @@ static void igb_qdev_reset_hold(Object *obj)
trace_e1000e_cb_qdev_reset_hold();
- pcie_sriov_pf_disable_vfs(d);
+ pcie_sriov_pf_reset(d);
igb_core_reset(&s->core);
}
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 7a56e7b79b4d..7c0d3f108724 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7116,7 +7116,7 @@ static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)
}
if (rst != NVME_RESET_CONTROLLER) {
- pcie_sriov_pf_disable_vfs(pci_dev);
+ pcie_sriov_pf_reset(pci_dev);
}
}
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index da209b7f47fd..51b66d1bb342 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -249,16 +249,26 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
}
-/* Reset SR/IOV VF Enable bit to trigger an unregister of all VFs */
-void pcie_sriov_pf_disable_vfs(PCIDevice *dev)
+/* Reset SR/IOV */
+void pcie_sriov_pf_reset(PCIDevice *dev)
{
uint16_t sriov_cap = dev->exp.sriov_cap;
- if (sriov_cap) {
- uint32_t val = pci_get_byte(dev->config + sriov_cap + PCI_SRIOV_CTRL);
- if (val & PCI_SRIOV_CTRL_VFE) {
- val &= ~PCI_SRIOV_CTRL_VFE;
- pcie_sriov_config_write(dev, sriov_cap + PCI_SRIOV_CTRL, val, 1);
- }
+ if (!sriov_cap) {
+ return;
+ }
+
+ pci_set_word(dev->config + sriov_cap + PCI_SRIOV_CTRL, 0);
+ unregister_vfs(dev);
+
+ /*
+ * Default is to use 4K pages, software can modify it
+ * to any of the supported bits
+ */
+ pci_set_word(dev->config + sriov_cap + PCI_SRIOV_SYS_PGSIZE, 0x1);
+
+ for (uint16_t i = 0; i < PCI_NUM_REGIONS; i++) {
+ pci_set_quad(dev->config + sriov_cap + PCI_SRIOV_BAR + i * 4,
+ dev->exp.sriov_pf.vf_bar_type[i]);
}
}
--
2.43.2
> -----Original Message-----
> From: Akihiko Odaki <akihiko.odaki@daynix.com>
> Sent: Wednesday, 28 February 2024 12:33
> To: Philippe Mathieu-Daudé <philmd@linaro.org>; Michael S. Tsirkin
> <mst@redhat.com>; Marcel Apfelbaum <marcel.apfelbaum@gmail.com>;
> Alex Williamson <alex.williamson@redhat.com>; Cédric Le Goater
> <clg@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Daniel P.
> Berrangé <berrange@redhat.com>; Eduardo Habkost
> <eduardo@habkost.net>; Sriram Yagnaraman
> <sriram.yagnaraman@ericsson.com>; Jason Wang <jasowang@redhat.com>;
> Keith Busch <kbusch@kernel.org>; Klaus Jensen <its@irrelevant.dk>; Markus
> Armbruster <armbru@redhat.com>
> Cc: qemu-devel@nongnu.org; qemu-block@nongnu.org; Akihiko Odaki
> <akihiko.odaki@daynix.com>
> Subject: [PATCH v8 03/15] pcie_sriov: Reset SR-IOV extended capability
>
> pcie_sriov_pf_disable_vfs() is called when resetting the PF, but it only disables
> VFs and does not reset SR-IOV extended capability, leaking the state and
> making the VF Enable register inconsistent with the actual state.
>
> Replace pcie_sriov_pf_disable_vfs() with pcie_sriov_pf_reset(), which does
> not only disable VFs but also resets the capability.
>
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> ---
> include/hw/pci/pcie_sriov.h | 4 ++--
> hw/net/igb.c | 2 +-
> hw/nvme/ctrl.c | 2 +-
> hw/pci/pcie_sriov.c | 26 ++++++++++++++++++--------
> 4 files changed, 22 insertions(+), 12 deletions(-)
>
> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index
> 095fb0c9edf9..b77eb7bf58ac 100644
> --- a/include/hw/pci/pcie_sriov.h
> +++ b/include/hw/pci/pcie_sriov.h
> @@ -58,8 +58,8 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev,
> uint16_t opt_sup_pgsize); void pcie_sriov_config_write(PCIDevice *dev,
> uint32_t address,
> uint32_t val, int len);
>
> -/* Reset SR/IOV VF Enable bit to unregister all VFs */ -void
> pcie_sriov_pf_disable_vfs(PCIDevice *dev);
> +/* Reset SR/IOV */
> +void pcie_sriov_pf_reset(PCIDevice *dev);
>
> /* Get logical VF number of a VF - only valid for VFs */ uint16_t
> pcie_sriov_vf_number(PCIDevice *dev); diff --git a/hw/net/igb.c
> b/hw/net/igb.c index 0b5c31a58bba..9345506f81ec 100644
> --- a/hw/net/igb.c
> +++ b/hw/net/igb.c
> @@ -493,7 +493,7 @@ static void igb_qdev_reset_hold(Object *obj)
>
> trace_e1000e_cb_qdev_reset_hold();
>
> - pcie_sriov_pf_disable_vfs(d);
> + pcie_sriov_pf_reset(d);
> igb_core_reset(&s->core);
> }
>
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index
> 7a56e7b79b4d..7c0d3f108724 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -7116,7 +7116,7 @@ static void nvme_ctrl_reset(NvmeCtrl *n,
> NvmeResetType rst)
> }
>
> if (rst != NVME_RESET_CONTROLLER) {
> - pcie_sriov_pf_disable_vfs(pci_dev);
> + pcie_sriov_pf_reset(pci_dev);
> }
> }
>
> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index
> da209b7f47fd..51b66d1bb342 100644
> --- a/hw/pci/pcie_sriov.c
> +++ b/hw/pci/pcie_sriov.c
> @@ -249,16 +249,26 @@ void pcie_sriov_config_write(PCIDevice *dev,
> uint32_t address, }
>
>
> -/* Reset SR/IOV VF Enable bit to trigger an unregister of all VFs */ -void
> pcie_sriov_pf_disable_vfs(PCIDevice *dev)
> +/* Reset SR/IOV */
> +void pcie_sriov_pf_reset(PCIDevice *dev)
> {
> uint16_t sriov_cap = dev->exp.sriov_cap;
> - if (sriov_cap) {
> - uint32_t val = pci_get_byte(dev->config + sriov_cap + PCI_SRIOV_CTRL);
> - if (val & PCI_SRIOV_CTRL_VFE) {
> - val &= ~PCI_SRIOV_CTRL_VFE;
> - pcie_sriov_config_write(dev, sriov_cap + PCI_SRIOV_CTRL, val, 1);
> - }
> + if (!sriov_cap) {
> + return;
> + }
> +
> + pci_set_word(dev->config + sriov_cap + PCI_SRIOV_CTRL, 0);
> + unregister_vfs(dev);
> +
> + /*
> + * Default is to use 4K pages, software can modify it
> + * to any of the supported bits
> + */
> + pci_set_word(dev->config + sriov_cap + PCI_SRIOV_SYS_PGSIZE, 0x1);
> +
Just curious, do we need this?
On Linux, I thought the PCI subsystem restores the page size after reset.
Otherwise,
Assuming change of my mail address from sriram.yagnaraman@est.tech to @ericsson.com is accepted,
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@ericsson.com>
> + for (uint16_t i = 0; i < PCI_NUM_REGIONS; i++) {
> + pci_set_quad(dev->config + sriov_cap + PCI_SRIOV_BAR + i * 4,
> + dev->exp.sriov_pf.vf_bar_type[i]);
> }
> }
>
>
> --
> 2.43.2
On 2024/02/29 1:23, Sriram Yagnaraman wrote:
>
>
>> -----Original Message-----
>> From: Akihiko Odaki <akihiko.odaki@daynix.com>
>> Sent: Wednesday, 28 February 2024 12:33
>> To: Philippe Mathieu-Daudé <philmd@linaro.org>; Michael S. Tsirkin
>> <mst@redhat.com>; Marcel Apfelbaum <marcel.apfelbaum@gmail.com>;
>> Alex Williamson <alex.williamson@redhat.com>; Cédric Le Goater
>> <clg@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Daniel P.
>> Berrangé <berrange@redhat.com>; Eduardo Habkost
>> <eduardo@habkost.net>; Sriram Yagnaraman
>> <sriram.yagnaraman@ericsson.com>; Jason Wang <jasowang@redhat.com>;
>> Keith Busch <kbusch@kernel.org>; Klaus Jensen <its@irrelevant.dk>; Markus
>> Armbruster <armbru@redhat.com>
>> Cc: qemu-devel@nongnu.org; qemu-block@nongnu.org; Akihiko Odaki
>> <akihiko.odaki@daynix.com>
>> Subject: [PATCH v8 03/15] pcie_sriov: Reset SR-IOV extended capability
>>
>> pcie_sriov_pf_disable_vfs() is called when resetting the PF, but it only disables
>> VFs and does not reset SR-IOV extended capability, leaking the state and
>> making the VF Enable register inconsistent with the actual state.
>>
>> Replace pcie_sriov_pf_disable_vfs() with pcie_sriov_pf_reset(), which does
>> not only disable VFs but also resets the capability.
>>
>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>> ---
>> include/hw/pci/pcie_sriov.h | 4 ++--
>> hw/net/igb.c | 2 +-
>> hw/nvme/ctrl.c | 2 +-
>> hw/pci/pcie_sriov.c | 26 ++++++++++++++++++--------
>> 4 files changed, 22 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index
>> 095fb0c9edf9..b77eb7bf58ac 100644
>> --- a/include/hw/pci/pcie_sriov.h
>> +++ b/include/hw/pci/pcie_sriov.h
>> @@ -58,8 +58,8 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev,
>> uint16_t opt_sup_pgsize); void pcie_sriov_config_write(PCIDevice *dev,
>> uint32_t address,
>> uint32_t val, int len);
>>
>> -/* Reset SR/IOV VF Enable bit to unregister all VFs */ -void
>> pcie_sriov_pf_disable_vfs(PCIDevice *dev);
>> +/* Reset SR/IOV */
>> +void pcie_sriov_pf_reset(PCIDevice *dev);
>>
>> /* Get logical VF number of a VF - only valid for VFs */ uint16_t
>> pcie_sriov_vf_number(PCIDevice *dev); diff --git a/hw/net/igb.c
>> b/hw/net/igb.c index 0b5c31a58bba..9345506f81ec 100644
>> --- a/hw/net/igb.c
>> +++ b/hw/net/igb.c
>> @@ -493,7 +493,7 @@ static void igb_qdev_reset_hold(Object *obj)
>>
>> trace_e1000e_cb_qdev_reset_hold();
>>
>> - pcie_sriov_pf_disable_vfs(d);
>> + pcie_sriov_pf_reset(d);
>> igb_core_reset(&s->core);
>> }
>>
>> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index
>> 7a56e7b79b4d..7c0d3f108724 100644
>> --- a/hw/nvme/ctrl.c
>> +++ b/hw/nvme/ctrl.c
>> @@ -7116,7 +7116,7 @@ static void nvme_ctrl_reset(NvmeCtrl *n,
>> NvmeResetType rst)
>> }
>>
>> if (rst != NVME_RESET_CONTROLLER) {
>> - pcie_sriov_pf_disable_vfs(pci_dev);
>> + pcie_sriov_pf_reset(pci_dev);
>> }
>> }
>>
>> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index
>> da209b7f47fd..51b66d1bb342 100644
>> --- a/hw/pci/pcie_sriov.c
>> +++ b/hw/pci/pcie_sriov.c
>> @@ -249,16 +249,26 @@ void pcie_sriov_config_write(PCIDevice *dev,
>> uint32_t address, }
>>
>>
>> -/* Reset SR/IOV VF Enable bit to trigger an unregister of all VFs */ -void
>> pcie_sriov_pf_disable_vfs(PCIDevice *dev)
>> +/* Reset SR/IOV */
>> +void pcie_sriov_pf_reset(PCIDevice *dev)
>> {
>> uint16_t sriov_cap = dev->exp.sriov_cap;
>> - if (sriov_cap) {
>> - uint32_t val = pci_get_byte(dev->config + sriov_cap + PCI_SRIOV_CTRL);
>> - if (val & PCI_SRIOV_CTRL_VFE) {
>> - val &= ~PCI_SRIOV_CTRL_VFE;
>> - pcie_sriov_config_write(dev, sriov_cap + PCI_SRIOV_CTRL, val, 1);
>> - }
>> + if (!sriov_cap) {
>> + return;
>> + }
>> +
>> + pci_set_word(dev->config + sriov_cap + PCI_SRIOV_CTRL, 0);
>> + unregister_vfs(dev);
>> +
>> + /*
>> + * Default is to use 4K pages, software can modify it
>> + * to any of the supported bits
>> + */
>> + pci_set_word(dev->config + sriov_cap + PCI_SRIOV_SYS_PGSIZE, 0x1);
>> +
>
> Just curious, do we need this?
> On Linux, I thought the PCI subsystem restores the page size after reset.
Perhaps Linux doesn't need it, but the specification requires to reset
the whole capability.
>
> Otherwise,
> Assuming change of my mail address from sriram.yagnaraman@est.tech to @ericsson.com is accepted,
> Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@ericsson.com>
>
>> + for (uint16_t i = 0; i < PCI_NUM_REGIONS; i++) {
>> + pci_set_quad(dev->config + sriov_cap + PCI_SRIOV_BAR + i * 4,
>> + dev->exp.sriov_pf.vf_bar_type[i]);
>> }
>> }
>>
>>
>> --
>> 2.43.2
© 2016 - 2026 Red Hat, Inc.