[PATCH for 8.0] igb: Fix DMA requester specification for Tx packet

Akihiko Odaki posted 1 patch 1 year ago
hw/net/e1000e_core.c |  6 +++---
hw/net/igb_core.c    | 13 ++++++++-----
hw/net/net_tx_pkt.c  |  3 ++-
hw/net/net_tx_pkt.h  |  3 ++-
hw/net/vmxnet3.c     |  4 ++--
5 files changed, 17 insertions(+), 12 deletions(-)
[PATCH for 8.0] igb: Fix DMA requester specification for Tx packet
Posted by Akihiko Odaki 1 year ago
igb used to specify the PF as DMA requester when reading Tx packets.
This made Tx requests from VFs to be performed on the address space of
the PF, defeating the purpose of SR-IOV. Add some logic to change the
requester depending on the queue, which can be assigned to a VF.

Fixes: 3a977deebe ("Intrdocue igb device emulation")
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
 hw/net/e1000e_core.c |  6 +++---
 hw/net/igb_core.c    | 13 ++++++++-----
 hw/net/net_tx_pkt.c  |  3 ++-
 hw/net/net_tx_pkt.h  |  3 ++-
 hw/net/vmxnet3.c     |  4 ++--
 5 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index 4d9679ca0b..c0c09b6965 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -765,7 +765,7 @@ e1000e_process_tx_desc(E1000ECore *core,
         }
 
         tx->skip_cp = false;
-        net_tx_pkt_reset(tx->tx_pkt);
+        net_tx_pkt_reset(tx->tx_pkt, core->owner);
 
         tx->sum_needed = 0;
         tx->cptse = 0;
@@ -3447,7 +3447,7 @@ e1000e_core_pci_uninit(E1000ECore *core)
     qemu_del_vm_change_state_handler(core->vmstate);
 
     for (i = 0; i < E1000E_NUM_QUEUES; i++) {
-        net_tx_pkt_reset(core->tx[i].tx_pkt);
+        net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
         net_tx_pkt_uninit(core->tx[i].tx_pkt);
     }
 
@@ -3572,7 +3572,7 @@ static void e1000e_reset(E1000ECore *core, bool sw)
     e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
 
     for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
-        net_tx_pkt_reset(core->tx[i].tx_pkt);
+        net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
         memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
         core->tx[i].skip_cp = false;
     }
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index a7c7bfdc75..41d1abae03 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -521,6 +521,7 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
 
 static void
 igb_process_tx_desc(IGBCore *core,
+                    PCIDevice *dev,
                     struct igb_tx *tx,
                     union e1000_adv_tx_desc *tx_desc,
                     int queue_index)
@@ -585,7 +586,7 @@ igb_process_tx_desc(IGBCore *core,
 
         tx->first = true;
         tx->skip_cp = false;
-        net_tx_pkt_reset(tx->tx_pkt);
+        net_tx_pkt_reset(tx->tx_pkt, dev);
     }
 }
 
@@ -800,6 +801,8 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
         d = core->owner;
     }
 
+    net_tx_pkt_reset(txr->tx->tx_pkt, d);
+
     while (!igb_ring_empty(core, txi)) {
         base = igb_ring_head_descr(core, txi);
 
@@ -808,7 +811,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
         trace_e1000e_tx_descr((void *)(intptr_t)desc.read.buffer_addr,
                               desc.read.cmd_type_len, desc.wb.status);
 
-        igb_process_tx_desc(core, txr->tx, &desc, txi->idx);
+        igb_process_tx_desc(core, d, txr->tx, &desc, txi->idx);
         igb_ring_advance(core, txi, 1);
         eic |= igb_txdesc_writeback(core, base, &desc, txi);
     }
@@ -3825,7 +3828,7 @@ igb_core_pci_realize(IGBCore        *core,
     core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
 
     for (i = 0; i < IGB_NUM_QUEUES; i++) {
-        net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS);
+        net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS);
     }
 
     net_rx_pkt_init(&core->rx_pkt);
@@ -3850,7 +3853,7 @@ igb_core_pci_uninit(IGBCore *core)
     qemu_del_vm_change_state_handler(core->vmstate);
 
     for (i = 0; i < IGB_NUM_QUEUES; i++) {
-        net_tx_pkt_reset(core->tx[i].tx_pkt);
+        net_tx_pkt_reset(core->tx[i].tx_pkt, NULL);
         net_tx_pkt_uninit(core->tx[i].tx_pkt);
     }
 
@@ -4023,7 +4026,7 @@ static void igb_reset(IGBCore *core, bool sw)
 
     for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
         tx = &core->tx[i];
-        net_tx_pkt_reset(tx->tx_pkt);
+        net_tx_pkt_reset(tx->tx_pkt, NULL);
         tx->vlan = 0;
         tx->mss = 0;
         tx->tse = false;
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index 986a3adfe9..cb606cc84b 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -443,7 +443,7 @@ void net_tx_pkt_dump(struct NetTxPkt *pkt)
 #endif
 }
 
-void net_tx_pkt_reset(struct NetTxPkt *pkt)
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
 {
     int i;
 
@@ -467,6 +467,7 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
                           pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
         }
     }
+    pkt->pci_dev = pci_dev;
     pkt->raw_frags = 0;
 
     pkt->hdr_len = 0;
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
index f57b4e034b..e5ce6f20bc 100644
--- a/hw/net/net_tx_pkt.h
+++ b/hw/net/net_tx_pkt.h
@@ -148,9 +148,10 @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
  * reset tx packet private context (needed to be called between packets)
  *
  * @pkt:            packet
+ * @dev:            PCI device processing the next packet
  *
  */
-void net_tx_pkt_reset(struct NetTxPkt *pkt);
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
 
 /**
  * Send packet to qemu. handles sw offloads if vhdr is not supported.
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 1068b80868..f7b874c139 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -678,7 +678,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
             vmxnet3_complete_packet(s, qidx, txd_idx);
             s->tx_sop = true;
             s->skip_current_tx_pkt = false;
-            net_tx_pkt_reset(s->tx_pkt);
+            net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
         }
     }
 }
@@ -1159,7 +1159,7 @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
 {
     if (s->device_active) {
         VMW_CBPRN("Deactivating vmxnet3...");
-        net_tx_pkt_reset(s->tx_pkt);
+        net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
         net_tx_pkt_uninit(s->tx_pkt);
         net_rx_pkt_uninit(s->rx_pkt);
         s->device_active = false;
-- 
2.39.2
Re: [PATCH for 8.0] igb: Fix DMA requester specification for Tx packet
Posted by Jason Wang 1 year ago
On Thu, Mar 16, 2023 at 8:29 PM Akihiko Odaki <akihiko.odaki@daynix.com> wrote:
>
> igb used to specify the PF as DMA requester when reading Tx packets.
> This made Tx requests from VFs to be performed on the address space of
> the PF, defeating the purpose of SR-IOV. Add some logic to change the
> requester depending on the queue, which can be assigned to a VF.
>
> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> ---
>  hw/net/e1000e_core.c |  6 +++---
>  hw/net/igb_core.c    | 13 ++++++++-----
>  hw/net/net_tx_pkt.c  |  3 ++-
>  hw/net/net_tx_pkt.h  |  3 ++-
>  hw/net/vmxnet3.c     |  4 ++--
>  5 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> index 4d9679ca0b..c0c09b6965 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -765,7 +765,7 @@ e1000e_process_tx_desc(E1000ECore *core,
>          }
>
>          tx->skip_cp = false;
> -        net_tx_pkt_reset(tx->tx_pkt);
> +        net_tx_pkt_reset(tx->tx_pkt, core->owner);
>
>          tx->sum_needed = 0;
>          tx->cptse = 0;
> @@ -3447,7 +3447,7 @@ e1000e_core_pci_uninit(E1000ECore *core)
>      qemu_del_vm_change_state_handler(core->vmstate);
>
>      for (i = 0; i < E1000E_NUM_QUEUES; i++) {
> -        net_tx_pkt_reset(core->tx[i].tx_pkt);
> +        net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
>          net_tx_pkt_uninit(core->tx[i].tx_pkt);
>      }
>
> @@ -3572,7 +3572,7 @@ static void e1000e_reset(E1000ECore *core, bool sw)
>      e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
>
>      for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
> -        net_tx_pkt_reset(core->tx[i].tx_pkt);
> +        net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
>          memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
>          core->tx[i].skip_cp = false;
>      }
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
> index a7c7bfdc75..41d1abae03 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -521,6 +521,7 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
>
>  static void
>  igb_process_tx_desc(IGBCore *core,
> +                    PCIDevice *dev,
>                      struct igb_tx *tx,
>                      union e1000_adv_tx_desc *tx_desc,
>                      int queue_index)
> @@ -585,7 +586,7 @@ igb_process_tx_desc(IGBCore *core,
>
>          tx->first = true;
>          tx->skip_cp = false;
> -        net_tx_pkt_reset(tx->tx_pkt);
> +        net_tx_pkt_reset(tx->tx_pkt, dev);
>      }
>  }
>
> @@ -800,6 +801,8 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
>          d = core->owner;
>      }
>
> +    net_tx_pkt_reset(txr->tx->tx_pkt, d);
> +
>      while (!igb_ring_empty(core, txi)) {
>          base = igb_ring_head_descr(core, txi);
>
> @@ -808,7 +811,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
>          trace_e1000e_tx_descr((void *)(intptr_t)desc.read.buffer_addr,
>                                desc.read.cmd_type_len, desc.wb.status);
>
> -        igb_process_tx_desc(core, txr->tx, &desc, txi->idx);
> +        igb_process_tx_desc(core, d, txr->tx, &desc, txi->idx);
>          igb_ring_advance(core, txi, 1);
>          eic |= igb_txdesc_writeback(core, base, &desc, txi);
>      }
> @@ -3825,7 +3828,7 @@ igb_core_pci_realize(IGBCore        *core,
>      core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
>
>      for (i = 0; i < IGB_NUM_QUEUES; i++) {
> -        net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS);
> +        net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS);
>      }
>
>      net_rx_pkt_init(&core->rx_pkt);
> @@ -3850,7 +3853,7 @@ igb_core_pci_uninit(IGBCore *core)
>      qemu_del_vm_change_state_handler(core->vmstate);
>
>      for (i = 0; i < IGB_NUM_QUEUES; i++) {
> -        net_tx_pkt_reset(core->tx[i].tx_pkt);
> +        net_tx_pkt_reset(core->tx[i].tx_pkt, NULL);
>          net_tx_pkt_uninit(core->tx[i].tx_pkt);
>      }
>
> @@ -4023,7 +4026,7 @@ static void igb_reset(IGBCore *core, bool sw)
>
>      for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
>          tx = &core->tx[i];
> -        net_tx_pkt_reset(tx->tx_pkt);
> +        net_tx_pkt_reset(tx->tx_pkt, NULL);
>          tx->vlan = 0;
>          tx->mss = 0;
>          tx->tse = false;
> diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
> index 986a3adfe9..cb606cc84b 100644
> --- a/hw/net/net_tx_pkt.c
> +++ b/hw/net/net_tx_pkt.c
> @@ -443,7 +443,7 @@ void net_tx_pkt_dump(struct NetTxPkt *pkt)
>  #endif
>  }
>
> -void net_tx_pkt_reset(struct NetTxPkt *pkt)
> +void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
>  {
>      int i;
>
> @@ -467,6 +467,7 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
>                            pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
>          }
>      }
> +    pkt->pci_dev = pci_dev;
>      pkt->raw_frags = 0;
>
>      pkt->hdr_len = 0;
> diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
> index f57b4e034b..e5ce6f20bc 100644
> --- a/hw/net/net_tx_pkt.h
> +++ b/hw/net/net_tx_pkt.h
> @@ -148,9 +148,10 @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
>   * reset tx packet private context (needed to be called between packets)
>   *
>   * @pkt:            packet
> + * @dev:            PCI device processing the next packet

I've queued this patch, but please post a patch for post 8.0 to
replace the PCIDevice * with void *. We don't want to tightly couple
PCI devices with net_tx_pkt. But the user can store a context (e.g
PCIDevice) instead.

Thanks

>   *
>   */
> -void net_tx_pkt_reset(struct NetTxPkt *pkt);
> +void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
>
>  /**
>   * Send packet to qemu. handles sw offloads if vhdr is not supported.
> diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
> index 1068b80868..f7b874c139 100644
> --- a/hw/net/vmxnet3.c
> +++ b/hw/net/vmxnet3.c
> @@ -678,7 +678,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
>              vmxnet3_complete_packet(s, qidx, txd_idx);
>              s->tx_sop = true;
>              s->skip_current_tx_pkt = false;
> -            net_tx_pkt_reset(s->tx_pkt);
> +            net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
>          }
>      }
>  }
> @@ -1159,7 +1159,7 @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
>  {
>      if (s->device_active) {
>          VMW_CBPRN("Deactivating vmxnet3...");
> -        net_tx_pkt_reset(s->tx_pkt);
> +        net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
>          net_tx_pkt_uninit(s->tx_pkt);
>          net_rx_pkt_uninit(s->rx_pkt);
>          s->device_active = false;
> --
> 2.39.2
>