:p
atchew
Login
Based-on: <20230130140809.78262-1-akihiko.odaki@daynix.com> ([PATCH v5 0/9] Introduce igb) Rebased on latest changes from Akihiko, and merged changes from my original patchset: https://lists.gnu.org/archive/html/qemu-devel/2022-12/msg04670.html Changes since v2: - Fixed more comments from Akhiko - Reordered the patches to make changes easier to understand Changes since v1: - Fix review comments from Akihiko Sriram Yagnaraman (9): MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer igb: handle PF/VF reset properly igb: add ICR_RXDW igb: implement VFRE and VFTE registers igb: check oversized packets for VMDq igb: respect E1000_VMOLR_RSSE igb: implement VF Tx and Rx stats igb: respect VT_CTL ignore MAC field igb: respect VMVIR and VMOLR for VLAN MAINTAINERS | 1 + hw/net/e1000x_regs.h | 4 + hw/net/igb_core.c | 244 ++++++++++++++++++++++++++++++++----------- hw/net/igb_core.h | 1 + hw/net/igb_regs.h | 6 ++ hw/net/trace-events | 4 + 6 files changed, 197 insertions(+), 63 deletions(-) -- 2.34.1
I would like to review and be informed on changes to igb device Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index XXXXXXX..XXXXXXX 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -XXX,XX +XXX,XX @@ F: tests/qtest/libqos/e1000e.* igb M: Akihiko Odaki <akihiko.odaki@daynix.com> +R: Sriram Yagnaraman <sriram.yagnaraman@est.tech> S: Maintained F: docs/system/devices/igb.rst F: hw/net/igb* -- 2.34.1
Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF is reset. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 33 +++++++++++++++++++++------------ hw/net/igb_regs.h | 3 +++ hw/net/trace-events | 2 ++ 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ static void igb_set_eims(IGBCore *core, int index, uint32_t val) igb_update_interrupt_state(core); } -static void igb_vf_reset(IGBCore *core, uint16_t vfn) -{ - /* TODO: Reset of the queue enable and the interrupt registers of the VF. */ - - core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI; - core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD; -} - static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn) { uint32_t ent = core->mac[VTIVAR_MISC + vfn]; @@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val) } } +static void igb_vf_reset(IGBCore *core, uint16_t vfn) +{ + /* disable Rx and Tx for the VF*/ + core->mac[VFTE] &= ~BIT(vfn); + core->mac[VFRE] &= ~BIT(vfn); + /* indicate VF reset to PF */ + core->mac[VFLRE] |= BIT(vfn); + /* VFLRE and mailbox use the same interrupt cause */ + mailbox_interrupt_to_pf(core); +} + static void igb_w1c(IGBCore *core, int index, uint32_t val) { core->mac[index] &= ~val; @@ -XXX,XX +XXX,XX @@ igb_set_status(IGBCore *core, int index, uint32_t val) static void igb_set_ctrlext(IGBCore *core, int index, uint32_t val) { - trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK), - !!(val & E1000_CTRL_EXT_SPD_BYPS)); - - /* TODO: PFRSTD */ + trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK), + !!(val & E1000_CTRL_EXT_SPD_BYPS), + !!(val & E1000_CTRL_EXT_PFRSTD)); /* Zero self-clearing bits */ val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST); core->mac[CTRL_EXT] = val; + + if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) { + for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) { + core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI; + core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD; + } + } } static void diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { /* from igb/e1000_defines.h */ +/* Physical Func Reset Done Indication */ +#define E1000_CTRL_EXT_PFRSTD 0x00004000 + #define E1000_IVAR_VALID 0x80 #define E1000_GPIE_NSICR 0x00000001 #define E1000_GPIE_MSIX_MODE 0x00000010 diff --git a/hw/net/trace-events b/hw/net/trace-events index XXXXXXX..XXXXXXX 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -XXX,XX +XXX,XX @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED" igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x" igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" +igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" + igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" -- 2.34.1
IGB uses RXDW ICR bit to indicate that rx descriptor has been written back. This is the same as RXT0 bit in older HW. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/e1000x_regs.h | 4 ++++ hw/net/igb_core.c | 46 +++++++++++++++++--------------------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000x_regs.h +++ b/hw/net/e1000x_regs.h @@ -XXX,XX +XXX,XX @@ #define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ #define E1000_ICR_RXO 0x00000040 /* rx overrun */ #define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ +#define E1000_ICR_RXDW 0x00000080 /* rx desc written back */ #define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */ #define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */ #define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ @@ -XXX,XX +XXX,XX @@ #define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */ #define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_ICS_RXDW E1000_ICR_RXDW /* rx desc written back */ #define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */ #define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ #define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ @@ -XXX,XX +XXX,XX @@ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMS_RXDW E1000_ICR_RXDW /* rx desc written back */ #define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */ #define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ #define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ @@ -XXX,XX +XXX,XX @@ #define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */ #define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMC_RXDW E1000_ICR_RXDW /* rx desc written back */ #define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */ #define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ #define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4); uint16_t queues = 0; - uint32_t n; + uint32_t icr_bits = 0; uint8_t min_buf[ETH_ZLEN]; struct iovec min_iov; struct eth_header *ehdr; @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, e1000x_fcs_len(core->mac); retval = orig_size; + igb_rx_fix_l4_csum(core, core->rx_pkt); for (i = 0; i < IGB_NUM_QUEUES; i++) { if (!(queues & BIT(i))) { @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, igb_rx_ring_init(core, &rxr, i); - trace_e1000e_rx_rss_dispatched_to_queue(rxr.i->idx); - if (!igb_has_rxbufs(core, rxr.i, total_size)) { - retval = 0; + icr_bits |= E1000_ICS_RXO; + continue; } - } - if (retval) { - n = E1000_ICR_RXT0; - - igb_rx_fix_l4_csum(core, core->rx_pkt); - - for (i = 0; i < IGB_NUM_QUEUES; i++) { - if (!(queues & BIT(i))) { - continue; - } - - igb_rx_ring_init(core, &rxr, i); + trace_e1000e_rx_rss_dispatched_to_queue(rxr.i->idx); + igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info); - igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info); + /* Check if receive descriptor minimum threshold hit */ + if (igb_rx_descr_threshold_hit(core, rxr.i)) { + icr_bits |= E1000_ICS_RXDMT0; + } - /* Check if receive descriptor minimum threshold hit */ - if (igb_rx_descr_threshold_hit(core, rxr.i)) { - n |= E1000_ICS_RXDMT0; - } + core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx); - core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx); - } + icr_bits |= E1000_ICR_RXDW; + } - trace_e1000e_rx_written_to_guest(n); + if (icr_bits & E1000_ICR_RXDW) { + trace_e1000e_rx_written_to_guest(icr_bits); } else { - n = E1000_ICS_RXO; - trace_e1000e_rx_not_written_to_guest(n); + trace_e1000e_rx_not_written_to_guest(icr_bits); } - trace_e1000e_rx_interrupt_set(n); - igb_set_interrupt_cause(core, n); + trace_e1000e_rx_interrupt_set(icr_bits); + igb_set_interrupt_cause(core, icr_bits); return retval; } -- 2.34.1
Also add checks for RXDCTL/TXDCTL queue enable bits Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 30 +++++++++++++++++++++++++----- hw/net/igb_core.h | 1 + hw/net/igb_regs.h | 3 +++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base, return igb_tx_wb_eic(core, txi->idx); } +static inline bool +igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi) +{ + bool vmdq = core->mac[MRQC] & 1; + uint16_t qn = txi->idx; + uint16_t pool = qn % IGB_NUM_VM_POOLS; + + return (core->mac[TCTL] & E1000_TCTL_EN) && + (!vmdq || core->mac[VFTE] & BIT(pool)) && + (core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE); +} + static void igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) { @@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) const E1000E_RingInfo *txi = txr->i; uint32_t eic = 0; - /* TODO: check if the queue itself is enabled too. */ - if (!(core->mac[TCTL] & E1000_TCTL_EN)) { + if (!igb_tx_enabled(core, txi)) { trace_e1000e_tx_disabled(); return; } @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT); } + queues &= core->mac[VFRE]; igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info); if (rss_info->queue & 1) { queues <<= 8; @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, igb_rx_fix_l4_csum(core, core->rx_pkt); for (i = 0; i < IGB_NUM_QUEUES; i++) { - if (!(queues & BIT(i))) { + if (!(queues & BIT(i)) || + !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) { continue; } igb_rx_ring_init(core, &rxr, i); - if (!igb_has_rxbufs(core, rxr.i, total_size)) { icr_bits |= E1000_ICS_RXO; continue; @@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val) static void igb_vf_reset(IGBCore *core, uint16_t vfn) { + uint16_t qn0 = vfn; + uint16_t qn1 = vfn + IGB_NUM_VM_POOLS; + /* disable Rx and Tx for the VF*/ - core->mac[VFTE] &= ~BIT(vfn); + core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE; + core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE; + core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE; + core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE; core->mac[VFRE] &= ~BIT(vfn); + core->mac[VFTE] &= ~BIT(vfn); /* indicate VF reset to PF */ core->mac[VFLRE] |= BIT(vfn); /* VFLRE and mailbox use the same interrupt cause */ @@ -XXX,XX +XXX,XX @@ igb_phy_reg_init[] = { static const uint32_t igb_mac_reg_init[] = { [LEDCTL] = 2 | (3 << 8) | BIT(15) | (6 << 16) | (7 << 24), [EEMNGCTL] = BIT(31), + [TXDCTL0] = E1000_TXDCTL_QUEUE_ENABLE, [RXDCTL0] = E1000_RXDCTL_QUEUE_ENABLE | (1 << 16), [RXDCTL1] = 1 << 16, [RXDCTL2] = 1 << 16, diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.h +++ b/hw/net/igb_core.h @@ -XXX,XX +XXX,XX @@ #define IGB_MSIX_VEC_NUM (10) #define IGBVF_MSIX_VEC_NUM (3) #define IGB_NUM_QUEUES (16) +#define IGB_NUM_VM_POOLS (8) typedef struct IGBCore IGBCore; diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { #define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 +/* Additional Transmit Descriptor Control definitions */ +#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */ + /* Additional Receive Descriptor Control definitions */ #define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Rx Queue */ -- 2.34.1
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 48 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_rx_l4_cso_enabled(IGBCore *core) return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD); } +static bool +igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size) +{ + uint16_t pool = qn % IGB_NUM_VM_POOLS; + bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE); + int maximum_ethernet_lpe_size = + core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK; + int maximum_ethernet_vlan_size = 1522; + + return lpe ? size > maximum_ethernet_lpe_size : + size > maximum_ethernet_vlan_size; +} + static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, - E1000E_RSSInfo *rss_info, bool *external_tx) + size_t size, E1000E_RSSInfo *rss_info, + bool *external_tx) { static const int ta_shift[] = { 4, 3, 2, 0 }; uint32_t f, ra[2], *macp, rctl = core->mac[RCTL]; uint16_t queues = 0; + uint16_t oversized = 0; uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK; bool accepted = false; int i; @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, if (core->mac[MRQC] & 1) { if (is_broadcast_ether_addr(ehdr->h_dest)) { - for (i = 0; i < 8; i++) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) { queues |= BIT(i); } @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff; if (macp[f >> 5] & (1 << (f & 0x1f))) { - for (i = 0; i < 8; i++) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) { queues |= BIT(i); } @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, } } } else { - for (i = 0; i < 8; i++) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) { mask |= BIT(i); } @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, } queues &= core->mac[VFRE]; - igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info); - if (rss_info->queue & 1) { - queues <<= 8; + if (queues) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { + if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) { + oversized |= BIT(i); + } + } + /* 8.19.37 increment ROC if packet is oversized for all queues */ + if (oversized == queues) { + trace_e1000x_rx_oversized(size); + e1000x_inc_reg_if_not_full(core->mac, ROC); + } + queues &= ~oversized; + } + + if (queues) { + igb_rss_parse_packet(core, core->rx_pkt, + external_tx != NULL, rss_info); + if (rss_info->queue & 1) { + queues <<= 8; + } } } else { switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, e1000x_vlan_enabled(core->mac), core->mac[VET] & 0xffff); - queues = igb_receive_assign(core, ehdr, &rss_info, external_tx); + queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx); if (!queues) { trace_e1000e_rx_flt_dropped(); return orig_size; -- 2.34.1
RSS for VFs is only enabled if VMOLR[n].RSSE is set. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, if (queues) { igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info); + /* Sec 8.26.1: PQn = VFn + VQn*8 */ if (rss_info->queue & 1) { - queues <<= 8; + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { + if ((queues & BIT(i)) && + (core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) { + queues |= BIT(i + IGB_NUM_VM_POOLS); + queues &= ~BIT(i); + } + } } } } else { -- 2.34.1
Please note that loopback counters for VM to VM traffic is not implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int queue_index) } static void -igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt) +igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn) { static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511, PTC1023, PTC1522 }; @@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt) core->mac[GPTC] = core->mac[TPT]; core->mac[GOTCL] = core->mac[TOTL]; core->mac[GOTCH] = core->mac[TOTH]; + + if (core->mac[MRQC] & 1) { + uint16_t pool = qn % IGB_NUM_VM_POOLS; + + core->mac[PVFGOTC0 + (pool * 64)] += tot_len; + core->mac[PVFGPTC0 + (pool * 64)]++; + } } static void @@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core, core->mac[VET] & 0xffff); } if (igb_tx_pkt_send(core, tx, queue_index)) { - igb_on_tx_done_update_stats(core, tx->tx_pkt); + igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index); } } @@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core, } static void -igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size) +igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, + size_t data_size, size_t data_fcs_size) { e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size); @@ -XXX,XX +XXX,XX @@ igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size) default: break; } + + if (core->mac[MRQC] & 1) { + uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS; + + core->mac[PVFGORC0 + (pool * 64)] += data_size + 4; + core->mac[PVFGPRC0 + (pool * 64)]++; + if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) { + core->mac[PVFMPRC0 + (pool * 64)]++; + } + } } static inline bool @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, } while (desc_offset < total_size); - igb_update_rx_stats(core, size, total_size); + igb_update_rx_stats(core, rxi, size, total_size); } static inline void -- 2.34.1
Also trace out a warning if replication mode is disabled, since we only support replication mode enabled. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 9 +++++++++ hw/net/trace-events | 2 ++ 2 files changed, 11 insertions(+) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, } if (core->mac[MRQC] & 1) { + if (!(core->mac[VT_CTL] & E1000_VT_CTL_VM_REPL_EN)) { + trace_igb_rx_vmdq_replication_mode_disabled(); + } + if (is_broadcast_ether_addr(ehdr->h_dest)) { for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) { @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, } } + /* assume a full pool list if IGMAC is set */ + if (core->mac[VT_CTL] & E1000_VT_CTL_IGNORE_MAC) { + queues = BIT(IGB_MAX_VF_FUNCTIONS) - 1; + } + if (e1000x_vlan_rx_filter_enabled(core->mac)) { uint16_t mask = 0; diff --git a/hw/net/trace-events b/hw/net/trace-events index XXXXXXX..XXXXXXX 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -XXX,XX +XXX,XX @@ igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint3 igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" +igb_rx_vmdq_replication_mode_disabled(void) "WARN: Only replication mode enabled is supported" + igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled" igb_irq_icr_write(uint32_t bits, uint32_t old_icr, uint32_t new_icr) "Clearing ICR bits 0x%x: 0x%x --> 0x%x" igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" -- 2.34.1
Add support for stripping/inserting VLAN for VFs. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 51 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx, info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash); } +static inline bool +igb_tx_insert_vlan(IGBCore *core, uint16_t qn, + struct igb_tx *tx, bool desc_vle) +{ + if (core->mac[MRQC] & 1) { + uint16_t pool = qn % IGB_NUM_VM_POOLS; + + if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) { + /* always insert default VLAN */ + desc_vle = true; + tx->vlan = core->mac[VMVIR0 + pool] & 0xffff; + } else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) { + return false; + } + } + + return desc_vle && e1000x_vlan_enabled(core->mac); +} + static bool igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx) { @@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core, if (cmd_type_len & E1000_TXD_CMD_EOP) { if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) { - if (cmd_type_len & E1000_TXD_CMD_VLE) { + if (igb_tx_insert_vlan(core, queue_index, tx, + !!(cmd_type_len & E1000_TXD_CMD_VLE))) { net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan, core->mac[VET] & 0xffff); } @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, igb_update_rx_stats(core, rxi, size, total_size); } +static bool +igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi) +{ + if (core->mac[MRQC] & 1) { + uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS; + /* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */ + return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ? + core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN : + core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN; + } + + return e1000x_vlan_enabled(core->mac); +} + static inline void igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt) { @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, ehdr = PKT_GET_ETH_HDR(filter_buf); net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr)); - - net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, - e1000x_vlan_enabled(core->mac), - core->mac[VET] & 0xffff); + net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size); queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx); if (!queues) { @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, return orig_size; } - total_size = net_rx_pkt_get_total_len(core->rx_pkt) + - e1000x_fcs_len(core->mac); - retval = orig_size; - igb_rx_fix_l4_csum(core, core->rx_pkt); + total_size = size + e1000x_fcs_len(core->mac); for (i = 0; i < IGB_NUM_QUEUES; i++) { if (!(queues & BIT(i)) || @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, } igb_rx_ring_init(core, &rxr, i); + net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, + igb_rx_strip_vlan(core, rxr.i), + core->mac[VET] & 0xffff); + igb_rx_fix_l4_csum(core, core->rx_pkt); + if (!igb_has_rxbufs(core, rxr.i, total_size)) { icr_bits |= E1000_ICS_RXO; continue; -- 2.34.1
Based-on: <20230201042615.34706-1-akihiko.odaki@daynix.com> ([PATCH v7 0/9] Introduce igb) Rebased on latest changes from Akihiko, and merged changes from my original patchset: https://lists.gnu.org/archive/html/qemu-devel/2022-12/msg04670.html Changes since v5: - Added back an unecessarily removed empty line Changes since v4: - Removed the change implementing VTCTL.IGMAC, it needs more thought and implementation of DTXSWC.LLE and VLVF.LVLAN first Changes since v3: - Fix comments - Rebased on latest patchset from Akihiko - Remove Rx loop improvements that Akihiko has pulled into his patchset Changes since v2: - Fixed more comments from Akhiko - Reordered the patches to make changes easier to understand Changes since v1: - Fix review comments from Akihiko Sriram Yagnaraman (8): MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer igb: handle PF/VF reset properly igb: add ICR_RXDW igb: implement VFRE and VFTE registers igb: check oversized packets for VMDq igb: respect E1000_VMOLR_RSSE igb: implement VF Tx and Rx stats igb: respect VMVIR and VMOLR for VLAN MAINTAINERS | 1 + hw/net/e1000x_regs.h | 4 + hw/net/igb_core.c | 199 ++++++++++++++++++++++++++++++++++--------- hw/net/igb_core.h | 1 + hw/net/igb_regs.h | 6 ++ hw/net/trace-events | 2 + 6 files changed, 175 insertions(+), 38 deletions(-) -- 2.34.1
I would like to review and be informed on changes to igb device Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index XXXXXXX..XXXXXXX 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -XXX,XX +XXX,XX @@ F: tests/qtest/libqos/e1000e.* igb M: Akihiko Odaki <akihiko.odaki@daynix.com> +R: Sriram Yagnaraman <sriram.yagnaraman@est.tech> S: Maintained F: docs/system/devices/igb.rst F: hw/net/igb* -- 2.34.1
Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF is reset. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 33 +++++++++++++++++++++------------ hw/net/igb_regs.h | 3 +++ hw/net/trace-events | 2 ++ 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ static void igb_set_eims(IGBCore *core, int index, uint32_t val) igb_update_interrupt_state(core); } -static void igb_vf_reset(IGBCore *core, uint16_t vfn) -{ - /* TODO: Reset of the queue enable and the interrupt registers of the VF. */ - - core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI; - core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD; -} - static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn) { uint32_t ent = core->mac[VTIVAR_MISC + vfn]; @@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val) } } +static void igb_vf_reset(IGBCore *core, uint16_t vfn) +{ + /* disable Rx and Tx for the VF*/ + core->mac[VFTE] &= ~BIT(vfn); + core->mac[VFRE] &= ~BIT(vfn); + /* indicate VF reset to PF */ + core->mac[VFLRE] |= BIT(vfn); + /* VFLRE and mailbox use the same interrupt cause */ + mailbox_interrupt_to_pf(core); +} + static void igb_w1c(IGBCore *core, int index, uint32_t val) { core->mac[index] &= ~val; @@ -XXX,XX +XXX,XX @@ igb_set_status(IGBCore *core, int index, uint32_t val) static void igb_set_ctrlext(IGBCore *core, int index, uint32_t val) { - trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK), - !!(val & E1000_CTRL_EXT_SPD_BYPS)); - - /* TODO: PFRSTD */ + trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK), + !!(val & E1000_CTRL_EXT_SPD_BYPS), + !!(val & E1000_CTRL_EXT_PFRSTD)); /* Zero self-clearing bits */ val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST); core->mac[CTRL_EXT] = val; + + if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) { + for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) { + core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI; + core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD; + } + } } static void diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { /* from igb/e1000_defines.h */ +/* Physical Func Reset Done Indication */ +#define E1000_CTRL_EXT_PFRSTD 0x00004000 + #define E1000_IVAR_VALID 0x80 #define E1000_GPIE_NSICR 0x00000001 #define E1000_GPIE_MSIX_MODE 0x00000010 diff --git a/hw/net/trace-events b/hw/net/trace-events index XXXXXXX..XXXXXXX 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -XXX,XX +XXX,XX @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED" igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x" igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" +igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" + igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" -- 2.34.1
IGB uses RXDW ICR bit to indicate that rx descriptor has been written back. This is the same as RXT0 bit in older HW. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/e1000x_regs.h | 4 ++++ hw/net/igb_core.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000x_regs.h +++ b/hw/net/e1000x_regs.h @@ -XXX,XX +XXX,XX @@ #define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ #define E1000_ICR_RXO 0x00000040 /* rx overrun */ #define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ +#define E1000_ICR_RXDW 0x00000080 /* rx desc written back */ #define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */ #define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */ #define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ @@ -XXX,XX +XXX,XX @@ #define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */ #define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_ICS_RXDW E1000_ICR_RXDW /* rx desc written back */ #define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */ #define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ #define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ @@ -XXX,XX +XXX,XX @@ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMS_RXDW E1000_ICR_RXDW /* rx desc written back */ #define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */ #define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ #define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ @@ -XXX,XX +XXX,XX @@ #define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */ #define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMC_RXDW E1000_ICR_RXDW /* rx desc written back */ #define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */ #define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ #define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, n |= E1000_ICS_RXDMT0; } - n |= E1000_ICR_RXT0; + n |= E1000_ICR_RXDW; trace_e1000e_rx_written_to_guest(rxr.i->idx); } -- 2.34.1
Also introduce: - Checks for RXDCTL/TXDCTL queue enable bits - IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7) Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 38 +++++++++++++++++++++++++++++++------- hw/net/igb_core.h | 1 + hw/net/igb_regs.h | 3 +++ 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base, return igb_tx_wb_eic(core, txi->idx); } +static inline bool +igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi) +{ + bool vmdq = core->mac[MRQC] & 1; + uint16_t qn = txi->idx; + uint16_t pool = qn % IGB_NUM_VM_POOLS; + + return (core->mac[TCTL] & E1000_TCTL_EN) && + (!vmdq || core->mac[VFTE] & BIT(pool)) && + (core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE); +} + static void igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) { @@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) const E1000E_RingInfo *txi = txr->i; uint32_t eic = 0; - /* TODO: check if the queue itself is enabled too. */ - if (!(core->mac[TCTL] & E1000_TCTL_EN)) { + if (!igb_tx_enabled(core, txi)) { trace_e1000e_tx_disabled(); return; } @@ -XXX,XX +XXX,XX @@ igb_can_receive(IGBCore *core) for (i = 0; i < IGB_NUM_QUEUES; i++) { E1000E_RxRing rxr; + if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) { + continue; + } igb_rx_ring_init(core, &rxr, i); if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) { @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, if (core->mac[MRQC] & 1) { if (is_broadcast_ether_addr(ehdr->h_dest)) { - for (i = 0; i < 8; i++) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) { queues |= BIT(i); } @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff; if (macp[f >> 5] & (1 << (f & 0x1f))) { - for (i = 0; i < 8; i++) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) { queues |= BIT(i); } @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, } } } else { - for (i = 0; i < 8; i++) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) { mask |= BIT(i); } @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT); } + queues &= core->mac[VFRE]; igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info); if (rss_info->queue & 1) { queues <<= 8; @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, igb_rx_fix_l4_csum(core, core->rx_pkt); for (i = 0; i < IGB_NUM_QUEUES; i++) { - if (!(queues & BIT(i))) { + if (!(queues & BIT(i)) || + !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) { continue; } @@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val) static void igb_vf_reset(IGBCore *core, uint16_t vfn) { + uint16_t qn0 = vfn; + uint16_t qn1 = vfn + IGB_NUM_VM_POOLS; + /* disable Rx and Tx for the VF*/ - core->mac[VFTE] &= ~BIT(vfn); + core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE; + core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE; + core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE; + core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE; core->mac[VFRE] &= ~BIT(vfn); + core->mac[VFTE] &= ~BIT(vfn); /* indicate VF reset to PF */ core->mac[VFLRE] |= BIT(vfn); /* VFLRE and mailbox use the same interrupt cause */ @@ -XXX,XX +XXX,XX @@ igb_phy_reg_init[] = { static const uint32_t igb_mac_reg_init[] = { [LEDCTL] = 2 | (3 << 8) | BIT(15) | (6 << 16) | (7 << 24), [EEMNGCTL] = BIT(31), + [TXDCTL0] = E1000_TXDCTL_QUEUE_ENABLE, [RXDCTL0] = E1000_RXDCTL_QUEUE_ENABLE | (1 << 16), [RXDCTL1] = 1 << 16, [RXDCTL2] = 1 << 16, diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.h +++ b/hw/net/igb_core.h @@ -XXX,XX +XXX,XX @@ #define IGB_MSIX_VEC_NUM (10) #define IGBVF_MSIX_VEC_NUM (3) #define IGB_NUM_QUEUES (16) +#define IGB_NUM_VM_POOLS (8) typedef struct IGBCore IGBCore; diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { #define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 +/* Additional Transmit Descriptor Control definitions */ +#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */ + /* Additional Receive Descriptor Control definitions */ #define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Rx Queue */ -- 2.34.1
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_rx_l4_cso_enabled(IGBCore *core) return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD); } +static bool +igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size) +{ + uint16_t pool = qn % IGB_NUM_VM_POOLS; + bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE); + int max_ethernet_lpe_size = + core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK; + int max_ethernet_vlan_size = 1522; + + return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size); +} + static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, - E1000E_RSSInfo *rss_info, bool *external_tx) + size_t size, E1000E_RSSInfo *rss_info, + bool *external_tx) { static const int ta_shift[] = { 4, 3, 2, 0 }; uint32_t f, ra[2], *macp, rctl = core->mac[RCTL]; uint16_t queues = 0; + uint16_t oversized = 0; uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK; bool accepted = false; int i; @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, } queues &= core->mac[VFRE]; - igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info); - if (rss_info->queue & 1) { - queues <<= 8; + if (queues) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { + if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) { + oversized |= BIT(i); + } + } + /* 8.19.37 increment ROC if packet is oversized for all queues */ + if (oversized == queues) { + trace_e1000x_rx_oversized(size); + e1000x_inc_reg_if_not_full(core->mac, ROC); + } + queues &= ~oversized; + } + + if (queues) { + igb_rss_parse_packet(core, core->rx_pkt, + external_tx != NULL, rss_info); + if (rss_info->queue & 1) { + queues <<= 8; + } } } else { switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, e1000x_vlan_enabled(core->mac), core->mac[VET] & 0xffff); - queues = igb_receive_assign(core, ehdr, &rss_info, external_tx); + queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx); if (!queues) { trace_e1000e_rx_flt_dropped(); return orig_size; -- 2.34.1
RSS for VFs is only enabled if VMOLR[n].RSSE is set. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, if (queues) { igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info); + /* Sec 8.26.1: PQn = VFn + VQn*8 */ if (rss_info->queue & 1) { - queues <<= 8; + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { + if ((queues & BIT(i)) && + (core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) { + queues |= BIT(i + IGB_NUM_VM_POOLS); + queues &= ~BIT(i); + } + } } } } else { -- 2.34.1
Please note that loopback counters for VM to VM traffic is not implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int queue_index) } static void -igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt) +igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn) { static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511, PTC1023, PTC1522 }; @@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt) core->mac[GPTC] = core->mac[TPT]; core->mac[GOTCL] = core->mac[TOTL]; core->mac[GOTCH] = core->mac[TOTH]; + + if (core->mac[MRQC] & 1) { + uint16_t pool = qn % IGB_NUM_VM_POOLS; + + core->mac[PVFGOTC0 + (pool * 64)] += tot_len; + core->mac[PVFGPTC0 + (pool * 64)]++; + } } static void @@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core, core->mac[VET] & 0xffff); } if (igb_tx_pkt_send(core, tx, queue_index)) { - igb_on_tx_done_update_stats(core, tx->tx_pkt); + igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index); } } @@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core, } static void -igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size) +igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, + size_t data_size, size_t data_fcs_size) { e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size); @@ -XXX,XX +XXX,XX @@ igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size) default: break; } + + if (core->mac[MRQC] & 1) { + uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS; + + core->mac[PVFGORC0 + (pool * 64)] += data_size + 4; + core->mac[PVFGPRC0 + (pool * 64)]++; + if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) { + core->mac[PVFMPRC0 + (pool * 64)]++; + } + } } static inline bool @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, } while (desc_offset < total_size); - igb_update_rx_stats(core, size, total_size); + igb_update_rx_stats(core, rxi, size, total_size); } static inline void -- 2.34.1
Add support for stripping/inserting VLAN for VFs. Had to move CSUM calculation back into the for loop, since packet data is pulled inside the loop based on strip VLAN decision for every VF. net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for igb. Work for a future patch. Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> --- hw/net/igb_core.c | 54 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx, info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash); } +static inline bool +igb_tx_insert_vlan(IGBCore *core, uint16_t qn, + struct igb_tx *tx, bool desc_vle) +{ + if (core->mac[MRQC] & 1) { + uint16_t pool = qn % IGB_NUM_VM_POOLS; + + if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) { + /* always insert default VLAN */ + desc_vle = true; + tx->vlan = core->mac[VMVIR0 + pool] & 0xffff; + } else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) { + return false; + } + } + + return desc_vle && e1000x_vlan_enabled(core->mac); +} + static bool igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx) { @@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core, if (cmd_type_len & E1000_TXD_CMD_EOP) { if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) { - if (cmd_type_len & E1000_TXD_CMD_VLE) { + if (igb_tx_insert_vlan(core, queue_index, tx, + !!(cmd_type_len & E1000_TXD_CMD_VLE))) { net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan, core->mac[VET] & 0xffff); } @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, igb_update_rx_stats(core, rxi, size, total_size); } +static bool +igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi) +{ + if (core->mac[MRQC] & 1) { + uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS; + /* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */ + return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ? + core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN : + core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN; + } + + return e1000x_vlan_enabled(core->mac); +} + static inline void igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt) { @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, ehdr = PKT_GET_ETH_HDR(filter_buf); net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr)); - - net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, - e1000x_vlan_enabled(core->mac), - core->mac[VET] & 0xffff); + net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size); queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx); if (!queues) { @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, return orig_size; } - total_size = net_rx_pkt_get_total_len(core->rx_pkt) + - e1000x_fcs_len(core->mac); - - igb_rx_fix_l4_csum(core, core->rx_pkt); - for (i = 0; i < IGB_NUM_QUEUES; i++) { if (!(queues & BIT(i)) || !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) { @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, igb_rx_ring_init(core, &rxr, i); + net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, + igb_rx_strip_vlan(core, rxr.i), + core->mac[VET] & 0xffff); + + total_size = net_rx_pkt_get_total_len(core->rx_pkt) + + e1000x_fcs_len(core->mac); + if (!igb_has_rxbufs(core, rxr.i, total_size)) { n |= E1000_ICS_RXO; trace_e1000e_rx_not_written_to_guest(rxr.i->idx); continue; } + igb_rx_fix_l4_csum(core, core->rx_pkt); igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info); core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx); -- 2.34.1