From 7fe05e125d5f730bd2d0fc53985bee77b6c762f0 Mon Sep 17 00:00:00 2001 From: Benjamin Mikailenko Date: Fri, 15 Jul 2022 18:27:07 -0400 Subject: ice: Fix VSI rebuild WARN_ON check for VF In commit b03d519d3460 ("ice: store VF pointer instead of VF ID") WARN_ON checks were added to validate the vsi->vf pointer and catch programming errors. However, one check to vsi->vf was missed. This caused a call trace when resetting VFs. Fix ice_vsi_rebuild by encompassing VF pointer in WARN_ON check. Fixes: b03d519d3460 ("ice: store VF pointer instead of VF ID") Signed-off-by: Benjamin Mikailenko Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/ice/ice_lib.c') diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index a830f7f9aed0..0d4dbca88964 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3181,7 +3181,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) pf = vsi->back; vtype = vsi->type; - if (WARN_ON(vtype == ICE_VSI_VF) && !vsi->vf) + if (WARN_ON(vtype == ICE_VSI_VF && !vsi->vf)) return -EINVAL; ice_vsi_init_vlan_ops(vsi); -- cgit v1.2.3-59-g8ed1b From cf6b82fd3fbc8f747333ef5005798a90b5345bd4 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 27 Jul 2022 16:15:58 -0700 Subject: ice: initialize cached_phctime when creating Rx rings When we create new Rx rings, the cached_phctime field is zero initialized. This could result in incorrect timestamp reporting due to the cached value not yet being updated. Although a background task will periodically update the cached value, ensure it matches the existing cached value in the PF structure at ring initialization. Signed-off-by: Jacob Keller Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 1 + drivers/net/ethernet/intel/ice/ice_lib.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/net/ethernet/intel/ice/ice_lib.c') diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index bbf6a300078e..3a18762cc38f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2885,6 +2885,7 @@ process_rx: /* clone ring and setup updated count */ rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_cnt; + rx_rings[i].cached_phctime = pf->ptp.cached_phc_time; rx_rings[i].desc = NULL; rx_rings[i].rx_buf = NULL; /* this is to allow wr32 to have something to write to diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index a830f7f9aed0..679529040edd 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1522,6 +1522,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->netdev = vsi->netdev; ring->dev = dev; ring->count = vsi->num_rx_desc; + ring->cached_phctime = pf->ptp.cached_phc_time; WRITE_ONCE(vsi->rx_rings[i], ring); } -- cgit v1.2.3-59-g8ed1b From abddafd4585cc825d454da3cf308ad1226f6c554 Mon Sep 17 00:00:00 2001 From: Grzegorz Siwik Date: Fri, 12 Aug 2022 15:25:49 +0200 Subject: ice: Fix clearing of promisc mode with bridge over bond When at least two interfaces are bonded and a bridge is enabled on the bond, an error can occur when the bridge is removed and re-added. The reason for the error is because promiscuous mode was not fully cleared from the VLAN VSI in the hardware. With this change, promiscuous mode is properly removed when the bridge disconnects from bonding. [ 1033.676359] bond1: link status definitely down for interface enp95s0f0, disabling it [ 1033.676366] bond1: making interface enp175s0f0 the new active one [ 1033.676369] device enp95s0f0 left promiscuous mode [ 1033.676522] device enp175s0f0 entered promiscuous mode [ 1033.676901] ice 0000:af:00.0 enp175s0f0: Error setting Multicast promiscuous mode on VSI 6 [ 1041.795662] ice 0000:af:00.0 enp175s0f0: Error setting Multicast promiscuous mode on VSI 6 [ 1041.944826] bond1: link status definitely down for interface enp175s0f0, disabling it [ 1041.944874] device enp175s0f0 left promiscuous mode [ 1041.944918] bond1: now running without any active interface! Fixes: c31af68a1b94 ("ice: Add outer_vlan_ops and VSI specific VLAN ops implementations") Co-developed-by: Jesse Brandeburg Signed-off-by: Jesse Brandeburg Signed-off-by: Grzegorz Siwik Link: https://lore.kernel.org/all/CAK8fFZ7m-KR57M_rYX6xZN39K89O=LGooYkKsu6HKt0Bs+x6xQ@mail.gmail.com/ Tested-by: Jaroslav Pulchart Tested-by: Igor Raits Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 6 +++++- drivers/net/ethernet/intel/ice/ice_main.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_lib.c') diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 0d4dbca88964..733c455f6574 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -4062,7 +4062,11 @@ int ice_vsi_del_vlan_zero(struct ice_vsi *vsi) if (err && err != -EEXIST) return err; - return 0; + /* when deleting the last VLAN filter, make sure to disable the VLAN + * promisc mode so the filter isn't left by accident + */ + return ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_VLAN_PROMISC_BITS, 0); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index eb40526ee179..4ecaf40cf946 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -267,8 +267,10 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m) status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0); } + if (status && status != -EEXIST) + return status; - return status; + return 0; } /** @@ -3573,6 +3575,14 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) usleep_range(1000, 2000); + ret = ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_VLAN_PROMISC_BITS, vid); + if (ret) { + netdev_err(netdev, "Error clearing multicast promiscuous mode on VSI %i\n", + vsi->vsi_num); + vsi->current_netdev_flags |= IFF_ALLMULTI; + } + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); /* Make sure VLAN delete is successful before updating VLAN -- cgit v1.2.3-59-g8ed1b From dddd406d9dbe4a94919b377a84b696e97f709379 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 27 Jul 2022 09:24:05 +0200 Subject: ice: Implement control of FCS/CRC stripping The driver can allow the user to configure whether the CRC aka the FCS (Frame Check Sequence) is DMA'd to the host as part of the receive buffer. The driver usually wants this feature disabled so that the hardware checks the FCS and strips it in order to save PCI bandwidth. Control the reception of FCS to the host using the command: ethtool -K eth0 rx-fcs The default shown in ethtool -k eth0 | grep fcs; should be "off", as the hardware will drop any frame with a bad checksum, and DMA of the checksum is useless overhead especially for small packets. Testing Hints: test the FCS/CRC arrives with received packets using tcpdump -nnpi eth0 -xxxx and it should show crc data as the last 4 bytes of the packet. Can also use wireshark to turn on CRC checking and check the data is correct. Signed-off-by: Jesse Brandeburg Co-developed-by: Grzegorz Nitka Signed-off-by: Grzegorz Nitka Co-developed-by: Benjamin Mikailenko Signed-off-by: Benjamin Mikailenko Co-developed-by: Anatolii Gerasymenko Signed-off-by: Anatolii Gerasymenko Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_base.c | 2 +- drivers/net/ethernet/intel/ice/ice_ethtool.c | 5 +--- drivers/net/ethernet/intel/ice/ice_lib.c | 22 +++++++++++++++ drivers/net/ethernet/intel/ice/ice_lib.h | 2 ++ drivers/net/ethernet/intel/ice/ice_main.c | 40 ++++++++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_txrx.h | 3 ++- 7 files changed, 69 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_lib.c') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index cc5b85afd437..267942418847 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -854,6 +854,7 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, struct ice_q_stats stats, u64 *pkts, u64 *bytes); int ice_up(struct ice_vsi *vsi); int ice_down(struct ice_vsi *vsi); +int ice_down_up(struct ice_vsi *vsi); int ice_vsi_cfg(struct ice_vsi *vsi); struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi); int ice_vsi_determine_xdp_res(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 136d7911adb4..6f092e06054e 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -417,7 +417,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Strip the Ethernet CRC bytes before the packet is posted to host * memory. */ - rlan_ctx.crcstrip = 1; + rlan_ctx.crcstrip = !(ring->flags & ICE_RX_FLAGS_CRC_STRIP_DIS); /* L2TSEL flag defines the reported L2 Tags in the receive descriptor * and it needs to remain 1 for non-DVM capable configurations to not diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 0f0faa8dc7fb..ad6cffb2d3e0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1289,10 +1289,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) } if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) { /* down and up VSI so that changes of Rx cfg are reflected. */ - if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { - ice_down(vsi); - ice_up(vsi); - } + ice_down_up(vsi); } /* don't allow modification of this flag when a single VF is in * promiscuous mode because it's not supported diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 679529040edd..afecf2494ad6 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1562,6 +1562,22 @@ void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) kfree(lut); } +/** + * ice_vsi_cfg_crc_strip - Configure CRC stripping for a VSI + * @vsi: VSI to be configured + * @disable: set to true to have FCS / CRC in the frame data + */ +void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable) +{ + int i; + + ice_for_each_rxq(vsi, i) + if (disable) + vsi->rx_rings[i]->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS; + else + vsi->rx_rings[i]->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS; +} + /** * ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI * @vsi: VSI to be configured @@ -3277,6 +3293,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) */ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_vsi_cfg_rss_lut_key(vsi); + + /* disable or enable CRC stripping */ + if (vsi->netdev) + ice_vsi_cfg_crc_strip(vsi, !!(vsi->netdev->features & + NETIF_F_RXFCS)); + break; case ICE_VSI_VF: ret = ice_vsi_alloc_q_vectors(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 8712b1d2ceec..ec4bf0c89857 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -89,6 +89,8 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi); void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); +void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable); + void ice_update_tx_ring_stats(struct ice_tx_ring *ring, u64 pkts, u64 bytes); void ice_update_rx_ring_stats(struct ice_rx_ring *ring, u64 pkts, u64 bytes); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index eb40526ee179..a827045198cc 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3374,6 +3374,11 @@ static void ice_set_netdev_features(struct net_device *netdev) if (is_dvm_ena) netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX; + + /* Leave CRC / FCS stripping enabled by default, but allow the value to + * be changed at runtime + */ + netdev->hw_features |= NETIF_F_RXFCS; } /** @@ -5977,6 +5982,16 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) if (ret) return ret; + /* Turn on receive of FCS aka CRC, and after setting this + * flag the packet data will have the 4 byte CRC appended + */ + if (changed & NETIF_F_RXFCS) { + ice_vsi_cfg_crc_strip(vsi, !!(features & NETIF_F_RXFCS)); + ret = ice_down_up(vsi); + if (ret) + return ret; + } + if (changed & NETIF_F_NTUPLE) { bool ena = !!(features & NETIF_F_NTUPLE); @@ -6680,6 +6695,31 @@ int ice_down(struct ice_vsi *vsi) return 0; } +/** + * ice_down_up - shutdown the VSI connection and bring it up + * @vsi: the VSI to be reconnected + */ +int ice_down_up(struct ice_vsi *vsi) +{ + int ret; + + /* if DOWN already set, nothing to do */ + if (test_and_set_bit(ICE_VSI_DOWN, vsi->state)) + return 0; + + ret = ice_down(vsi); + if (ret) + return ret; + + ret = ice_up(vsi); + if (ret) { + netdev_err(vsi->netdev, "reallocating resources failed during netdev features change, may need to reload driver\n"); + return ret; + } + + return 0; +} + /** * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources * @vsi: VSI having resources allocated diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index ca902af54bb4..932b5661ec4d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -295,10 +295,11 @@ struct ice_rx_ring { struct xsk_buff_pool *xsk_pool; struct sk_buff *skb; dma_addr_t dma; /* physical address of ring */ -#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) u64 cached_phctime; u8 dcb_tc; /* Traffic class of ring */ u8 ptp_rx; +#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) +#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) u8 flags; } ____cacheline_internodealigned_in_smp; -- cgit v1.2.3-59-g8ed1b From 9ead7e74bfd6dd54db12ef133b8604add72511de Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 11 Aug 2022 20:21:49 +0200 Subject: ice: xsk: use Rx ring's XDP ring when picking NAPI context Ice driver allocates per cpu XDP queues so that redirect path can safely use smp_processor_id() as an index to the array. At the same time though, XDP rings are used to pick NAPI context to call napi_schedule() or set NAPIF_STATE_MISSED. When user reduces queue count, say to 8, and num_possible_cpus() of underlying platform is 44, then this means queue vectors with correlated NAPI contexts will carry several XDP queues. This in turn can result in a broken behavior where NAPI context of interest will never be scheduled and AF_XDP socket will not process any traffic. To fix this, let us change the way how XDP rings are assigned to Rx rings and use this information later on when setting ice_tx_ring::xsk_pool pointer. For each Rx ring, grab the associated queue vector and walk through Tx ring's linked list. Once we stumble upon XDP ring in it, assign this ring to ice_rx_ring::xdp_ring. Previous [0] approach of fixing this issue was for txonly scenario because of the described grouping of XDP rings across queue vectors. So, relying on Rx ring meant that NAPI context could be scheduled with a queue vector without XDP ring with associated XSK pool. [0]: https://lore.kernel.org/netdev/20220707161128.54215-1-maciej.fijalkowski@intel.com/ Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Fixes: 22bf877e528f ("ice: introduce XDP_TX fallback path") Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 36 ++++++++++++++++++++----------- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_main.c | 25 ++++++++++++++------- drivers/net/ethernet/intel/ice/ice_xsk.c | 12 +++++------ 4 files changed, 48 insertions(+), 29 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_lib.c') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index cc5b85afd437..841fa149c407 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) * ice_xsk_pool - get XSK buffer pool bound to a ring * @ring: Rx ring to use * - * Returns a pointer to xdp_umem structure if there is a buffer pool present, - * NULL otherwise. + * Returns a pointer to xsk_buff_pool structure if there is a buffer pool + * present, NULL otherwise. */ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) { @@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) } /** - * ice_tx_xsk_pool - get XSK buffer pool bound to a ring - * @ring: Tx ring to use + * ice_tx_xsk_pool - assign XSK buff pool to XDP ring + * @vsi: pointer to VSI + * @qid: index of a queue to look at XSK buff pool presence * - * Returns a pointer to xdp_umem structure if there is a buffer pool present, - * NULL otherwise. Tx equivalent of ice_xsk_pool. + * Sets XSK buff pool pointer on XDP ring. + * + * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided + * queue id. Reason for doing so is that queue vectors might have assigned more + * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring + * carries a pointer to one of these XDP rings for its own purposes, such as + * handling XDP_TX action, therefore we can piggyback here on the + * rx_ring->xdp_ring assignment that was done during XDP rings initialization. */ -static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) +static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) { - struct ice_vsi *vsi = ring->vsi; - u16 qid; + struct ice_tx_ring *ring; - qid = ring->q_index - vsi->alloc_txq; + ring = vsi->rx_rings[qid]->xdp_ring; + if (!ring) + return; - if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) - return NULL; + if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) { + ring->xsk_pool = NULL; + return; + } - return xsk_get_pool_from_qid(vsi->netdev, qid); + ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 733c455f6574..0c4ec9264071 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1986,8 +1986,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) if (ret) return ret; - ice_for_each_xdp_txq(vsi, i) - vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]); + ice_for_each_rxq(vsi, i) + ice_tx_xsk_pool(vsi, i); return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4ecaf40cf946..173fe6c31341 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2581,7 +2581,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); spin_lock_init(&xdp_ring->tx_lock); for (j = 0; j < xdp_ring->count; j++) { tx_desc = ICE_TX_DESC(xdp_ring, j); @@ -2589,13 +2588,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) } } - ice_for_each_rxq(vsi, i) { - if (static_key_enabled(&ice_xdp_locking_key)) - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; - else - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; - } - return 0; free_xdp_rings: @@ -2685,6 +2677,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) xdp_rings_rem -= xdp_rings_per_v; } + ice_for_each_rxq(vsi, i) { + if (static_key_enabled(&ice_xdp_locking_key)) { + vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; + } else { + struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector; + struct ice_tx_ring *ring; + + ice_for_each_tx_ring(ring, q_vector->tx) { + if (ice_ring_is_xdp(ring)) { + vsi->rx_rings[i]->xdp_ring = ring; + break; + } + } + } + ice_tx_xsk_pool(vsi, i); + } + /* omit the scheduler update if in reset path; XDP queues will be * taken into account at the end of ice_vsi_rebuild, where * ice_cfg_vsi_lan is being called diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 45f88e6ec25e..e48e29258450 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -243,7 +243,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); + ice_tx_xsk_pool(vsi, q_idx); } err = ice_vsi_cfg_rxq(rx_ring); @@ -359,7 +359,7 @@ xsk_pool_if_up: if (if_running) { ret = ice_qp_ena(vsi, qid); if (!ret && pool_present) - napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi); + napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi); else if (ret) netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret); } @@ -950,13 +950,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, if (!ice_is_xdp_ena_vsi(vsi)) return -EINVAL; - if (queue_id >= vsi->num_txq) + if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq) return -EINVAL; - if (!vsi->xdp_rings[queue_id]->xsk_pool) - return -EINVAL; + ring = vsi->rx_rings[queue_id]->xdp_ring; - ring = vsi->xdp_rings[queue_id]; + if (!ring->xsk_pool) + return -EINVAL; /* The idea here is that if NAPI is running, mark a miss, so * it will run again. If not, trigger an interrupt and -- cgit v1.2.3-59-g8ed1b From a509702cac95a8b450228a037c8542f57e538e5b Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Wed, 17 Aug 2022 18:53:18 +0800 Subject: ice: Fix crash by keep old cfg when update TCs more than queues There are problems if allocated queues less than Traffic Classes. Commit a632b2a4c920 ("ice: ethtool: Prohibit improper channel config for DCB") already disallow setting less queues than TCs. Another case is if we first set less queues, and later update more TCs config due to LLDP, ice_vsi_cfg_tc() will failed but left dirty num_txq/rxq and tc_cfg in vsi, that will cause invalid pointer access. [ 95.968089] ice 0000:3b:00.1: More TCs defined than queues/rings allocated. [ 95.968092] ice 0000:3b:00.1: Trying to use more Rx queues (8), than were allocated (1)! [ 95.968093] ice 0000:3b:00.1: Failed to config TC for VSI index: 0 [ 95.969621] general protection fault: 0000 [#1] SMP NOPTI [ 95.969705] CPU: 1 PID: 58405 Comm: lldpad Kdump: loaded Tainted: G U W O --------- -t - 4.18.0 #1 [ 95.969867] Hardware name: O.E.M/BC11SPSCB10, BIOS 8.23 12/30/2021 [ 95.969992] RIP: 0010:devm_kmalloc+0xa/0x60 [ 95.970052] Code: 5c ff ff ff 31 c0 5b 5d 41 5c c3 b8 f4 ff ff ff eb f4 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 89 d1 <8b> 97 60 02 00 00 48 8d 7e 18 48 39 f7 72 3f 55 89 ce 53 48 8b 4c [ 95.970344] RSP: 0018:ffffc9003f553888 EFLAGS: 00010206 [ 95.970425] RAX: dead000000000200 RBX: ffffea003c425b00 RCX: 00000000006080c0 [ 95.970536] RDX: 00000000006080c0 RSI: 0000000000000200 RDI: dead000000000200 [ 95.970648] RBP: dead000000000200 R08: 00000000000463c0 R09: ffff888ffa900000 [ 95.970760] R10: 0000000000000000 R11: 0000000000000002 R12: ffff888ff6b40100 [ 95.970870] R13: ffff888ff6a55018 R14: 0000000000000000 R15: ffff888ff6a55460 [ 95.970981] FS: 00007f51b7d24700(0000) GS:ffff88903ee80000(0000) knlGS:0000000000000000 [ 95.971108] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 95.971197] CR2: 00007fac5410d710 CR3: 0000000f2c1de002 CR4: 00000000007606e0 [ 95.971309] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 95.971419] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 95.971530] PKRU: 55555554 [ 95.971573] Call Trace: [ 95.971622] ice_setup_rx_ring+0x39/0x110 [ice] [ 95.971695] ice_vsi_setup_rx_rings+0x54/0x90 [ice] [ 95.971774] ice_vsi_open+0x25/0x120 [ice] [ 95.971843] ice_open_internal+0xb8/0x1f0 [ice] [ 95.971919] ice_ena_vsi+0x4f/0xd0 [ice] [ 95.971987] ice_dcb_ena_dis_vsi.constprop.5+0x29/0x90 [ice] [ 95.972082] ice_pf_dcb_cfg+0x29a/0x380 [ice] [ 95.972154] ice_dcbnl_setets+0x174/0x1b0 [ice] [ 95.972220] dcbnl_ieee_set+0x89/0x230 [ 95.972279] ? dcbnl_ieee_del+0x150/0x150 [ 95.972341] dcb_doit+0x124/0x1b0 [ 95.972392] rtnetlink_rcv_msg+0x243/0x2f0 [ 95.972457] ? dcb_doit+0x14d/0x1b0 [ 95.972510] ? __kmalloc_node_track_caller+0x1d3/0x280 [ 95.972591] ? rtnl_calcit.isra.31+0x100/0x100 [ 95.972661] netlink_rcv_skb+0xcf/0xf0 [ 95.972720] netlink_unicast+0x16d/0x220 [ 95.972781] netlink_sendmsg+0x2ba/0x3a0 [ 95.975891] sock_sendmsg+0x4c/0x50 [ 95.979032] ___sys_sendmsg+0x2e4/0x300 [ 95.982147] ? kmem_cache_alloc+0x13e/0x190 [ 95.985242] ? __wake_up_common_lock+0x79/0x90 [ 95.988338] ? __check_object_size+0xac/0x1b0 [ 95.991440] ? _copy_to_user+0x22/0x30 [ 95.994539] ? move_addr_to_user+0xbb/0xd0 [ 95.997619] ? __sys_sendmsg+0x53/0x80 [ 96.000664] __sys_sendmsg+0x53/0x80 [ 96.003747] do_syscall_64+0x5b/0x1d0 [ 96.006862] entry_SYSCALL_64_after_hwframe+0x65/0xca Only update num_txq/rxq when passed check, and restore tc_cfg if setup queue map failed. Fixes: a632b2a4c920 ("ice: ethtool: Prohibit improper channel config for DCB") Signed-off-by: Ding Hui Reviewed-by: Anatolii Gerasymenko Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 42 ++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_lib.c') diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 0c4ec9264071..58d483e2f539 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -914,7 +914,7 @@ static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt) */ static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { - u16 offset = 0, qmap = 0, tx_count = 0, pow = 0; + u16 offset = 0, qmap = 0, tx_count = 0, rx_count = 0, pow = 0; u16 num_txq_per_tc, num_rxq_per_tc; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; @@ -981,23 +981,25 @@ static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) * at least 1) */ if (offset) - vsi->num_rxq = offset; + rx_count = offset; else - vsi->num_rxq = num_rxq_per_tc; + rx_count = num_rxq_per_tc; - if (vsi->num_rxq > vsi->alloc_rxq) { + if (rx_count > vsi->alloc_rxq) { dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n", - vsi->num_rxq, vsi->alloc_rxq); + rx_count, vsi->alloc_rxq); return -EINVAL; } - vsi->num_txq = tx_count; - if (vsi->num_txq > vsi->alloc_txq) { + if (tx_count > vsi->alloc_txq) { dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n", - vsi->num_txq, vsi->alloc_txq); + tx_count, vsi->alloc_txq); return -EINVAL; } + vsi->num_txq = tx_count; + vsi->num_rxq = rx_count; + if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) { dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n"); /* since there is a chance that num_rxq could have been changed @@ -3490,6 +3492,7 @@ ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt, u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap; u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0]; int tc0_qcount = vsi->mqprio_qopt.qopt.count[0]; + u16 new_txq, new_rxq; u8 netdev_tc = 0; int i; @@ -3530,21 +3533,24 @@ ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt, } } - /* Set actual Tx/Rx queue pairs */ - vsi->num_txq = offset + qcount_tx; - if (vsi->num_txq > vsi->alloc_txq) { + new_txq = offset + qcount_tx; + if (new_txq > vsi->alloc_txq) { dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n", - vsi->num_txq, vsi->alloc_txq); + new_txq, vsi->alloc_txq); return -EINVAL; } - vsi->num_rxq = offset + qcount_rx; - if (vsi->num_rxq > vsi->alloc_rxq) { + new_rxq = offset + qcount_rx; + if (new_rxq > vsi->alloc_rxq) { dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n", - vsi->num_rxq, vsi->alloc_rxq); + new_rxq, vsi->alloc_rxq); return -EINVAL; } + /* Set actual Tx/Rx queue pairs */ + vsi->num_txq = new_txq; + vsi->num_rxq = new_rxq; + /* Setup queue TC[0].qmap for given VSI context */ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]); @@ -3576,6 +3582,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_pf *pf = vsi->back; + struct ice_tc_cfg old_tc_cfg; struct ice_vsi_ctx *ctx; struct device *dev; int i, ret = 0; @@ -3600,6 +3607,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) max_txqs[i] = vsi->num_txq; } + memcpy(&old_tc_cfg, &vsi->tc_cfg, sizeof(old_tc_cfg)); vsi->tc_cfg.ena_tc = ena_tc; vsi->tc_cfg.numtc = num_tc; @@ -3616,8 +3624,10 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) else ret = ice_vsi_setup_q_map(vsi, ctx); - if (ret) + if (ret) { + memcpy(&vsi->tc_cfg, &old_tc_cfg, sizeof(vsi->tc_cfg)); goto out; + } /* must to indicate which section of VSI context are being modified */ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); -- cgit v1.2.3-59-g8ed1b From a286ba73871411b8615eb52e9de1dd8c5078ed3d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2022 13:09:37 +0200 Subject: ice: reorder PF/representor devlink port register/unregister flows Make sure that netdevice is registered/unregistered while devlink port is registered. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_lib.c | 6 +++--- drivers/net/ethernet/intel/ice/ice_main.c | 12 ++++++------ drivers/net/ethernet/intel/ice/ice_repr.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_lib.c') diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 8a80da8e910e..938ba8c215cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2988,9 +2988,6 @@ int ice_vsi_release(struct ice_vsi *vsi) clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); } - if (vsi->type == ICE_VSI_PF) - ice_devlink_destroy_pf_port(pf); - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_rss_clean(vsi); @@ -3048,6 +3045,9 @@ int ice_vsi_release(struct ice_vsi *vsi) } } + if (vsi->type == ICE_VSI_PF) + ice_devlink_destroy_pf_port(pf); + if (vsi->type == ICE_VSI_VF && vsi->agg_node && vsi->agg_node->valid) vsi->agg_node->num_vsis--; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0ccc8a750374..747f27c4e761 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4599,6 +4599,10 @@ static int ice_register_netdev(struct ice_pf *pf) if (!vsi || !vsi->netdev) return -EIO; + err = ice_devlink_create_pf_port(pf); + if (err) + goto err_devlink_create; + err = register_netdev(vsi->netdev); if (err) goto err_register_netdev; @@ -4606,17 +4610,13 @@ static int ice_register_netdev(struct ice_pf *pf) set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); - err = ice_devlink_create_pf_port(pf); - if (err) - goto err_devlink_create; devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev); return 0; -err_devlink_create: - unregister_netdev(vsi->netdev); - clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); err_register_netdev: + ice_devlink_destroy_pf_port(pf); +err_devlink_create: free_netdev(vsi->netdev); vsi->netdev = NULL; clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 0dac67cd9c77..bd31748aae1b 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -377,10 +377,10 @@ static void ice_repr_rem(struct ice_vf *vf) if (!vf->repr) return; - ice_devlink_destroy_vf_port(vf); kfree(vf->repr->q_vector); vf->repr->q_vector = NULL; unregister_netdev(vf->repr->netdev); + ice_devlink_destroy_vf_port(vf); free_netdev(vf->repr->netdev); vf->repr->netdev = NULL; #ifdef CONFIG_ICE_SWITCHDEV -- cgit v1.2.3-59-g8ed1b