From 81b1d548d00bcd028303c4f3150fa753b9b8aa71 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 11 Nov 2021 22:14:02 +0800 Subject: hamradio: remove needs_free_netdev to avoid UAF The former patch "defer 6pack kfree after unregister_netdev" reorders the kfree of two buffer after the unregister_netdev to prevent the race condition. It also adds free_netdev() function in sixpack_close(), which is a direct copy from the similar code in mkiss_close(). However, in sixpack driver, the flag needs_free_netdev is set to true in sp_setup(), hence the unregister_netdev() will free the netdev automatically. Therefore, as the sp is netdev_priv, use-after-free occurs. This patch removes the needs_free_netdev = true and just let the free_netdev to finish this deallocation task. Fixes: 0b9111922b1f ("hamradio: defer 6pack kfree after unregister_netdev") Signed-off-by: Lin Ma Link: https://lore.kernel.org/r/20211111141402.7551-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/hamradio/6pack.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index bfdf89e54752..8a19a06b505d 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -306,7 +306,6 @@ static void sp_setup(struct net_device *dev) { /* Finish setting up the DEVICE info. */ dev->netdev_ops = &sp_netdev_ops; - dev->needs_free_netdev = true; dev->mtu = SIXP_MTU; dev->hard_header_len = AX25_MAX_HEADER_LEN; dev->header_ops = &ax25_header_ops; -- cgit v1.2.3-59-g8ed1b From 27df68d579c67ef6c39a5047559b6a7c08c96219 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 11 Nov 2021 19:37:24 +0100 Subject: net/ipa: ipa_resource: Fix wrong for loop range The source group count was mistakenly assigned to both dst and src loops. Fix it to make IPA probe and work again. Fixes: 4fd704b3608a ("net: ipa: record number of groups in data") Acked-by: AngeloGioacchino Del Regno Reviewed-by: Marijn Suijten Signed-off-by: Konrad Dybcio Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/20211111183724.593478-1-konrad.dybcio@somainline.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c index e3da95d69409..06cec7199382 100644 --- a/drivers/net/ipa/ipa_resource.c +++ b/drivers/net/ipa/ipa_resource.c @@ -52,7 +52,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa, return false; } - group_count = data->rsrc_group_src_count; + group_count = data->rsrc_group_dst_count; if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX) return false; -- cgit v1.2.3-59-g8ed1b From aae458725412332825f31121a5feb8fd887cac5a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Nov 2021 13:08:24 -0800 Subject: ethernet: sis900: fix indentation A space has snuck in. Reported-by: kernel test robot Fixes: 74fad215ee3d ("ethernet: sis900: use eth_hw_addr_set()") Link: https://lore.kernel.org/r/20211111210824.676201-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sis/sis900.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index cc2d907c4c4b..23a336c5096e 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -392,7 +392,7 @@ static int sis96x_get_mac_addr(struct pci_dev *pci_dev, /* get MAC address from EEPROM */ for (i = 0; i < 3; i++) addr[i] = read_eeprom(ioaddr, i + EEPROMMACAddr); - eth_hw_addr_set(net_dev, (u8 *)addr); + eth_hw_addr_set(net_dev, (u8 *)addr); rc = 1; break; -- cgit v1.2.3-59-g8ed1b From a31d27fbed5d518734cb60956303eb15089a7634 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 12 Nov 2021 08:56:03 +0100 Subject: tun: fix bonding active backup with arp monitoring As stated in the bonding doc, trans_start must be set manually for drivers using NETIF_F_LLTX: Drivers that use NETIF_F_LLTX flag must also update netdev_queue->trans_start. If they do not, then the ARP monitor will immediately fail any slaves using that driver, and those slaves will stay down. Link: https://www.kernel.org/doc/html/v5.15/networking/bonding.html#arp-monitor-operation Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/tun.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fecc9a1d293a..1572878c3403 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1010,6 +1010,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); int txq = skb->queue_mapping; + struct netdev_queue *queue; struct tun_file *tfile; int len = skb->len; @@ -1054,6 +1055,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop; + /* NETIF_F_LLTX requires to do our own update of trans_start */ + queue = netdev_get_tx_queue(dev, txq); + queue->trans_start = jiffies; + /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) kill_fasync(&tfile->fasync, SIGIO, POLL_IN); -- cgit v1.2.3-59-g8ed1b From 6e228d8cbb1cc6ba78022d406340e901e08d26e0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 12 Nov 2021 16:22:09 -0600 Subject: net: ipa: HOLB register sometimes must be written twice Starting with IPA v4.5, the HOL_BLOCK_EN register must be written twice when enabling head-of-line blocking avoidance. Fixes: 84f9bd12d46db ("soc: qcom: ipa: IPA endpoints") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_endpoint.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 5528d97110d5..006da4642a0b 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -868,6 +868,9 @@ ipa_endpoint_init_hol_block_enable(struct ipa_endpoint *endpoint, bool enable) val = enable ? HOL_BLOCK_EN_FMASK : 0; offset = IPA_REG_ENDP_INIT_HOL_BLOCK_EN_N_OFFSET(endpoint_id); iowrite32(val, endpoint->ipa->reg_virt + offset); + /* When enabling, the register must be written twice for IPA v4.5+ */ + if (enable && endpoint->ipa->version >= IPA_VERSION_4_5) + iowrite32(val, endpoint->ipa->reg_virt + offset); } void ipa_endpoint_modem_hol_block_clear_all(struct ipa *ipa) -- cgit v1.2.3-59-g8ed1b From 816316cacad2b5abd5b41423cf04e4845239abd4 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 12 Nov 2021 16:22:10 -0600 Subject: net: ipa: disable HOLB drop when updating timer The head-of-line blocking timer should only be modified when head-of-line drop is disabled. One of the steps in recovering from a modem crash is to enable dropping of packets with timeout of 0 (immediate). We don't know how the modem configured its endpoints, so before we program the timer, we need to ensure HOL_BLOCK is disabled. Fixes: 84f9bd12d46db ("soc: qcom: ipa: IPA endpoints") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_endpoint.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 006da4642a0b..ef790fd0ab56 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -853,6 +853,7 @@ static void ipa_endpoint_init_hol_block_timer(struct ipa_endpoint *endpoint, u32 offset; u32 val; + /* This should only be changed when HOL_BLOCK_EN is disabled */ offset = IPA_REG_ENDP_INIT_HOL_BLOCK_TIMER_N_OFFSET(endpoint_id); val = hol_block_timer_val(ipa, microseconds); iowrite32(val, ipa->reg_virt + offset); @@ -883,6 +884,7 @@ void ipa_endpoint_modem_hol_block_clear_all(struct ipa *ipa) if (endpoint->toward_ipa || endpoint->ee_id != GSI_EE_MODEM) continue; + ipa_endpoint_init_hol_block_enable(endpoint, false); ipa_endpoint_init_hol_block_timer(endpoint, 0); ipa_endpoint_init_hol_block_enable(endpoint, true); } -- cgit v1.2.3-59-g8ed1b From f8885ac89ce310570e5391fe0bf0ec9c7c9b4fdc Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 14 Nov 2021 01:36:36 +0300 Subject: net: bnx2x: fix variable dereferenced before check Smatch says: bnx2x_init_ops.h:640 bnx2x_ilt_client_mem_op() warn: variable dereferenced before check 'ilt' (see line 638) Move ilt_cli variable initialization _after_ ilt validation, because it's unsafe to deref the pointer before validation check. Fixes: 523224a3b3cd ("bnx2x, cnic, bnx2i: use new FW/HSI") Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h index 1835d2e451c0..fc7fce642666 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h @@ -635,11 +635,13 @@ static int bnx2x_ilt_client_mem_op(struct bnx2x *bp, int cli_num, { int i, rc; struct bnx2x_ilt *ilt = BP_ILT(bp); - struct ilt_client_info *ilt_cli = &ilt->clients[cli_num]; + struct ilt_client_info *ilt_cli; if (!ilt || !ilt->lines) return -1; + ilt_cli = &ilt->clients[cli_num]; + if (ilt_cli->flags & (ILT_CLIENT_SKIP_INIT | ILT_CLIENT_SKIP_MEM)) return 0; -- cgit v1.2.3-59-g8ed1b From b922f622592af76b57cbc566eaeccda0b31a3496 Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Sat, 13 Nov 2021 22:24:40 -0500 Subject: atlantic: Fix OOB read and write in hw_atl_utils_fw_rpc_wait This bug report shows up when running our research tools. The reports is SOOB read, but it seems SOOB write is also possible a few lines below. In details, fw.len and sw.len are inputs coming from io. A len over the size of self->rpc triggers SOOB. The patch fixes the bugs by adding sanity checks. The bugs are triggerable with compromised/malfunctioning devices. They are potentially exploitable given they first leak up to 0xffff bytes and able to overwrite the region later. The patch is tested with QEMU emulater. This is NOT tested with a real device. Attached is the log we found by fuzzing. BUG: KASAN: slab-out-of-bounds in hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] Read of size 4 at addr ffff888016260b08 by task modprobe/213 CPU: 0 PID: 213 Comm: modprobe Not tainted 5.6.0 #1 Call Trace: dump_stack+0x76/0xa0 print_address_description.constprop.0+0x16/0x200 ? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] ? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] __kasan_report.cold+0x37/0x7c ? aq_hw_read_reg_bit+0x60/0x70 [atlantic] ? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] kasan_report+0xe/0x20 hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] hw_atl_utils_fw_rpc_call+0x95/0x130 [atlantic] hw_atl_utils_fw_rpc_wait+0x176/0x210 [atlantic] hw_atl_utils_mpi_create+0x229/0x2e0 [atlantic] ? hw_atl_utils_fw_rpc_wait+0x210/0x210 [atlantic] ? hw_atl_utils_initfw+0x9f/0x1c8 [atlantic] hw_atl_utils_initfw+0x12a/0x1c8 [atlantic] aq_nic_ndev_register+0x88/0x650 [atlantic] ? aq_nic_ndev_init+0x235/0x3c0 [atlantic] aq_pci_probe+0x731/0x9b0 [atlantic] ? aq_pci_func_init+0xc0/0xc0 [atlantic] local_pci_probe+0xd3/0x160 pci_device_probe+0x23f/0x3e0 Reported-by: Brendan Dolan-Gavitt Signed-off-by: Zekun Shen Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index fc0e66006644..3f1704cbe1cb 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -559,6 +559,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, goto err_exit; if (fw.len == 0xFFFFU) { + if (sw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid sw len: %x\n", sw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_rpc_call(self, sw.len); if (err < 0) goto err_exit; @@ -567,6 +572,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, if (rpc) { if (fw.len) { + if (fw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid fw len: %x\n", fw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_downld_dwords(self, self->rpc_addr, -- cgit v1.2.3-59-g8ed1b From e97b21e94652f5f0d1c196452c111151f6d15883 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 14 Nov 2021 17:02:29 -0800 Subject: net: ethernet: lantiq_etop: fix build errors/warnings Fix build error and warnings reported by kernel test robot: drivers/net/ethernet/lantiq_etop.c: In function 'ltq_etop_probe': drivers/net/ethernet/lantiq_etop.c:673:15: error: implicit declaration of function 'device_property_read_u32' [-Werror=implicit-function-declaration] 673 | err = device_property_read_u32(&pdev->dev, "lantiq,tx-burst-length", &priv->tx_burst_len); drivers/net/ethernet/lantiq_etop.c: At top level: drivers/net/ethernet/lantiq_etop.c:730:1: warning: no previous prototype for 'init_ltq_etop' [-Wmissing-prototypes] 730 | init_ltq_etop(void) drivers/net/ethernet/lantiq_etop.c: In function 'ltq_etop_hw_init': drivers/net/ethernet/lantiq_etop.c:276:25: warning: ignoring return value of 'request_irq' declared with attribute 'warn_unused_result' [-Wunused-result] 276 | request_irq(irq, ltq_etop_dma_irq, 0, "etop_tx", priv); drivers/net/ethernet/lantiq_etop.c:284:25: warning: ignoring return value of 'request_irq' declared with attribute 'warn_unused_result' [-Wunused-result] 284 | request_irq(irq, ltq_etop_dma_irq, 0, "etop_rx", priv); Fixes: 14d4e308e0aa ("net: lantiq: configure the burst length in ethernet drivers") Fixes: dddb29e42770 ("net: lantiq_etop: remove deprecated IRQF_DISABLED") Fixes: 504d4721ee8e ("MIPS: Lantiq: Add ethernet driver") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202111090621.yjr9xuVj-lkp@intel.com To: netdev@vger.kernel.org Cc: Aleksander Jan Bajkowski Cc: Hauke Mehrtens Cc: "David S. Miller" Cc: Jakub Kicinski Cc: John Crispin Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: Michael Opdenacker Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_etop.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 6433c909c6b2..072391c494ce 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -239,6 +240,7 @@ ltq_etop_hw_init(struct net_device *dev) { struct ltq_etop_priv *priv = netdev_priv(dev); int i; + int err; ltq_pmu_enable(PMU_PPE); @@ -273,7 +275,13 @@ ltq_etop_hw_init(struct net_device *dev) if (IS_TX(i)) { ltq_dma_alloc_tx(&ch->dma); - request_irq(irq, ltq_etop_dma_irq, 0, "etop_tx", priv); + err = request_irq(irq, ltq_etop_dma_irq, 0, "etop_tx", priv); + if (err) { + netdev_err(dev, + "Unable to get Tx DMA IRQ %d\n", + irq); + return err; + } } else if (IS_RX(i)) { ltq_dma_alloc_rx(&ch->dma); for (ch->dma.desc = 0; ch->dma.desc < LTQ_DESC_NUM; @@ -281,7 +289,13 @@ ltq_etop_hw_init(struct net_device *dev) if (ltq_etop_alloc_skb(ch)) return -ENOMEM; ch->dma.desc = 0; - request_irq(irq, ltq_etop_dma_irq, 0, "etop_rx", priv); + err = request_irq(irq, ltq_etop_dma_irq, 0, "etop_rx", priv); + if (err) { + netdev_err(dev, + "Unable to get Rx DMA IRQ %d\n", + irq); + return err; + } } ch->dma.irq = irq; } @@ -726,7 +740,7 @@ static struct platform_driver ltq_mii_driver = { }, }; -int __init +static int __init init_ltq_etop(void) { int ret = platform_driver_probe(<q_mii_driver, ltq_etop_probe); -- cgit v1.2.3-59-g8ed1b From 46d08f55d24e69e921456b5a40717da09199267b Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Mon, 15 Nov 2021 02:37:59 -0500 Subject: bnxt_en: extend RTNL to VF check in devlink driver_reinit The fixes the race condition between configuring SR-IOV and devlink reload. The SR-IOV configure logic already takes the RTNL lock, setting sriov_cfg under the lock while changes are underway. Extend the lock scope in devlink driver_reinit to cover the VF check and don't run concurrently with SR-IOV configure. Reported-by: Leon Romanovsky Fixes: 228ea8c187d8 ("bnxt_en: implement devlink dev reload driver_reinit") Cc: Leon Romanovsky Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Reviewed-by: Andy Gospodarek Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 5c464ea73576..a0a9af402642 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -441,12 +441,13 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: { - if (BNXT_PF(bp) && bp->pf.active_vfs) { + rtnl_lock(); + if (BNXT_PF(bp) && (bp->pf.active_vfs || bp->sriov_cfg)) { NL_SET_ERR_MSG_MOD(extack, - "reload is unsupported when VFs are allocated"); + "reload is unsupported while VFs are allocated or being configured"); + rtnl_unlock(); return -EOPNOTSUPP; } - rtnl_lock(); if (bp->dev->reg_state == NETREG_UNREGISTERED) { rtnl_unlock(); return -ENODEV; -- cgit v1.2.3-59-g8ed1b From b68a1a933fe4a52a8316d214e3421f2a89bc113e Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Mon, 15 Nov 2021 02:38:00 -0500 Subject: bnxt_en: fix format specifier in live patch error message This fixes type mismatch warning. Reported-by: kernel test robot Fixes: 3c4153394e2c ("bnxt_en: implement firmware live patching") Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index a0a9af402642..6fe9e9b59f83 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -360,7 +360,7 @@ bnxt_dl_livepatch_report_err(struct bnxt *bp, struct netlink_ext_ack *extack, NL_SET_ERR_MSG_MOD(extack, "Live patch already applied"); break; default: - netdev_err(bp->dev, "Unexpected live patch error: %hhd\n", err); + netdev_err(bp->dev, "Unexpected live patch error: %d\n", err); NL_SET_ERR_MSG_MOD(extack, "Failed to activate live patch"); break; } -- cgit v1.2.3-59-g8ed1b From b0757491a118ae5727cf9f1c3a11544397d46596 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Mon, 15 Nov 2021 02:38:01 -0500 Subject: bnxt_en: reject indirect blk offload when hw-tc-offload is off The driver does not check if hw-tc-offload is enabled for the device before offloading a flow in the context of indirect block callback. Fix this by checking NETIF_F_HW_TC in the features flag and rejecting the offload request. This will avoid unnecessary dmesg error logs when hw-tc-offload is disabled, such as these: bnxt_en 0000:19:00.1 eno2np1: dev(ifindex=294) not on same switch bnxt_en 0000:19:00.1 eno2np1: Error: bnxt_tc_add_flow: cookie=0xffff8dace1c88000 error=-22 bnxt_en 0000:19:00.0 eno1np0: dev(ifindex=294) not on same switch bnxt_en 0000:19:00.0 eno1np0: Error: bnxt_tc_add_flow: cookie=0xffff8dace1c88000 error=-22 Reported-by: Marcelo Ricardo Leitner Fixes: 627c89d00fb9 ("bnxt_en: flow_offload: offload tunnel decap rules via indirect callbacks") Signed-off-by: Sriharsha Basavapatna Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index e6a4a768b10b..1471b6130a2b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1868,7 +1868,7 @@ static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type, struct flow_cls_offload *flower = type_data; struct bnxt *bp = priv->bp; - if (flower->common.chain_index) + if (!tc_cls_can_offload_and_chain0(bp->dev, type_data)) return -EOPNOTSUPP; switch (type) { -- cgit v1.2.3-59-g8ed1b From 9119570039481d56350af1c636f040fb300b8cf3 Mon Sep 17 00:00:00 2001 From: Meng Li Date: Mon, 15 Nov 2021 15:04:23 +0800 Subject: net: stmmac: socfpga: add runtime suspend/resume callback for stratix10 platform According to upstream commit 5ec55823438e("net: stmmac: add clocks management for gmac driver"), it improve clocks management for stmmac driver. So, it is necessary to implement the runtime callback in dwmac-socfpga driver because it doesn't use the common stmmac_pltfr_pm_ops instance. Otherwise, clocks are not disabled when system enters suspend status. Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver") Cc: stable@vger.kernel.org Signed-off-by: Meng Li Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 24 ++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 85208128f135..b7c2579c963b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -485,8 +485,28 @@ static int socfpga_dwmac_resume(struct device *dev) } #endif /* CONFIG_PM_SLEEP */ -static SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend, - socfpga_dwmac_resume); +static int __maybe_unused socfpga_dwmac_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + stmmac_bus_clks_config(priv, false); + + return 0; +} + +static int __maybe_unused socfpga_dwmac_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + return stmmac_bus_clks_config(priv, true); +} + +static const struct dev_pm_ops socfpga_dwmac_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(stmmac_suspend, socfpga_dwmac_resume) + SET_RUNTIME_PM_OPS(socfpga_dwmac_runtime_suspend, socfpga_dwmac_runtime_resume, NULL) +}; static const struct socfpga_dwmac_ops socfpga_gen5_ops = { .set_phy_mode = socfpga_gen5_set_phy_mode, -- cgit v1.2.3-59-g8ed1b From 4e5e6b5d9d1334d3490326b6922a2daaf56a867f Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Tue, 9 Feb 2021 11:59:38 +0000 Subject: iavf: Fix return of set the new channel count Fixed return correct code from set the new channel count. Implemented by check if reset is done in appropriate time. This solution give a extra time to pf for reset vf in case when user want set new channel count for all vfs. Without this patch it is possible to return misleading output code to user and vf reset not to be correctly performed by pf. Fixes: 5520deb15326 ("iavf: Enable support for up to 16 queues") Signed-off-by: Grzegorz Szczurek Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 5a359a0a20ec..136c801f5584 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1776,6 +1776,7 @@ static int iavf_set_channels(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 num_req = ch->combined_count; + int i; if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && adapter->num_tc) { @@ -1798,6 +1799,20 @@ static int iavf_set_channels(struct net_device *netdev, adapter->num_req_queues = num_req; adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; iavf_schedule_reset(adapter); + + /* wait for the reset is done */ + for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { + msleep(IAVF_RESET_WAIT_MS); + if (adapter->flags & IAVF_FLAG_RESET_PENDING) + continue; + break; + } + if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + adapter->num_active_queues = num_req; + return -EOPNOTSUPP; + } + return 0; } -- cgit v1.2.3-59-g8ed1b From 8a4a126f4be88eb8b5f00a165ab58c35edf4ef76 Mon Sep 17 00:00:00 2001 From: Nicholas Nunley Date: Fri, 4 Jun 2021 09:48:53 -0700 Subject: iavf: check for null in iavf_fix_features If the driver has lost contact with the PF then it enters a disabled state and frees adapter->vf_res. However, ndo_fix_features can still be called on the interface, so we need to check for this condition first. Since we have no information on the features at this time simply leave them unmodified and return. Fixes: c4445aedfe09 ("i40evf: Fix VLAN features") Signed-off-by: Nicholas Nunley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 847d67e32a54..561171507cda 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3503,7 +3503,8 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); - if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) + if (adapter->vf_res && + !(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) features &= ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER); -- cgit v1.2.3-59-g8ed1b From 89f22f129696ab53cfbc608e0a2184d0fea46ac1 Mon Sep 17 00:00:00 2001 From: Nicholas Nunley Date: Fri, 4 Jun 2021 09:48:54 -0700 Subject: iavf: free q_vectors before queues in iavf_disable_vf iavf_free_queues() clears adapter->num_active_queues, which iavf_free_q_vectors() relies on, so swap the order of these two function calls in iavf_disable_vf(). This resolves a panic encountered when the interface is disabled and then later brought up again after PF communication is restored. Fixes: 65c7006f234c ("i40evf: assign num_active_queues inside i40evf_alloc_queues") Signed-off-by: Nicholas Nunley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 561171507cda..c23fff5a4bd9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2123,8 +2123,8 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); - iavf_free_queues(adapter); iavf_free_q_vectors(adapter); + iavf_free_queues(adapter); memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); adapter->netdev->flags &= ~IFF_UP; -- cgit v1.2.3-59-g8ed1b From 2135a8d5c8186bc92901dc00f179ffd50e54c2ac Mon Sep 17 00:00:00 2001 From: Nicholas Nunley Date: Fri, 4 Jun 2021 09:48:55 -0700 Subject: iavf: don't clear a lock we don't hold In iavf_configure_clsflower() the function will bail out if it is unable to obtain the crit_section lock in a reasonable time. However, it will clear the lock when exiting, so fix this. Fixes: 640a8af5841f ("i40evf: Reorder configure_clsflower to avoid deadlock on error") Signed-off-by: Nicholas Nunley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index c23fff5a4bd9..28661e4425f1 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3095,8 +3095,10 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, return -ENOMEM; while (!mutex_trylock(&adapter->crit_lock)) { - if (--count == 0) - goto err; + if (--count == 0) { + kfree(filter); + return err; + } udelay(1); } -- cgit v1.2.3-59-g8ed1b From 8905072a192fffe9389255489db250c73ecab008 Mon Sep 17 00:00:00 2001 From: Piotr Marczak Date: Fri, 4 Jun 2021 09:48:56 -0700 Subject: iavf: Fix failure to exit out from last all-multicast mode The driver could only quit allmulti when allmulti and promisc modes are turn on at the same time. If promisc had been off there was no way to turn off allmulti mode. The patch corrects this behavior. Switching allmulti does not depends on promisc state mode anymore Fixes: f42a5c74da99 ("i40e: Add allmulti support for the VF") Signed-off-by: Piotr Marczak Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 28661e4425f1..76c4ca0f055e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1639,8 +1639,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC); return 0; } - - if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) && + if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) || (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) { iavf_set_promiscuous(adapter, 0); return 0; -- cgit v1.2.3-59-g8ed1b From 4f0400803818f2642f066d3eacaf013f23554cc7 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 4 Jun 2021 09:48:57 -0700 Subject: iavf: prevent accidental free of filter structure In iavf_config_clsflower, the filter structure could be accidentally released at the end, if iavf_parse_cls_flower or iavf_handle_tclass ever return a non-zero but positive value. In this case, the function continues through to the end, and will call kfree() on the filter structure even though it has been added to the linked list. This can actually happen because iavf_parse_cls_flower will return a positive IAVF_ERR_CONFIG value instead of the traditional negative error codes. Fix this by ensuring that the kfree() check and error checks are similar. Use the more idiomatic "if (err)" to catch all non-zero error codes. Fixes: 0075fa0fadd0 ("i40evf: Add support to apply cloud filters") Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 76c4ca0f055e..9c68c8628512 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3108,11 +3108,11 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, /* start out with flow type and eth type IPv4 to begin with */ filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW; err = iavf_parse_cls_flower(adapter, cls_flower, filter); - if (err < 0) + if (err) goto err; err = iavf_handle_tclass(adapter, tc, filter); - if (err < 0) + if (err) goto err; /* add filter to the list */ -- cgit v1.2.3-59-g8ed1b From 131b0edc4028bb88bb472456b1ddba526cfb7036 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 4 Jun 2021 09:48:58 -0700 Subject: iavf: validate pointers In some cases, the ethtool get_rxfh handler may be called with a null key or indir parameter. So check these pointers, or you will have a very bad day. Fixes: 43a3d9ba34c9 ("i40evf: Allow PF driver to configure RSS") Signed-off-by: Mitch Williams Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 136c801f5584..25ee0606e625 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1859,14 +1859,13 @@ static int iavf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, if (hfunc) *hfunc = ETH_RSS_HASH_TOP; - if (!indir) - return 0; - - memcpy(key, adapter->rss_key, adapter->rss_key_size); + if (key) + memcpy(key, adapter->rss_key, adapter->rss_key_size); - /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < adapter->rss_lut_size; i++) - indir[i] = (u32)adapter->rss_lut[i]; + if (indir) + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < adapter->rss_lut_size; i++) + indir[i] = (u32)adapter->rss_lut[i]; return 0; } -- cgit v1.2.3-59-g8ed1b From 321421b57a12e933f92b228e0e6d0b2c6541f41d Mon Sep 17 00:00:00 2001 From: Surabhi Boob Date: Fri, 4 Jun 2021 09:48:59 -0700 Subject: iavf: Fix for the false positive ASQ/ARQ errors while issuing VF reset While issuing VF Reset from the guest OS, the VF driver prints logs about critical / Overflow error detection. This is not an actual error since the VF_MBX_ARQLEN register is set to all FF's for a short period of time and the VF would catch the bits set if it was reading the register during that spike of time. This patch introduces an additional check to ignore this condition since the VF is in reset. Fixes: 19b73d8efaa4 ("i40evf: Add additional check for reset") Signed-off-by: Surabhi Boob Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 9c68c8628512..9ca9208aa896 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2409,7 +2409,7 @@ static void iavf_adminq_task(struct work_struct *work) /* check for error indications */ val = rd32(hw, hw->aq.arq.len); - if (val == 0xdeadbeef) /* indicates device in reset */ + if (val == 0xdeadbeef || val == 0xffffffff) /* device in reset */ goto freedom; oldval = val; if (val & IAVF_VF_ARQLEN1_ARQVFE_MASK) { -- cgit v1.2.3-59-g8ed1b From 9a6e9e483a9684a34573fd9f9e30ecfb047cb8cb Mon Sep 17 00:00:00 2001 From: Grzegorz Szczurek Date: Fri, 4 Jun 2021 09:49:00 -0700 Subject: iavf: Fix for setting queues to 0 Now setting combine to 0 will be rejected with the appropriate error code. This has been implemented by adding a condition that checks the value of combine equal to zero. Without this patch, when the user requested it, no error was returned and combine was set to the default value for VF. Fixes: 5520deb15326 ("iavf: Enable support for up to 16 queues") Signed-off-by: Grzegorz Szczurek Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 25ee0606e625..144a77679359 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1787,7 +1787,7 @@ static int iavf_set_channels(struct net_device *netdev, /* All of these should have already been checked by ethtool before this * even gets to us, but just to be sure. */ - if (num_req > adapter->vsi_res->num_queue_pairs) + if (num_req == 0 || num_req > adapter->vsi_res->num_queue_pairs) return -EINVAL; if (num_req == adapter->num_active_queues) -- cgit v1.2.3-59-g8ed1b From 4293014230b887d94b68aa460ff00153454a3709 Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Fri, 4 Jun 2021 09:53:27 -0700 Subject: iavf: Restore VLAN filters after link down Restore VLAN filters after the link is brought down, and up - since all filters are deleted from HW during the netdev link down routine. Fixes: ed1f5b58ea01 ("i40evf: remove VLAN filters on close") Signed-off-by: Akeem G Abodunrin Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 1 + drivers/net/ethernet/intel/iavf/iavf_main.c | 35 ++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index e6e7c1da47fb..75635bd57cf6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -39,6 +39,7 @@ #include "iavf_txrx.h" #include "iavf_fdir.h" #include "iavf_adv_rss.h" +#include #define DEFAULT_DEBUG_LEVEL_SHIFT 3 #define PFX "iavf: " diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 9ca9208aa896..336e6bf95e48 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -696,6 +696,23 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, u16 vlan) spin_unlock_bh(&adapter->mac_vlan_list_lock); } +/** + * iavf_restore_filters + * @adapter: board private structure + * + * Restore existing non MAC filters when VF netdev comes back up + **/ +static void iavf_restore_filters(struct iavf_adapter *adapter) +{ + /* re-add all VLAN filters */ + if (VLAN_ALLOWED(adapter)) { + u16 vid; + + for_each_set_bit(vid, adapter->vsi.active_vlans, VLAN_N_VID) + iavf_add_vlan(adapter, vid); + } +} + /** * iavf_vlan_rx_add_vid - Add a VLAN filter to a device * @netdev: network device struct @@ -709,8 +726,11 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev, if (!VLAN_ALLOWED(adapter)) return -EIO; + if (iavf_add_vlan(adapter, vid) == NULL) return -ENOMEM; + + set_bit(vid, adapter->vsi.active_vlans); return 0; } @@ -725,11 +745,13 @@ static int iavf_vlan_rx_kill_vid(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); - if (VLAN_ALLOWED(adapter)) { - iavf_del_vlan(adapter, vid); - return 0; - } - return -EIO; + if (!VLAN_ALLOWED(adapter)) + return -EIO; + + iavf_del_vlan(adapter, vid); + clear_bit(vid, adapter->vsi.active_vlans); + + return 0; } /** @@ -3309,6 +3331,9 @@ static int iavf_open(struct net_device *netdev) spin_unlock_bh(&adapter->mac_vlan_list_lock); + /* Restore VLAN filters that were removed with IFF_DOWN */ + iavf_restore_filters(adapter); + iavf_configure(adapter); iavf_up_complete(adapter); -- cgit v1.2.3-59-g8ed1b From cc4a9cc03faa6d8db1a6954bb536f2c1e63bdff6 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 15 Sep 2021 13:25:31 +0300 Subject: net/mlx5e: kTLS, Fix crash in RX resync flow For the TLS RX resync flow, we maintain a list of TLS contexts that require some attention, to communicate their resync information to the HW. Here we fix list corruptions, by protecting the entries against movements coming from resync_handle_seq_match(), until their resync handling in napi is fully completed. Fixes: e9ce991bce5b ("net/mlx5e: kTLS, Add resiliency to RX resync failures") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 23 ++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 62abce008c7b..a2a9f68579dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -55,6 +55,7 @@ struct mlx5e_ktls_offload_context_rx { DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS); /* resync */ + spinlock_t lock; /* protects resync fields */ struct mlx5e_ktls_rx_resync_ctx resync; struct list_head list; }; @@ -386,14 +387,18 @@ static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_r struct mlx5e_icosq *sq; bool trigger_poll; - memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq)); - sq = &c->async_icosq; ktls_resync = sq->ktls_resync; + trigger_poll = false; spin_lock_bh(&ktls_resync->lock); - list_add_tail(&priv_rx->list, &ktls_resync->list); - trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_lock_bh(&priv_rx->lock); + memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq)); + if (list_empty(&priv_rx->list)) { + list_add_tail(&priv_rx->list, &ktls_resync->list); + trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + } + spin_unlock_bh(&priv_rx->lock); spin_unlock_bh(&ktls_resync->lock); if (!trigger_poll) @@ -617,6 +622,8 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, if (err) goto err_create_key; + INIT_LIST_HEAD(&priv_rx->list); + spin_lock_init(&priv_rx->lock); priv_rx->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; @@ -730,10 +737,14 @@ bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) priv_rx = list_first_entry(&local_list, struct mlx5e_ktls_offload_context_rx, list); + spin_lock(&priv_rx->lock); cseg = post_static_params(sq, priv_rx); - if (IS_ERR(cseg)) + if (IS_ERR(cseg)) { + spin_unlock(&priv_rx->lock); break; - list_del(&priv_rx->list); + } + list_del_init(&priv_rx->list); + spin_unlock(&priv_rx->lock); db_cseg = cseg; } if (db_cseg) -- cgit v1.2.3-59-g8ed1b From 362980eada85b5ea691e5e0d9257a991aa7ade47 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 21 Oct 2021 18:15:10 +0300 Subject: net/mlx5e: Wait for concurrent flow deletion during neigh/fib events Function mlx5e_take_tmp_flow() skips flows with zero reference count. This can cause syndrome 0x179e84 when the called from neigh or route update code and the skipped flow is not removed from the hardware by the time underlying encap/decap resource is deleted. Add new completion 'del_hw_done' that is completed when flow is unoffloaded. This is safe to do because flow with reference count zero needs to be detached from encap/decap entry before its memory is deallocated, which requires taking the encap_tbl_lock mutex that is held by the event handlers code. Fixes: 8914add2c9e5 ("net/mlx5e: Handle FIB events to update tunnel endpoint device") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 8 +++++++- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index 8f64f2c8895a..b689701ac7d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -102,6 +102,7 @@ struct mlx5e_tc_flow { refcount_t refcnt; struct rcu_head rcu_head; struct completion init_done; + struct completion del_hw_done; int tunnel_id; /* the mapped tunnel id of this flow */ struct mlx5_flow_attr *attr; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 660cca73c36c..042b1abe1437 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -245,8 +245,14 @@ static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, struct list_head *flow_list, int index) { - if (IS_ERR(mlx5e_flow_get(flow))) + if (IS_ERR(mlx5e_flow_get(flow))) { + /* Flow is being deleted concurrently. Wait for it to be + * unoffloaded from hardware, otherwise deleting encap will + * fail. + */ + wait_for_completion(&flow->del_hw_done); return; + } wait_for_completion(&flow->init_done); flow->tmp_entry_index = index; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 835caa1c7b74..cb76c41fe163 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1600,6 +1600,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } + complete_all(&flow->del_hw_done); if (mlx5_flow_has_geneve_opt(flow)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); @@ -4465,6 +4466,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->l3_to_l2_reformat); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); + init_completion(&flow->del_hw_done); *__flow = flow; *__parse_attr = parse_attr; -- cgit v1.2.3-59-g8ed1b From d7751d6476185ff754b9dad2cba0c0a6e43ecadc Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 20 May 2021 17:09:58 +0300 Subject: net/mlx5: E-Switch, Fix resetting of encap mode when entering switchdev E-Switch encap mode is relevant only when in switchdev mode. The RDMA driver can query the encap configuration via mlx5_eswitch_get_encap_mode(). Make sure it returns the currently used mode and not the set one. This reverts the cited commit which reset the encap mode on entering switchdev and fixes the original issue properly. Fixes: 9a64144d683a ("net/mlx5: E-Switch, Fix default encap mode") Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 9 +++++++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ------- include/linux/mlx5/eswitch.h | 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ec136b499204..5872cc8bf953 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1572,6 +1572,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = MLX5_ESWITCH_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; + else + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; dev->priv.eswitch = esw; BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); @@ -1934,7 +1939,7 @@ free_out: return err; } -u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) +u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -1948,7 +1953,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) struct mlx5_eswitch *esw; esw = dev->priv.eswitch; - return mlx5_esw_allowed(esw) ? esw->offloads.encap : + return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS) ? esw->offloads.encap : DEVLINK_ESWITCH_ENCAP_MODE_NONE; } EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f4eaa5893886..80fa76f60e1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3183,12 +3183,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) u64 mapping_id; int err; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) && - MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap)) - esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; - else - esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; - mutex_init(&esw->offloads.termtbl_mutex); mlx5_rdma_enable_roce(esw->dev); @@ -3286,7 +3280,6 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) esw_offloads_metadata_uninit(esw); mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); - esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 97afcea39a7b..8b18fe9771f9 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -145,13 +145,13 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \ ESW_TUN_OPTS_OFFSET + 1) -u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); +u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ -static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) +static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) { return MLX5_ESWITCH_NONE; } -- cgit v1.2.3-59-g8ed1b From 76ded29d3fcda4928da8849ffc446ea46871c1c2 Mon Sep 17 00:00:00 2001 From: Valentine Fatiev Date: Tue, 26 Oct 2021 11:42:41 +0300 Subject: net/mlx5e: nullify cq->dbg pointer in mlx5_debug_cq_remove() Prior to this patch in case mlx5_core_destroy_cq() failed it proceeds to rest of destroy operations. mlx5_core_destroy_cq() could be called again by user and cause additional call of mlx5_debug_cq_remove(). cq->dbg was not nullify in previous call and cause the crash. Fix it by nullify cq->dbg pointer after removal. Also proceed to destroy operations only if FW return 0 for MLX5_CMD_OP_DESTROY_CQ command. general protection fault, probably for non-canonical address 0x2000300004058: 0000 [#1] SMP PTI CPU: 5 PID: 1228 Comm: python Not tainted 5.15.0-rc5_for_upstream_min_debug_2021_10_14_11_06 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:lockref_get+0x1/0x60 Code: 5d e9 53 ff ff ff 48 8d 7f 70 e8 0a 2e 48 00 c7 85 d0 00 00 00 02 00 00 00 c6 45 70 00 fb 5d c3 c3 cc cc cc cc cc cc cc cc 53 <48> 8b 17 48 89 fb 85 d2 75 3d 48 89 d0 bf 64 00 00 00 48 89 c1 48 RSP: 0018:ffff888137dd7a38 EFLAGS: 00010206 RAX: 0000000000000000 RBX: ffff888107d5f458 RCX: 00000000fffffffe RDX: 000000000002c2b0 RSI: ffffffff8155e2e0 RDI: 0002000300004058 RBP: ffff888137dd7a88 R08: 0002000300004058 R09: ffff8881144a9f88 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8881141d4000 R13: ffff888137dd7c68 R14: ffff888137dd7d58 R15: ffff888137dd7cc0 FS: 00007f4644f2a4c0(0000) GS:ffff8887a2d40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055b4500f4380 CR3: 0000000114f7a003 CR4: 0000000000170ea0 Call Trace: simple_recursive_removal+0x33/0x2e0 ? debugfs_remove+0x60/0x60 debugfs_remove+0x40/0x60 mlx5_debug_cq_remove+0x32/0x70 [mlx5_core] mlx5_core_destroy_cq+0x41/0x1d0 [mlx5_core] devx_obj_cleanup+0x151/0x330 [mlx5_ib] ? __pollwait+0xd0/0xd0 ? xas_load+0x5/0x70 ? xa_load+0x62/0xa0 destroy_hw_idr_uobject+0x20/0x80 [ib_uverbs] uverbs_destroy_uobject+0x3b/0x360 [ib_uverbs] uobj_destroy+0x54/0xa0 [ib_uverbs] ib_uverbs_cmd_verbs+0xaf2/0x1160 [ib_uverbs] ? uverbs_finalize_object+0xd0/0xd0 [ib_uverbs] ib_uverbs_ioctl+0xc4/0x1b0 [ib_uverbs] __x64_sys_ioctl+0x3e4/0x8e0 Fixes: 94b960b9deff ("net/mlx5e: Fix memory leak in mlx5_core_destroy_cq() error path") Signed-off-by: Valentine Fatiev Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 02e77ffe5c3e..5371ad0a12eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -164,13 +164,14 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); MLX5_SET(destroy_cq_in, in, uid, cq->uid); err = mlx5_cmd_exec_in(dev, destroy_cq, in); + if (err) + return err; synchronize_irq(cq->irqn); - mlx5_cq_put(cq); wait_for_completion(&cq->free); - return err; + return 0; } EXPORT_SYMBOL(mlx5_core_destroy_cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 07c8d9811bc8..10d195042ab5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -507,6 +507,8 @@ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) if (!mlx5_debugfs_root) return; - if (cq->dbg) + if (cq->dbg) { rem_res_tree(cq->dbg); + cq->dbg = NULL; + } } -- cgit v1.2.3-59-g8ed1b From 9091b821aaa4c2d107ca8f97c32baefcb1e7e40d Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 3 Nov 2021 01:09:04 +0200 Subject: net/mlx5: DR, Handle eswitch manager and uplink vports separately When querying eswitch manager vport capabilities as "other = 1", we encounter a FW compatibility issue with older FW versions. To maintain backward compatibility, eswitch manager vport should be queried as "other = 0" vport both for ECPF and non-ECPF cases. This patch fixes these queries and improves the code readability by handling eswitch manager and uplink vports separately, avoiding the excessive 'if' conditions. Also, uplink caps are stored similar to esw manager and not as part of xarray. Fixes: dd4acb2a0954 ("net/mlx5: DR, Add missing query for vport 0") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_domain.c | 56 +++++++++------------- .../mellanox/mlx5/core/steering/dr_types.h | 1 + 2 files changed, 24 insertions(+), 33 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 49089cbe897c..8cbd36c82b3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -135,25 +135,14 @@ static void dr_domain_fill_uplink_caps(struct mlx5dr_domain *dmn, static int dr_domain_query_vport(struct mlx5dr_domain *dmn, u16 vport_number, + bool other_vport, struct mlx5dr_cmd_vport_cap *vport_caps) { - u16 cmd_vport = vport_number; - bool other_vport = true; int ret; - if (vport_number == MLX5_VPORT_UPLINK) { - dr_domain_fill_uplink_caps(dmn, vport_caps); - return 0; - } - - if (dmn->info.caps.is_ecpf && vport_number == MLX5_VPORT_ECPF) { - other_vport = false; - cmd_vport = 0; - } - ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev, other_vport, - cmd_vport, + vport_number, &vport_caps->icm_address_rx, &vport_caps->icm_address_tx); if (ret) @@ -161,7 +150,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn, ret = mlx5dr_cmd_query_gvmi(dmn->mdev, other_vport, - cmd_vport, + vport_number, &vport_caps->vport_gvmi); if (ret) return ret; @@ -176,9 +165,15 @@ static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn) { return dr_domain_query_vport(dmn, dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0, + false, &dmn->info.caps.vports.esw_manager_caps); } +static void dr_domain_query_uplink(struct mlx5dr_domain *dmn) +{ + dr_domain_fill_uplink_caps(dmn, &dmn->info.caps.vports.uplink_caps); +} + static struct mlx5dr_cmd_vport_cap * dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) { @@ -190,7 +185,7 @@ dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) if (!vport_caps) return NULL; - ret = dr_domain_query_vport(dmn, vport, vport_caps); + ret = dr_domain_query_vport(dmn, vport, true, vport_caps); if (ret) { kvfree(vport_caps); return NULL; @@ -207,16 +202,26 @@ dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) return vport_caps; } +static bool dr_domain_is_esw_mgr_vport(struct mlx5dr_domain *dmn, u16 vport) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (caps->is_ecpf && vport == MLX5_VPORT_ECPF) || + (!caps->is_ecpf && vport == 0); +} + struct mlx5dr_cmd_vport_cap * mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport) { struct mlx5dr_cmd_caps *caps = &dmn->info.caps; struct mlx5dr_cmd_vport_cap *vport_caps; - if ((caps->is_ecpf && vport == MLX5_VPORT_ECPF) || - (!caps->is_ecpf && vport == 0)) + if (dr_domain_is_esw_mgr_vport(dmn, vport)) return &caps->vports.esw_manager_caps; + if (vport == MLX5_VPORT_UPLINK) + return &caps->vports.uplink_caps; + vport_load: vport_caps = xa_load(&caps->vports.vports_caps_xa, vport); if (vport_caps) @@ -241,17 +246,6 @@ static void dr_domain_clear_vports(struct mlx5dr_domain *dmn) } } -static int dr_domain_query_uplink(struct mlx5dr_domain *dmn) -{ - struct mlx5dr_cmd_vport_cap *vport_caps; - - vport_caps = mlx5dr_domain_get_vport_cap(dmn, MLX5_VPORT_UPLINK); - if (!vport_caps) - return -EINVAL; - - return 0; -} - static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, struct mlx5dr_domain *dmn) { @@ -281,11 +275,7 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, goto free_vports_caps_xa; } - ret = dr_domain_query_uplink(dmn); - if (ret) { - mlx5dr_err(dmn, "Failed to query uplink vport caps (err: %d)", ret); - goto free_vports_caps_xa; - } + dr_domain_query_uplink(dmn); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 3028b776da00..2333c2439c28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -764,6 +764,7 @@ struct mlx5dr_roce_cap { struct mlx5dr_vports { struct mlx5dr_cmd_vport_cap esw_manager_caps; + struct mlx5dr_cmd_vport_cap uplink_caps; struct xarray vports_caps_xa; }; -- cgit v1.2.3-59-g8ed1b From 455832d49666e1765acf812be79710b9f84a8cbf Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 3 Nov 2021 17:51:03 +0200 Subject: net/mlx5: DR, Fix check for unsupported fields in match param The existing loop doesn't cast the buffer while scanning it, which results in out-of-bounds read and failure to create the matcher. Fixes: 941f19798a11 ("net/mlx5: DR, Add check for unsupported fields in match param") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index 75c775bee351..793365242e85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -924,11 +924,12 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, /* Check that all mask data was consumed */ for (i = 0; i < consumed_mask.match_sz; i++) { - if (consumed_mask.match_buf[i]) { - mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n"); - ret = -EOPNOTSUPP; - goto free_consumed_mask; - } + if (!((u8 *)consumed_mask.match_buf)[i]) + continue; + + mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n"); + ret = -EOPNOTSUPP; + goto free_consumed_mask; } ret = 0; -- cgit v1.2.3-59-g8ed1b From ba50cd9451f6c49cf0841c0a4a146ff6a2822699 Mon Sep 17 00:00:00 2001 From: Neta Ostrovsky Date: Wed, 27 Oct 2021 15:16:14 +0300 Subject: net/mlx5: Update error handler for UCTX and UMEM In the fast unload flow, the device state is set to internal error, which indicates that the driver started the destroy process. In this case, when a destroy command is being executed, it should return MLX5_CMD_STAT_OK. Fix MLX5_CMD_OP_DESTROY_UCTX and MLX5_CMD_OP_DESTROY_UMEM to return OK instead of EIO. This fixes a call trace in the umem release process - [ 2633.536695] Call Trace: [ 2633.537518] ib_uverbs_remove_one+0xc3/0x140 [ib_uverbs] [ 2633.538596] remove_client_context+0x8b/0xd0 [ib_core] [ 2633.539641] disable_device+0x8c/0x130 [ib_core] [ 2633.540615] __ib_unregister_device+0x35/0xa0 [ib_core] [ 2633.541640] ib_unregister_device+0x21/0x30 [ib_core] [ 2633.542663] __mlx5_ib_remove+0x38/0x90 [mlx5_ib] [ 2633.543640] auxiliary_bus_remove+0x1e/0x30 [auxiliary] [ 2633.544661] device_release_driver_internal+0x103/0x1f0 [ 2633.545679] bus_remove_device+0xf7/0x170 [ 2633.546640] device_del+0x181/0x410 [ 2633.547606] mlx5_rescan_drivers_locked.part.10+0x63/0x160 [mlx5_core] [ 2633.548777] mlx5_unregister_device+0x27/0x40 [mlx5_core] [ 2633.549841] mlx5_uninit_one+0x21/0xc0 [mlx5_core] [ 2633.550864] remove_one+0x69/0xe0 [mlx5_core] [ 2633.551819] pci_device_remove+0x3b/0xc0 [ 2633.552731] device_release_driver_internal+0x103/0x1f0 [ 2633.553746] unbind_store+0xf6/0x130 [ 2633.554657] kernfs_fop_write+0x116/0x190 [ 2633.555567] vfs_write+0xa5/0x1a0 [ 2633.556407] ksys_write+0x4f/0xb0 [ 2633.557233] do_syscall_64+0x5b/0x1a0 [ 2633.558071] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 2633.559018] RIP: 0033:0x7f9977132648 [ 2633.559821] Code: 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 55 6f 2d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 41 54 49 89 d4 55 [ 2633.562332] RSP: 002b:00007fffb1a83888 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 2633.563472] RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007f9977132648 [ 2633.564541] RDX: 000000000000000c RSI: 000055b90546e230 RDI: 0000000000000001 [ 2633.565596] RBP: 000055b90546e230 R08: 00007f9977406860 R09: 00007f9977a54740 [ 2633.566653] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f99774056e0 [ 2633.567692] R13: 000000000000000c R14: 00007f9977400880 R15: 000000000000000c [ 2633.568725] ---[ end trace 10b4fe52945e544d ]--- Fixes: 6a6fabbfa3e8 ("net/mlx5: Update pci error handler entries and command translation") Signed-off-by: Neta Ostrovsky Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index f71ec4d9d68e..8eaa24d865c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -339,6 +339,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_PAGE_FAULT_RESUME: case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: case MLX5_CMD_OP_DEALLOC_SF: + case MLX5_CMD_OP_DESTROY_UCTX: + case MLX5_CMD_OP_DESTROY_UMEM: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -464,9 +466,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: case MLX5_CMD_OP_CREATE_UCTX: - case MLX5_CMD_OP_DESTROY_UCTX: case MLX5_CMD_OP_CREATE_UMEM: - case MLX5_CMD_OP_DESTROY_UMEM: case MLX5_CMD_OP_ALLOC_MEMIC: case MLX5_CMD_OP_MODIFY_XRQ: case MLX5_CMD_OP_RELEASE_XRQ_ERROR: -- cgit v1.2.3-59-g8ed1b From 2eb0cb31bc4ce2ede5460cf3ef433b40cf5f040d Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 10 Nov 2021 15:19:12 +0000 Subject: net/mlx5: E-Switch, rebuild lag only when needed A user can enable VFs without changing E-Switch mode, this can happen when a user moves straight to switchdev mode and only once in switchdev VFs are enabled via the sysfs interface. The cited commit assumed this isn't possible and exposed a single API function where the E-switch calls into the lag code, breaks the lag and prevents any other lag operations to take place until the E-switch update has ended. Breaking the hardware lag when it isn't needed can make it such that hardware lag can't be enabled again. In the sysfs call path check if the current E-Switch mode is NONE, in the context of the function it can only mean the E-Switch is moving out of NONE mode and the hardware lag should be disabled and enabled once the mode change has ended. If the mode isn't NONE it means VFs are about to be enabled and such operation doesn't require toggling the hardware lag. Fixes: cac1eb2cf2e3 ("net/mlx5: Lag, properly lock eswitch if needed") Signed-off-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5872cc8bf953..51a8cecc4a7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1305,12 +1305,17 @@ abort: */ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { + bool toggle_lag; int ret; if (!mlx5_esw_allowed(esw)) return 0; - mlx5_lag_disable_change(esw->dev); + toggle_lag = esw->mode == MLX5_ESWITCH_NONE; + + if (toggle_lag) + mlx5_lag_disable_change(esw->dev); + down_write(&esw->mode_lock); if (esw->mode == MLX5_ESWITCH_NONE) { ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); @@ -1324,7 +1329,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = num_vfs; } up_write(&esw->mode_lock); - mlx5_lag_enable_change(esw->dev); + + if (toggle_lag) + mlx5_lag_enable_change(esw->dev); + return ret; } -- cgit v1.2.3-59-g8ed1b From 38a54cae6f76c3e6a1e6c1e52c2e43a069fa78cb Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Wed, 3 Nov 2021 13:04:23 +0200 Subject: net/mlx5: Fix flow counters SF bulk query len When doing a flow counters bulk query, the number of counters to query must be aligned to 4. Current SF bulk query len is not aligned to 4, which leads to an error when trying to query more than 4 counters. Fix it by aligning SF bulk query len to 4. Fixes: 2fdeb4f4c2ae ("net/mlx5: Reduce flow counters bulk query buffer size for SFs") Signed-off-by: Avihai Horon Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 31c99d53faf7..7e0e04cf26f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -40,7 +40,7 @@ #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) -#define MLX5_SF_NUM_COUNTERS_BULK 6 +#define MLX5_SF_NUM_COUNTERS_BULK 8 #define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) #define MLX5_FC_POOL_USED_BUFF_RATIO 10 -- cgit v1.2.3-59-g8ed1b From 806401c20a0f9c51b6c8fd7035671e6ca841f6c2 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 8 Nov 2021 16:41:05 +0200 Subject: net/mlx5e: CT, Fix multiple allocations and memleak of mod acts CT clear action offload adds additional mod hdr actions to the flow's original mod actions in order to clear the registers which hold ct_state. When such flow also includes encap action, a neigh update event can cause the driver to unoffload the flow and then reoffload it. Each time this happens, the ct clear handling adds that same set of mod hdr actions to reset ct_state until the max of mod hdr actions is reached. Also the driver never releases the allocated mod hdr actions and causing a memleak. Fix above two issues by moving CT clear mod acts allocation into the parsing actions phase and only use it when offloading the rule. The release of mod acts will be done in the normal flow_put(). backtrace: [<000000007316e2f3>] krealloc+0x83/0xd0 [<00000000ef157de1>] mlx5e_mod_hdr_alloc+0x147/0x300 [mlx5_core] [<00000000970ce4ae>] mlx5e_tc_match_to_reg_set_and_get_id+0xd7/0x240 [mlx5_core] [<0000000067c5fa17>] mlx5e_tc_match_to_reg_set+0xa/0x20 [mlx5_core] [<00000000d032eb98>] mlx5_tc_ct_entry_set_registers.isra.0+0x36/0xc0 [mlx5_core] [<00000000fd23b869>] mlx5_tc_ct_flow_offload+0x272/0x1f10 [mlx5_core] [<000000004fc24acc>] mlx5e_tc_offload_fdb_rules.part.0+0x150/0x620 [mlx5_core] [<00000000dc741c17>] mlx5e_tc_encap_flows_add+0x489/0x690 [mlx5_core] [<00000000e92e49d7>] mlx5e_rep_update_flows+0x6e4/0x9b0 [mlx5_core] [<00000000f60f5602>] mlx5e_rep_neigh_update+0x39a/0x5d0 [mlx5_core] Fixes: 1ef3018f5af3 ("net/mlx5e: CT: Support clear action") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 26 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 +++++-- 3 files changed, 25 insertions(+), 11 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index c1c6e74c79c4..2445e2ae3324 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1356,9 +1356,13 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; + int err; + if (!priv) { NL_SET_ERR_MSG_MOD(extack, "offload of ct action isn't available"); @@ -1369,6 +1373,17 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; + if (!clear_action) + goto out; + + err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear"); + return err; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + +out: return 0; } @@ -1898,23 +1913,16 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, memcpy(pre_ct_attr, attr, attr_sz); - err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); - if (err) { - ct_dbg("Failed to set register for ct clear"); - goto err_set_registers; - } - mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, mod_acts->num_actions, mod_acts->actions); if (IS_ERR(mod_hdr)) { err = PTR_ERR(mod_hdr); ct_dbg("Failed to add create ct clear mod hdr"); - goto err_set_registers; + goto err_mod_hdr; } pre_ct_attr->modify_hdr = mod_hdr; - pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr); if (IS_ERR(rule)) { @@ -1930,7 +1938,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, err_insert: mlx5_modify_header_dealloc(priv->mdev, mod_hdr); -err_set_registers: +err_mod_hdr: netdev_warn(priv->netdev, "Failed to offload ct clear flow, err %d\n", err); kfree(pre_ct_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 363329f4aac6..99662af1e41a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -110,6 +110,7 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec); int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, const struct flow_action_entry *act, struct netlink_ext_ack *extack); @@ -172,6 +173,7 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) static inline int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cb76c41fe163..3d45f4ae80c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3608,7 +3608,9 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, attr->dest_chain = act->chain_index; break; case FLOW_ACTION_CT: - err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack); + err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, + &parse_attr->mod_hdr_acts, + act, extack); if (err) return err; @@ -4277,7 +4279,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); return -EOPNOTSUPP; } - err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack); + err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, + &parse_attr->mod_hdr_acts, + act, extack); if (err) return err; -- cgit v1.2.3-59-g8ed1b From ae396d85c01c7bdc9eeceecde1f493d03f793465 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Fri, 5 Nov 2021 11:19:48 +0200 Subject: net/mlx5: Lag, update tracker when state change event received Currently, In NETDEV_CHANGELOWERSTATE/NETDEV_CHANGEUPPERSTATE events handling, tracking is not fully completed if the LAG device is not ready at the time the events occur. But, we must keep track of the upper and lower states after receiving the events because RoCE needs this info in mlx5_lag_get_roce_netdev() - in order to return the corresponding port that its running on. Returning the wrong (not most recent) port will lead to gids table being incorrect. For example: If during the attachment of a slave to the bond, the other non-attached port performs pci_reload, then the LAG device is not ready, but that should not result in dismissing attached slave tracker update automatically (which is performed in mlx5_handle_changelowerstate()), Since these events might not come later, which can lead to both bond ports having tx_enabled=0 - which is not a valid state of LAG bond. Fixes: 9b412cc35f00 ("net/mlx5e: Add LAG warning if bond slave is not lag master") Signed-off-by: Maher Sanalla Reviewed-by: Mark Bloch Reviewed-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 28 +++++++++++------------ 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 48d2ea690d7a..4ddf6b330a44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -615,6 +615,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, bool is_bonded, is_in_lag, mode_supported; int bond_status = 0; int num_slaves = 0; + int changed = 0; int idx; if (!netif_is_lag_master(upper)) @@ -653,27 +654,27 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, */ is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; - if (!mlx5_lag_is_ready(ldev) && is_in_lag) { - NL_SET_ERR_MSG_MOD(info->info.extack, - "Can't activate LAG offload, PF is configured with more than 64 VFs"); - return 0; - } - /* Lag mode must be activebackup or hash. */ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; - if (is_in_lag && !mode_supported) - NL_SET_ERR_MSG_MOD(info->info.extack, - "Can't activate LAG offload, TX type isn't supported"); - is_bonded = is_in_lag && mode_supported; if (tracker->is_bonded != is_bonded) { tracker->is_bonded = is_bonded; - return 1; + changed = 1; } - return 0; + if (!is_in_lag) + return changed; + + if (!mlx5_lag_is_ready(ldev)) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, PF is configured with more than 64 VFs"); + else if (!mode_supported) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, TX type isn't supported"); + + return changed; } static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, @@ -716,9 +717,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, ldev = container_of(this, struct mlx5_lag, nb); - if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE) - return NOTIFY_DONE; - tracker = ldev->tracker; switch (event) { -- cgit v1.2.3-59-g8ed1b From c4c3176739dfa6efcc5b1d1de4b3fd2b51b048c7 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Mon, 1 Nov 2021 16:18:53 +0200 Subject: net/mlx5: E-Switch, return error if encap isn't supported On regular ConnectX HCAs getting encap mode isn't supported when the E-Switch is in NONE mode. Current code would return no error code when trying to get encap mode in such case which is wrong. Fix by returning error value to indicate failure to caller in such case. Fixes: 8e0aa4bc959c ("net/mlx5: E-switch, Protect eswitch mode changes") Signed-off-by: Raed Salem Reviewed-by: Mark Bloch Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 80fa76f60e1e..a46455694f7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3623,7 +3623,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, *encap = esw->offloads.encap; unlock: up_write(&esw->mode_lock); - return 0; + return err; } static bool -- cgit v1.2.3-59-g8ed1b From 2460386bef0b9b98b71728d3c173e15558b78d82 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 15 Nov 2021 16:30:24 +0100 Subject: net: mvmdio: fix compilation warning The kernel test robot reported a following issue: >> drivers/net/ethernet/marvell/mvmdio.c:426:36: warning: unused variable 'orion_mdio_acpi_match' [-Wunused-const-variable] static const struct acpi_device_id orion_mdio_acpi_match[] = { ^ 1 warning generated. Fix that by surrounding the variable by appropriate ifdef. Fixes: c54da4c1acb1 ("net: mvmdio: add ACPI support") Reported-by: kernel test robot Signed-off-by: Marcin Wojtas Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20211115153024.209083-1-mw@semihalf.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvmdio.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 62a97c46fba0..ef878973b859 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -429,12 +429,14 @@ static const struct of_device_id orion_mdio_match[] = { }; MODULE_DEVICE_TABLE(of, orion_mdio_match); +#ifdef CONFIG_ACPI static const struct acpi_device_id orion_mdio_acpi_match[] = { { "MRVL0100", BUS_TYPE_SMI }, { "MRVL0101", BUS_TYPE_XSMI }, { }, }; MODULE_DEVICE_TABLE(acpi, orion_mdio_acpi_match); +#endif static struct platform_driver orion_mdio_driver = { .probe = orion_mdio_probe, -- cgit v1.2.3-59-g8ed1b From 9f5363916a5099e618e6e40606e91b8ce0833754 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 16 Nov 2021 14:26:10 -0500 Subject: bnxt_en: Fix compile error regression when CONFIG_BNXT_SRIOV is not set bp->sriov_cfg is not defined when CONFIG_BNXT_SRIOV is not set. Fix it by adding a helper function bnxt_sriov_cfg() to handle the logic with or without the config option. Fixes: 46d08f55d24e ("bnxt_en: extend RTNL to VF check in devlink driver_reinit") Reported-by: kernel test robot Reviewed-by: Edwin Peer Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/1637090770-22835-1-git-send-email-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 10 ++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d0d5da9b78f8..4c9507d82fd0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2258,6 +2258,16 @@ static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db, } } +/* Must hold rtnl_lock */ +static inline bool bnxt_sriov_cfg(struct bnxt *bp) +{ +#if defined(CONFIG_BNXT_SRIOV) + return BNXT_PF(bp) && (bp->pf.active_vfs || bp->sriov_cfg); +#else + return false; +#endif +} + extern const u16 bnxt_lhint_arr[]; int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 6fe9e9b59f83..951c4c569a9b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -442,7 +442,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: { rtnl_lock(); - if (BNXT_PF(bp) && (bp->pf.active_vfs || bp->sriov_cfg)) { + if (bnxt_sriov_cfg(bp)) { NL_SET_ERR_MSG_MOD(extack, "reload is unsupported while VFs are allocated or being configured"); rtnl_unlock(); -- cgit v1.2.3-59-g8ed1b From b0024a04e48837b6556a080ff37ecd8351632596 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 16 Nov 2021 16:09:23 +0000 Subject: amt: cancel delayed_work synchronously in amt_fini() When the amt module is being removed, it calls cancel_delayed_work() to cancel pending delayed_work. But this function doesn't wait for canceling delayed_work. So, workers can be still doing after module delete. In order to avoid this, cancel_delayed_work_sync() should be used instead. Suggested-by: Jakub Kicinski Fixes: bc54e49c140b ("amt: add multicast(IGMP) report message handler") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20211116160923.25258-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/amt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/amt.c b/drivers/net/amt.c index 47a04c330885..b732ee9a50ef 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -3286,7 +3286,7 @@ static void __exit amt_fini(void) { rtnl_link_unregister(&amt_link_ops); unregister_netdevice_notifier(&amt_notifier_block); - cancel_delayed_work(&source_gc_wq); + cancel_delayed_work_sync(&source_gc_wq); __amt_source_gc_work(); destroy_workqueue(amt_wq); } -- cgit v1.2.3-59-g8ed1b From 3751c3d34cd5a750c86d1c8eaf217d8faf7f9325 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Nov 2021 16:21:23 +0100 Subject: net: stmmac: Fix signed/unsigned wreckage The recent addition of timestamp correction to compensate the CDC error introduced a subtle signed/unsigned bug in stmmac_get_tx_hwtstamp() while it managed for some obscure reason to avoid that in stmmac_get_rx_hwtstamp(). The issue is: s64 adjust = 0; u64 ns; adjust += -(2 * (NSEC_PER_SEC / priv->plat->clk_ptp_rate)); ns += adjust; works by chance on 64bit, but falls apart on 32bit because the compiler knows that adjust fits into 32bit and then treats the addition as a u64 + u32 resulting in an off by ~2 seconds failure. The RX variant uses an u64 for adjust and does the adjustment via ns -= adjust; because consistency is obviously overrated. Get rid of the pointless zero initialized adjust variable and do: ns -= (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate; which is obviously correct and spares the adjust obfuscation. Aside of that it yields a more accurate result because the multiplication takes place before the integer divide truncation and not afterwards. Stick the calculation into an inline so it can't be accidentally disimproved. Return an u32 from that inline as the result is guaranteed to fit which lets the compiler optimize the substraction. Cc: stable@vger.kernel.org Fixes: 3600be5f58c1 ("net: stmmac: add timestamp correction to rid CDC sync error") Reported-by: Benedikt Spranger Signed-off-by: Thomas Gleixner Tested-by: Benedikt Spranger Tested-by: Kurt Kanzenbach # Intel EHL Link: https://lore.kernel.org/r/87mtm578cs.ffs@tglx Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d3f350c25b9b..2eb284576336 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -511,6 +511,14 @@ bool stmmac_eee_init(struct stmmac_priv *priv) return true; } +static inline u32 stmmac_cdc_adjust(struct stmmac_priv *priv) +{ + /* Correct the clk domain crossing(CDC) error */ + if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) + return (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate; + return 0; +} + /* stmmac_get_tx_hwtstamp - get HW TX timestamps * @priv: driver private structure * @p : descriptor pointer @@ -524,7 +532,6 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, { struct skb_shared_hwtstamps shhwtstamp; bool found = false; - s64 adjust = 0; u64 ns = 0; if (!priv->hwts_tx_en) @@ -543,12 +550,7 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, } if (found) { - /* Correct the clk domain crossing(CDC) error */ - if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) { - adjust += -(2 * (NSEC_PER_SEC / - priv->plat->clk_ptp_rate)); - ns += adjust; - } + ns -= stmmac_cdc_adjust(priv); memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); shhwtstamp.hwtstamp = ns_to_ktime(ns); @@ -573,7 +575,6 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, { struct skb_shared_hwtstamps *shhwtstamp = NULL; struct dma_desc *desc = p; - u64 adjust = 0; u64 ns = 0; if (!priv->hwts_rx_en) @@ -586,11 +587,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) { stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns); - /* Correct the clk domain crossing(CDC) error */ - if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) { - adjust += 2 * (NSEC_PER_SEC / priv->plat->clk_ptp_rate); - ns -= adjust; - } + ns -= stmmac_cdc_adjust(priv); netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); shhwtstamp = skb_hwtstamps(skb); -- cgit v1.2.3-59-g8ed1b From f77b83b5bbab53d2be339184838b19ed2c62c0a5 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Tue, 16 Nov 2021 22:19:17 +0800 Subject: net: usb: r8152: Add MAC passthrough support for more Lenovo Docks Like ThinkaPad Thunderbolt 4 Dock, more Lenovo docks start to use the original Realtek USB ethernet chip ID 0bda:8153. Lenovo Docks always use their own IDs for usb hub, even for older Docks. If parent hub is from Lenovo, then r8152 should try MAC passthrough. Verified on Lenovo TBT3 dock too. Signed-off-by: Aaron Ma Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4a02f33f0643..f9877a3e83ac 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9603,12 +9603,9 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->hw_features &= ~NETIF_F_RXCSUM; } - if (le16_to_cpu(udev->descriptor.idVendor) == VENDOR_ID_LENOVO) { - switch (le16_to_cpu(udev->descriptor.idProduct)) { - case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2: - case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2: - tp->lenovo_macpassthru = 1; - } + if (udev->parent && + le16_to_cpu(udev->parent->descriptor.idVendor) == VENDOR_ID_LENOVO) { + tp->lenovo_macpassthru = 1; } if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial && -- cgit v1.2.3-59-g8ed1b From 9b5a333272a48c2f8b30add7a874e46e8b26129c Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 16 Nov 2021 18:17:12 +0300 Subject: net: dpaa2-eth: fix use-after-free in dpaa2_eth_remove Access to netdev after free_netdev() will cause use-after-free bug. Move debug log before free_netdev() call to avoid it. Fixes: 7472dd9f6499 ("staging: fsl-dpaa2/eth: Move print message") Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 714e961e7a77..6451c8383639 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4550,10 +4550,10 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) fsl_mc_portal_free(priv->mc_io); - free_netdev(net_dev); - dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name); + free_netdev(net_dev); + return 0; } -- cgit v1.2.3-59-g8ed1b From c366ce28750e9633f8d4b07829a9cde0e59034eb Mon Sep 17 00:00:00 2001 From: Łukasz Stelmach Date: Tue, 16 Nov 2021 22:29:15 +0100 Subject: net: ax88796c: use bit numbers insetad of bit masks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the values of EVENT_* constants from bit masks to bit numbers as accepted by {clear,set,test}_bit() functions. Reported-by: Dan Carpenter Signed-off-by: Łukasz Stelmach Signed-off-by: David S. Miller --- drivers/net/ethernet/asix/ax88796c_main.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/asix/ax88796c_main.h b/drivers/net/ethernet/asix/ax88796c_main.h index 80263c3cef75..4a83c991dcbe 100644 --- a/drivers/net/ethernet/asix/ax88796c_main.h +++ b/drivers/net/ethernet/asix/ax88796c_main.h @@ -127,9 +127,9 @@ struct ax88796c_device { #define AX_PRIV_FLAGS_MASK (AX_CAP_COMP) unsigned long flags; - #define EVENT_INTR BIT(0) - #define EVENT_TX BIT(1) - #define EVENT_SET_MULTI BIT(2) + #define EVENT_INTR 0 + #define EVENT_TX 1 + #define EVENT_SET_MULTI 2 }; -- cgit v1.2.3-59-g8ed1b From 6afbd7b3c53cb7417189f476e99d431daccb85b0 Mon Sep 17 00:00:00 2001 From: Eryk Rybak Date: Thu, 21 Jan 2021 16:17:22 +0000 Subject: i40e: Fix correct max_pkt_size on VF RX queue Setting VLAN port increasing RX queue max_pkt_size by 4 bytes to take VLAN tag into account. Trigger the VF reset when setting port VLAN for VF to renegotiate its capabilities and reinitialize. Fixes: ba4e003d29c1 ("i40e: don't hold spinlock while resetting VF") Signed-off-by: Sylwester Dziedziuch Signed-off-by: Aleksandr Loktionov Signed-off-by: Eryk Rybak Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 53 ++++------------------ 1 file changed, 9 insertions(+), 44 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 472f56b360b8..815661632e7a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -674,14 +674,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, u16 vsi_queue_id, struct virtchnl_rxq_info *info) { + u16 pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx]; struct i40e_hw *hw = &pf->hw; struct i40e_hmc_obj_rxq rx_ctx; - u16 pf_queue_id; int ret = 0; - pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); - /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq)); @@ -719,6 +718,10 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, } rx_ctx.rxmax = info->max_pkt_size; + /* if port VLAN is configured increase the max packet size */ + if (vsi->info.pvid) + rx_ctx.rxmax += VLAN_HLEN; + /* enable 32bytes desc always */ rx_ctx.dsize = 1; @@ -4169,34 +4172,6 @@ error_param: return ret; } -/** - * i40e_vsi_has_vlans - True if VSI has configured VLANs - * @vsi: pointer to the vsi - * - * Check if a VSI has configured any VLANs. False if we have a port VLAN or if - * we have no configured VLANs. Do not call while holding the - * mac_filter_hash_lock. - */ -static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi) -{ - bool have_vlans; - - /* If we have a port VLAN, then the VSI cannot have any VLANs - * configured, as all MAC/VLAN filters will be assigned to the PVID. - */ - if (vsi->info.pvid) - return false; - - /* Since we don't have a PVID, we know that if the device is in VLAN - * mode it must be because of a VLAN filter configured on this VSI. - */ - spin_lock_bh(&vsi->mac_filter_hash_lock); - have_vlans = i40e_is_vsi_in_vlan(vsi); - spin_unlock_bh(&vsi->mac_filter_hash_lock); - - return have_vlans; -} - /** * i40e_ndo_set_vf_port_vlan * @netdev: network interface device structure @@ -4253,19 +4228,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; - if (i40e_vsi_has_vlans(vsi)) { - dev_err(&pf->pdev->dev, - "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n", - vf_id); - /* Administrator Error - knock the VF offline until he does - * the right thing by reconfiguring his network correctly - * and then reloading the VF driver. - */ - i40e_vc_disable_vf(vf); - /* During reset the VF got a new VSI, so refresh the pointer. */ - vsi = pf->vsi[vf->lan_vsi_idx]; - } - + i40e_vc_disable_vf(vf); + /* During reset the VF got a new VSI, so refresh a pointer. */ + vsi = pf->vsi[vf->lan_vsi_idx]; /* Locked once because multiple functions below iterate list */ spin_lock_bh(&vsi->mac_filter_hash_lock); -- cgit v1.2.3-59-g8ed1b From 37d9e304acd903a445df8208b8a13d707902dea6 Mon Sep 17 00:00:00 2001 From: Michal Maloszewski Date: Wed, 24 Feb 2021 12:07:48 +0000 Subject: i40e: Fix NULL ptr dereference on VSI filter sync Remove the reason of null pointer dereference in sync VSI filters. Added new I40E_VSI_RELEASING flag to signalize deleting and releasing of VSI resources to sync this thread with sync filters subtask. Without this patch it is possible to start update the VSI filter list after VSI is removed, that's causing a kernel oops. Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Grzegorz Szczurek Signed-off-by: Michal Maloszewski Reviewed-by: Przemyslaw Patynowski Reviewed-by: Witold Fijalkowski Reviewed-by: Jaroslaw Gawin Reviewed-by: Aleksandr Loktionov Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 3d528fba754b..35a83a161b6f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -161,6 +161,7 @@ enum i40e_vsi_state_t { __I40E_VSI_OVERFLOW_PROMISC, __I40E_VSI_REINIT_REQUESTED, __I40E_VSI_DOWN_REQUESTED, + __I40E_VSI_RELEASING, /* This must be last as it determines the size of the BITMAP */ __I40E_VSI_STATE_SIZE__, }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ba862131b9bd..6e309d6ce37d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2623,7 +2623,8 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v] && - (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) { + (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) && + !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) { int ret = i40e_sync_vsi_filters(pf->vsi[v]); if (ret) { @@ -13771,7 +13772,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) dev_info(&pf->pdev->dev, "Can't remove PF VSI\n"); return -ENODEV; } - + set_bit(__I40E_VSI_RELEASING, vsi->state); uplink_seid = vsi->uplink_seid; if (vsi->type != I40E_VSI_SRIOV) { if (vsi->netdev_registered) { -- cgit v1.2.3-59-g8ed1b From d2a69fefd75683004ffe87166de5635b3267ee07 Mon Sep 17 00:00:00 2001 From: Eryk Rybak Date: Fri, 23 Apr 2021 13:43:25 +0200 Subject: i40e: Fix changing previously set num_queue_pairs for PFs Currently, the i40e_vsi_setup_queue_map is basing the count of queues in TCs on a VSI's alloc_queue_pairs member which is not changed throughout any user's action (for example via ethtool's set_channels callback). This implies that vsi->tc_config.tc_info[n].qcount value that is given to the kernel via netdev_set_tc_queue() that notifies about the count of queues per particular traffic class is constant even if user has changed the total count of queues. This in turn caused the kernel warning after setting the queue count to the lower value than the initial one: $ ethtool -l ens801f0 Channel parameters for ens801f0: Pre-set maximums: RX: 0 TX: 0 Other: 1 Combined: 64 Current hardware settings: RX: 0 TX: 0 Other: 1 Combined: 64 $ ethtool -L ens801f0 combined 40 [dmesg] Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled! Reason was that vsi->alloc_queue_pairs stayed at 64 value which was used to set the qcount on TC0 (by default only TC0 exists so all of the existing queues are assigned to TC0). we update the offset/qcount via netdev_set_tc_queue() back to the old value but then the netif_set_real_num_tx_queues() is using the vsi->num_queue_pairs as a value which got set to 40. Fix it by using vsi->req_queue_pairs as a queue count that will be distributed across TCs. Do it only for non-zero values, which implies that user actually requested the new count of queues. For VSIs other than main, stay with the vsi->alloc_queue_pairs as we only allow manipulating the queue count on main VSI. Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use") Co-developed-by: Maciej Fijalkowski Signed-off-by: Maciej Fijalkowski Co-developed-by: Przemyslaw Patynowski Signed-off-by: Przemyslaw Patynowski Signed-off-by: Eryk Rybak Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 35 +++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6e309d6ce37d..8437cc14bfc6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1790,6 +1790,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, bool is_add) { struct i40e_pf *pf = vsi->back; + u16 num_tc_qps = 0; u16 sections = 0; u8 netdev_tc = 0; u16 numtc = 1; @@ -1797,13 +1798,29 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, u8 offset; u16 qmap; int i; - u16 num_tc_qps = 0; sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; offset = 0; + if (vsi->type == I40E_VSI_MAIN) { + /* This code helps add more queue to the VSI if we have + * more cores than RSS can support, the higher cores will + * be served by ATR or other filters. Furthermore, the + * non-zero req_queue_pairs says that user requested a new + * queue count via ethtool's set_channels, so use this + * value for queues distribution across traffic classes + */ + if (vsi->req_queue_pairs > 0) + vsi->num_queue_pairs = vsi->req_queue_pairs; + else if (pf->flags & I40E_FLAG_MSIX_ENABLED) + vsi->num_queue_pairs = pf->num_lan_msix; + } + /* Number of queues per enabled TC */ - num_tc_qps = vsi->alloc_queue_pairs; + if (vsi->type == I40E_VSI_MAIN) + num_tc_qps = vsi->num_queue_pairs; + else + num_tc_qps = vsi->alloc_queue_pairs; if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { /* Find numtc from enabled TC bitmap */ for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { @@ -1881,16 +1898,10 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, } ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } - - /* Set actual Tx/Rx queue pairs */ - vsi->num_queue_pairs = offset; - if ((vsi->type == I40E_VSI_MAIN) && (numtc == 1)) { - if (vsi->req_queue_pairs > 0) - vsi->num_queue_pairs = vsi->req_queue_pairs; - else if (pf->flags & I40E_FLAG_MSIX_ENABLED) - vsi->num_queue_pairs = pf->num_lan_msix; - } - + /* Do not change previously set num_queue_pairs for PFs */ + if ((vsi->type == I40E_VSI_MAIN && numtc != 1) || + vsi->type != I40E_VSI_MAIN) + vsi->num_queue_pairs = offset; /* Scheduler section valid can only be set for ADD VSI */ if (is_add) { sections |= I40E_AQ_VSI_PROP_SCHED_VALID; -- cgit v1.2.3-59-g8ed1b From 9e0a603cb7dce2a19d98116d42de84b6db26d716 Mon Sep 17 00:00:00 2001 From: Eryk Rybak Date: Fri, 23 Apr 2021 13:43:26 +0200 Subject: i40e: Fix ping is lost after configuring ADq on VF Properly reconfigure VF VSIs after VF request ADQ. Created new function to update queue mapping and queue pairs per TC with AQ update VSI. This sets proper RSS size on NIC. VFs num_queue_pairs should not be changed during setup of queue maps. Previously, VF main VSI in ADQ had configured too many queues and had wrong RSS size, which lead to packets not being consumed and drops in connectivity. Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use") Co-developed-by: Przemyslaw Patynowski Signed-off-by: Przemyslaw Patynowski Signed-off-by: Eryk Rybak Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 64 +++++++++++++++++++++- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 17 ++++-- 3 files changed, 74 insertions(+), 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 35a83a161b6f..4d939af0a626 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1248,6 +1248,7 @@ void i40e_ptp_restore_hw_time(struct i40e_pf *pf); void i40e_ptp_init(struct i40e_pf *pf); void i40e_ptp_stop(struct i40e_pf *pf); int i40e_ptp_alloc_pins(struct i40e_pf *pf); +int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset); int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi); i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf); i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8437cc14bfc6..37386a270db5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1801,6 +1801,8 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; offset = 0; + /* zero out queue mapping, it will get updated on the end of the function */ + memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping)); if (vsi->type == I40E_VSI_MAIN) { /* This code helps add more queue to the VSI if we have @@ -1817,10 +1819,12 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, } /* Number of queues per enabled TC */ - if (vsi->type == I40E_VSI_MAIN) + if (vsi->type == I40E_VSI_MAIN || + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0)) num_tc_qps = vsi->num_queue_pairs; else num_tc_qps = vsi->alloc_queue_pairs; + if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { /* Find numtc from enabled TC bitmap */ for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { @@ -1898,10 +1902,12 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, } ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } - /* Do not change previously set num_queue_pairs for PFs */ + /* Do not change previously set num_queue_pairs for PFs and VFs*/ if ((vsi->type == I40E_VSI_MAIN && numtc != 1) || - vsi->type != I40E_VSI_MAIN) + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) || + (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV)) vsi->num_queue_pairs = offset; + /* Scheduler section valid can only be set for ADD VSI */ if (is_add) { sections |= I40E_AQ_VSI_PROP_SCHED_VALID; @@ -5438,6 +5444,58 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi, sizeof(vsi->info.tc_mapping)); } +/** + * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI + * @vsi: the VSI being reconfigured + * @vsi_offset: offset from main VF VSI + */ +int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset) +{ + struct i40e_vsi_context ctxt = {}; + struct i40e_pf *pf; + struct i40e_hw *hw; + int ret; + + if (!vsi) + return I40E_ERR_PARAM; + pf = vsi->back; + hw = &pf->hw; + + ctxt.seid = vsi->seid; + ctxt.pf_num = hw->pf_id; + ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset; + ctxt.uplink_seid = vsi->uplink_seid; + ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; + ctxt.flags = I40E_AQ_VSI_TYPE_VF; + ctxt.info = vsi->info; + + i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc, + false); + if (vsi->reconfig_rss) { + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n"); + return ret; + } + vsi->reconfig_rss = false; + } + + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, "Update vsi config failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + /* update the local VSI info with updated queue map */ + i40e_vsi_update_queue_map(vsi, &ctxt); + vsi->info.valid_sections = 0; + + return ret; +} + /** * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map * @vsi: VSI to be configured diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 815661632e7a..2102db11972a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2220,11 +2220,12 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) struct virtchnl_vsi_queue_config_info *qci = (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; - struct i40e_pf *pf = vf->pf; u16 vsi_id, vsi_queue_id = 0; - u16 num_qps_all = 0; + struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; int i, j = 0, idx = 0; + struct i40e_vsi *vsi; + u16 num_qps_all = 0; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; @@ -2313,9 +2314,15 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs; } else { - for (i = 0; i < vf->num_tc; i++) - pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs = - vf->ch[i].num_qps; + for (i = 0; i < vf->num_tc; i++) { + vsi = pf->vsi[vf->ch[i].vsi_idx]; + vsi->num_queue_pairs = vf->ch[i].num_qps; + + if (i40e_update_adq_vsi_queues(vsi, i)) { + aq_ret = I40E_ERR_CONFIG; + goto error_param; + } + } } error_param: -- cgit v1.2.3-59-g8ed1b From 3a3b311e3881172fc8e019b6508f04bc40c92d9d Mon Sep 17 00:00:00 2001 From: Karen Sornek Date: Wed, 28 Apr 2021 10:19:41 +0200 Subject: i40e: Fix warning message and call stack during rmmod i40e driver Restore part of reset functionality used when reset is called from the VF to reset itself. Without this fix warning message is displayed when VF is being removed via sysfs. Fix the crash of the VF during reset by ensuring that the PF receives the reset message successfully. Refactor code to use one function instead of two. Fixes: 5c3c48ac6bf5 ("i40e: implement virtual device interface") Signed-off-by: Grzegorz Szczurek Signed-off-by: Karen Sornek Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 53 +++++++++------------- 1 file changed, 21 insertions(+), 32 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 2102db11972a..80ae264c99ba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -183,17 +183,18 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) /***********************misc routines*****************************/ /** - * i40e_vc_disable_vf + * i40e_vc_reset_vf * @vf: pointer to the VF info - * - * Disable the VF through a SW reset. + * @notify_vf: notify vf about reset or not + * Reset VF handler. **/ -static inline void i40e_vc_disable_vf(struct i40e_vf *vf) +static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf) { struct i40e_pf *pf = vf->pf; int i; - i40e_vc_notify_vf_reset(vf); + if (notify_vf) + i40e_vc_notify_vf_reset(vf); /* We want to ensure that an actual reset occurs initiated after this * function was called. However, we do not want to wait forever, so @@ -211,9 +212,14 @@ static inline void i40e_vc_disable_vf(struct i40e_vf *vf) usleep_range(10000, 20000); } - dev_warn(&vf->pf->pdev->dev, - "Failed to initiate reset for VF %d after 200 milliseconds\n", - vf->vf_id); + if (notify_vf) + dev_warn(&vf->pf->pdev->dev, + "Failed to initiate reset for VF %d after 200 milliseconds\n", + vf->vf_id); + else + dev_dbg(&vf->pf->pdev->dev, + "Failed to initiate reset for VF %d after 200 milliseconds\n", + vf->vf_id); } /** @@ -2108,20 +2114,6 @@ err: return ret; } -/** - * i40e_vc_reset_vf_msg - * @vf: pointer to the VF info - * - * called from the VF to reset itself, - * unlike other virtchnl messages, PF driver - * doesn't send the response back to the VF - **/ -static void i40e_vc_reset_vf_msg(struct i40e_vf *vf) -{ - if (test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) - i40e_reset_vf(vf, false); -} - /** * i40e_vc_config_promiscuous_mode_msg * @vf: pointer to the VF info @@ -2617,8 +2609,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) } else { /* successful request */ vf->num_req_queues = req_pairs; - i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + i40e_vc_reset_vf(vf, true); return 0; } @@ -3813,8 +3804,7 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) vf->num_req_queues = 0; /* reset the VF in order to allocate resources */ - i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + i40e_vc_reset_vf(vf, true); return I40E_SUCCESS; @@ -3854,8 +3844,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) } /* reset the VF in order to allocate resources */ - i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + i40e_vc_reset_vf(vf, true); return I40E_SUCCESS; @@ -3917,7 +3906,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, i40e_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_RESET_VF: - i40e_vc_reset_vf_msg(vf); + i40e_vc_reset_vf(vf, false); ret = 0; break; case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: @@ -4171,7 +4160,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) /* Force the VF interface down so it has to bring up with new MAC * address */ - i40e_vc_disable_vf(vf); + i40e_vc_reset_vf(vf, true); dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n"); error_param: @@ -4235,7 +4224,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; - i40e_vc_disable_vf(vf); + i40e_vc_reset_vf(vf, true); /* During reset the VF got a new VSI, so refresh a pointer. */ vsi = pf->vsi[vf->lan_vsi_idx]; /* Locked once because multiple functions below iterate list */ @@ -4613,7 +4602,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) goto out; vf->trusted = setting; - i40e_vc_disable_vf(vf); + i40e_vc_reset_vf(vf, true); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); -- cgit v1.2.3-59-g8ed1b From 2e6d218c1ec6fb9cd70693b78134cbc35ae0b5a9 Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Mon, 21 Jun 2021 08:37:31 +0000 Subject: i40e: Fix creation of first queue by omitting it if is not power of two Reject TCs creation with proper message if the first queue assignment is not equal to the power of two. The first queue number was checked too late in the second queue iteration, if second queue was configured at all. Now if first queue value is not a power of two, then trying to create qdisc will be rejected. Fixes: 8f88b3034db3 ("i40e: Add infrastructure for queue channel support") Signed-off-by: Grzegorz Szczurek Signed-off-by: Jedrzej Jagielski Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 59 ++++++++++------------------- 1 file changed, 19 insertions(+), 40 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 37386a270db5..0a98fab6d019 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5786,24 +5786,6 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi) INIT_LIST_HEAD(&vsi->ch_list); } -/** - * i40e_is_any_channel - channel exist or not - * @vsi: ptr to VSI to which channels are associated with - * - * Returns true or false if channel(s) exist for associated VSI or not - **/ -static bool i40e_is_any_channel(struct i40e_vsi *vsi) -{ - struct i40e_channel *ch, *ch_tmp; - - list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { - if (ch->initialized) - return true; - } - - return false; -} - /** * i40e_get_max_queues_for_channel * @vsi: ptr to VSI to which channels are associated with @@ -6310,26 +6292,15 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi, /* By default we are in VEPA mode, if this is the first VF/VMDq * VSI to be added switch to VEB mode. */ - if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) || - (!i40e_is_any_channel(vsi))) { - if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) { - dev_dbg(&pf->pdev->dev, - "Failed to create channel. Override queues (%u) not power of 2\n", - vsi->tc_config.tc_info[0].qcount); - return -EINVAL; - } - if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - if (vsi->type == I40E_VSI_MAIN) { - if (pf->flags & I40E_FLAG_TC_MQPRIO) - i40e_do_reset(pf, I40E_PF_RESET_FLAG, - true); - else - i40e_do_reset_safe(pf, - I40E_PF_RESET_FLAG); - } + if (vsi->type == I40E_VSI_MAIN) { + if (pf->flags & I40E_FLAG_TC_MQPRIO) + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); + else + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } /* now onwards for main VSI, number of queues will be value * of TC0's queue count @@ -7982,12 +7953,20 @@ config_tc: vsi->seid); need_reset = true; goto exit; - } else { - dev_info(&vsi->back->pdev->dev, - "Setup channel (id:%u) utilizing num_queues %d\n", - vsi->seid, vsi->tc_config.tc_info[0].qcount); + } else if (enabled_tc && + (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) { + netdev_info(netdev, + "Failed to create channel. Override queues (%u) not power of 2\n", + vsi->tc_config.tc_info[0].qcount); + ret = -EINVAL; + need_reset = true; + goto exit; } + dev_info(&vsi->back->pdev->dev, + "Setup channel (id:%u) utilizing num_queues %d\n", + vsi->seid, vsi->tc_config.tc_info[0].qcount); + if (pf->flags & I40E_FLAG_TC_MQPRIO) { if (vsi->mqprio_qopt.max_rate[0]) { u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; -- cgit v1.2.3-59-g8ed1b From 5aff430d4e33a0b48a6b3d5beb06f79da23f9916 Mon Sep 17 00:00:00 2001 From: Grzegorz Szczurek Date: Fri, 29 Oct 2021 11:26:01 +0200 Subject: i40e: Fix display error code in dmesg Fix misleading display error in dmesg if tc filter return fail. Only i40e status error code should be converted to string, not linux error code. Otherwise, we return false information about the error. Fixes: 2f4b411a3d67 ("i40e: Enable cloud filters via tc-flower") Signed-off-by: Grzegorz Szczurek Signed-off-by: Mateusz Palczewski Tested-by: Dave Switzer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0a98fab6d019..e118cf9265c7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8531,9 +8531,8 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, err = i40e_add_del_cloud_filter(vsi, filter, true); if (err) { - dev_err(&pf->pdev->dev, - "Failed to add cloud filter, err %s\n", - i40e_stat_str(&pf->hw, err)); + dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n", + err); goto err; } -- cgit v1.2.3-59-g8ed1b From a280ef90af01dc133d0a52387e563015686d6294 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Nov 2021 10:34:54 +0300 Subject: octeontx2-af: debugfs: don't corrupt user memory The user supplies the "count" value to say how big its read buffer is. The rvu_dbg_lmtst_map_table_display() function does not take the "count" into account but instead just copies the whole table, potentially corrupting the user's data. Introduce the "ret" variable to store how many bytes we can copy. Also I changed the type of "off" to size_t to make using min() simpler. Fixes: 0daa55d033b0 ("octeontx2-af: cn10k: debugfs for dumping LMTST map table") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211117073454.GD5237@kili Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index c7fd466a0efd..a09a507369ac 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -236,10 +236,11 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, u64 lmt_addr, val, tbl_base; int pf, vf, num_vfs, hw_vfs; void __iomem *lmt_map_base; - int index = 0, off = 0; - int bytes_not_copied; int buf_size = 10240; + size_t off = 0; + int index = 0; char *buf; + int ret; /* don't allow partial reads */ if (*ppos != 0) @@ -303,15 +304,17 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, } off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); - bytes_not_copied = copy_to_user(buffer, buf, off); + ret = min(off, count); + if (copy_to_user(buffer, buf, ret)) + ret = -EFAULT; kfree(buf); iounmap(lmt_map_base); - if (bytes_not_copied) - return -EFAULT; + if (ret < 0) + return ret; - *ppos = off; - return off; + *ppos = ret; + return ret; } RVU_DEBUG_FOPS(lmtst_map_table, lmtst_map_table_display, NULL); -- cgit v1.2.3-59-g8ed1b From a66998e0fbf213d47d02813b9679426129d0d114 Mon Sep 17 00:00:00 2001 From: Teng Qi Date: Wed, 17 Nov 2021 11:44:53 +0800 Subject: ethernet: hisilicon: hns: hns_dsaf_misc: fix a possible array overflow in hns_dsaf_ge_srst_by_port() The if statement: if (port >= DSAF_GE_NUM) return; limits the value of port less than DSAF_GE_NUM (i.e., 8). However, if the value of port is 6 or 7, an array overflow could occur: port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off; because the length of dsaf_dev->mac_cb is DSAF_MAX_PORT_NUM (i.e., 6). To fix this possible array overflow, we first check port and if it is greater than or equal to DSAF_MAX_PORT_NUM, the function returns. Reported-by: TOTE Robot Signed-off-by: Teng Qi Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 23d9cbf262c3..740850b64aff 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -400,6 +400,10 @@ static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, return; if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { + /* DSAF_MAX_PORT_NUM is 6, but DSAF_GE_NUM is 8. + We need check to prevent array overflow */ + if (port >= DSAF_MAX_PORT_NUM) + return; reg_val_1 = 0x1 << port; port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off; /* there is difference between V1 and V2 in register.*/ -- cgit v1.2.3-59-g8ed1b From 5d2ca2e12dfb2aff3388ca57b06f570fa6206ced Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 17 Nov 2021 12:59:52 -0800 Subject: e100: fix device suspend/resume As reported in [1], e100 was no longer working for suspend/resume cycles. The previous commit mentioned in the fixes appears to have broken things and this attempts to practice best known methods for device power management and keep wake-up working while allowing suspend/resume to work. To do this, I reorder a little bit of code and fix the resume path to make sure the device is enabled. [1] https://bugzilla.kernel.org/show_bug.cgi?id=214933 Fixes: 69a74aef8a18 ("e100: use generic power management") Cc: Vaibhav Gupta Reported-by: Alexey Kuznetsov Signed-off-by: Jesse Brandeburg Tested-by: Alexey Kuznetsov Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e100.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 5039a2536951..0bf3d47bb90d 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -3003,9 +3003,10 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake) struct net_device *netdev = pci_get_drvdata(pdev); struct nic *nic = netdev_priv(netdev); + netif_device_detach(netdev); + if (netif_running(netdev)) e100_down(nic); - netif_device_detach(netdev); if ((nic->flags & wol_magic) | e100_asf(nic)) { /* enable reverse auto-negotiation */ @@ -3022,7 +3023,7 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake) *enable_wake = false; } - pci_clear_master(pdev); + pci_disable_device(pdev); } static int __e100_power_off(struct pci_dev *pdev, bool wake) @@ -3042,8 +3043,6 @@ static int __maybe_unused e100_suspend(struct device *dev_d) __e100_shutdown(to_pci_dev(dev_d), &wake); - device_wakeup_disable(dev_d); - return 0; } @@ -3051,6 +3050,14 @@ static int __maybe_unused e100_resume(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct nic *nic = netdev_priv(netdev); + int err; + + err = pci_enable_device(to_pci_dev(dev_d)); + if (err) { + netdev_err(netdev, "Resume cannot enable PCI device, aborting\n"); + return err; + } + pci_set_master(to_pci_dev(dev_d)); /* disable reverse auto-negotiation */ if (nic->phy == phy_82552_v) { @@ -3062,10 +3069,11 @@ static int __maybe_unused e100_resume(struct device *dev_d) smartspeed & ~(E100_82552_REV_ANEG)); } - netif_device_attach(netdev); if (netif_running(netdev)) e100_up(nic); + netif_device_attach(netdev); + return 0; } -- cgit v1.2.3-59-g8ed1b From 61217be886b5f7402843677e4be7e7e83de9cb41 Mon Sep 17 00:00:00 2001 From: zhangyue Date: Thu, 18 Nov 2021 13:46:32 +0800 Subject: net: tulip: de4x5: fix the problem that the array 'lp->phy[8]' may be out of bound In line 5001, if all id in the array 'lp->phy[8]' is not 0, when the 'for' end, the 'k' is 8. At this time, the array 'lp->phy[8]' may be out of bound. Signed-off-by: zhangyue Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de4x5.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 13121c4dcfe6..bb334042f8e1 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -5000,19 +5000,23 @@ mii_get_phy(struct net_device *dev) } if ((j == limit) && (i < DE4X5_MAX_MII)) { for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++); - lp->phy[k].addr = i; - lp->phy[k].id = id; - lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ - lp->phy[k].spd.mask = GENERIC_MASK; /* 100Mb/s technologies */ - lp->phy[k].spd.value = GENERIC_VALUE; /* TX & T4, H/F Duplex */ - lp->mii_cnt++; - lp->active++; - printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name); - j = de4x5_debug; - de4x5_debug |= DEBUG_MII; - de4x5_dbg_mii(dev, k); - de4x5_debug = j; - printk("\n"); + if (k < DE4X5_MAX_PHY) { + lp->phy[k].addr = i; + lp->phy[k].id = id; + lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ + lp->phy[k].spd.mask = GENERIC_MASK; /* 100Mb/s technologies */ + lp->phy[k].spd.value = GENERIC_VALUE; /* TX & T4, H/F Duplex */ + lp->mii_cnt++; + lp->active++; + printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name); + j = de4x5_debug; + de4x5_debug |= DEBUG_MII; + de4x5_dbg_mii(dev, k); + de4x5_debug = j; + printk("\n"); + } else { + goto purgatory; + } } } purgatory: -- cgit v1.2.3-59-g8ed1b From 0fa68da72c3be09e06dd833258ee89c33374195f Mon Sep 17 00:00:00 2001 From: Teng Qi Date: Thu, 18 Nov 2021 15:01:18 +0800 Subject: net: ethernet: dec: tulip: de4x5: fix possible array overflows in type3_infoblock() The definition of macro MOTO_SROM_BUG is: #define MOTO_SROM_BUG (lp->active == 8 && (get_unaligned_le32( dev->dev_addr) & 0x00ffffff) == 0x3e0008) and the if statement if (MOTO_SROM_BUG) lp->active = 0; using this macro indicates lp->active could be 8. If lp->active is 8 and the second comparison of this macro is false. lp->active will remain 8 in: lp->phy[lp->active].gep = (*p ? p : NULL); p += (2 * (*p) + 1); lp->phy[lp->active].rst = (*p ? p : NULL); p += (2 * (*p) + 1); lp->phy[lp->active].mc = get_unaligned_le16(p); p += 2; lp->phy[lp->active].ana = get_unaligned_le16(p); p += 2; lp->phy[lp->active].fdx = get_unaligned_le16(p); p += 2; lp->phy[lp->active].ttm = get_unaligned_le16(p); p += 2; lp->phy[lp->active].mci = *p; However, the length of array lp->phy is 8, so array overflows can occur. To fix these possible array overflows, we first check lp->active and then return -EINVAL if it is greater or equal to ARRAY_SIZE(lp->phy) (i.e. 8). Reported-by: TOTE Robot Signed-off-by: Teng Qi Reviewed-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de4x5.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index bb334042f8e1..71730ef4cd57 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -4709,6 +4709,10 @@ type3_infoblock(struct net_device *dev, u_char count, u_char *p) lp->ibn = 3; lp->active = *p++; if (MOTO_SROM_BUG) lp->active = 0; + /* if (MOTO_SROM_BUG) statement indicates lp->active could + * be 8 (i.e. the size of array lp->phy) */ + if (WARN_ON(lp->active >= ARRAY_SIZE(lp->phy))) + return -EINVAL; lp->phy[lp->active].gep = (*p ? p : NULL); p += (2 * (*p) + 1); lp->phy[lp->active].rst = (*p ? p : NULL); p += (2 * (*p) + 1); lp->phy[lp->active].mc = get_unaligned_le16(p); p += 2; -- cgit v1.2.3-59-g8ed1b From 253e9b4d11e577bb8cbc77ef68a9ff46438065ca Mon Sep 17 00:00:00 2001 From: Volodymyr Mytnyk Date: Thu, 18 Nov 2021 21:48:03 +0200 Subject: net: marvell: prestera: fix brige port operation Return NOTIFY_DONE (dont't care) for switchdev notifications that prestera driver don't know how to handle them. With introduction of SWITCHDEV_BRPORT_[UN]OFFLOADED switchdev events, the driver rejects adding swport to bridge operation which is handled by prestera_bridge_port_join() func. The root cause of this is that prestera driver returns error (EOPNOTSUPP) in prestera_switchdev_blk_event() handler for unknown swdev events. This causes switchdev_bridge_port_offload() to fail when adding port to bridge in prestera_bridge_port_join(). Fixes: 957e2235e526 ("net: make switchdev_bridge_port_{,unoffload} loosely coupled with the bridge") Signed-off-by: Volodymyr Mytnyk Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/prestera/prestera_switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 3ce6ccd0f539..79f2fca0d412 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -1124,7 +1124,7 @@ static int prestera_switchdev_blk_event(struct notifier_block *unused, prestera_port_obj_attr_set); break; default: - err = -EOPNOTSUPP; + return NOTIFY_DONE; } return notifier_from_errno(err); -- cgit v1.2.3-59-g8ed1b From e8d032507cb7912baf1d3e0af54516f823befefd Mon Sep 17 00:00:00 2001 From: Volodymyr Mytnyk Date: Thu, 18 Nov 2021 21:51:40 +0200 Subject: net: marvell: prestera: fix double free issue on err path fix error path handling in prestera_bridge_port_join() that cases prestera driver to crash (see below). Trace: Internal error: Oops: 96000044 [#1] SMP Modules linked in: prestera_pci prestera uio_pdrv_genirq CPU: 1 PID: 881 Comm: ip Not tainted 5.15.0 #1 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : prestera_bridge_destroy+0x2c/0xb0 [prestera] lr : prestera_bridge_port_join+0x2cc/0x350 [prestera] sp : ffff800011a1b0f0 ... x2 : ffff000109ca6c80 x1 : dead000000000100 x0 : dead000000000122 Call trace: prestera_bridge_destroy+0x2c/0xb0 [prestera] prestera_bridge_port_join+0x2cc/0x350 [prestera] prestera_netdev_port_event.constprop.0+0x3c4/0x450 [prestera] prestera_netdev_event_handler+0xf4/0x110 [prestera] raw_notifier_call_chain+0x54/0x80 call_netdevice_notifiers_info+0x54/0xa0 __netdev_upper_dev_link+0x19c/0x380 Fixes: e1189d9a5fbe ("net: marvell: prestera: Add Switchdev driver implementation") Signed-off-by: Volodymyr Mytnyk Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/prestera/prestera_switchdev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 79f2fca0d412..b4599fe4ca8d 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -497,8 +497,8 @@ int prestera_bridge_port_join(struct net_device *br_dev, br_port = prestera_bridge_port_add(bridge, port->dev); if (IS_ERR(br_port)) { - err = PTR_ERR(br_port); - goto err_brport_create; + prestera_bridge_put(bridge); + return PTR_ERR(br_port); } err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL, @@ -519,8 +519,6 @@ err_port_join: switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL); err_switchdev_offload: prestera_bridge_port_put(br_port); -err_brport_create: - prestera_bridge_put(bridge); return err; } -- cgit v1.2.3-59-g8ed1b From 6a405f6c372d14707b87d3097b361b69899a26c8 Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Thu, 18 Nov 2021 16:08:02 -0500 Subject: atlantic: fix double-free in aq_ring_tx_clean We found this bug while fuzzing the device driver. Using and freeing the dangling pointer buff->skb would cause use-after-free and double-free. This bug is triggerable with compromised/malfunctioning devices. We found the bug with QEMU emulation and tested the patch by emulation. We did NOT test on a real device. Attached is the bug report. BUG: KASAN: double-free or invalid-free in consume_skb+0x6c/0x1c0 Call Trace: dump_stack+0x76/0xa0 print_address_description.constprop.0+0x16/0x200 ? consume_skb+0x6c/0x1c0 kasan_report_invalid_free+0x61/0xa0 ? consume_skb+0x6c/0x1c0 __kasan_slab_free+0x15e/0x170 ? consume_skb+0x6c/0x1c0 kfree+0x8c/0x230 consume_skb+0x6c/0x1c0 aq_ring_tx_clean+0x5c2/0xa80 [atlantic] aq_vec_poll+0x309/0x5d0 [atlantic] ? _sub_I_65535_1+0x20/0x20 [atlantic] ? __next_timer_interrupt+0xba/0xf0 net_rx_action+0x363/0xbd0 ? call_timer_fn+0x240/0x240 ? __switch_to_asm+0x34/0x70 ? napi_busy_loop+0x520/0x520 ? net_tx_action+0x379/0x720 __do_softirq+0x18c/0x634 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x15/0x20 smpboot_thread_fn+0x2f1/0x6b0 ? smpboot_unregister_percpu_thread+0x160/0x160 ? __kthread_parkme+0x80/0x100 ? smpboot_unregister_percpu_thread+0x160/0x160 kthread+0x2b5/0x3b0 ? kthread_create_on_node+0xd0/0xd0 ret_from_fork+0x22/0x40 Reported-by: Brendan Dolan-Gavitt Signed-off-by: Zekun Shen Reviewed-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 24122ccda614..81b3756417ec 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -298,13 +298,14 @@ bool aq_ring_tx_clean(struct aq_ring_s *self) } } - if (unlikely(buff->is_eop)) { + if (unlikely(buff->is_eop && buff->skb)) { u64_stats_update_begin(&self->stats.tx.syncp); ++self->stats.tx.packets; self->stats.tx.bytes += buff->skb->len; u64_stats_update_end(&self->stats.tx.syncp); dev_kfree_skb_any(buff->skb); + buff->skb = NULL; } buff->pa = 0U; buff->eop_index = 0xffffU; -- cgit v1.2.3-59-g8ed1b From 0f296e782f21dc1c55475a3c107ac68ab09cc1cf Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Thu, 18 Nov 2021 16:42:47 -0500 Subject: stmmac_pci: Fix underflow size in stmmac_rx This bug report came up when we were testing the device driver by fuzzing. It shows that buf1_len can get underflowed and be 0xfffffffc (4294967292). This bug is triggerable with a compromised/malfunctioning device. We found the bug through QEMU emulation tested the patch with emulation. We did NOT test it on real hardware. Attached is the bug report by fuzzing. BUG: KASAN: use-after-free in stmmac_napi_poll_rx+0x1c08/0x36e0 [stmmac] Read of size 4294967292 at addr ffff888016358000 by task ksoftirqd/0/9 CPU: 0 PID: 9 Comm: ksoftirqd/0 Tainted: G W 5.6.0 #1 Call Trace: dump_stack+0x76/0xa0 print_address_description.constprop.0+0x16/0x200 ? stmmac_napi_poll_rx+0x1c08/0x36e0 [stmmac] ? stmmac_napi_poll_rx+0x1c08/0x36e0 [stmmac] __kasan_report.cold+0x37/0x7c ? stmmac_napi_poll_rx+0x1c08/0x36e0 [stmmac] kasan_report+0xe/0x20 check_memory_region+0x15a/0x1d0 memcpy+0x20/0x50 stmmac_napi_poll_rx+0x1c08/0x36e0 [stmmac] ? stmmac_suspend+0x850/0x850 [stmmac] ? __next_timer_interrupt+0xba/0xf0 net_rx_action+0x363/0xbd0 ? call_timer_fn+0x240/0x240 ? __switch_to_asm+0x40/0x70 ? napi_busy_loop+0x520/0x520 ? __schedule+0x839/0x15a0 __do_softirq+0x18c/0x634 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x15/0x20 smpboot_thread_fn+0x2f1/0x6b0 ? smpboot_unregister_percpu_thread+0x160/0x160 ? __kthread_parkme+0x80/0x100 ? smpboot_unregister_percpu_thread+0x160/0x160 kthread+0x2b5/0x3b0 ? kthread_create_on_node+0xd0/0xd0 ret_from_fork+0x22/0x40 Reported-by: Brendan Dolan-Gavitt Signed-off-by: Zekun Shen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2eb284576336..19bc86f2092f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5161,12 +5161,13 @@ read_again: if (likely(!(status & rx_not_ls)) && (likely(priv->synopsys_id >= DWMAC_CORE_4_00) || unlikely(status != llc_snap))) { - if (buf2_len) + if (buf2_len) { buf2_len -= ETH_FCS_LEN; - else + len -= ETH_FCS_LEN; + } else if (buf1_len) { buf1_len -= ETH_FCS_LEN; - - len -= ETH_FCS_LEN; + len -= ETH_FCS_LEN; + } } if (!skb) { -- cgit v1.2.3-59-g8ed1b From e792779e6b639c182df91b46ac1e5803460b0b15 Mon Sep 17 00:00:00 2001 From: Nitesh B Venkatesh Date: Fri, 4 Jun 2021 09:53:31 -0700 Subject: iavf: Prevent changing static ITR values if adaptive moderation is on Resolve being able to change static values on VF when adaptive interrupt moderation is enabled. This problem is fixed by checking the interrupt settings is not a combination of change of static value while adaptive interrupt moderation is turned on. Without this fix, the user would be able to change static values on VF with adaptive moderation enabled. Fixes: 65e87c0398f5 ("i40evf: support queue-specific settings for interrupt moderation") Signed-off-by: Nitesh B Venkatesh Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 30 ++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 144a77679359..71b23922089f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -723,12 +723,31 @@ static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue, * * Change the ITR settings for a specific queue. **/ -static void iavf_set_itr_per_queue(struct iavf_adapter *adapter, - struct ethtool_coalesce *ec, int queue) +static int iavf_set_itr_per_queue(struct iavf_adapter *adapter, + struct ethtool_coalesce *ec, int queue) { struct iavf_ring *rx_ring = &adapter->rx_rings[queue]; struct iavf_ring *tx_ring = &adapter->tx_rings[queue]; struct iavf_q_vector *q_vector; + u16 itr_setting; + + itr_setting = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC; + + if (ec->rx_coalesce_usecs != itr_setting && + ec->use_adaptive_rx_coalesce) { + netif_info(adapter, drv, adapter->netdev, + "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n"); + return -EINVAL; + } + + itr_setting = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC; + + if (ec->tx_coalesce_usecs != itr_setting && + ec->use_adaptive_tx_coalesce) { + netif_info(adapter, drv, adapter->netdev, + "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n"); + return -EINVAL; + } rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); @@ -751,6 +770,7 @@ static void iavf_set_itr_per_queue(struct iavf_adapter *adapter, * the Tx and Rx ITR values based on the values we have entered * into the q_vector, no need to write the values now. */ + return 0; } /** @@ -792,9 +812,11 @@ static int __iavf_set_coalesce(struct net_device *netdev, */ if (queue < 0) { for (i = 0; i < adapter->num_active_queues; i++) - iavf_set_itr_per_queue(adapter, ec, i); + if (iavf_set_itr_per_queue(adapter, ec, i)) + return -EINVAL; } else if (queue < adapter->num_active_queues) { - iavf_set_itr_per_queue(adapter, ec, queue); + if (iavf_set_itr_per_queue(adapter, ec, queue)) + return -EINVAL; } else { netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", adapter->num_active_queues - 1); -- cgit v1.2.3-59-g8ed1b From 0cc318d2e8408bc0ffb4662a0c3e5e57005ac6ff Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Tue, 7 Sep 2021 09:25:40 +0000 Subject: iavf: Fix deadlock occurrence during resetting VF interface System hangs if close the interface is called from the kernel during the interface is in resetting state. During resetting operation the link is closing but kernel didn't know it and it tried to close this interface again what sometimes led to deadlock. Inform kernel about current state of interface and turn off the flag IFF_UP when interface is closing until reset is finished. Previously it was most likely to hang the system when kernel (network manager) tried to close the interface in the same time when interface was in resetting state because of deadlock. Fixes: 3c8e0b989aa1 ("i40vf: don't stop me now") Signed-off-by: Jaroslaw Gawin Signed-off-by: Jedrzej Jagielski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 336e6bf95e48..84680777ac12 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2254,6 +2254,7 @@ continue_reset: (adapter->state == __IAVF_RESETTING)); if (running) { + netdev->flags &= ~IFF_UP; netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); adapter->link_up = false; @@ -2365,7 +2366,7 @@ continue_reset: * to __IAVF_RUNNING */ iavf_up_complete(adapter); - + netdev->flags |= IFF_UP; iavf_irq_enable(adapter, true); } else { iavf_change_state(adapter, __IAVF_DOWN); @@ -2378,8 +2379,10 @@ continue_reset: reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - if (running) + if (running) { iavf_change_state(adapter, __IAVF_RUNNING); + netdev->flags |= IFF_UP; + } dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); iavf_close(netdev); } -- cgit v1.2.3-59-g8ed1b From 3b5bdd18eb76e7570d9bacbcab6828a9b26ae121 Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Wed, 15 Sep 2021 09:01:00 +0000 Subject: iavf: Fix refreshing iavf adapter stats on ethtool request Currently iavf adapter statistics are refreshed only in a watchdog task, triggered approximately every two seconds, which causes some ethtool requests to return outdated values. Add explicit statistics refresh when requested by ethtool -S. Fixes: b476b0030e61 ("iavf: Move commands processing to the separate function") Signed-off-by: Jan Sokolowski Signed-off-by: Jedrzej Jagielski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 2 ++ drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 3 +++ drivers/net/ethernet/intel/iavf/iavf_main.c | 18 ++++++++++++++++++ drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 2 ++ 4 files changed, 25 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 75635bd57cf6..bb9cc227d1e1 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -305,6 +305,7 @@ struct iavf_adapter { #define IAVF_FLAG_AQ_DEL_FDIR_FILTER BIT(26) #define IAVF_FLAG_AQ_ADD_ADV_RSS_CFG BIT(27) #define IAVF_FLAG_AQ_DEL_ADV_RSS_CFG BIT(28) +#define IAVF_FLAG_AQ_REQUEST_STATS BIT(29) /* OS defined structs */ struct net_device *netdev; @@ -444,6 +445,7 @@ int iavf_up(struct iavf_adapter *adapter); void iavf_down(struct iavf_adapter *adapter); int iavf_process_config(struct iavf_adapter *adapter); void iavf_schedule_reset(struct iavf_adapter *adapter); +void iavf_schedule_request_stats(struct iavf_adapter *adapter); void iavf_reset(struct iavf_adapter *adapter); void iavf_set_ethtool_ops(struct net_device *netdev); void iavf_update_stats(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 71b23922089f..0cecaff38d04 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -354,6 +354,9 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, struct iavf_adapter *adapter = netdev_priv(netdev); unsigned int i; + /* Explicitly request stats refresh */ + iavf_schedule_request_stats(adapter); + iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats); rcu_read_lock(); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 84680777ac12..8e96ae746c3d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -174,6 +174,19 @@ void iavf_schedule_reset(struct iavf_adapter *adapter) } } +/** + * iavf_schedule_request_stats - Set the flags and schedule statistics request + * @adapter: board private structure + * + * Sets IAVF_FLAG_AQ_REQUEST_STATS flag so iavf_watchdog_task() will explicitly + * request and refresh ethtool stats + **/ +void iavf_schedule_request_stats(struct iavf_adapter *adapter) +{ + adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS; + mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); +} + /** * iavf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure @@ -1709,6 +1722,11 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) iavf_del_adv_rss_cfg(adapter); return 0; } + if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_STATS) { + iavf_request_stats(adapter); + return 0; + } + return -EAGAIN; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 8c3f0f77cb57..8421cbe6a197 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -784,6 +784,8 @@ void iavf_request_stats(struct iavf_adapter *adapter) /* no error message, this isn't crucial */ return; } + + adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_STATS; adapter->current_op = VIRTCHNL_OP_GET_STATS; vqs.vsi_id = adapter->vsi_res->vsi_id; /* queue maps are ignored for this message - only the vsi is used */ -- cgit v1.2.3-59-g8ed1b From 5951a2b9812d8227d33f20d1899fae60e4f72c04 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Fri, 5 Nov 2021 09:20:25 -0700 Subject: iavf: Fix VLAN feature flags after VFR When a VF goes through a reset, it's possible for the VF's feature set to change. For example it may lose the VIRTCHNL_VF_OFFLOAD_VLAN capability after VF reset. Unfortunately, the driver doesn't correctly deal with this situation and errors are seen from downing/upping the interface and/or moving the interface in/out of a network namespace. When setting the interface down/up we see the following errors after the VIRTCHNL_VF_OFFLOAD_VLAN capability was taken away from the VF: ice 0000:51:00.1: VF 1 failed opcode 12, retval: -64 iavf 0000:51:09.1: Failed to add VLAN filter, error IAVF_NOT_SUPPORTED ice 0000:51:00.1: VF 1 failed opcode 13, retval: -64 iavf 0000:51:09.1: Failed to delete VLAN filter, error IAVF_NOT_SUPPORTED These add/delete errors are happening because the VLAN filters are tracked internally to the driver and regardless of the VLAN_ALLOWED() setting the driver tries to delete/re-add them over virtchnl. Fix the delete failure by making sure to delete any VLAN filter tracking in the driver when a removal request is made, while preventing the virtchnl request. This makes it so the driver's VLAN list is up to date and the errors are Fix the add failure by making sure the check for VLAN_ALLOWED() during reset is done after the VF receives its capability list from the PF via VIRTCHNL_OP_GET_VF_RESOURCES. If VLAN functionality is not allowed, then prevent requesting re-adding the filters over virtchnl. When moving the interface into a network namespace we see the following errors after the VIRTCHNL_VF_OFFLOAD_VLAN capability was taken away from the VF: iavf 0000:51:09.1 enp81s0f1v1: NIC Link is Up Speed is 25 Gbps Full Duplex iavf 0000:51:09.1 temp_27: renamed from enp81s0f1v1 iavf 0000:51:09.1 mgmt: renamed from temp_27 iavf 0000:51:09.1 dev27: set_features() failed (-22); wanted 0x020190001fd54833, left 0x020190001fd54bb3 These errors are happening because we aren't correctly updating the netdev capabilities and dealing with ndo_fix_features() and ndo_set_features() correctly. Fix this by only reporting errors in the driver's ndo_set_features() callback when VIRTCHNL_VF_OFFLOAD_VLAN is not allowed and any attempt to enable the VLAN features is made. Also, make sure to disable VLAN insertion, filtering, and stripping since the VIRTCHNL_VF_OFFLOAD_VLAN flag applies to all of them and not just VLAN stripping. Also, after we process the capabilities in the VF reset path, make sure to call netdev_update_features() in case the capabilities have changed in order to update the netdev's feature set to match the VF's actual capabilities. Lastly, make sure to always report success on VLAN filter delete when VIRTCHNL_VF_OFFLOAD_VLAN is not supported. The changed flow in iavf_del_vlans() allows the stack to delete previosly existing VLAN filters even if VLAN filtering is not allowed. This makes it so the VLAN filter list is up to date. Fixes: 8774370d268f ("i40e/i40evf: support for VF VLAN tag stripping control") Signed-off-by: Brett Creeley Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 1 + drivers/net/ethernet/intel/iavf/iavf_main.c | 33 +++++++----------- drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 45 +++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index bb9cc227d1e1..3789269ce741 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -503,4 +503,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, const u8 *macaddr); +int iavf_lock_timeout(struct mutex *lock, unsigned int msecs); #endif /* _IAVF_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 8e96ae746c3d..14934a7a13ef 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -147,7 +147,7 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, * * Returns 0 on success, negative on failure **/ -static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) +int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) { unsigned int wait, delay = 10; @@ -717,13 +717,11 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, u16 vlan) **/ static void iavf_restore_filters(struct iavf_adapter *adapter) { - /* re-add all VLAN filters */ - if (VLAN_ALLOWED(adapter)) { - u16 vid; + u16 vid; - for_each_set_bit(vid, adapter->vsi.active_vlans, VLAN_N_VID) - iavf_add_vlan(adapter, vid); - } + /* re-add all VLAN filters */ + for_each_set_bit(vid, adapter->vsi.active_vlans, VLAN_N_VID) + iavf_add_vlan(adapter, vid); } /** @@ -758,9 +756,6 @@ static int iavf_vlan_rx_kill_vid(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); - if (!VLAN_ALLOWED(adapter)) - return -EIO; - iavf_del_vlan(adapter, vid); clear_bit(vid, adapter->vsi.active_vlans); @@ -2191,7 +2186,6 @@ static void iavf_reset_task(struct work_struct *work) struct net_device *netdev = adapter->netdev; struct iavf_hw *hw = &adapter->hw; struct iavf_mac_filter *f, *ftmp; - struct iavf_vlan_filter *vlf; struct iavf_cloud_filter *cf; u32 reg_val; int i = 0, err; @@ -2332,11 +2326,6 @@ continue_reset: list_for_each_entry(f, &adapter->mac_filter_list, list) { f->add = true; } - /* re-add all VLAN filters */ - list_for_each_entry(vlf, &adapter->vlan_filter_list, list) { - vlf->add = true; - } - spin_unlock_bh(&adapter->mac_vlan_list_lock); /* check if TCs are running and re-add all cloud filters */ @@ -2350,7 +2339,6 @@ continue_reset: spin_unlock_bh(&adapter->cloud_filter_list_lock); adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; iavf_misc_irq_enable(adapter); @@ -3462,11 +3450,16 @@ static int iavf_set_features(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); - /* Don't allow changing VLAN_RX flag when adapter is not capable - * of VLAN offload + /* Don't allow enabling VLAN features when adapter is not capable + * of VLAN offload/filtering */ if (!VLAN_ALLOWED(adapter)) { - if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) + netdev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_FILTER); + if (features & (NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_FILTER)) return -EINVAL; } else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) { if (features & NETIF_F_HW_VLAN_CTAG_RX) diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 8421cbe6a197..d60bf7c21200 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -607,7 +607,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) if (f->add) count++; } - if (!count) { + if (!count || !VLAN_ALLOWED(adapter)) { adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER; spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -673,9 +673,19 @@ void iavf_del_vlans(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->remove) + list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { + /* since VLAN capabilities are not allowed, we dont want to send + * a VLAN delete request because it will most likely fail and + * create unnecessary errors/noise, so just free the VLAN + * filters marked for removal to enable bailing out before + * sending a virtchnl message + */ + if (f->remove && !VLAN_ALLOWED(adapter)) { + list_del(&f->list); + kfree(f); + } else if (f->remove) { count++; + } } if (!count) { adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER; @@ -1724,8 +1734,37 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, } spin_lock_bh(&adapter->mac_vlan_list_lock); iavf_add_filter(adapter, adapter->hw.mac.addr); + + if (VLAN_ALLOWED(adapter)) { + if (!list_empty(&adapter->vlan_filter_list)) { + struct iavf_vlan_filter *vlf; + + /* re-add all VLAN filters over virtchnl */ + list_for_each_entry(vlf, + &adapter->vlan_filter_list, + list) + vlf->add = true; + + adapter->aq_required |= + IAVF_FLAG_AQ_ADD_VLAN_FILTER; + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); iavf_process_config(adapter); + + /* unlock crit_lock before acquiring rtnl_lock as other + * processes holding rtnl_lock could be waiting for the same + * crit_lock + */ + mutex_unlock(&adapter->crit_lock); + rtnl_lock(); + netdev_update_features(adapter->netdev); + rtnl_unlock(); + if (iavf_lock_timeout(&adapter->crit_lock, 10000)) + dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", + __FUNCTION__); + } break; case VIRTCHNL_OP_ENABLE_QUEUES: -- cgit v1.2.3-59-g8ed1b From 3b00a07c2443745d62babfe08dbb2ad8e649526e Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Fri, 19 Nov 2021 03:03:49 +0100 Subject: net: dsa: qca8k: fix internal delay applied to the wrong PAD config With SGMII phy the internal delay is always applied to the PAD0 config. This is caused by the falling edge configuration that hardcode the reg to PAD0 (as the falling edge bits are present only in PAD0 reg) Move the delay configuration before the reg overwrite to correctly apply the delay. Fixes: cef08115846e ("net: dsa: qca8k: set internal delay also for sgmii") Signed-off-by: Ansuel Smith Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index a429c9750add..d7bcecbc1c53 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1433,6 +1433,12 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, qca8k_write(priv, QCA8K_REG_SGMII_CTRL, val); + /* From original code is reported port instability as SGMII also + * require delay set. Apply advised values here or take them from DT. + */ + if (state->interface == PHY_INTERFACE_MODE_SGMII) + qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg); + /* For qca8327/qca8328/qca8334/qca8338 sgmii is unique and * falling edge is set writing in the PORT0 PAD reg */ @@ -1455,12 +1461,6 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE, val); - /* From original code is reported port instability as SGMII also - * require delay set. Apply advised values here or take them from DT. - */ - if (state->interface == PHY_INTERFACE_MODE_SGMII) - qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg); - break; default: dev_err(ds->dev, "xMII mode %s not supported for port %d\n", -- cgit v1.2.3-59-g8ed1b From 65258b9d8cde45689bdc86ca39b50f01f983733b Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Fri, 19 Nov 2021 03:03:50 +0100 Subject: net: dsa: qca8k: fix MTU calculation qca8k has a global MTU, so its tracking the MTU per port to make sure that the largest MTU gets applied. Since it uses the frame size instead of MTU the driver MTU change function will then add the size of Ethernet header and checksum on top of MTU. The driver currently populates the per port MTU size as Ethernet frame length + checksum which equals 1518. The issue is that then MTU change function will go through all of the ports, find the largest MTU and apply the Ethernet header + checksum on top of it again, so for a desired MTU of 1500 you will end up with 1536. This is obviously incorrect, so to correct it populate the per port struct MTU with just the MTU and not include the Ethernet header + checksum size as those will be added by the MTU change function. Fixes: f58d2598cf70 ("net: dsa: qca8k: implement the port MTU callbacks") Signed-off-by: Robert Marko Signed-off-by: Ansuel Smith Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index d7bcecbc1c53..147ca39531a3 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1256,8 +1256,12 @@ qca8k_setup(struct dsa_switch *ds) /* Set initial MTU for every port. * We have only have a general MTU setting. So track * every port and set the max across all port. + * Set per port MTU to 1500 as the MTU change function + * will add the overhead and if its set to 1518 then it + * will apply the overhead again and we will end up with + * MTU of 1536 instead of 1518 */ - priv->port_mtu[i] = ETH_FRAME_LEN + ETH_FCS_LEN; + priv->port_mtu[i] = ETH_DATA_LEN; } /* Special GLOBAL_FC_THRESH value are needed for ar8327 switch */ -- cgit v1.2.3-59-g8ed1b From 3bd6b2a838ba6a3b86d41b077f570b1b61174def Mon Sep 17 00:00:00 2001 From: Diana Wang Date: Fri, 19 Nov 2021 14:38:03 +0100 Subject: nfp: checking parameter process for rx-usecs/tx-usecs is invalid Use nn->tlv_caps.me_freq_mhz instead of nn->me_freq_mhz to check whether rx-usecs/tx-usecs is valid. This is because nn->tlv_caps.me_freq_mhz represents the clock_freq (MHz) of the flow processing cores (FPC) on the NIC. While nn->me_freq_mhz is not be set. Fixes: ce991ab6662a ("nfp: read ME frequency from vNIC ctrl memory") Signed-off-by: Diana Wang Signed-off-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 3 --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index df203738511b..0b1865e9f0b5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -565,7 +565,6 @@ struct nfp_net_dp { * @exn_name: Name for Exception interrupt * @shared_handler: Handler for shared interrupts * @shared_name: Name for shared interrupt - * @me_freq_mhz: ME clock_freq (MHz) * @reconfig_lock: Protects @reconfig_posted, @reconfig_timer_active, * @reconfig_sync_present and HW reconfiguration request * regs/machinery from async requests (sync must take @@ -650,8 +649,6 @@ struct nfp_net { irq_handler_t shared_handler; char shared_name[IFNAMSIZ + 8]; - u32 me_freq_mhz; - bool link_up; spinlock_t link_status_lock; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1de076f55740..cf7882933993 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1344,7 +1344,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev, * ME timestamp ticks. There are 16 ME clock cycles for each timestamp * count. */ - factor = nn->me_freq_mhz / 16; + factor = nn->tlv_caps.me_freq_mhz / 16; /* Each pair of (usecs, max_frames) fields specifies that interrupts * should be coalesced until -- cgit v1.2.3-59-g8ed1b From a6da2bbb0005e6b4909472962c9d0af29e75dd06 Mon Sep 17 00:00:00 2001 From: Holger Assmann Date: Sun, 21 Nov 2021 19:57:04 +0200 Subject: net: stmmac: retain PTP clock time during SIOCSHWTSTAMP ioctls Currently, when user space emits SIOCSHWTSTAMP ioctl calls such as enabling/disabling timestamping or changing filter settings, the driver reads the current CLOCK_REALTIME value and programming this into the NIC's hardware clock. This might be necessary during system initialization, but at runtime, when the PTP clock has already been synchronized to a grandmaster, a reset of the timestamp settings might result in a clock jump. Furthermore, if the clock is also controlled by phc2sys in automatic mode (where the UTC offset is queried from ptp4l), that UTC-to-TAI offset (currently 37 seconds in 2021) would be temporarily reset to 0, and it would take a long time for phc2sys to readjust so that CLOCK_REALTIME and the PHC are apart by 37 seconds again. To address the issue, we introduce a new function called stmmac_init_tstamp_counter(), which gets called during ndo_open(). It contains the code snippet moved from stmmac_hwtstamp_set() that manages the time synchronization. Besides, the sub second increment configuration is also moved here since the related values are hardware dependent and runtime invariant. Furthermore, the hardware clock must be kept running even when no time stamping mode is selected in order to retain the synchronized time base. That way, timestamping can be enabled again at any time only with the need to compensate the clock's natural drifting. As a side effect, this patch fixes the issue that ptp_clock_info::enable can be called before SIOCSHWTSTAMP and the driver (which looks at priv->systime_flags) was not prepared to handle that ordering. Fixes: 92ba6888510c ("stmmac: add the support for PTP hw clock driver") Reported-by: Michael Olbrich Signed-off-by: Ahmad Fatoum Signed-off-by: Holger Assmann Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 125 +++++++++++++-------- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 +- 3 files changed, 81 insertions(+), 47 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 43eead726886..5f129733aabd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -314,6 +314,7 @@ int stmmac_mdio_reset(struct mii_bus *mii); int stmmac_xpcs_setup(struct mii_bus *mii); void stmmac_set_ethtool_ops(struct net_device *netdev); +int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags); void stmmac_ptp_register(struct stmmac_priv *priv); void stmmac_ptp_unregister(struct stmmac_priv *priv); int stmmac_open(struct net_device *dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 19bc86f2092f..f12097c8a485 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -50,6 +50,13 @@ #include "dwxgmac2.h" #include "hwif.h" +/* As long as the interface is active, we keep the timestamping counter enabled + * with fine resolution and binary rollover. This avoid non-monotonic behavior + * (clock jumps) when changing timestamping settings at runtime. + */ +#define STMMAC_HWTS_ACTIVE (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | \ + PTP_TCR_TSCTRLSSR) + #define STMMAC_ALIGN(x) ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16) #define TSO_MAX_BUFF_SIZE (SZ_16K - 1) @@ -613,8 +620,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) { struct stmmac_priv *priv = netdev_priv(dev); struct hwtstamp_config config; - struct timespec64 now; - u64 temp = 0; u32 ptp_v2 = 0; u32 tstamp_all = 0; u32 ptp_over_ipv4_udp = 0; @@ -623,11 +628,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) u32 snap_type_sel = 0; u32 ts_master_en = 0; u32 ts_event_en = 0; - u32 sec_inc = 0; - u32 value = 0; - bool xmac; - - xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; if (!(priv->dma_cap.time_stamp || priv->adv_ts)) { netdev_alert(priv->dev, "No support for HW time stamping\n"); @@ -789,42 +789,17 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) priv->hwts_rx_en = ((config.rx_filter == HWTSTAMP_FILTER_NONE) ? 0 : 1); priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON; - if (!priv->hwts_tx_en && !priv->hwts_rx_en) - stmmac_config_hw_tstamping(priv, priv->ptpaddr, 0); - else { - value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR | - tstamp_all | ptp_v2 | ptp_over_ethernet | - ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en | - ts_master_en | snap_type_sel); - stmmac_config_hw_tstamping(priv, priv->ptpaddr, value); - - /* program Sub Second Increment reg */ - stmmac_config_sub_second_increment(priv, - priv->ptpaddr, priv->plat->clk_ptp_rate, - xmac, &sec_inc); - temp = div_u64(1000000000ULL, sec_inc); - - /* Store sub second increment and flags for later use */ - priv->sub_second_inc = sec_inc; - priv->systime_flags = value; - - /* calculate default added value: - * formula is : - * addend = (2^32)/freq_div_ratio; - * where, freq_div_ratio = 1e9ns/sec_inc - */ - temp = (u64)(temp << 32); - priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate); - stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend); - - /* initialize system time */ - ktime_get_real_ts64(&now); + priv->systime_flags = STMMAC_HWTS_ACTIVE; - /* lower 32 bits of tv_sec are safe until y2106 */ - stmmac_init_systime(priv, priv->ptpaddr, - (u32)now.tv_sec, now.tv_nsec); + if (priv->hwts_tx_en || priv->hwts_rx_en) { + priv->systime_flags |= tstamp_all | ptp_v2 | + ptp_over_ethernet | ptp_over_ipv6_udp | + ptp_over_ipv4_udp | ts_event_en | + ts_master_en | snap_type_sel; } + stmmac_config_hw_tstamping(priv, priv->ptpaddr, priv->systime_flags); + memcpy(&priv->tstamp_config, &config, sizeof(config)); return copy_to_user(ifr->ifr_data, &config, @@ -852,6 +827,66 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) sizeof(*config)) ? -EFAULT : 0; } +/** + * stmmac_init_tstamp_counter - init hardware timestamping counter + * @priv: driver private structure + * @systime_flags: timestamping flags + * Description: + * Initialize hardware counter for packet timestamping. + * This is valid as long as the interface is open and not suspended. + * Will be rerun after resuming from suspend, case in which the timestamping + * flags updated by stmmac_hwtstamp_set() also need to be restored. + */ +int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) +{ + bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; + struct timespec64 now; + u32 sec_inc = 0; + u64 temp = 0; + int ret; + + if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) + return -EOPNOTSUPP; + + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) { + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + return ret; + } + + stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags); + priv->systime_flags = systime_flags; + + /* program Sub Second Increment reg */ + stmmac_config_sub_second_increment(priv, priv->ptpaddr, + priv->plat->clk_ptp_rate, + xmac, &sec_inc); + temp = div_u64(1000000000ULL, sec_inc); + + /* Store sub second increment for later use */ + priv->sub_second_inc = sec_inc; + + /* calculate default added value: + * formula is : + * addend = (2^32)/freq_div_ratio; + * where, freq_div_ratio = 1e9ns/sec_inc + */ + temp = (u64)(temp << 32); + priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate); + stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend); + + /* initialize system time */ + ktime_get_real_ts64(&now); + + /* lower 32 bits of tv_sec are safe until y2106 */ + stmmac_init_systime(priv, priv->ptpaddr, (u32)now.tv_sec, now.tv_nsec); + + return 0; +} +EXPORT_SYMBOL_GPL(stmmac_init_tstamp_counter); + /** * stmmac_init_ptp - init PTP * @priv: driver private structure @@ -862,9 +897,11 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) static int stmmac_init_ptp(struct stmmac_priv *priv) { bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; + int ret; - if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) - return -EOPNOTSUPP; + ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE); + if (ret) + return ret; priv->adv_ts = 0; /* Check if adv_ts can be enabled for dwmac 4.x / xgmac core */ @@ -3272,10 +3309,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) stmmac_mmc_setup(priv); if (init_ptp) { - ret = clk_prepare_enable(priv->plat->clk_ptp_ref); - if (ret < 0) - netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret); - ret = stmmac_init_ptp(priv); if (ret == -EOPNOTSUPP) netdev_warn(priv->dev, "PTP not supported by HW\n"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 232ac98943cd..5d29f336315b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -816,7 +816,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) if (ret) return ret; - clk_prepare_enable(priv->plat->clk_ptp_ref); + stmmac_init_tstamp_counter(priv, priv->systime_flags); } return 0; -- cgit v1.2.3-59-g8ed1b From f93fd0ca5e7de743ce687951266950fb37877e34 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sun, 21 Nov 2021 21:06:42 +0100 Subject: net: ax88796c: do not receive data in pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function axspi_read_status calls: ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)&status, 3); status is a pointer to a struct spi_status, which is 3-byte wide: struct spi_status { u16 isr; u8 status; }; But &status is the pointer to this pointer, and spi_write_then_read does not dereference this parameter: int spi_write_then_read(struct spi_device *spi, const void *txbuf, unsigned n_tx, void *rxbuf, unsigned n_rx) Therefore axspi_read_status currently receive a SPI response in the pointer status, which overwrites 24 bits of the pointer. Thankfully, on Little-Endian systems, the pointer is only used in le16_to_cpus(&status->isr); ... which is a no-operation. So there, the overwritten pointer is not dereferenced. Nevertheless on Big-Endian systems, this can lead to dereferencing pointers after their 24 most significant bits were overwritten. And in all systems this leads to possible use of uninitialized value in functions calling spi_write_then_read which expect status to be initialized when the function returns. Moreover function axspi_read_status (and macro AX_READ_STATUS) do not seem to be used anywhere. So currently this seems to be dead code. Fix the issue anyway so that future code works properly when using function axspi_read_status. Fixes: a97c69ba4f30 ("net: ax88796c: ASIX AX88796C SPI Ethernet Adapter Driver") Signed-off-by: Nicolas Iooss Acked-by: Łukasz Stelmach Signed-off-by: David S. Miller --- drivers/net/ethernet/asix/ax88796c_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/asix/ax88796c_spi.c b/drivers/net/ethernet/asix/ax88796c_spi.c index 94df4f96d2be..0710e716d682 100644 --- a/drivers/net/ethernet/asix/ax88796c_spi.c +++ b/drivers/net/ethernet/asix/ax88796c_spi.c @@ -34,7 +34,7 @@ int axspi_read_status(struct axspi_data *ax_spi, struct spi_status *status) /* OP */ ax_spi->cmd_buf[0] = AX_SPICMD_READ_STATUS; - ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)&status, 3); + ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)status, 3); if (ret) dev_err(&ax_spi->spi->dev, "%s() failed: ret = %d\n", __func__, ret); else -- cgit v1.2.3-59-g8ed1b From a68229ca634066975fff6d4780155bd2eb14a82a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Nov 2021 16:02:49 +0100 Subject: nixge: fix mac address error handling again The change to eth_hw_addr_set() caused gcc to correctly spot a bug that was introduced in an earlier incorrect fix: In file included from include/linux/etherdevice.h:21, from drivers/net/ethernet/ni/nixge.c:7: In function '__dev_addr_set', inlined from 'eth_hw_addr_set' at include/linux/etherdevice.h:319:2, inlined from 'nixge_probe' at drivers/net/ethernet/ni/nixge.c:1286:3: include/linux/netdevice.h:4648:9: error: 'memcpy' reading 6 bytes from a region of size 0 [-Werror=stringop-overread] 4648 | memcpy(dev->dev_addr, addr, len); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As nixge_get_nvmem_address() can return either NULL or an error pointer, the NULL check is wrong, and we can end up reading from ERR_PTR(-EOPNOTSUPP), which gcc knows to contain zero readable bytes. Make the function always return an error pointer again but fix the check to match that. Fixes: f3956ebb3bf0 ("ethernet: use eth_hw_addr_set() instead of ether_addr_copy()") Fixes: abcd3d6fc640 ("net: nixge: Fix error path for obtaining mac address") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ni/nixge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index cfeb7620ae20..07a00dd9cfe0 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -1209,7 +1209,7 @@ static void *nixge_get_nvmem_address(struct device *dev) cell = nvmem_cell_get(dev, "address"); if (IS_ERR(cell)) - return NULL; + return cell; mac = nvmem_cell_read(cell, &cell_size); nvmem_cell_put(cell); @@ -1282,7 +1282,7 @@ static int nixge_probe(struct platform_device *pdev) ndev->max_mtu = NIXGE_JUMBO_MTU; mac_addr = nixge_get_nvmem_address(&pdev->dev); - if (mac_addr && is_valid_ether_addr(mac_addr)) { + if (!IS_ERR(mac_addr) && is_valid_ether_addr(mac_addr)) { eth_hw_addr_set(ndev, mac_addr); kfree(mac_addr); } else { -- cgit v1.2.3-59-g8ed1b From 792b2086584f25d84081a526beee80d103c2a913 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 26 Oct 2021 18:47:18 +0200 Subject: ice: fix vsi->txq_map sizing The approach of having XDP queue per CPU regardless of user's setting exposed a hidden bug that could occur in case when Rx queue count differ from Tx queue count. Currently vsi->txq_map's size is equal to the doubled vsi->alloc_txq, which is not correct due to the fact that XDP rings were previously based on the Rx queue count. Below splat can be seen when ethtool -L is used and XDP rings are configured: [ 682.875339] BUG: kernel NULL pointer dereference, address: 000000000000000f [ 682.883403] #PF: supervisor read access in kernel mode [ 682.889345] #PF: error_code(0x0000) - not-present page [ 682.895289] PGD 0 P4D 0 [ 682.898218] Oops: 0000 [#1] PREEMPT SMP PTI [ 682.903055] CPU: 42 PID: 2878 Comm: ethtool Tainted: G OE 5.15.0-rc5+ #1 [ 682.912214] Hardware name: Intel Corp. GRANTLEY/GRANTLEY, BIOS GRRFCRB1.86B.0276.D07.1605190235 05/19/2016 [ 682.923380] RIP: 0010:devres_remove+0x44/0x130 [ 682.928527] Code: 49 89 f4 55 48 89 fd 4c 89 ff 53 48 83 ec 10 e8 92 b9 49 00 48 8b 9d a8 02 00 00 48 8d 8d a0 02 00 00 49 89 c2 48 39 cb 74 0f <4c> 3b 63 10 74 25 48 8b 5b 08 48 39 cb 75 f1 4c 89 ff 4c 89 d6 e8 [ 682.950237] RSP: 0018:ffffc90006a679f0 EFLAGS: 00010002 [ 682.956285] RAX: 0000000000000286 RBX: ffffffffffffffff RCX: ffff88908343a370 [ 682.964538] RDX: 0000000000000001 RSI: ffffffff81690d60 RDI: 0000000000000000 [ 682.972789] RBP: ffff88908343a0d0 R08: 0000000000000000 R09: 0000000000000000 [ 682.981040] R10: 0000000000000286 R11: 3fffffffffffffff R12: ffffffff81690d60 [ 682.989282] R13: ffffffff81690a00 R14: ffff8890819807a8 R15: ffff88908343a36c [ 682.997535] FS: 00007f08c7bfa740(0000) GS:ffff88a03fd00000(0000) knlGS:0000000000000000 [ 683.006910] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 683.013557] CR2: 000000000000000f CR3: 0000001080a66003 CR4: 00000000003706e0 [ 683.021819] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 683.030075] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 683.038336] Call Trace: [ 683.041167] devm_kfree+0x33/0x50 [ 683.045004] ice_vsi_free_arrays+0x5e/0xc0 [ice] [ 683.050380] ice_vsi_rebuild+0x4c8/0x750 [ice] [ 683.055543] ice_vsi_recfg_qs+0x9a/0x110 [ice] [ 683.060697] ice_set_channels+0x14f/0x290 [ice] [ 683.065962] ethnl_set_channels+0x333/0x3f0 [ 683.070807] genl_family_rcv_msg_doit+0xea/0x150 [ 683.076152] genl_rcv_msg+0xde/0x1d0 [ 683.080289] ? channels_prepare_data+0x60/0x60 [ 683.085432] ? genl_get_cmd+0xd0/0xd0 [ 683.089667] netlink_rcv_skb+0x50/0xf0 [ 683.094006] genl_rcv+0x24/0x40 [ 683.097638] netlink_unicast+0x239/0x340 [ 683.102177] netlink_sendmsg+0x22e/0x470 [ 683.106717] sock_sendmsg+0x5e/0x60 [ 683.110756] __sys_sendto+0xee/0x150 [ 683.114894] ? handle_mm_fault+0xd0/0x2a0 [ 683.119535] ? do_user_addr_fault+0x1f3/0x690 [ 683.134173] __x64_sys_sendto+0x25/0x30 [ 683.148231] do_syscall_64+0x3b/0xc0 [ 683.161992] entry_SYSCALL_64_after_hwframe+0x44/0xae Fix this by taking into account the value that num_possible_cpus() yields in addition to vsi->alloc_txq instead of doubling the latter. Fixes: efc2214b6047 ("ice: Add support for XDP") Fixes: 22bf877e528f ("ice: introduce XDP_TX fallback path") Reviewed-by: Alexander Lobakin Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 40562600a8cf..09a3297cd63c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -89,8 +89,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) if (!vsi->rx_rings) goto err_rings; - /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */ - vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq), + /* txq_map needs to have enough space to track both Tx (stack) rings + * and XDP rings; at this point vsi->num_xdp_txq might not be set, + * so use num_possible_cpus() as we want to always provide XDP ring + * per CPU, regardless of queue count settings from user that might + * have come from ethtool's set_channels() callback; + */ + vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()), sizeof(*vsi->txq_map), GFP_KERNEL); if (!vsi->txq_map) -- cgit v1.2.3-59-g8ed1b From f65ee535df775a13a1046c0a0b2d72db342f8a5b Mon Sep 17 00:00:00 2001 From: Marta Plantykow Date: Tue, 26 Oct 2021 18:47:19 +0200 Subject: ice: avoid bpf_prog refcount underflow Ice driver has the routines for managing XDP resources that are shared between ndo_bpf op and VSI rebuild flow. The latter takes place for example when user changes queue count on an interface via ethtool's set_channels(). There is an issue around the bpf_prog refcounting when VSI is being rebuilt - since ice_prepare_xdp_rings() is called with vsi->xdp_prog as an argument that is used later on by ice_vsi_assign_bpf_prog(), same bpf_prog pointers are swapped with each other. Then it is also interpreted as an 'old_prog' which in turn causes us to call bpf_prog_put on it that will decrement its refcount. Below splat can be interpreted in a way that due to zero refcount of a bpf_prog it is wiped out from the system while kernel still tries to refer to it: [ 481.069429] BUG: unable to handle page fault for address: ffffc9000640f038 [ 481.077390] #PF: supervisor read access in kernel mode [ 481.083335] #PF: error_code(0x0000) - not-present page [ 481.089276] PGD 100000067 P4D 100000067 PUD 1001cb067 PMD 106d2b067 PTE 0 [ 481.097141] Oops: 0000 [#1] PREEMPT SMP PTI [ 481.101980] CPU: 12 PID: 3339 Comm: sudo Tainted: G OE 5.15.0-rc5+ #1 [ 481.110840] Hardware name: Intel Corp. GRANTLEY/GRANTLEY, BIOS GRRFCRB1.86B.0276.D07.1605190235 05/19/2016 [ 481.122021] RIP: 0010:dev_xdp_prog_id+0x25/0x40 [ 481.127265] Code: 80 00 00 00 00 0f 1f 44 00 00 89 f6 48 c1 e6 04 48 01 fe 48 8b 86 98 08 00 00 48 85 c0 74 13 48 8b 50 18 31 c0 48 85 d2 74 07 <48> 8b 42 38 8b 40 20 c3 48 8b 96 90 08 00 00 eb e8 66 2e 0f 1f 84 [ 481.148991] RSP: 0018:ffffc90007b63868 EFLAGS: 00010286 [ 481.155034] RAX: 0000000000000000 RBX: ffff889080824000 RCX: 0000000000000000 [ 481.163278] RDX: ffffc9000640f000 RSI: ffff889080824010 RDI: ffff889080824000 [ 481.171527] RBP: ffff888107af7d00 R08: 0000000000000000 R09: ffff88810db5f6e0 [ 481.179776] R10: 0000000000000000 R11: ffff8890885b9988 R12: ffff88810db5f4bc [ 481.188026] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 481.196276] FS: 00007f5466d5bec0(0000) GS:ffff88903fb00000(0000) knlGS:0000000000000000 [ 481.205633] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 481.212279] CR2: ffffc9000640f038 CR3: 000000014429c006 CR4: 00000000003706e0 [ 481.220530] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 481.228771] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 481.237029] Call Trace: [ 481.239856] rtnl_fill_ifinfo+0x768/0x12e0 [ 481.244602] rtnl_dump_ifinfo+0x525/0x650 [ 481.249246] ? __alloc_skb+0xa5/0x280 [ 481.253484] netlink_dump+0x168/0x3c0 [ 481.257725] netlink_recvmsg+0x21e/0x3e0 [ 481.262263] ____sys_recvmsg+0x87/0x170 [ 481.266707] ? __might_fault+0x20/0x30 [ 481.271046] ? _copy_from_user+0x66/0xa0 [ 481.275591] ? iovec_from_user+0xf6/0x1c0 [ 481.280226] ___sys_recvmsg+0x82/0x100 [ 481.284566] ? sock_sendmsg+0x5e/0x60 [ 481.288791] ? __sys_sendto+0xee/0x150 [ 481.293129] __sys_recvmsg+0x56/0xa0 [ 481.297267] do_syscall_64+0x3b/0xc0 [ 481.301395] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 481.307238] RIP: 0033:0x7f5466f39617 [ 481.311373] Code: 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb bd 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2f 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 481.342944] RSP: 002b:00007ffedc7f4308 EFLAGS: 00000246 ORIG_RAX: 000000000000002f [ 481.361783] RAX: ffffffffffffffda RBX: 00007ffedc7f5460 RCX: 00007f5466f39617 [ 481.380278] RDX: 0000000000000000 RSI: 00007ffedc7f5360 RDI: 0000000000000003 [ 481.398500] RBP: 00007ffedc7f53f0 R08: 0000000000000000 R09: 000055d556f04d50 [ 481.416463] R10: 0000000000000077 R11: 0000000000000246 R12: 00007ffedc7f5360 [ 481.434131] R13: 00007ffedc7f5350 R14: 00007ffedc7f5344 R15: 0000000000000e98 [ 481.451520] Modules linked in: ice(OE) af_packet binfmt_misc nls_iso8859_1 ipmi_ssif intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp mxm_wmi mei_me coretemp mei ipmi_si ipmi_msghandler wmi acpi_pad acpi_power_meter ip_tables x_tables autofs4 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel ahci crypto_simd cryptd libahci lpc_ich [last unloaded: ice] [ 481.528558] CR2: ffffc9000640f038 [ 481.542041] ---[ end trace d1f24c9ecf5b61c1 ]--- Fix this by only calling ice_vsi_assign_bpf_prog() inside ice_prepare_xdp_rings() when current vsi->xdp_prog pointer is NULL. This way set_channels() flow will not attempt to swap the vsi->xdp_prog pointers with itself. Also, sprinkle around some comments that provide a reasoning about correlation between driver and kernel in terms of bpf_prog refcount. Fixes: efc2214b6047 ("ice: Add support for XDP") Reviewed-by: Alexander Lobakin Signed-off-by: Marta Plantykow Co-developed-by: Maciej Fijalkowski Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f099797f35e3..4d1fc48c9744 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2609,7 +2609,18 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) ice_stat_str(status)); goto clear_xdp_rings; } - ice_vsi_assign_bpf_prog(vsi, prog); + + /* assign the prog only when it's not already present on VSI; + * this flow is a subject of both ethtool -L and ndo_bpf flows; + * VSI rebuild that happens under ethtool -L can expose us to + * the bpf_prog refcount issues as we would be swapping same + * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put + * on it as it would be treated as an 'old_prog'; for ndo_bpf + * this is not harmful as dev_xdp_install bumps the refcount + * before calling the op exposed by the driver; + */ + if (!ice_is_xdp_ena_vsi(vsi)) + ice_vsi_assign_bpf_prog(vsi, prog); return 0; clear_xdp_rings: @@ -2785,6 +2796,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); } else { + /* safe to call even when prog == vsi->xdp_prog as + * dev_xdp_install in net/core/dev.c incremented prog's + * refcount so corresponding bpf_prog_put won't cause + * underflow + */ ice_vsi_assign_bpf_prog(vsi, prog); } -- cgit v1.2.3-59-g8ed1b From fe785f56ad5886c08d1cadd9e8b4e1ff6a1866f6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Nov 2021 15:21:02 +0100 Subject: iwlwifi: pcie: fix constant-conversion warning Both gcc-11 and clang point out a potential issue with integer overflow when the iwl_dev_info_table[] array is empty. This is what clang warns: drivers/net/wireless/intel/iwlwifi/pcie/drv.c:1344:42: error: implicit conversion from 'unsigned long' to 'int' changes value from 18446744073709551615 to -1 [-Werror,-Wconstant-conversion] for (i = ARRAY_SIZE(iwl_dev_info_table) - 1; i >= 0; i--) { ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~ This is still harmless, as the loop correctly terminates, but adding an extra range check makes that obvious to both readers and to the compiler. Fixes: 3f7320428fa4 ("iwlwifi: pcie: simplify iwl_pci_find_dev_info()") Reported-by: kernel test robot Cc: Nick Desaulniers Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211118142124.526901-1-arnd@kernel.org --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index c574f041f096..395e328c6a07 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1339,9 +1339,13 @@ iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb, u8 rf_id, u8 no_160, u8 cores) { + int num_devices = ARRAY_SIZE(iwl_dev_info_table); int i; - for (i = ARRAY_SIZE(iwl_dev_info_table) - 1; i >= 0; i--) { + if (!num_devices) + return NULL; + + for (i = num_devices - 1; i >= 0; i--) { const struct iwl_dev_info *dev_info = &iwl_dev_info_table[i]; if (dev_info->device != (u16)IWL_CFG_ANY && -- cgit v1.2.3-59-g8ed1b From 1b54403c9cc444b6e0ade1f441efdf1270877ace Mon Sep 17 00:00:00 2001 From: chongjiapeng Date: Tue, 2 Nov 2021 15:38:47 +0800 Subject: iwlwifi: Fix missing error code in iwl_pci_probe() The error code is missing in this code scenario, add the error code '-EINVAL' to the return value 'ret'. Eliminate the follow smatch warning: drivers/net/wireless/intel/iwlwifi/pcie/drv.c:1376 iwl_pci_probe() warn: missing error code 'ret'. Reported-by: Abaci Robot Fixes: 1f171f4f1437 ("iwlwifi: Add support for getting rf id with blank otp") Signed-off-by: chongjiapeng Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1635838727-128735-1-git-send-email-jiapeng.chong@linux.alibaba.com --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 395e328c6a07..5ce07f28e7c3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1446,8 +1446,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ if (iwl_trans->trans_cfg->rf_id && iwl_trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_9000 && - !CSR_HW_RFID_TYPE(iwl_trans->hw_rf_id) && get_crf_id(iwl_trans)) + !CSR_HW_RFID_TYPE(iwl_trans->hw_rf_id) && get_crf_id(iwl_trans)) { + ret = -EINVAL; goto out_free_trans; + } dev_info = iwl_pci_find_dev_info(pdev->device, pdev->subsystem_device, CSR_HW_REV_TYPE(iwl_trans->hw_rev), -- cgit v1.2.3-59-g8ed1b From 5283dd677e52af9db6fe6ad11b2f12220d519d0c Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Wed, 10 Nov 2021 15:01:59 +0200 Subject: iwlwifi: mvm: retry init flow if failed In some very rare cases the init flow may fail. In many cases, this is recoverable, so we can retry. Implement a loop to retry two more times after the first attempt failed. This can happen in two different situations, namely during probe and during mac80211 start. For the first case, a simple loop is enough. For the second case, we need to add a flag to prevent mac80211 from trying to restart it as well, leaving full control with the driver. Cc: Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20211110150132.57514296ecab.I52a0411774b700bdc7dedb124d8b59bf99456eb2@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 22 ++++++++++++++------- drivers/net/wireless/intel/iwlwifi/iwl-drv.h | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 24 ++++++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 3 +++ 5 files changed, 47 insertions(+), 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 36196e07b1a0..5cec467b995b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1313,23 +1313,31 @@ _iwl_op_mode_start(struct iwl_drv *drv, struct iwlwifi_opmode_table *op) const struct iwl_op_mode_ops *ops = op->ops; struct dentry *dbgfs_dir = NULL; struct iwl_op_mode *op_mode = NULL; + int retry, max_retry = !!iwlwifi_mod_params.fw_restart * IWL_MAX_INIT_RETRY; + + for (retry = 0; retry <= max_retry; retry++) { #ifdef CONFIG_IWLWIFI_DEBUGFS - drv->dbgfs_op_mode = debugfs_create_dir(op->name, - drv->dbgfs_drv); - dbgfs_dir = drv->dbgfs_op_mode; + drv->dbgfs_op_mode = debugfs_create_dir(op->name, + drv->dbgfs_drv); + dbgfs_dir = drv->dbgfs_op_mode; #endif - op_mode = ops->start(drv->trans, drv->trans->cfg, &drv->fw, dbgfs_dir); + op_mode = ops->start(drv->trans, drv->trans->cfg, + &drv->fw, dbgfs_dir); + + if (op_mode) + return op_mode; + + IWL_ERR(drv, "retry init count %d\n", retry); #ifdef CONFIG_IWLWIFI_DEBUGFS - if (!op_mode) { debugfs_remove_recursive(drv->dbgfs_op_mode); drv->dbgfs_op_mode = NULL; - } #endif + } - return op_mode; + return NULL; } static void _iwl_op_mode_stop(struct iwl_drv *drv) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h index 2e2d60a58692..0fd009e6d685 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h @@ -89,4 +89,7 @@ void iwl_drv_stop(struct iwl_drv *drv); #define IWL_EXPORT_SYMBOL(sym) #endif +/* max retry for init flow */ +#define IWL_MAX_INIT_RETRY 2 + #endif /* __iwl_drv_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 9fb9c7dad314..897e3b91ddb2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -16,6 +16,7 @@ #include #include +#include "iwl-drv.h" #include "iwl-op-mode.h" #include "iwl-io.h" #include "mvm.h" @@ -1117,9 +1118,30 @@ static int iwl_mvm_mac_start(struct ieee80211_hw *hw) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); int ret; + int retry, max_retry = 0; mutex_lock(&mvm->mutex); - ret = __iwl_mvm_mac_start(mvm); + + /* we are starting the mac not in error flow, and restart is enabled */ + if (!test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status) && + iwlwifi_mod_params.fw_restart) { + max_retry = IWL_MAX_INIT_RETRY; + /* + * This will prevent mac80211 recovery flows to trigger during + * init failures + */ + set_bit(IWL_MVM_STATUS_STARTING, &mvm->status); + } + + for (retry = 0; retry <= max_retry; retry++) { + ret = __iwl_mvm_mac_start(mvm); + if (!ret) + break; + + IWL_ERR(mvm, "mac start retry %d\n", retry); + } + clear_bit(IWL_MVM_STATUS_STARTING, &mvm->status); + mutex_unlock(&mvm->mutex); return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 2b1dcd60e00f..a72d85086fe3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1123,6 +1123,8 @@ struct iwl_mvm { * @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running * @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA * @IWL_MVM_STATUS_IN_D3: in D3 (or at least about to go into it) + * @IWL_MVM_STATUS_STARTING: starting mac, + * used to disable restart flow while in STARTING state */ enum iwl_mvm_status { IWL_MVM_STATUS_HW_RFKILL, @@ -1134,6 +1136,7 @@ enum iwl_mvm_status { IWL_MVM_STATUS_FIRMWARE_RUNNING, IWL_MVM_STATUS_NEED_FLUSH_P2P, IWL_MVM_STATUS_IN_D3, + IWL_MVM_STATUS_STARTING, }; /* Keep track of completed init configuration */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 232ad531d612..ce7160670aa7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1600,6 +1600,9 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error) */ if (!mvm->fw_restart && fw_error) { iwl_fw_error_collect(&mvm->fwrt, false); + } else if (test_bit(IWL_MVM_STATUS_STARTING, + &mvm->status)) { + IWL_ERR(mvm, "Starting mac, retry will be triggered anyway\n"); } else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) { struct iwl_mvm_reprobe *reprobe; -- cgit v1.2.3-59-g8ed1b From f5cecf1d4c5ff76172928bc32e99ca56a5ca2f56 Mon Sep 17 00:00:00 2001 From: Łukasz Bartosik Date: Wed, 10 Nov 2021 22:57:44 +0100 Subject: iwlwifi: fix warnings produced by kernel debug options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix warnings produced by: - lockdep_assert_wiphy() in function reg_process_self_managed_hint(), - wiphy_dereference() in function iwl_mvm_init_fw_regd(). Both function are expected to be called in critical section. The warnings were discovered when running v5.15 kernel with debug options enabled: 1) Hardware name: Google Delbin/Delbin RIP: 0010:reg_process_self_managed_hint+0x254/0x347 [cfg80211] ... Call Trace: regulatory_set_wiphy_regd_sync+0x3d/0xb0 iwl_mvm_init_mcc+0x49d/0x5a2 iwl_op_mode_mvm_start+0x1b58/0x2507 ? iwl_mvm_reprobe_wk+0x94/0x94 _iwl_op_mode_start+0x146/0x1a3 iwl_opmode_register+0xda/0x13d init_module+0x28/0x1000 2) drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c:263 suspicious rcu_dereference_protected() usage! ... Hardware name: Google Delbin/Delbin, BIOS Google_Delbin Call Trace: dump_stack_lvl+0xb1/0xe6 iwl_mvm_init_fw_regd+0x2e7/0x379 iwl_mvm_init_mcc+0x2c6/0x5a2 iwl_op_mode_mvm_start+0x1b58/0x2507 ? iwl_mvm_reprobe_wk+0x94/0x94 _iwl_op_mode_start+0x146/0x1a3 iwl_opmode_register+0xda/0x13d init_module+0x28/0x100 Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Signed-off-by: Łukasz Bartosik Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211110215744.5487-1-lukasz.bartosik@semihalf.com --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index ce7160670aa7..cd08e289cd9a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -686,6 +686,7 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm) int ret; rtnl_lock(); + wiphy_lock(mvm->hw->wiphy); mutex_lock(&mvm->mutex); ret = iwl_run_init_mvm_ucode(mvm); @@ -701,6 +702,7 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm) iwl_mvm_stop_device(mvm); mutex_unlock(&mvm->mutex); + wiphy_unlock(mvm->hw->wiphy); rtnl_unlock(); if (ret < 0) -- cgit v1.2.3-59-g8ed1b From a571bc28326d9f3e13f5f2d9cda2883e0631b0ce Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 11 Nov 2021 08:23:11 +0100 Subject: iwlwifi: Fix memory leaks in error handling path Should an error occur (invalid TLV len or memory allocation failure), the memory already allocated in 'reduce_power_data' should be freed before returning, otherwise it is leaking. Fixes: 9dad325f9d57 ("iwlwifi: support loading the reduced power table from UEFI") Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1504cd7d842d13ddb8244e18004523128d5c9523.1636615284.git.christophe.jaillet@wanadoo.fr --- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index c875bf35533c..009dd4be597b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -86,6 +86,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans, if (len < tlv_len) { IWL_ERR(trans, "invalid TLV len: %zd/%u\n", len, tlv_len); + kfree(reduce_power_data); reduce_power_data = ERR_PTR(-EINVAL); goto out; } @@ -105,6 +106,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans, IWL_DEBUG_FW(trans, "Couldn't allocate (more) reduce_power_data\n"); + kfree(reduce_power_data); reduce_power_data = ERR_PTR(-ENOMEM); goto out; } @@ -134,6 +136,10 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans, done: if (!size) { IWL_DEBUG_FW(trans, "Empty REDUCE_POWER, skipping.\n"); + /* Better safe than sorry, but 'reduce_power_data' should + * always be NULL if !size. + */ + kfree(reduce_power_data); reduce_power_data = ERR_PTR(-ENOENT); goto out; } -- cgit v1.2.3-59-g8ed1b From 5737b4515deea0829c138ab5201160345ec67d49 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 19 Nov 2021 13:45:10 +0800 Subject: rtw89: update partition size of firmware header on skb->data The partition size is used to tell hardware the size of piece we are going to send a firmware. The old code updates the size in constant buffer of firmware, and leads system crash. To fix this, update the size on skb->data after we copy the firmware data into skb. Buglink: https://bugzilla.opensuse.org/show_bug.cgi?id=1188303 Fixes: e3ec7017f6a2 ("rtw89: add Realtek 802.11ax driver") Reported-by: Takashi Iwai Signed-off-by: Ping-Ke Shih Tested-by: Takashi Iwai Tested-by: Larry Finger Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211119054512.10620-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 2 +- drivers/net/wireless/realtek/rtw89/fw.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 212aaf577d3c..65ef3dc9d061 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -91,7 +91,6 @@ static int rtw89_fw_hdr_parser(struct rtw89_dev *rtwdev, const u8 *fw, u32 len, info->section_num = GET_FW_HDR_SEC_NUM(fw); info->hdr_len = RTW89_FW_HDR_SIZE + info->section_num * RTW89_FW_SECTION_HDR_SIZE; - SET_FW_HDR_PART_SIZE(fw, FWDL_SECTION_PER_PKT_LEN); bin = fw + info->hdr_len; @@ -275,6 +274,7 @@ static int __rtw89_fw_download_hdr(struct rtw89_dev *rtwdev, const u8 *fw, u32 l } skb_put_data(skb, fw, len); + SET_FW_HDR_PART_SIZE(skb->data, FWDL_SECTION_PER_PKT_LEN); rtw89_h2c_pkt_set_hdr_fwdl(rtwdev, skb, FWCMD_TYPE_H2C, H2C_CAT_MAC, H2C_CL_MAC_FWDL, H2C_FUNC_MAC_FWHDR_DL, len); diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 7ee0d9323310..36e8d0da6c1e 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -282,8 +282,10 @@ struct rtw89_h2creg_sch_tx_en { le32_get_bits(*((__le32 *)(fwhdr) + 6), GENMASK(15, 8)) #define GET_FW_HDR_CMD_VERSERION(fwhdr) \ le32_get_bits(*((__le32 *)(fwhdr) + 7), GENMASK(31, 24)) -#define SET_FW_HDR_PART_SIZE(fwhdr, val) \ - le32p_replace_bits((__le32 *)(fwhdr) + 7, val, GENMASK(15, 0)) +static inline void SET_FW_HDR_PART_SIZE(void *fwhdr, u32 val) +{ + le32p_replace_bits((__le32 *)fwhdr + 7, val, GENMASK(15, 0)); +} #define SET_CTRL_INFO_MACID(table, val) \ le32p_replace_bits((__le32 *)(table) + 0, val, GENMASK(6, 0)) -- cgit v1.2.3-59-g8ed1b From 6e53d6d26920d5221d3f4d4f5ffdd629ea69aa5c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 18 Nov 2021 13:47:48 +0100 Subject: mt76: mt7915: fix NULL pointer dereference in mt7915_get_phy_mode Fix the following NULL pointer dereference in mt7915_get_phy_mode routine adding an ibss interface to the mt7915 driver. [ 101.137097] wlan0: Trigger new scan to find an IBSS to join [ 102.827039] wlan0: Creating new IBSS network, BSSID 26:a4:50:1a:6e:69 [ 103.064756] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 103.073670] Mem abort info: [ 103.076520] ESR = 0x96000005 [ 103.079614] EC = 0x25: DABT (current EL), IL = 32 bits [ 103.084934] SET = 0, FnV = 0 [ 103.088042] EA = 0, S1PTW = 0 [ 103.091215] Data abort info: [ 103.094104] ISV = 0, ISS = 0x00000005 [ 103.098041] CM = 0, WnR = 0 [ 103.101044] user pgtable: 4k pages, 39-bit VAs, pgdp=00000000460b1000 [ 103.107565] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 [ 103.116590] Internal error: Oops: 96000005 [#1] SMP [ 103.189066] CPU: 1 PID: 333 Comm: kworker/u4:3 Not tainted 5.10.75 #0 [ 103.195498] Hardware name: MediaTek MT7622 RFB1 board (DT) [ 103.201124] Workqueue: phy0 ieee80211_iface_work [mac80211] [ 103.206695] pstate: 20000005 (nzCv daif -PAN -UAO -TCO BTYPE=--) [ 103.212705] pc : mt7915_get_phy_mode+0x68/0x120 [mt7915e] [ 103.218103] lr : mt7915_mcu_add_bss_info+0x11c/0x760 [mt7915e] [ 103.223927] sp : ffffffc011cdb9e0 [ 103.227235] x29: ffffffc011cdb9e0 x28: ffffff8006563098 [ 103.232545] x27: ffffff8005f4da22 x26: ffffff800685ac40 [ 103.237855] x25: 0000000000000001 x24: 000000000000011f [ 103.243165] x23: ffffff8005f4e260 x22: ffffff8006567918 [ 103.248475] x21: ffffff8005f4df80 x20: ffffff800685ac58 [ 103.253785] x19: ffffff8006744400 x18: 0000000000000000 [ 103.259094] x17: 0000000000000000 x16: 0000000000000001 [ 103.264403] x15: 000899c3a2d9d2e4 x14: 000899bdc3c3a1c8 [ 103.269713] x13: 0000000000000000 x12: 0000000000000000 [ 103.275024] x11: ffffffc010e30c20 x10: 0000000000000000 [ 103.280333] x9 : 0000000000000050 x8 : ffffff8006567d88 [ 103.285642] x7 : ffffff8006563b5c x6 : ffffff8006563b44 [ 103.290952] x5 : 0000000000000002 x4 : 0000000000000001 [ 103.296262] x3 : 0000000000000001 x2 : 0000000000000001 [ 103.301572] x1 : 0000000000000000 x0 : 0000000000000011 [ 103.306882] Call trace: [ 103.309328] mt7915_get_phy_mode+0x68/0x120 [mt7915e] [ 103.314378] mt7915_bss_info_changed+0x198/0x200 [mt7915e] [ 103.319941] ieee80211_bss_info_change_notify+0x128/0x290 [mac80211] [ 103.326360] __ieee80211_sta_join_ibss+0x308/0x6c4 [mac80211] [ 103.332171] ieee80211_sta_create_ibss+0x8c/0x10c [mac80211] [ 103.337895] ieee80211_ibss_work+0x3dc/0x614 [mac80211] [ 103.343185] ieee80211_iface_work+0x388/0x3f0 [mac80211] [ 103.348495] process_one_work+0x288/0x690 [ 103.352499] worker_thread+0x70/0x464 [ 103.356157] kthread+0x144/0x150 [ 103.359380] ret_from_fork+0x10/0x18 [ 103.362952] Code: 394008c3 52800220 394000e4 7100007f (39400023) Fixes: 37f4ca907c46 ("mt76: mt7915: register per-phy HE capabilities for each interface") Fixes: e57b7901469f ("mt76: add mac80211 driver for MT7915 PCIe-based chipsets") Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/ddae419a740f1fb9e48afd432035e9f394f512ee.1637239456.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 899957b9d0f1..852d5d97c70b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -176,7 +176,7 @@ mt7915_get_phy_mode(struct ieee80211_vif *vif, struct ieee80211_sta *sta) if (ht_cap->ht_supported) mode |= PHY_MODE_GN; - if (he_cap->has_he) + if (he_cap && he_cap->has_he) mode |= PHY_MODE_AX_24G; } else if (band == NL80211_BAND_5GHZ) { mode |= PHY_MODE_A; @@ -187,7 +187,7 @@ mt7915_get_phy_mode(struct ieee80211_vif *vif, struct ieee80211_sta *sta) if (vht_cap->vht_supported) mode |= PHY_MODE_AC; - if (he_cap->has_he) + if (he_cap && he_cap->has_he) mode |= PHY_MODE_AX_5G; } -- cgit v1.2.3-59-g8ed1b From 16517829f2e02f096fb5ea9083d160381127faf3 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 23 Nov 2021 19:07:48 +0800 Subject: hamradio: fix macro redefine warning MIPS/IA64 define END as assembly function ending, which conflict with END definition in mkiss.c, just undef it at first Reported-by: lkp@intel.com Signed-off-by: Huang Pei Signed-off-by: David S. Miller --- drivers/net/hamradio/mkiss.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index e2b332b54f06..7da2bb8a443c 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -31,6 +31,8 @@ #define AX_MTU 236 +/* some arch define END as assembly function ending, just undef it */ +#undef END /* SLIP/KISS protocol characters. */ #define END 0300 /* indicates end of frame */ #define ESC 0333 /* indicates byte stuffing */ -- cgit v1.2.3-59-g8ed1b From e5b40668e930979bd1e82c7ed7c9029db635f0e4 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 23 Nov 2021 19:07:49 +0800 Subject: slip: fix macro redefine warning MIPS/IA64 define END as assembly function ending, which conflict with END definition in slip.h, just undef it at first Reported-by: lkp@intel.com Signed-off-by: Huang Pei Signed-off-by: David S. Miller --- drivers/net/slip/slip.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/slip/slip.h b/drivers/net/slip/slip.h index c420e5948522..3d7f88b330c1 100644 --- a/drivers/net/slip/slip.h +++ b/drivers/net/slip/slip.h @@ -40,6 +40,8 @@ insmod -oslip_maxdev=nnn */ #define SL_MTU 296 /* 296; I am used to 600- FvK */ +/* some arch define END as assembly function ending, just undef it */ +#undef END /* SLIP protocol characters. */ #define END 0300 /* indicates end of frame */ #define ESC 0333 /* indicates byte stuffing */ -- cgit v1.2.3-59-g8ed1b From ce4995bc6c8eec9685707e36c3f38aea3c8694fa Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 23 Nov 2021 09:52:55 +0200 Subject: mlxsw: spectrum: Allow driver to load with old firmware versions The driver fails to load with old firmware versions that cannot report the maximum number of RIF MAC profiles [1]. Fix this by defaulting to a maximum of a single profile in such situations, as multiple profiles are not supported by old firmware versions. [1] mlxsw_spectrum 0000:03:00.0: cannot register bus device mlxsw_spectrum: probe of 0000:03:00.0 failed with error -5 Fixes: 1c375ffb2efab ("mlxsw: spectrum_router: Expose RIF MAC profiles to devlink resource") Signed-off-by: Danielle Ratson Reported-by: Vadim Pasternak Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 5925db386b1b..738ee3b0d3c3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3290,10 +3290,10 @@ mlxsw_sp_resources_rif_mac_profile_register(struct mlxsw_core *mlxsw_core) u8 max_rif_mac_profiles; if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIF_MAC_PROFILES)) - return -EIO; - - max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core, - MAX_RIF_MAC_PROFILES); + max_rif_mac_profiles = 1; + else + max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core, + MAX_RIF_MAC_PROFILES); devlink_resource_size_params_init(&size_params, max_rif_mac_profiles, max_rif_mac_profiles, 1, DEVLINK_RESOURCE_UNIT_ENTRY); -- cgit v1.2.3-59-g8ed1b From 63b08b1f6834bbb0b4f7783bf63b80c8c8e9a047 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 23 Nov 2021 09:52:56 +0200 Subject: mlxsw: spectrum: Protect driver from buggy firmware When processing port up/down events generated by the device's firmware, the driver protects itself from events reported for non-existent local ports, but not the CPU port (local port 0), which exists, but lacks a netdev. This can result in a NULL pointer dereference when calling netif_carrier_{on,off}(). Fix this by bailing early when processing an event reported for the CPU port. Problem was only observed when running on top of a buggy emulator. Fixes: 28b1987ef506 ("mlxsw: spectrum: Register CPU port with devlink") Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 738ee3b0d3c3..03e5bad4e405 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2153,7 +2153,7 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, max_ports = mlxsw_core_max_ports(mlxsw_sp->core); local_port = mlxsw_reg_pude_local_port_get(pude_pl); - if (WARN_ON_ONCE(local_port >= max_ports)) + if (WARN_ON_ONCE(!local_port || local_port >= max_ports)) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) -- cgit v1.2.3-59-g8ed1b From 33a153100bb3459479bd95d3259c2915b53fefa8 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 22 Nov 2021 18:15:54 -0600 Subject: net: ipa: directly disable ipa-setup-ready interrupt We currently maintain a "disabled" Boolean flag to determine whether the "ipa-setup-ready" SMP2P IRQ handler does anything. That flag must be accessed under protection of a mutex. Instead, disable the SMP2P interrupt when requested, which prevents the interrupt handler from ever being called. More importantly, it synchronizes a thread disabling the interrupt with the completion of the interrupt handler in case they run concurrently. Use the IPA setup_complete flag rather than the disabled flag in the handler to determine whether to ignore any interrupts arriving after the first. Rename the "disabled" flag to be "setup_disabled", to be specific about its purpose. Fixes: 530f9216a953 ("soc: qcom: ipa: AP/modem communications") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_smp2p.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index df7639c39d71..24bc112a072c 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -53,7 +53,7 @@ * @setup_ready_irq: IPA interrupt triggered by modem to signal GSI ready * @power_on: Whether IPA power is on * @notified: Whether modem has been notified of power state - * @disabled: Whether setup ready interrupt handling is disabled + * @setup_disabled: Whether setup ready interrupt handler is disabled * @mutex: Mutex protecting ready-interrupt/shutdown interlock * @panic_notifier: Panic notifier structure */ @@ -67,7 +67,7 @@ struct ipa_smp2p { u32 setup_ready_irq; bool power_on; bool notified; - bool disabled; + bool setup_disabled; struct mutex mutex; struct notifier_block panic_notifier; }; @@ -155,11 +155,9 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) struct device *dev; int ret; - mutex_lock(&smp2p->mutex); - - if (smp2p->disabled) - goto out_mutex_unlock; - smp2p->disabled = true; /* If any others arrive, ignore them */ + /* Ignore any (spurious) interrupts received after the first */ + if (smp2p->ipa->setup_complete) + return IRQ_HANDLED; /* Power needs to be active for setup */ dev = &smp2p->ipa->pdev->dev; @@ -176,8 +174,6 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) out_power_put: pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); -out_mutex_unlock: - mutex_unlock(&smp2p->mutex); return IRQ_HANDLED; } @@ -322,7 +318,10 @@ void ipa_smp2p_disable(struct ipa *ipa) mutex_lock(&smp2p->mutex); - smp2p->disabled = true; + if (!smp2p->setup_disabled) { + disable_irq(smp2p->setup_ready_irq); + smp2p->setup_disabled = true; + } mutex_unlock(&smp2p->mutex); } -- cgit v1.2.3-59-g8ed1b From 8afc7e471ad3c92a9c96adc62d1b67de77378bb6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 22 Nov 2021 18:15:55 -0600 Subject: net: ipa: separate disabling setup from modem stop The IPA setup_complete flag is set at the end of ipa_setup(), when the setup phase of initialization has completed successfully. This occurs as part of driver probe processing, or (if "modem-init" is specified in the DTS file) it is triggered by the "ipa-setup-ready" SMP2P interrupt generated by the modem. In the latter case, it's possible for driver shutdown (or remove) to begin while setup processing is underway, and this can't be allowed. The problem is that the setup_complete flag is not adequate to signal that setup is underway. If setup_complete is set, it will never be un-set, so that case is not a problem. But if setup_complete is false, there's a chance setup is underway. Because setup is triggered by an interrupt on a "modem-init" system, there is a simple way to ensure the value of setup_complete is safe to read. The threaded handler--if it is executing--will complete as part of a request to disable the "ipa-modem-ready" interrupt. This means that ipa_setup() (which is called from the handler) will run to completion if it was underway, or will never be called otherwise. The request to disable the "ipa-setup-ready" interrupt is currently made within ipa_modem_stop(). Instead, disable the interrupt outside that function in the two places it's called. In the case of ipa_remove(), this ensures the setup_complete flag is safe to read before we read it. Rename ipa_smp2p_disable() to be ipa_smp2p_irq_disable_setup(), to be more specific about its effect. Fixes: 530f9216a953 ("soc: qcom: ipa: AP/modem communications") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_main.c | 6 ++++++ drivers/net/ipa/ipa_modem.c | 6 +++--- drivers/net/ipa/ipa_smp2p.c | 2 +- drivers/net/ipa/ipa_smp2p.h | 7 +++---- 4 files changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index cdfa98a76e1f..a448ec198bee 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -28,6 +28,7 @@ #include "ipa_reg.h" #include "ipa_mem.h" #include "ipa_table.h" +#include "ipa_smp2p.h" #include "ipa_modem.h" #include "ipa_uc.h" #include "ipa_interrupt.h" @@ -801,6 +802,11 @@ static int ipa_remove(struct platform_device *pdev) struct device *dev = &pdev->dev; int ret; + /* Prevent the modem from triggering a call to ipa_setup(). This + * also ensures a modem-initiated setup that's underway completes. + */ + ipa_smp2p_irq_disable_setup(ipa); + ret = pm_runtime_get_sync(dev); if (WARN_ON(ret < 0)) goto out_power_put; diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index ad116bcc0580..d0ab4d70c303 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -339,9 +339,6 @@ int ipa_modem_stop(struct ipa *ipa) if (state != IPA_MODEM_STATE_RUNNING) return -EBUSY; - /* Prevent the modem from triggering a call to ipa_setup() */ - ipa_smp2p_disable(ipa); - /* Clean up the netdev and endpoints if it was started */ if (netdev) { struct ipa_priv *priv = netdev_priv(netdev); @@ -369,6 +366,9 @@ static void ipa_modem_crashed(struct ipa *ipa) struct device *dev = &ipa->pdev->dev; int ret; + /* Prevent the modem from triggering a call to ipa_setup() */ + ipa_smp2p_irq_disable_setup(ipa); + ret = pm_runtime_get_sync(dev); if (ret < 0) { dev_err(dev, "error %d getting power to handle crash\n", ret); diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index 24bc112a072c..211233612039 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -309,7 +309,7 @@ void ipa_smp2p_exit(struct ipa *ipa) kfree(smp2p); } -void ipa_smp2p_disable(struct ipa *ipa) +void ipa_smp2p_irq_disable_setup(struct ipa *ipa) { struct ipa_smp2p *smp2p = ipa->smp2p; diff --git a/drivers/net/ipa/ipa_smp2p.h b/drivers/net/ipa/ipa_smp2p.h index 99a956789638..59cee31a7383 100644 --- a/drivers/net/ipa/ipa_smp2p.h +++ b/drivers/net/ipa/ipa_smp2p.h @@ -27,13 +27,12 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init); void ipa_smp2p_exit(struct ipa *ipa); /** - * ipa_smp2p_disable() - Prevent "ipa-setup-ready" interrupt handling + * ipa_smp2p_irq_disable_setup() - Disable the "setup ready" interrupt * @ipa: IPA pointer * - * Prevent handling of the "setup ready" interrupt from the modem. - * This is used before initiating shutdown of the driver. + * Disable the "ipa-setup-ready" interrupt from the modem. */ -void ipa_smp2p_disable(struct ipa *ipa); +void ipa_smp2p_irq_disable_setup(struct ipa *ipa); /** * ipa_smp2p_notify_reset() - Reset modem notification state -- cgit v1.2.3-59-g8ed1b From c75a9ad43691de040bead75f1924928111571f9c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 22 Nov 2021 22:35:33 +0100 Subject: r8169: fix incorrect mac address assignment The original changes brakes MAC address assignment on older chip versions (see bug report [0]), and it brakes random MAC assignment. is_valid_ether_addr() requires that its argument is word-aligned. Add the missing alignment to array mac_addr. [0] https://bugzilla.kernel.org/show_bug.cgi?id=215087 Fixes: 1c5d09d58748 ("ethernet: r8169: use eth_hw_addr_set()") Reported-by: Richard Herbert Tested-by: Richard Herbert Signed-off-by: Heiner Kallweit Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index bbe21db20417..86c44bc5f73f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5217,8 +5217,8 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp) static void rtl_init_mac_address(struct rtl8169_private *tp) { + u8 mac_addr[ETH_ALEN] __aligned(2) = {}; struct net_device *dev = tp->dev; - u8 mac_addr[ETH_ALEN]; int rc; rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr); @@ -5233,7 +5233,8 @@ static void rtl_init_mac_address(struct rtl8169_private *tp) if (is_valid_ether_addr(mac_addr)) goto done; - eth_hw_addr_random(dev); + eth_random_addr(mac_addr); + dev->addr_assign_type = NET_ADDR_RANDOM; dev_warn(tp_to_dev(tp), "can't read MAC address, setting random one\n"); done: eth_hw_addr_set(dev, mac_addr); -- cgit v1.2.3-59-g8ed1b From b82d71c0f84a2e5ccaaa7571dfd5c69e0e2cfb4a Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Tue, 23 Nov 2021 02:21:50 +0000 Subject: net: chelsio: cxgb4vf: Fix an error code in cxgb4vf_pci_probe() During the process of driver probing, probe function should return < 0 for failure, otherwise kernel will treat value == 0 as success. Therefore, we should set err to -EINVAL when adapter->registered_device_map is NULL. Otherwise kernel will assume that driver has been successfully probed and will cause unexpected errors. Signed-off-by: Zheyu Ma Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 64479c464b4e..ae9cca768d74 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -3196,6 +3196,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, } if (adapter->registered_device_map == 0) { dev_err(&pdev->dev, "could not register any net devices\n"); + err = -EINVAL; goto err_disable_interrupts; } -- cgit v1.2.3-59-g8ed1b From a049a30fc27c1cb2e12889bbdbd463dbf750103a Mon Sep 17 00:00:00 2001 From: Martyn Welch Date: Mon, 22 Nov 2021 18:44:45 +0000 Subject: net: usb: Correct PHY handling of smsc95xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The smsc95xx driver is dropping phy speed settings and causing a stack trace at device unbind: [  536.379147] smsc95xx 2-1:1.0 eth1: unregister 'smsc95xx' usb-ci_hdrc.2-1, smsc95xx USB 2.0 Ethernet [  536.425029] ------------[ cut here ]------------ [  536.429650] WARNING: CPU: 0 PID: 439 at fs/kernfs/dir.c:1535 kernfs_remove_by_name_ns+0xb8/0xc0 [  536.438416] kernfs: can not remove 'attached_dev', no directory [  536.444363] Modules linked in: xts dm_crypt dm_mod atmel_mxt_ts smsc95xx usbnet [  536.451748] CPU: 0 PID: 439 Comm: sh Tainted: G        W         5.15.0 #1 [  536.458636] Hardware name: Freescale i.MX53 (Device Tree Support) [  536.464735] Backtrace:  [  536.467190] [<80b1c904>] (dump_backtrace) from [<80b1cb48>] (show_stack+0x20/0x24) [  536.474787]  r7:000005ff r6:8035b294 r5:600f0013 r4:80d8af78 [  536.480449] [<80b1cb28>] (show_stack) from [<80b1f764>] (dump_stack_lvl+0x48/0x54) [  536.488035] [<80b1f71c>] (dump_stack_lvl) from [<80b1f788>] (dump_stack+0x18/0x1c) [  536.495620]  r5:00000009 r4:80d9b820 [  536.499198] [<80b1f770>] (dump_stack) from [<80124fac>] (__warn+0xfc/0x114) [  536.506187] [<80124eb0>] (__warn) from [<80b1d21c>] (warn_slowpath_fmt+0xa8/0xdc) [  536.513688]  r7:000005ff r6:80d9b820 r5:80d9b8e0 r4:83744000 [  536.519349] [<80b1d178>] (warn_slowpath_fmt) from [<8035b294>] (kernfs_remove_by_name_ns+0xb8/0xc0) [  536.528416]  r9:00000001 r8:00000000 r7:824926dc r6:00000000 r5:80df6c2c r4:00000000 [  536.536162] [<8035b1dc>] (kernfs_remove_by_name_ns) from [<80b1f56c>] (sysfs_remove_link+0x4c/0x50) [  536.545225]  r6:7f00f02c r5:80df6c2c r4:83306400 [  536.549845] [<80b1f520>] (sysfs_remove_link) from [<806f9c8c>] (phy_detach+0xfc/0x11c) [  536.557780]  r5:82492000 r4:83306400 [  536.561359] [<806f9b90>] (phy_detach) from [<806f9cf8>] (phy_disconnect+0x4c/0x58) [  536.568943]  r7:824926dc r6:7f00f02c r5:82492580 r4:83306400 [  536.574604] [<806f9cac>] (phy_disconnect) from [<7f00a310>] (smsc95xx_disconnect_phy+0x30/0x38 [smsc95xx]) [  536.584290]  r5:82492580 r4:82492580 [  536.587868] [<7f00a2e0>] (smsc95xx_disconnect_phy [smsc95xx]) from [<7f001570>] (usbnet_stop+0x70/0x1a0 [usbnet]) [  536.598161]  r5:82492580 r4:82492000 [  536.601740] [<7f001500>] (usbnet_stop [usbnet]) from [<808baa70>] (__dev_close_many+0xb4/0x12c) [  536.610466]  r8:83744000 r7:00000000 r6:83744000 r5:83745b74 r4:82492000 [  536.617170] [<808ba9bc>] (__dev_close_many) from [<808bab78>] (dev_close_many+0x90/0x120) [  536.625365]  r7:00000001 r6:83745b74 r5:83745b8c r4:82492000 [  536.631026] [<808baae8>] (dev_close_many) from [<808bf408>] (unregister_netdevice_many+0x15c/0x704) [  536.640094]  r9:00000001 r8:81130b98 r7:83745b74 r6:83745bc4 r5:83745b8c r4:82492000 [  536.647840] [<808bf2ac>] (unregister_netdevice_many) from [<808bfa50>] (unregister_netdevice_queue+0xa0/0xe8) [  536.657775]  r10:8112bcc0 r9:83306c00 r8:83306c80 r7:8291e420 r6:83744000 r5:00000000 [  536.665608]  r4:82492000 [  536.668143] [<808bf9b0>] (unregister_netdevice_queue) from [<808bfac0>] (unregister_netdev+0x28/0x30) [  536.677381]  r6:7f01003c r5:82492000 r4:82492000 [  536.682000] [<808bfa98>] (unregister_netdev) from [<7f000b40>] (usbnet_disconnect+0x64/0xdc [usbnet]) [  536.691241]  r5:82492000 r4:82492580 [  536.694819] [<7f000adc>] (usbnet_disconnect [usbnet]) from [<8076b958>] (usb_unbind_interface+0x80/0x248) [  536.704406]  r5:7f01003c r4:83306c80 [  536.707984] [<8076b8d8>] (usb_unbind_interface) from [<8061765c>] (device_release_driver_internal+0x1c4/0x1cc) [  536.718005]  r10:8112bcc0 r9:80dff1dc r8:83306c80 r7:83744000 r6:7f01003c r5:00000000 [  536.725838]  r4:8291e420 [  536.728373] [<80617498>] (device_release_driver_internal) from [<80617684>] (device_release_driver+0x20/0x24) [  536.738302]  r7:83744000 r6:810d4f4c r5:8291e420 r4:8176ae30 [  536.743963] [<80617664>] (device_release_driver) from [<806156cc>] (bus_remove_device+0xf0/0x148) [  536.752858] [<806155dc>] (bus_remove_device) from [<80610018>] (device_del+0x198/0x41c) [  536.760880]  r7:83744000 r6:8116e2e4 r5:8291e464 r4:8291e420 [  536.766542] [<8060fe80>] (device_del) from [<80768fe8>] (usb_disable_device+0xcc/0x1e0) [  536.774576]  r10:8112bcc0 r9:80dff1dc r8:00000001 r7:8112bc48 r6:8291e400 r5:00000001 [  536.782410]  r4:83306c00 [  536.784945] [<80768f1c>] (usb_disable_device) from [<80769c30>] (usb_set_configuration+0x514/0x8dc) [  536.794011]  r10:00000000 r9:00000000 r8:832c3600 r7:00000004 r6:810d5688 r5:00000000 [  536.801844]  r4:83306c00 [  536.804379] [<8076971c>] (usb_set_configuration) from [<80775fac>] (usb_generic_driver_disconnect+0x34/0x38) [  536.814236]  r10:832c3610 r9:83745ef8 r8:832c3600 r7:00000004 r6:810d5688 r5:83306c00 [  536.822069]  r4:83306c00 [  536.824605] [<80775f78>] (usb_generic_driver_disconnect) from [<8076b850>] (usb_unbind_device+0x30/0x70) [  536.834100]  r5:83306c00 r4:810d5688 [  536.837678] [<8076b820>] (usb_unbind_device) from [<8061765c>] (device_release_driver_internal+0x1c4/0x1cc) [  536.847432]  r5:822fb480 r4:83306c80 [  536.851009] [<80617498>] (device_release_driver_internal) from [<806176a8>] (device_driver_detach+0x20/0x24) [  536.860853]  r7:00000004 r6:810d4f4c r5:810d5688 r4:83306c80 [  536.866515] [<80617688>] (device_driver_detach) from [<80614d98>] (unbind_store+0x70/0xe4) [  536.874793] [<80614d28>] (unbind_store) from [<80614118>] (drv_attr_store+0x30/0x3c) [  536.882554]  r7:00000000 r6:00000000 r5:83739200 r4:80614d28 [  536.888217] [<806140e8>] (drv_attr_store) from [<8035cb68>] (sysfs_kf_write+0x48/0x54) [  536.896154]  r5:83739200 r4:806140e8 [  536.899732] [<8035cb20>] (sysfs_kf_write) from [<8035be84>] (kernfs_fop_write_iter+0x11c/0x1d4) [  536.908446]  r5:83739200 r4:00000004 [  536.912024] [<8035bd68>] (kernfs_fop_write_iter) from [<802b87fc>] (vfs_write+0x258/0x3e4) [  536.920317]  r10:00000000 r9:83745f58 r8:83744000 r7:00000000 r6:00000004 r5:00000000 [  536.928151]  r4:82adacc0 [  536.930687] [<802b85a4>] (vfs_write) from [<802b8b0c>] (ksys_write+0x74/0xf4) [  536.937842]  r10:00000004 r9:007767a0 r8:83744000 r7:00000000 r6:00000000 r5:82adacc0 [  536.945676]  r4:82adacc0 [  536.948213] [<802b8a98>] (ksys_write) from [<802b8ba4>] (sys_write+0x18/0x1c) [  536.955367]  r10:00000004 r9:83744000 r8:80100244 r7:00000004 r6:76f47b58 r5:76fc0350 [  536.963200]  r4:00000004 [  536.965735] [<802b8b8c>] (sys_write) from [<80100060>] (ret_fast_syscall+0x0/0x48) [  536.973320] Exception stack(0x83745fa8 to 0x83745ff0) [  536.978383] 5fa0:                   00000004 76fc0350 00000001 007767a0 00000004 00000000 [  536.986569] 5fc0: 00000004 76fc0350 76f47b58 00000004 76f47c7c 76f48114 00000000 7e87991c [  536.994753] 5fe0: 00000498 7e879908 76e6dce8 76eca2e8 [  536.999922] ---[ end trace 9b835d809816b435 ]--- The driver should not be connecting and disconnecting the PHY when the device is opened and closed, it should be stopping and starting the PHY. The phy should be connected as part of binding and disconnected during unbinding. As this results in the PHY not being reset during open, link speed, etc. settings set prior to the link coming up are now not being lost. It is necessary for phy_stop() to only be called when the phydev still exists (resolving the above stack trace). When unbinding, ".unbind" will be called prior to ".stop", with phy_disconnect() already having called phy_stop() before the phydev becomes inaccessible. Signed-off-by: Martyn Welch Cc: Steve Glendinning Cc: UNGLinuxDriver@microchip.com Cc: "David S. Miller" Cc: Jakub Kicinski Cc: stable@kernel.org # v5.15 Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 55 ++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 29 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 20fe4cd8f784..abe0149ed917 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1050,6 +1050,14 @@ static const struct net_device_ops smsc95xx_netdev_ops = { .ndo_set_features = smsc95xx_set_features, }; +static void smsc95xx_handle_link_change(struct net_device *net) +{ + struct usbnet *dev = netdev_priv(net); + + phy_print_status(net->phydev); + usbnet_defer_kevent(dev, EVENT_LINK_CHANGE); +} + static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) { struct smsc95xx_priv *pdata; @@ -1154,6 +1162,17 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->min_mtu = ETH_MIN_MTU; dev->net->max_mtu = ETH_DATA_LEN; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; + + ret = phy_connect_direct(dev->net, pdata->phydev, + &smsc95xx_handle_link_change, + PHY_INTERFACE_MODE_MII); + if (ret) { + netdev_err(dev->net, "can't attach PHY to %s\n", pdata->mdiobus->id); + goto unregister_mdio; + } + + phy_attached_info(dev->net->phydev); + return 0; unregister_mdio: @@ -1171,47 +1190,25 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf) { struct smsc95xx_priv *pdata = dev->driver_priv; + phy_disconnect(dev->net->phydev); mdiobus_unregister(pdata->mdiobus); mdiobus_free(pdata->mdiobus); netif_dbg(dev, ifdown, dev->net, "free pdata\n"); kfree(pdata); } -static void smsc95xx_handle_link_change(struct net_device *net) -{ - struct usbnet *dev = netdev_priv(net); - - phy_print_status(net->phydev); - usbnet_defer_kevent(dev, EVENT_LINK_CHANGE); -} - static int smsc95xx_start_phy(struct usbnet *dev) { - struct smsc95xx_priv *pdata = dev->driver_priv; - struct net_device *net = dev->net; - int ret; + phy_start(dev->net->phydev); - ret = smsc95xx_reset(dev); - if (ret < 0) - return ret; - - ret = phy_connect_direct(net, pdata->phydev, - &smsc95xx_handle_link_change, - PHY_INTERFACE_MODE_MII); - if (ret) { - netdev_err(net, "can't attach PHY to %s\n", pdata->mdiobus->id); - return ret; - } - - phy_attached_info(net->phydev); - phy_start(net->phydev); return 0; } -static int smsc95xx_disconnect_phy(struct usbnet *dev) +static int smsc95xx_stop(struct usbnet *dev) { - phy_stop(dev->net->phydev); - phy_disconnect(dev->net->phydev); + if (dev->net->phydev) + phy_stop(dev->net->phydev); + return 0; } @@ -1966,7 +1963,7 @@ static const struct driver_info smsc95xx_info = { .unbind = smsc95xx_unbind, .link_reset = smsc95xx_link_reset, .reset = smsc95xx_start_phy, - .stop = smsc95xx_disconnect_phy, + .stop = smsc95xx_stop, .rx_fixup = smsc95xx_rx_fixup, .tx_fixup = smsc95xx_tx_fixup, .status = smsc95xx_status, -- cgit v1.2.3-59-g8ed1b From e4e9bfb7c93d7e78aa4ad7e1c411a8df15386062 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 22 Nov 2021 19:16:40 -0600 Subject: net: ipa: kill ipa_cmd_pipeline_clear() Calling ipa_cmd_pipeline_clear() after stopping the channel underlying the AP<-modem RX endpoint can lead to a deadlock. This occurs in the ->runtime_suspend device power operation for the IPA driver. While this callback is in progress, any other requests for power will block until the callback returns. Stopping the AP<-modem RX channel does not prevent the modem from sending another packet to this endpoint. If a packet arrives for an RX channel when the channel is stopped, an SUSPEND IPA interrupt condition will be pending. Handling an IPA interrupt requires power, so ipa_isr_thread() calls pm_runtime_get_sync() first thing. The problem occurs because a "pipeline clear" command will not complete while such a SUSPEND interrupt condition exists. So the SUSPEND IPA interrupt handler won't proceed until it gets power; that won't happen until the ->runtime_suspend callback (and its "pipeline clear" command) completes; and that can't happen while the SUSPEND interrupt condition exists. It turns out that in this case there is no need to use the "pipeline clear" command. There are scenarios in which clearing the pipeline is required while suspending, but those are not (yet) supported upstream. So a simple fix, avoiding the potential deadlock, is to stop calling ipa_cmd_pipeline_clear() in ipa_endpoint_suspend(). This removes the only user of ipa_cmd_pipeline_clear(), so get rid of that function. It can be restored again whenever it's needed. This is basically a manual revert along with an explanation for commit 6cb63ea6a39ea ("net: ipa: introduce ipa_cmd_tag_process()"). Fixes: 6cb63ea6a39ea ("net: ipa: introduce ipa_cmd_tag_process()") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_cmd.c | 16 ---------------- drivers/net/ipa/ipa_cmd.h | 6 ------ drivers/net/ipa/ipa_endpoint.c | 2 -- 3 files changed, 24 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index cff51731195a..d57472ea077f 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -661,22 +661,6 @@ void ipa_cmd_pipeline_clear_wait(struct ipa *ipa) wait_for_completion(&ipa->completion); } -void ipa_cmd_pipeline_clear(struct ipa *ipa) -{ - u32 count = ipa_cmd_pipeline_clear_count(); - struct gsi_trans *trans; - - trans = ipa_cmd_trans_alloc(ipa, count); - if (trans) { - ipa_cmd_pipeline_clear_add(trans); - gsi_trans_commit_wait(trans); - ipa_cmd_pipeline_clear_wait(ipa); - } else { - dev_err(&ipa->pdev->dev, - "error allocating %u entry tag transaction\n", count); - } -} - static struct ipa_cmd_info * ipa_cmd_info_alloc(struct ipa_endpoint *endpoint, u32 tre_count) { diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h index 69cd085d427d..05ed7e42e184 100644 --- a/drivers/net/ipa/ipa_cmd.h +++ b/drivers/net/ipa/ipa_cmd.h @@ -163,12 +163,6 @@ u32 ipa_cmd_pipeline_clear_count(void); */ void ipa_cmd_pipeline_clear_wait(struct ipa *ipa); -/** - * ipa_cmd_pipeline_clear() - Clear the hardware pipeline - * @ipa: - IPA pointer - */ -void ipa_cmd_pipeline_clear(struct ipa *ipa); - /** * ipa_cmd_trans_alloc() - Allocate a transaction for the command TX endpoint * @ipa: IPA pointer diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index ef790fd0ab56..03a170993420 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1636,8 +1636,6 @@ void ipa_endpoint_suspend(struct ipa *ipa) if (ipa->modem_netdev) ipa_modem_suspend(ipa->modem_netdev); - ipa_cmd_pipeline_clear(ipa); - ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]); ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]); } -- cgit v1.2.3-59-g8ed1b From 7b1b62bc1e6a7b2fd5ee7a4296268eb291d23aeb Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Mon, 22 Nov 2021 21:08:34 +0100 Subject: net: marvell: mvpp2: increase MTU limit when XDP enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently mvpp2_xdp_setup won't allow attaching XDP program if mtu > ETH_DATA_LEN (1500). The mvpp2_change_mtu on the other hand checks whether MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE. These two checks are semantically different. Moreover this limit can be increased to MVPP2_MAX_RX_BUF_SIZE, since in mvpp2_rx we have xdp.data = data + MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM; xdp.frame_sz = PAGE_SIZE; Change the checks to check whether mtu > MVPP2_MAX_RX_BUF_SIZE Fixes: 07dd0a7aae7f ("mvpp2: add basic XDP support") Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 2b18d89d9756..ce486e16489c 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5017,11 +5017,13 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu) mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8); } + if (port->xdp_prog && mtu > MVPP2_MAX_RX_BUF_SIZE) { + netdev_err(dev, "Illegal MTU value %d (> %d) for XDP mode\n", + mtu, (int)MVPP2_MAX_RX_BUF_SIZE); + return -EINVAL; + } + if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) { - if (port->xdp_prog) { - netdev_err(dev, "Jumbo frames are not supported with XDP\n"); - return -EINVAL; - } if (priv->percpu_pools) { netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu); mvpp2_bm_switch_buffers(priv, false); @@ -5307,8 +5309,8 @@ static int mvpp2_xdp_setup(struct mvpp2_port *port, struct netdev_bpf *bpf) bool running = netif_running(port->dev); bool reset = !prog != !port->xdp_prog; - if (port->dev->mtu > ETH_DATA_LEN) { - NL_SET_ERR_MSG_MOD(bpf->extack, "XDP is not supported with jumbo frames enabled"); + if (port->dev->mtu > MVPP2_MAX_RX_BUF_SIZE) { + NL_SET_ERR_MSG_MOD(bpf->extack, "MTU too large for XDP"); return -EOPNOTSUPP; } -- cgit v1.2.3-59-g8ed1b From ebb75b1b43d3e2bafc4d33eb4b1ae9c8d2759771 Mon Sep 17 00:00:00 2001 From: Deren Wu Date: Mon, 22 Nov 2021 23:10:27 +0800 Subject: mt76: fix timestamp check in tx_status Should keep SKBs only if timeout timestamp is still after jiffies. Otherwise, report tx status and drop it direclty. Fixes: bd1e3e7b693c ("mt76: introduce packet_id idr") Signed-off-by: Deren Wu Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/7e3784949c0b29a00465966b89fdb0192bd0298e.1637593492.git.deren.wu@mediatek.com --- drivers/net/wireless/mediatek/mt76/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 11719ef034d8..6b8c9dc80542 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -173,7 +173,7 @@ mt76_tx_status_skb_get(struct mt76_dev *dev, struct mt76_wcid *wcid, int pktid, if (!(cb->flags & MT_TX_CB_DMA_DONE)) continue; - if (!time_is_after_jiffies(cb->jiffies + + if (time_is_after_jiffies(cb->jiffies + MT_TX_STATUS_SKB_TIMEOUT)) continue; } -- cgit v1.2.3-59-g8ed1b From 2a9e9857473bfc5721092ff274bc1e371e5a0d2f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 22 Nov 2021 18:34:03 +0100 Subject: mt76: fix possible pktid leak Fix a possible idr pkt-id leak if the packet is dropped on tx side Fixes: bd1e3e7b693c ("mt76: introduce packet_id idr") Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/a560caffcc24452fb48af53904bbe5c45ea5db93.1637602268.git.lorenzo@kernel.org --- .../net/wireless/mediatek/mt76/mt7615/pci_mac.c | 3 +-- .../net/wireless/mediatek/mt76/mt7615/usb_sdio.c | 23 ++++++++++++---------- .../net/wireless/mediatek/mt76/mt76x02_usb_core.c | 8 +++++++- drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 15 +++++++------- .../net/wireless/mediatek/mt76/mt7921/sdio_mac.c | 16 +++++++++------ 5 files changed, 38 insertions(+), 27 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c index 5ee52cd70a4b..d1806f198aed 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c @@ -143,8 +143,6 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, if (!wcid) wcid = &dev->mt76.global_wcid; - pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); - if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && msta) { struct mt7615_phy *phy = &dev->phy; @@ -164,6 +162,7 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, if (id < 0) return id; + pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); mt7615_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, sta, pid, key, false); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c index bd2939ebcbf4..bfe6c1579dc1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c @@ -43,17 +43,11 @@ EXPORT_SYMBOL_GPL(mt7663_usb_sdio_reg_map); static void mt7663_usb_sdio_write_txwi(struct mt7615_dev *dev, struct mt76_wcid *wcid, enum mt76_txq_id qid, struct ieee80211_sta *sta, - struct sk_buff *skb) + int pid, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_key_conf *key = info->control.hw_key; __le32 *txwi; - int pid; - - if (!wcid) - wcid = &dev->mt76.global_wcid; - - pid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE); memset(txwi, 0, MT_USB_TXD_SIZE); @@ -195,9 +189,12 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, struct sk_buff *skb = tx_info->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct mt7615_sta *msta; - int pad; + int pad, err, pktid; msta = wcid ? container_of(wcid, struct mt7615_sta, wcid) : NULL; + if (!wcid) + wcid = &dev->mt76.global_wcid; + if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && msta && !msta->rate_probe) { /* request to configure sampling rate */ @@ -207,7 +204,8 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, spin_unlock_bh(&dev->mt76.lock); } - mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, skb); + pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); + mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, pktid, skb); if (mt76_is_usb(mdev)) { u32 len = skb->len; @@ -217,7 +215,12 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, pad = round_up(skb->len, 4) - skb->len; } - return mt76_skb_adjust_pad(skb, pad); + err = mt76_skb_adjust_pad(skb, pad); + if (err) + /* Release pktid in case of error. */ + idr_remove(&wcid->pktid, pktid); + + return err; } EXPORT_SYMBOL_GPL(mt7663_usb_sdio_tx_prepare_skb); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c index efd70ddc2fd1..2c6c03809b20 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c @@ -72,6 +72,7 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data, bool ampdu = IEEE80211_SKB_CB(tx_info->skb)->flags & IEEE80211_TX_CTL_AMPDU; enum mt76_qsel qsel; u32 flags; + int err; mt76_insert_hdr_pad(tx_info->skb); @@ -106,7 +107,12 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data, ewma_pktlen_add(&msta->pktlen, tx_info->skb->len); } - return mt76x02u_skb_dma_info(tx_info->skb, WLAN_PORT, flags); + err = mt76x02u_skb_dma_info(tx_info->skb, WLAN_PORT, flags); + if (err && wcid) + /* Release pktid in case of error. */ + idr_remove(&wcid->pktid, pid); + + return err; } EXPORT_SYMBOL_GPL(mt76x02u_tx_prepare_skb); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 5fcf35f2d9fb..809dc18e5083 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -1151,8 +1151,14 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } } - pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); + t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size); + t->skb = tx_info->skb; + + id = mt76_token_consume(mdev, &t); + if (id < 0) + return id; + pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); mt7915_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, pid, key, false); @@ -1178,13 +1184,6 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, txp->bss_idx = mvif->idx; } - t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size); - t->skb = tx_info->skb; - - id = mt76_token_consume(mdev, &t); - if (id < 0) - return id; - txp->token = cpu_to_le16(id); if (test_bit(MT_WCID_FLAG_4ADDR, &wcid->flags)) txp->rept_wds_wcid = cpu_to_le16(wcid->idx); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c index 137f86a6dbf8..85b3d88f8ecc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c @@ -142,14 +142,12 @@ out: static void mt7921s_write_txwi(struct mt7921_dev *dev, struct mt76_wcid *wcid, enum mt76_txq_id qid, struct ieee80211_sta *sta, - struct sk_buff *skb) + int pid, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_key_conf *key = info->control.hw_key; __le32 *txwi; - int pid; - pid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); txwi = (__le32 *)(skb->data - MT_SDIO_TXD_SIZE); memset(txwi, 0, MT_SDIO_TXD_SIZE); mt7921_mac_write_txwi(dev, txwi, skb, wcid, key, pid, false); @@ -164,7 +162,7 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb); struct sk_buff *skb = tx_info->skb; - int pad; + int err, pad, pktid; if (unlikely(tx_info->skb->len <= ETH_HLEN)) return -EINVAL; @@ -181,12 +179,18 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } } - mt7921s_write_txwi(dev, wcid, qid, sta, skb); + pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); + mt7921s_write_txwi(dev, wcid, qid, sta, pktid, skb); mt7921_skb_add_sdio_hdr(skb, MT7921_SDIO_DATA); pad = round_up(skb->len, 4) - skb->len; - return mt76_skb_adjust_pad(skb, pad); + err = mt76_skb_adjust_pad(skb, pad); + if (err) + /* Release pktid in case of error. */ + idr_remove(&wcid->pktid, pktid); + + return err; } void mt7921s_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e) -- cgit v1.2.3-59-g8ed1b From fcfb65f8a922c7dd25a2e9913601dae979ce6560 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Nov 2021 18:44:13 -0500 Subject: Revert "virtio-net: don't let virtio core to validate used length" This reverts commit 816625c13652cef5b2c49082d652875da6f2ad7a. Attempts to validate length in the core did not work out. We'll drop them, so revert the dependent changes in drivers. Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1771d6e5224f..55db6a336f7e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3423,7 +3423,6 @@ static struct virtio_driver virtio_net_driver = { .feature_table_size = ARRAY_SIZE(features), .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), - .suppress_used_validation = true, .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, -- cgit v1.2.3-59-g8ed1b From ddb826c2c92d461f290a7bab89e7c28696191875 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 24 Nov 2021 08:16:25 +0100 Subject: lan743x: fix deadlock in lan743x_phy_link_status_change() Usage of phy_ethtool_get_link_ksettings() in the link status change handler isn't needed, and in combination with the referenced change it results in a deadlock. Simply remove the call and replace it with direct access to phydev->speed. The duplex argument of lan743x_phy_update_flowcontrol() isn't used and can be removed. Fixes: c10a485c3de5 ("phy: phy_ethtool_ksettings_get: Lock the phy for consistency") Reported-by: Alessandro B Maurici Tested-by: Alessandro B Maurici Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/40e27f76-0ba3-dcef-ee32-a78b9df38b0f@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 4fc97823bc84..7d7647481f70 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -914,8 +914,7 @@ static int lan743x_phy_reset(struct lan743x_adapter *adapter) } static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter, - u8 duplex, u16 local_adv, - u16 remote_adv) + u16 local_adv, u16 remote_adv) { struct lan743x_phy *phy = &adapter->phy; u8 cap; @@ -943,7 +942,6 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) phy_print_status(phydev); if (phydev->state == PHY_RUNNING) { - struct ethtool_link_ksettings ksettings; int remote_advertisement = 0; int local_advertisement = 0; @@ -980,18 +978,14 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) } lan743x_csr_write(adapter, MAC_CR, data); - memset(&ksettings, 0, sizeof(ksettings)); - phy_ethtool_get_link_ksettings(netdev, &ksettings); local_advertisement = linkmode_adv_to_mii_adv_t(phydev->advertising); remote_advertisement = linkmode_adv_to_mii_adv_t(phydev->lp_advertising); - lan743x_phy_update_flowcontrol(adapter, - ksettings.base.duplex, - local_advertisement, + lan743x_phy_update_flowcontrol(adapter, local_advertisement, remote_advertisement); - lan743x_ptp_update_latency(adapter, ksettings.base.speed); + lan743x_ptp_update_latency(adapter, phydev->speed); } } -- cgit v1.2.3-59-g8ed1b From 80662f4fd4771bc9c7cc4abdfbe866ebd1179621 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 23 Nov 2021 16:44:02 +0100 Subject: net: phylink: Force link down and retrigger resolve on interface change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On PHY state change the phylink_resolve() function can read stale information from the MAC and report incorrect link speed and duplex to the kernel message log. Example with a Marvell 88X3310 PHY connected to a SerDes port on Marvell 88E6393X switch: - PHY driver triggers state change due to PHY interface mode being changed from 10gbase-r to 2500base-x due to copper change in speed from 10Gbps to 2.5Gbps, but the PHY itself either hasn't yet changed its interface to the host, or the interrupt about loss of SerDes link hadn't arrived yet (there can be a delay of several milliseconds for this), so we still think that the 10gbase-r mode is up - phylink_resolve() - phylink_mac_pcs_get_state() - this fills in speed=10g link=up - interface mode is updated to 2500base-x but speed is left at 10Gbps - phylink_major_config() - interface is changed to 2500base-x - phylink_link_up() - mv88e6xxx_mac_link_up() - .port_set_speed_duplex() - speed is set to 10Gbps - reports "Link is Up - 10Gbps/Full" to dmesg Afterwards when the interrupt finally arrives for mv88e6xxx, another resolve is forced in which we get the correct speed from phylink_mac_pcs_get_state(), but since the interface is not being changed anymore, we don't call phylink_major_config() but only phylink_mac_config(), which does not set speed/duplex anymore. To fix this, we need to force the link down and trigger another resolve on PHY interface change event. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Russell King (Oracle) Signed-off-by: Marek Behún Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 3ad7397b8119..708d2a984621 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -710,6 +710,7 @@ static void phylink_resolve(struct work_struct *w) struct phylink_link_state link_state; struct net_device *ndev = pl->netdev; bool mac_config = false; + bool retrigger = false; bool cur_link_state; mutex_lock(&pl->state_mutex); @@ -723,6 +724,7 @@ static void phylink_resolve(struct work_struct *w) link_state.link = false; } else if (pl->mac_link_dropped) { link_state.link = false; + retrigger = true; } else { switch (pl->cur_link_an_mode) { case MLO_AN_PHY: @@ -747,6 +749,15 @@ static void phylink_resolve(struct work_struct *w) /* Only update if the PHY link is up */ if (pl->phydev && pl->phy_state.link) { + /* If the interface has changed, force a + * link down event if the link isn't already + * down, and re-resolve. + */ + if (link_state.interface != + pl->phy_state.interface) { + retrigger = true; + link_state.link = false; + } link_state.interface = pl->phy_state.interface; /* If we have a PHY, we need to update with @@ -789,7 +800,7 @@ static void phylink_resolve(struct work_struct *w) else phylink_link_up(pl, link_state); } - if (!link_state.link && pl->mac_link_dropped) { + if (!link_state.link && retrigger) { pl->mac_link_dropped = false; queue_work(system_power_efficient_wq, &pl->resolve); } -- cgit v1.2.3-59-g8ed1b From dbae3388ea9ca33bd1d5eabc3b0ef17e69c74677 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 23 Nov 2021 16:44:03 +0100 Subject: net: phylink: Force retrigger in case of latched link-fail indicator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On mv88e6xxx 1G/2.5G PCS, the SerDes register 4.2001.2 has the following description: This register bit indicates when link was lost since the last read. For the current link status, read this register back-to-back. Thus to get current link state, we need to read the register twice. But doing that in the link change interrupt handler would lead to potentially ignoring link down events, which we really want to avoid. Thus this needs to be solved in phylink's resolve, by retriggering another resolve in the event when PCS reports link down and previous link was up, and by re-reading PCS state if the previous link was down. The wrong value is read when phylink requests change from sgmii to 2500base-x mode, and link won't come up. This fixes the bug. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Russell King (Oracle) Signed-off-by: Marek Behún Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 708d2a984621..5904546acae6 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -741,6 +741,19 @@ static void phylink_resolve(struct work_struct *w) case MLO_AN_INBAND: phylink_mac_pcs_get_state(pl, &link_state); + /* The PCS may have a latching link-fail indicator. + * If the link was up, bring the link down and + * re-trigger the resolve. Otherwise, re-read the + * PCS state to get the current status of the link. + */ + if (!link_state.link) { + if (cur_link_state) + retrigger = true; + else + phylink_mac_pcs_get_state(pl, + &link_state); + } + /* If we have a phy, the "up" state is the union of * both the PHY and the MAC */ -- cgit v1.2.3-59-g8ed1b From eaeace60778e524a2820d0c0ad60bf80289e292c Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 23 Nov 2021 12:40:00 -0800 Subject: igb: fix netpoll exit with traffic Oleksandr brought a bug report where netpoll causes trace messages in the log on igb. Danielle brought this back up as still occurring, so we'll try again. [22038.710800] ------------[ cut here ]------------ [22038.710801] igb_poll+0x0/0x1440 [igb] exceeded budget in poll [22038.710802] WARNING: CPU: 12 PID: 40362 at net/core/netpoll.c:155 netpoll_poll_dev+0x18a/0x1a0 As Alex suggested, change the driver to return work_done at the exit of napi_poll, which should be safe to do in this driver because it is not polling multiple queues in this single napi context (multiple queues attached to one MSI-X vector). Several other drivers contain the same simple sequence, so I hope this will not create new problems. Fixes: 16eb8815c235 ("igb: Refactor clean_rx_irq to reduce overhead and improve performance") Reported-by: Oleksandr Natalenko Reported-by: Danielle Ratson Suggested-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Oleksandr Natalenko Tested-by: Danielle Ratson Link: https://lore.kernel.org/r/20211123204000.1597971-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 836be0d3b291..fd54d3ef890b 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8026,7 +8026,7 @@ static int igb_poll(struct napi_struct *napi, int budget) if (likely(napi_complete_done(napi, work_done))) igb_ring_irq_enable(q_vector); - return min(work_done, budget - 1); + return work_done; } /** -- cgit v1.2.3-59-g8ed1b From 9dbe33cf371bd70330858370bdbc35c7668f00c3 Mon Sep 17 00:00:00 2001 From: Dylan Hung Date: Thu, 25 Nov 2021 10:44:32 +0800 Subject: mdio: aspeed: Fix "Link is Down" issue The issue happened randomly in runtime. The message "Link is Down" is popped but soon it recovered to "Link is Up". The "Link is Down" results from the incorrect read data for reading the PHY register via MDIO bus. The correct sequence for reading the data shall be: 1. fire the command 2. wait for command done (this step was missing) 3. wait for data idle 4. read data from data register Cc: stable@vger.kernel.org Fixes: f160e99462c6 ("net: phy: Add mdio-aspeed") Reviewed-by: Joel Stanley Signed-off-by: Dylan Hung Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20211125024432.15809-1-dylan_hung@aspeedtech.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-aspeed.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c index cad820568f75..966c3b4ad59d 100644 --- a/drivers/net/mdio/mdio-aspeed.c +++ b/drivers/net/mdio/mdio-aspeed.c @@ -61,6 +61,13 @@ static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum) iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL); + rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl, + !(ctrl & ASPEED_MDIO_CTRL_FIRE), + ASPEED_MDIO_INTERVAL_US, + ASPEED_MDIO_TIMEOUT_US); + if (rc < 0) + return rc; + rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_DATA, data, data & ASPEED_MDIO_DATA_IDLE, ASPEED_MDIO_INTERVAL_US, -- cgit v1.2.3-59-g8ed1b From b270bfe697367776eca2e6759a71d700fb8d82a2 Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Wed, 24 Nov 2021 16:47:31 +0100 Subject: net: stmmac: Disable Tx queues when reconfiguring the interface The Tx queues were not disabled in situations where the driver needed to stop the interface to apply a new configuration. This could result in a kernel panic when doing any of the 3 following actions: * reconfiguring the number of queues (ethtool -L) * reconfiguring the size of the ring buffers (ethtool -G) * installing/removing an XDP program (ip l set dev ethX xdp) Prevent the panic by making sure netif_tx_disable is called when stopping an interface. Without this patch, the following kernel panic can be observed when doing any of the actions above: Unable to handle kernel paging request at virtual address ffff80001238d040 [....] Call trace: dwmac4_set_addr+0x8/0x10 dev_hard_start_xmit+0xe4/0x1ac sch_direct_xmit+0xe8/0x39c __dev_queue_xmit+0x3ec/0xaf0 dev_queue_xmit+0x14/0x20 [...] [ end trace 0000000000000002 ]--- Fixes: 5fabb01207a2d ("net: stmmac: Add initial XDP support") Fixes: aa042f60e4961 ("net: stmmac: Add support to Ethtool get/set ring parameters") Fixes: 0366f7e06a6be ("net: stmmac: add ethtool support for get/set channels") Signed-off-by: Yannick Vignon Link: https://lore.kernel.org/r/20211124154731.1676949-1-yannick.vignon@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f12097c8a485..748195697e5a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3802,6 +3802,8 @@ int stmmac_release(struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); u32 chan; + netif_tx_disable(dev); + if (device_may_wakeup(priv->device)) phylink_speed_down(priv->phylink, false); /* Stop and disconnect the PHY */ -- cgit v1.2.3-59-g8ed1b From 0435a4d08032c8fba2966cebdac870e22238cacc Mon Sep 17 00:00:00 2001 From: zhangyue Date: Thu, 25 Nov 2021 19:36:10 +0800 Subject: net: qed: fix the array may be out of bound If the variable 'p_bit->flags' is always 0, the loop condition is always 0. The variable 'j' may be greater than or equal to 32. At this time, the array 'p_aeu->bits[32]' may be out of bound. Signed-off-by: zhangyue Link: https://lore.kernel.org/r/20211125113610.273841-1-zhangyue1@kylinos.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_int.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index a97f691839e0..6958adeca86d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -1045,7 +1045,7 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, if (!parities) continue; - for (j = 0, bit_idx = 0; bit_idx < 32; j++) { + for (j = 0, bit_idx = 0; bit_idx < 32 && j < 32; j++) { struct aeu_invert_reg_bit *p_bit = &p_aeu->bits[j]; if (qed_int_is_parity_flag(p_hwfn, p_bit) && @@ -1083,7 +1083,7 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, * to current group, making them responsible for the * previous assertion. */ - for (j = 0, bit_idx = 0; bit_idx < 32; j++) { + for (j = 0, bit_idx = 0; bit_idx < 32 && j < 32; j++) { long unsigned int bitmask; u8 bit, bit_len; @@ -1382,7 +1382,7 @@ static void qed_int_sb_attn_init(struct qed_hwfn *p_hwfn, memset(sb_info->parity_mask, 0, sizeof(u32) * NUM_ATTN_REGS); for (i = 0; i < NUM_ATTN_REGS; i++) { /* j is array index, k is bit index */ - for (j = 0, k = 0; k < 32; j++) { + for (j = 0, k = 0; k < 32 && j < 32; j++) { struct aeu_invert_reg_bit *p_aeu; p_aeu = &aeu_descs[i].bits[j]; -- cgit v1.2.3-59-g8ed1b From 8d2ad993aa05c0768f00c886c9d369cd97a337ac Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Fri, 26 Nov 2021 20:03:15 +0800 Subject: net: hns3: fix VF RSS failed problem after PF enable multi-TCs When PF is set to multi-TCs and configured mapping relationship between priorities and TCs, the hardware will active these settings for this PF and its VFs. In this case when VF just uses one TC and its rx packets contain priority, and if the priority is not mapped to TC0, as other TCs of VF is not valid, hardware always put this kind of packets to the queue 0. It cause this kind of packets of VF can not be used RSS function. To fix this problem, set tc mode of all unused TCs of VF to the setting of TC0, then rx packet with priority which map to unused TC will be direct to TC0. Fixes: e2cb1dec9779 ("net: hns3: Add HNS3 VF HCL(Hardware Compatibility Layer) Support") Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 25c419d40066..41afaeea881b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -703,9 +703,9 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) roundup_size = ilog2(roundup_size); for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) { - tc_valid[i] = !!(hdev->hw_tc_map & BIT(i)); + tc_valid[i] = 1; tc_size[i] = roundup_size; - tc_offset[i] = rss_size * i; + tc_offset[i] = (hdev->hw_tc_map & BIT(i)) ? rss_size * i : 0; } hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false); -- cgit v1.2.3-59-g8ed1b From b8af344cfea189cdc0fef41345e55aed76723615 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Fri, 26 Nov 2021 20:03:16 +0800 Subject: net: hns3: add check NULL address for page pool When page pool is not enabled, its address value is still NULL and page pool should not be accessed, so add a check for it. Fixes: 850bfb912a6d ("net: hns3: debugfs add support dumping page pool info") Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 67364ab63a1f..fbb8a5f08222 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1106,6 +1106,11 @@ hns3_dbg_page_pool_info(struct hnae3_handle *h, char *buf, int len) return -EFAULT; } + if (!priv->ring[h->kinfo.num_tqps].page_pool) { + dev_err(&h->pdev->dev, "page pool is not initialized\n"); + return -EFAULT; + } + for (i = 0; i < ARRAY_SIZE(page_pool_info_items); i++) result[i] = &data_str[i][0]; -- cgit v1.2.3-59-g8ed1b From 9c14791748708d87c4d02ba74eb7e281e141d6e4 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Fri, 26 Nov 2021 20:03:17 +0800 Subject: net: hns3: fix one incorrect value of page pool info when queried by debugfs Currently, when user queries page pool info by debugfs command "cat page_pool_info", the cnt of allocated page for page pool may be incorrect because of memory inconsistency problem caused by compiler optimization. So this patch uses READ_ONCE() to read value of pages_state_hold_cnt to fix this problem. Fixes: 850bfb912a6d ("net: hns3: debugfs add support dumping page pool info") Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index fbb8a5f08222..081295bff765 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1081,7 +1081,8 @@ static void hns3_dump_page_pool_info(struct hns3_enet_ring *ring, u32 j = 0; sprintf(result[j++], "%u", index); - sprintf(result[j++], "%u", ring->page_pool->pages_state_hold_cnt); + sprintf(result[j++], "%u", + READ_ONCE(ring->page_pool->pages_state_hold_cnt)); sprintf(result[j++], "%u", atomic_read(&ring->page_pool->pages_state_release_cnt)); sprintf(result[j++], "%u", ring->page_pool->p.pool_size); -- cgit v1.2.3-59-g8ed1b From 82229c4dbb8a2780f05fa1bab29c97ef7bcd21bb Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Fri, 26 Nov 2021 20:03:18 +0800 Subject: net: hns3: fix incorrect components info of ethtool --reset command Currently, HNS3 driver doesn't clear the reset flags of components after successfully executing reset, it causes userspace info of "Components reset" and "Components not reset" is incorrect. So fix this problem by clear corresponding reset flag after reset process. Fixes: ddccc5e368a3 ("net: hns3: add support for triggering reset by ethtool") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index c8442b86df94..c9b4568d7a8d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -987,6 +987,7 @@ static int hns3_set_reset(struct net_device *netdev, u32 *flags) struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev); const struct hnae3_ae_ops *ops = h->ae_algo->ops; const struct hns3_reset_type_map *rst_type_map; + enum ethtool_reset_flags rst_flags; u32 i, size; if (ops->ae_dev_resetting && ops->ae_dev_resetting(h)) @@ -1006,6 +1007,7 @@ static int hns3_set_reset(struct net_device *netdev, u32 *flags) for (i = 0; i < size; i++) { if (rst_type_map[i].rst_flags == *flags) { rst_type = rst_type_map[i].rst_type; + rst_flags = rst_type_map[i].rst_flags; break; } } @@ -1021,6 +1023,8 @@ static int hns3_set_reset(struct net_device *netdev, u32 *flags) ops->reset_event(h->pdev, h); + *flags &= ~rst_flags; + return 0; } -- cgit v1.2.3-59-g8ed1b From 8a075464d1e9317ffae0973dfe538a7511291a06 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 26 Nov 2021 19:28:41 +0200 Subject: net: mscc: ocelot: don't downgrade timestamping RX filters in SIOCSHWTSTAMP The ocelot driver, when asked to timestamp all receiving packets, 1588 v1 or NTP, says "nah, here's 1588 v2 for you". According to this discussion: https://patchwork.kernel.org/project/netdevbpf/patch/20211104133204.19757-8-martin.kaistra@linutronix.de/#24577647 drivers that downgrade from a wider request to a narrower response (or even a response where the intersection with the request is empty) are buggy, and should return -ERANGE instead. This patch fixes that. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Suggested-by: Richard Cochran Signed-off-by: Vladimir Oltean Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index e6c18b598d5c..bcc4f2f74ccc 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1320,12 +1320,6 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) switch (cfg.rx_filter) { case HWTSTAMP_FILTER_NONE: break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: -- cgit v1.2.3-59-g8ed1b From 95706be13b9f755d93b5b82bdc782af439f1ec22 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 26 Nov 2021 19:28:42 +0200 Subject: net: mscc: ocelot: create a function that replaces an existing VCAP filter VCAP (Versatile Content Aware Processor) is the TCAM-based engine behind tc flower offload on ocelot, among other things. The ingress port mask on which VCAP rules match is present as a bit field in the actual key of the rule. This means that it is possible for a rule to be shared among multiple source ports. When the rule is added one by one on each desired port, that the ingress port mask of the key must be edited and rewritten to hardware. But the API in ocelot_vcap.c does not allow for this. For one thing, ocelot_vcap_filter_add() and ocelot_vcap_filter_del() are not symmetric, because ocelot_vcap_filter_add() works with a preallocated and prepopulated filter and programs it to hardware, and ocelot_vcap_filter_del() does both the job of removing the specified filter from hardware, as well as kfreeing it. That is to say, the only option of editing a filter in place, which is to delete it, modify the structure and add it back, does not work because it results in use-after-free. This patch introduces ocelot_vcap_filter_replace, which trivially reprograms a VCAP entry to hardware, at the exact same index at which it existed before, without modifying any list or allocating any memory. Signed-off-by: Vladimir Oltean Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vcap.c | 16 ++++++++++++++++ include/soc/mscc/ocelot_vcap.h | 2 ++ 2 files changed, 18 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 99d7376a70a7..337cd08b1a54 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -1217,6 +1217,22 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot, } EXPORT_SYMBOL(ocelot_vcap_filter_del); +int ocelot_vcap_filter_replace(struct ocelot *ocelot, + struct ocelot_vcap_filter *filter) +{ + struct ocelot_vcap_block *block = &ocelot->block[filter->block_id]; + int index; + + index = ocelot_vcap_block_get_filter_index(block, filter); + if (index < 0) + return index; + + vcap_entry_set(ocelot, index, filter); + + return 0; +} +EXPORT_SYMBOL(ocelot_vcap_filter_replace); + int ocelot_vcap_filter_stats_update(struct ocelot *ocelot, struct ocelot_vcap_filter *filter) { diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index eeb1142aa1b1..4d1dfa1136b2 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -703,6 +703,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, struct netlink_ext_ack *extack); int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter *rule); +int ocelot_vcap_filter_replace(struct ocelot *ocelot, + struct ocelot_vcap_filter *filter); struct ocelot_vcap_filter * ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, unsigned long cookie, bool tc_offload); -- cgit v1.2.3-59-g8ed1b From 96ca08c05838d1f5501003f0a6201824e4eede70 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 26 Nov 2021 19:28:44 +0200 Subject: net: mscc: ocelot: set up traps for PTP packets IEEE 1588 support was declared too soon for the Ocelot switch. Out of reset, this switch does not apply any special treatment for PTP packets, i.e. when an event message is received, the natural tendency is to forward it by MAC DA/VLAN ID. This poses a problem when the ingress port is under a bridge, since user space application stacks (written primarily for endpoint ports, not switches) like ptp4l expect that PTP messages are always received on AF_PACKET / AF_INET sockets (depending on the PTP transport being used), and never being autonomously forwarded. Any forwarding, if necessary (for example in Transparent Clock mode) is handled in software by ptp4l. Having the hardware forward these packets too will cause duplicates which will confuse endpoints connected to these switches. So PTP over L2 barely works, in the sense that PTP packets reach the CPU port, but they reach it via flooding, and therefore reach lots of other unwanted destinations too. But PTP over IPv4/IPv6 does not work at all. This is because the Ocelot switch have a separate destination port mask for unknown IP multicast (which PTP over IP is) flooding compared to unknown non-IP multicast (which PTP over L2 is) flooding. Specifically, the driver allows the CPU port to be in the PGID_MC port group, but not in PGID_MCIPV4 and PGID_MCIPV6. There are several presentations from Allan Nielsen which explain that the embedded MIPS CPU on Ocelot switches is not very powerful at all, so every penny they could save by not allowing flooding to the CPU port module matters. Unknown IP multicast did not make it. The de facto consensus is that when a switch is PTP-aware and an application stack for PTP is running, switches should have some sort of trapping mechanism for PTP packets, to extract them from the hardware data path. This avoids both problems: (a) PTP packets are no longer flooded to unwanted destinations (b) PTP over IP packets are no longer denied from reaching the CPU since they arrive there via a trap and not via flooding It is not the first time when this change is attempted. Last time, the feedback from Allan Nielsen and Andrew Lunn was that the traps should not be installed by default, and that PTP-unaware switching may be desired for some use cases: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ To address that feedback, the present patch adds the necessary packet traps according to the RX filter configuration transmitted by user space through the SIOCSHWTSTAMP ioctl. Trapping is done via VCAP IS2, where we keep 5 filters, which are amended each time RX timestamping is enabled or disabled on a port: - 1 for PTP over L2 - 2 for PTP over IPv4 (UDP ports 319 and 320) - 2 for PTP over IPv6 (UDP ports 319 and 320) The cookie by which these filters (invisible to tc) are identified is strategically chosen such that it does not collide with the filters used for the ocelot-8021q tagging protocol by the Felix driver, or with the MRP traps set up by the Ocelot library. Other alternatives were considered, like patching user space to do something, but there are so many ways in which PTP packets could be made to reach the CPU, generically speaking, that "do what?" is a very valid question. The ptp4l program from the linuxptp stack already attempts to do something: it calls setsockopt(IP_ADD_MEMBERSHIP) (and PACKET_ADD_MEMBERSHIP, respectively) which translates in both cases into a dev_mc_add() on the interface, in the kernel: https://github.com/richardcochran/linuxptp/blob/v3.1.1/udp.c#L73 https://github.com/richardcochran/linuxptp/blob/v3.1.1/raw.c Reality shows that this is not sufficient in case the interface belongs to a switchdev driver, as dev_mc_add() does not show the intention to trap a packet to the CPU, but rather the intention to not drop it (it is strictly for RX filtering, same as promiscuous does not mean to send all traffic to the CPU, but to not drop traffic with unknown MAC DA). This topic is a can of worms in itself, and it would be great if user space could just stay out of it. On the other hand, setting up PTP traps privately within the driver is not new by any stretch of the imagination: https://elixir.bootlin.com/linux/v5.16-rc2/source/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c#L833 https://elixir.bootlin.com/linux/v5.16-rc2/source/drivers/net/dsa/hirschmann/hellcreek.c#L1050 https://elixir.bootlin.com/linux/v5.16-rc2/source/include/linux/dsa/sja1105.h#L21 So this is the approach taken here as well. The difference here being that we prepare and destroy the traps per port, dynamically at runtime, as opposed to driver init time, because apparently, PTP-unaware forwarding is a use case. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Reported-by: Po Liu Signed-off-by: Vladimir Oltean Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 241 ++++++++++++++++++++++++++++++++++++- 1 file changed, 240 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index bcc4f2f74ccc..9b7be93cbb0d 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1278,6 +1278,225 @@ int ocelot_fdb_dump(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_fdb_dump); +static void ocelot_populate_l2_ptp_trap_key(struct ocelot_vcap_filter *trap) +{ + trap->key_type = OCELOT_VCAP_KEY_ETYPE; + *(__be16 *)trap->key.etype.etype.value = htons(ETH_P_1588); + *(__be16 *)trap->key.etype.etype.mask = htons(0xffff); +} + +static void +ocelot_populate_ipv4_ptp_event_trap_key(struct ocelot_vcap_filter *trap) +{ + trap->key_type = OCELOT_VCAP_KEY_IPV4; + trap->key.ipv4.dport.value = PTP_EV_PORT; + trap->key.ipv4.dport.mask = 0xffff; +} + +static void +ocelot_populate_ipv6_ptp_event_trap_key(struct ocelot_vcap_filter *trap) +{ + trap->key_type = OCELOT_VCAP_KEY_IPV6; + trap->key.ipv6.dport.value = PTP_EV_PORT; + trap->key.ipv6.dport.mask = 0xffff; +} + +static void +ocelot_populate_ipv4_ptp_general_trap_key(struct ocelot_vcap_filter *trap) +{ + trap->key_type = OCELOT_VCAP_KEY_IPV4; + trap->key.ipv4.dport.value = PTP_GEN_PORT; + trap->key.ipv4.dport.mask = 0xffff; +} + +static void +ocelot_populate_ipv6_ptp_general_trap_key(struct ocelot_vcap_filter *trap) +{ + trap->key_type = OCELOT_VCAP_KEY_IPV6; + trap->key.ipv6.dport.value = PTP_GEN_PORT; + trap->key.ipv6.dport.mask = 0xffff; +} + +static int ocelot_trap_add(struct ocelot *ocelot, int port, + unsigned long cookie, + void (*populate)(struct ocelot_vcap_filter *f)) +{ + struct ocelot_vcap_block *block_vcap_is2; + struct ocelot_vcap_filter *trap; + bool new = false; + int err; + + block_vcap_is2 = &ocelot->block[VCAP_IS2]; + + trap = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, cookie, + false); + if (!trap) { + trap = kzalloc(sizeof(*trap), GFP_KERNEL); + if (!trap) + return -ENOMEM; + + populate(trap); + trap->prio = 1; + trap->id.cookie = cookie; + trap->id.tc_offload = false; + trap->block_id = VCAP_IS2; + trap->type = OCELOT_VCAP_FILTER_OFFLOAD; + trap->lookup = 0; + trap->action.cpu_copy_ena = true; + trap->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY; + trap->action.port_mask = 0; + new = true; + } + + trap->ingress_port_mask |= BIT(port); + + if (new) + err = ocelot_vcap_filter_add(ocelot, trap, NULL); + else + err = ocelot_vcap_filter_replace(ocelot, trap); + if (err) { + trap->ingress_port_mask &= ~BIT(port); + if (!trap->ingress_port_mask) + kfree(trap); + return err; + } + + return 0; +} + +static int ocelot_trap_del(struct ocelot *ocelot, int port, + unsigned long cookie) +{ + struct ocelot_vcap_block *block_vcap_is2; + struct ocelot_vcap_filter *trap; + + block_vcap_is2 = &ocelot->block[VCAP_IS2]; + + trap = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, cookie, + false); + if (!trap) + return 0; + + trap->ingress_port_mask &= ~BIT(port); + if (!trap->ingress_port_mask) + return ocelot_vcap_filter_del(ocelot, trap); + + return ocelot_vcap_filter_replace(ocelot, trap); +} + +static int ocelot_l2_ptp_trap_add(struct ocelot *ocelot, int port) +{ + unsigned long l2_cookie = ocelot->num_phys_ports + 1; + + return ocelot_trap_add(ocelot, port, l2_cookie, + ocelot_populate_l2_ptp_trap_key); +} + +static int ocelot_l2_ptp_trap_del(struct ocelot *ocelot, int port) +{ + unsigned long l2_cookie = ocelot->num_phys_ports + 1; + + return ocelot_trap_del(ocelot, port, l2_cookie); +} + +static int ocelot_ipv4_ptp_trap_add(struct ocelot *ocelot, int port) +{ + unsigned long ipv4_gen_cookie = ocelot->num_phys_ports + 2; + unsigned long ipv4_ev_cookie = ocelot->num_phys_ports + 3; + int err; + + err = ocelot_trap_add(ocelot, port, ipv4_ev_cookie, + ocelot_populate_ipv4_ptp_event_trap_key); + if (err) + return err; + + err = ocelot_trap_add(ocelot, port, ipv4_gen_cookie, + ocelot_populate_ipv4_ptp_general_trap_key); + if (err) + ocelot_trap_del(ocelot, port, ipv4_ev_cookie); + + return err; +} + +static int ocelot_ipv4_ptp_trap_del(struct ocelot *ocelot, int port) +{ + unsigned long ipv4_gen_cookie = ocelot->num_phys_ports + 2; + unsigned long ipv4_ev_cookie = ocelot->num_phys_ports + 3; + int err; + + err = ocelot_trap_del(ocelot, port, ipv4_ev_cookie); + err |= ocelot_trap_del(ocelot, port, ipv4_gen_cookie); + return err; +} + +static int ocelot_ipv6_ptp_trap_add(struct ocelot *ocelot, int port) +{ + unsigned long ipv6_gen_cookie = ocelot->num_phys_ports + 4; + unsigned long ipv6_ev_cookie = ocelot->num_phys_ports + 5; + int err; + + err = ocelot_trap_add(ocelot, port, ipv6_ev_cookie, + ocelot_populate_ipv6_ptp_event_trap_key); + if (err) + return err; + + err = ocelot_trap_add(ocelot, port, ipv6_gen_cookie, + ocelot_populate_ipv6_ptp_general_trap_key); + if (err) + ocelot_trap_del(ocelot, port, ipv6_ev_cookie); + + return err; +} + +static int ocelot_ipv6_ptp_trap_del(struct ocelot *ocelot, int port) +{ + unsigned long ipv6_gen_cookie = ocelot->num_phys_ports + 4; + unsigned long ipv6_ev_cookie = ocelot->num_phys_ports + 5; + int err; + + err = ocelot_trap_del(ocelot, port, ipv6_ev_cookie); + err |= ocelot_trap_del(ocelot, port, ipv6_gen_cookie); + return err; +} + +static int ocelot_setup_ptp_traps(struct ocelot *ocelot, int port, + bool l2, bool l4) +{ + int err; + + if (l2) + err = ocelot_l2_ptp_trap_add(ocelot, port); + else + err = ocelot_l2_ptp_trap_del(ocelot, port); + if (err) + return err; + + if (l4) { + err = ocelot_ipv4_ptp_trap_add(ocelot, port); + if (err) + goto err_ipv4; + + err = ocelot_ipv6_ptp_trap_add(ocelot, port); + if (err) + goto err_ipv6; + } else { + err = ocelot_ipv4_ptp_trap_del(ocelot, port); + + err |= ocelot_ipv6_ptp_trap_del(ocelot, port); + } + if (err) + return err; + + return 0; + +err_ipv6: + ocelot_ipv4_ptp_trap_del(ocelot, port); +err_ipv4: + if (l2) + ocelot_l2_ptp_trap_del(ocelot, port); + return err; +} + int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr) { return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config, @@ -1288,7 +1507,9 @@ EXPORT_SYMBOL(ocelot_hwstamp_get); int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + bool l2 = false, l4 = false; struct hwtstamp_config cfg; + int err; if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; @@ -1323,19 +1544,37 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + l4 = true; + break; case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + l2 = true; + break; case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + l2 = true; + l4 = true; break; default: mutex_unlock(&ocelot->ptp_lock); return -ERANGE; } + err = ocelot_setup_ptp_traps(ocelot, port, l2, l4); + if (err) + return err; + + if (l2 && l4) + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + else if (l2) + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + else if (l4) + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + else + cfg.rx_filter = HWTSTAMP_FILTER_NONE; + /* Commit back the result & save it */ memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg)); mutex_unlock(&ocelot->ptp_lock); -- cgit v1.2.3-59-g8ed1b From c49a35eedfef08bffd46b53c25dbf9d6016a86ff Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 26 Nov 2021 19:28:45 +0200 Subject: net: mscc: ocelot: correctly report the timestamping RX filters in ethtool The driver doesn't support RX timestamping for non-PTP packets, but it declares that it does. Restrict the reported RX filters to PTP v2 over L2 and over L4. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 9b7be93cbb0d..409cde1e59c6 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1677,7 +1677,10 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port, SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) | BIT(HWTSTAMP_TX_ONESTEP_SYNC); - info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT); return 0; } -- cgit v1.2.3-59-g8ed1b From b3612ccdf2841c64ae7a8dd9e780c91240093fe6 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 26 Nov 2021 13:39:26 +0100 Subject: net: dsa: microchip: implement multi-bridge support Current driver version is able to handle only one bridge at time. Configuring two bridges on two different ports would end up shorting this bridges by HW. To reproduce it: ip l a name br0 type bridge ip l a name br1 type bridge ip l s dev br0 up ip l s dev br1 up ip l s lan1 master br0 ip l s dev lan1 up ip l s lan2 master br1 ip l s dev lan2 up Ping on lan1 and get response on lan2, which should not happen. This happened, because current driver version is storing one global "Port VLAN Membership" and applying it to all ports which are members of any bridge. To solve this issue, we need to handle each port separately. This patch is dropping the global port member storage and calculating membership dynamically depending on STP state and bridge participation. Note: STP support was broken before this patch and should be fixed separately. Fixes: c2e866911e25 ("net: dsa: microchip: break KSZ9477 DSA driver into two files") Signed-off-by: Oleksij Rempel Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211126123926.2981028-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz8795.c | 56 ++++------------------------- drivers/net/dsa/microchip/ksz9477.c | 66 +++++----------------------------- drivers/net/dsa/microchip/ksz_common.c | 50 ++++++++++++++------------ drivers/net/dsa/microchip/ksz_common.h | 4 --- 4 files changed, 43 insertions(+), 133 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 43fc3087aeb3..013e9c02be71 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1002,57 +1002,32 @@ static void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member) data &= ~PORT_VLAN_MEMBERSHIP; data |= (member & dev->port_mask); ksz_pwrite8(dev, port, P_MIRROR_CTRL, data); - dev->ports[port].member = member; } static void ksz8_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { struct ksz_device *dev = ds->priv; - int forward = dev->member; struct ksz_port *p; - int member = -1; u8 data; - p = &dev->ports[port]; - ksz_pread8(dev, port, P_STP_CTRL, &data); data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE); switch (state) { case BR_STATE_DISABLED: data |= PORT_LEARN_DISABLE; - if (port < dev->phy_port_cnt) - member = 0; break; case BR_STATE_LISTENING: data |= (PORT_RX_ENABLE | PORT_LEARN_DISABLE); - if (port < dev->phy_port_cnt && - p->stp_state == BR_STATE_DISABLED) - member = dev->host_mask | p->vid_member; break; case BR_STATE_LEARNING: data |= PORT_RX_ENABLE; break; case BR_STATE_FORWARDING: data |= (PORT_TX_ENABLE | PORT_RX_ENABLE); - - /* This function is also used internally. */ - if (port == dev->cpu_port) - break; - - /* Port is a member of a bridge. */ - if (dev->br_member & BIT(port)) { - dev->member |= BIT(port); - member = dev->member; - } else { - member = dev->host_mask | p->vid_member; - } break; case BR_STATE_BLOCKING: data |= PORT_LEARN_DISABLE; - if (port < dev->phy_port_cnt && - p->stp_state == BR_STATE_DISABLED) - member = dev->host_mask | p->vid_member; break; default: dev_err(ds->dev, "invalid STP state: %d\n", state); @@ -1060,22 +1035,11 @@ static void ksz8_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) } ksz_pwrite8(dev, port, P_STP_CTRL, data); + + p = &dev->ports[port]; p->stp_state = state; - /* Port membership may share register with STP state. */ - if (member >= 0 && member != p->member) - ksz8_cfg_port_member(dev, port, (u8)member); - - /* Check if forwarding needs to be updated. */ - if (state != BR_STATE_FORWARDING) { - if (dev->br_member & BIT(port)) - dev->member &= ~BIT(port); - } - /* When topology has changed the function ksz_update_port_member - * should be called to modify port forwarding behavior. - */ - if (forward != dev->member) - ksz_update_port_member(dev, port); + ksz_update_port_member(dev, port); } static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port) @@ -1341,7 +1305,7 @@ static void ksz8795_cpu_interface_select(struct ksz_device *dev, int port) static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port) { - struct ksz_port *p = &dev->ports[port]; + struct dsa_switch *ds = dev->ds; struct ksz8 *ksz8 = dev->priv; const u32 *masks; u8 member; @@ -1368,10 +1332,11 @@ static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port) if (!ksz_is_ksz88x3(dev)) ksz8795_cpu_interface_select(dev, port); - member = dev->port_mask; + member = dsa_user_ports(ds); } else { - member = dev->host_mask | p->vid_member; + member = BIT(dsa_upstream_port(ds, port)); } + ksz8_cfg_port_member(dev, port, member); } @@ -1392,20 +1357,13 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds) ksz_cfg(dev, regs[S_TAIL_TAG_CTRL], masks[SW_TAIL_TAG_ENABLE], true); p = &dev->ports[dev->cpu_port]; - p->vid_member = dev->port_mask; p->on = 1; ksz8_port_setup(dev, dev->cpu_port, true); - dev->member = dev->host_mask; for (i = 0; i < dev->phy_port_cnt; i++) { p = &dev->ports[i]; - /* Initialize to non-zero so that ksz_cfg_port_member() will - * be called. - */ - p->vid_member = BIT(i); - p->member = dev->port_mask; ksz8_port_stp_state_set(ds, i, BR_STATE_DISABLED); /* Last port may be disabled. */ diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 854e25f43fa7..353b5f981740 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -391,7 +391,6 @@ static void ksz9477_cfg_port_member(struct ksz_device *dev, int port, u8 member) { ksz_pwrite32(dev, port, REG_PORT_VLAN_MEMBERSHIP__4, member); - dev->ports[port].member = member; } static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port, @@ -400,8 +399,6 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port, struct ksz_device *dev = ds->priv; struct ksz_port *p = &dev->ports[port]; u8 data; - int member = -1; - int forward = dev->member; ksz_pread8(dev, port, P_STP_CTRL, &data); data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE); @@ -409,40 +406,18 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port, switch (state) { case BR_STATE_DISABLED: data |= PORT_LEARN_DISABLE; - if (port != dev->cpu_port) - member = 0; break; case BR_STATE_LISTENING: data |= (PORT_RX_ENABLE | PORT_LEARN_DISABLE); - if (port != dev->cpu_port && - p->stp_state == BR_STATE_DISABLED) - member = dev->host_mask | p->vid_member; break; case BR_STATE_LEARNING: data |= PORT_RX_ENABLE; break; case BR_STATE_FORWARDING: data |= (PORT_TX_ENABLE | PORT_RX_ENABLE); - - /* This function is also used internally. */ - if (port == dev->cpu_port) - break; - - member = dev->host_mask | p->vid_member; - mutex_lock(&dev->dev_mutex); - - /* Port is a member of a bridge. */ - if (dev->br_member & (1 << port)) { - dev->member |= (1 << port); - member = dev->member; - } - mutex_unlock(&dev->dev_mutex); break; case BR_STATE_BLOCKING: data |= PORT_LEARN_DISABLE; - if (port != dev->cpu_port && - p->stp_state == BR_STATE_DISABLED) - member = dev->host_mask | p->vid_member; break; default: dev_err(ds->dev, "invalid STP state: %d\n", state); @@ -451,23 +426,8 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port, ksz_pwrite8(dev, port, P_STP_CTRL, data); p->stp_state = state; - mutex_lock(&dev->dev_mutex); - /* Port membership may share register with STP state. */ - if (member >= 0 && member != p->member) - ksz9477_cfg_port_member(dev, port, (u8)member); - - /* Check if forwarding needs to be updated. */ - if (state != BR_STATE_FORWARDING) { - if (dev->br_member & (1 << port)) - dev->member &= ~(1 << port); - } - /* When topology has changed the function ksz_update_port_member - * should be called to modify port forwarding behavior. - */ - if (forward != dev->member) - ksz_update_port_member(dev, port); - mutex_unlock(&dev->dev_mutex); + ksz_update_port_member(dev, port); } static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port) @@ -1168,10 +1128,10 @@ static void ksz9477_phy_errata_setup(struct ksz_device *dev, int port) static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) { - u8 data8; - u8 member; - u16 data16; struct ksz_port *p = &dev->ports[port]; + struct dsa_switch *ds = dev->ds; + u8 data8, member; + u16 data16; /* enable tag tail for host port */ if (cpu_port) @@ -1250,12 +1210,12 @@ static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_1, data8); p->phydev.duplex = 1; } - mutex_lock(&dev->dev_mutex); + if (cpu_port) - member = dev->port_mask; + member = dsa_user_ports(ds); else - member = dev->host_mask | p->vid_member; - mutex_unlock(&dev->dev_mutex); + member = BIT(dsa_upstream_port(ds, port)); + ksz9477_cfg_port_member(dev, port, member); /* clear pending interrupts */ @@ -1276,8 +1236,6 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds) const char *prev_mode; dev->cpu_port = i; - dev->host_mask = (1 << dev->cpu_port); - dev->port_mask |= dev->host_mask; p = &dev->ports[i]; /* Read from XMII register to determine host port @@ -1312,23 +1270,15 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds) /* enable cpu port */ ksz9477_port_setup(dev, i, true); - p->vid_member = dev->port_mask; p->on = 1; } } - dev->member = dev->host_mask; - for (i = 0; i < dev->port_cnt; i++) { if (i == dev->cpu_port) continue; p = &dev->ports[i]; - /* Initialize to non-zero so that ksz_cfg_port_member() will - * be called. - */ - p->vid_member = (1 << i); - p->member = dev->port_mask; ksz9477_port_stp_state_set(ds, i, BR_STATE_DISABLED); p->on = 1; if (i < dev->phy_port_cnt) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 7c2968a639eb..8a04302018dc 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -22,21 +22,40 @@ void ksz_update_port_member(struct ksz_device *dev, int port) { - struct ksz_port *p; + struct ksz_port *p = &dev->ports[port]; + struct dsa_switch *ds = dev->ds; + u8 port_member = 0, cpu_port; + const struct dsa_port *dp; int i; - for (i = 0; i < dev->port_cnt; i++) { - if (i == port || i == dev->cpu_port) + if (!dsa_is_user_port(ds, port)) + return; + + dp = dsa_to_port(ds, port); + cpu_port = BIT(dsa_upstream_port(ds, port)); + + for (i = 0; i < ds->num_ports; i++) { + const struct dsa_port *other_dp = dsa_to_port(ds, i); + struct ksz_port *other_p = &dev->ports[i]; + u8 val = 0; + + if (!dsa_is_user_port(ds, i)) continue; - p = &dev->ports[i]; - if (!(dev->member & (1 << i))) + if (port == i) + continue; + if (!dp->bridge_dev || dp->bridge_dev != other_dp->bridge_dev) continue; - /* Port is a member of the bridge and is forwarding. */ - if (p->stp_state == BR_STATE_FORWARDING && - p->member != dev->member) - dev->dev_ops->cfg_port_member(dev, i, dev->member); + if (other_p->stp_state == BR_STATE_FORWARDING && + p->stp_state == BR_STATE_FORWARDING) { + val |= BIT(port); + port_member |= BIT(i); + } + + dev->dev_ops->cfg_port_member(dev, i, val | cpu_port); } + + dev->dev_ops->cfg_port_member(dev, port, port_member | cpu_port); } EXPORT_SYMBOL_GPL(ksz_update_port_member); @@ -175,12 +194,6 @@ EXPORT_SYMBOL_GPL(ksz_get_ethtool_stats); int ksz_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *br) { - struct ksz_device *dev = ds->priv; - - mutex_lock(&dev->dev_mutex); - dev->br_member |= (1 << port); - mutex_unlock(&dev->dev_mutex); - /* port_stp_state_set() will be called after to put the port in * appropriate state so there is no need to do anything. */ @@ -192,13 +205,6 @@ EXPORT_SYMBOL_GPL(ksz_port_bridge_join); void ksz_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br) { - struct ksz_device *dev = ds->priv; - - mutex_lock(&dev->dev_mutex); - dev->br_member &= ~(1 << port); - dev->member &= ~(1 << port); - mutex_unlock(&dev->dev_mutex); - /* port_stp_state_set() will be called after to put the port in * forwarding state so there is no need to do anything. */ diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 1597c63988b4..54b456bc8972 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -25,8 +25,6 @@ struct ksz_port_mib { }; struct ksz_port { - u16 member; - u16 vid_member; bool remove_tag; /* Remove Tag flag set, for ksz8795 only */ int stp_state; struct phy_device phydev; @@ -83,8 +81,6 @@ struct ksz_device { struct ksz_port *ports; struct delayed_work mib_read; unsigned long mib_read_interval; - u16 br_member; - u16 member; u16 mirror_rx; u16 mirror_tx; u32 features; /* chip specific features */ -- cgit v1.2.3-59-g8ed1b From 00169a9245f841ec666c70959bfd1dcacce74324 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 27 Nov 2021 07:44:42 -0800 Subject: vmxnet3: Use generic Kconfig option for page size limit Use the architecture independent Kconfig option PAGE_SIZE_LESS_THAN_64KB to indicate that VMXNET3 requires a page size smaller than 64kB. Signed-off-by: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/net/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 10506a4b66ef..6cccc3dc00bc 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -567,9 +567,7 @@ config XEN_NETDEV_BACKEND config VMXNET3 tristate "VMware VMXNET3 ethernet driver" depends on PCI && INET - depends on !(PAGE_SIZE_64KB || ARM64_64K_PAGES || \ - IA64_PAGE_SIZE_64KB || PARISC_PAGE_SIZE_64KB || \ - PPC_64K_PAGES) + depends on PAGE_SIZE_LESS_THAN_64KB help This driver supports VMware's vmxnet3 virtual ethernet NIC. To compile this driver as a module, choose M here: the -- cgit v1.2.3-59-g8ed1b From ed53ae75693096f1c10b4561edd31a07b631bd72 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 11 Nov 2021 15:10:03 +0100 Subject: rt2x00: do not mark device gone on EPROTO errors during start As reported by Exuvo is possible that we have lot's of EPROTO errors during device start i.e. firmware load. But after that device works correctly. Hence marking device gone by few EPROTO errors done by commit e383c70474db ("rt2x00: check number of EPROTO errors") caused regression - Exuvo device stop working after kernel update. To fix disable the check during device start. Link: https://lore.kernel.org/linux-wireless/bff7d309-a816-6a75-51b6-5928ef4f7a8c@exuvo.se/ Reported-and-tested-by: Exuvo Fixes: e383c70474db ("rt2x00: check number of EPROTO errors") Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211111141003.GA134627@wp.pl --- drivers/net/wireless/ralink/rt2x00/rt2x00usb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index e4473a551241..74c3d8cb3100 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -25,6 +25,9 @@ static bool rt2x00usb_check_usb_error(struct rt2x00_dev *rt2x00dev, int status) if (status == -ENODEV || status == -ENOENT) return true; + if (!test_bit(DEVICE_STATE_STARTED, &rt2x00dev->flags)) + return false; + if (status == -EPROTO || status == -ETIMEDOUT) rt2x00dev->num_proto_errs++; else -- cgit v1.2.3-59-g8ed1b From f8e7dfd6fdabb831846ab1970a875746559d491b Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 26 Nov 2021 16:51:15 +0100 Subject: net: stmmac: Avoid DMA_CHAN_CONTROL write if no Split Header support The driver assumes that split headers can be enabled/disabled without stopping/starting the device, so it writes DMA_CHAN_CONTROL from stmmac_set_features(). However, on my system (IP v5.10a without Split Header support), simply writing DMA_CHAN_CONTROL when DMA is running (for example, with the commands below) leads to a TX watchdog timeout. host$ socat TCP-LISTEN:1024,fork,reuseaddr - & device$ ethtool -K eth0 tso off device$ ethtool -K eth0 tso on device$ dd if=/dev/zero bs=1M count=10 | socat - TCP4:host:1024 Note that since my IP is configured without Split Header support, the driver always just reads and writes the same value to the DMA_CHAN_CONTROL register. I don't have access to any platforms with Split Header support so I don't know if these writes to the DMA_CHAN_CONTROL while DMA is running actually work properly on such systems. I could not find anything in the databook that says that DMA_CHAN_CONTROL should not be written when the DMA is running. But on systems without Split Header support, there is in any case no need to call enable_sph() in stmmac_set_features() at all since SPH can never be toggled, so we can avoid the watchdog timeout there by skipping this call. Fixes: 8c6fc097a2f4acf ("net: stmmac: gmac4+: Add Split Header support") Signed-off-by: Vincent Whitchurch Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 748195697e5a..da8306f60730 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5540,8 +5540,6 @@ static int stmmac_set_features(struct net_device *netdev, netdev_features_t features) { struct stmmac_priv *priv = netdev_priv(netdev); - bool sph_en; - u32 chan; /* Keep the COE Type in case of csum is supporting */ if (features & NETIF_F_RXCSUM) @@ -5553,10 +5551,13 @@ static int stmmac_set_features(struct net_device *netdev, */ stmmac_rx_ipc(priv, priv->hw); - sph_en = (priv->hw->rx_csum > 0) && priv->sph; + if (priv->sph_cap) { + bool sph_en = (priv->hw->rx_csum > 0) && priv->sph; + u32 chan; - for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++) - stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); + for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++) + stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); + } return 0; } -- cgit v1.2.3-59-g8ed1b From 1e89ad864d035001835ccf02acea7b1d3dc41819 Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Fri, 26 Nov 2021 17:13:55 -0300 Subject: net: dsa: realtek-smi: fix indirect reg access for ports>3 This switch family can have up to 8 UTP ports {0..7}. However, INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK was using 2 bits instead of 3, dropping the most significant bit during indirect register reads and writes. Reading or writing ports 4, 5, 6, and 7 registers was actually manipulating, respectively, ports 0, 1, 2, and 3 registers. This is not sufficient but necessary to support any variant with more than 4 UTP ports, like RTL8367S. rtl8365mb_phy_{read,write} will now returns -EINVAL if phy is greater than 7. Fixes: 4af2950c50c8 ("net: dsa: realtek-smi: add rtl8365mb subdriver for RTL8365MB-VC") Signed-off-by: Luiz Angelo Daros de Luca Signed-off-by: David S. Miller --- drivers/net/dsa/rtl8365mb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/rtl8365mb.c b/drivers/net/dsa/rtl8365mb.c index baaae97283c5..078ca4cd7160 100644 --- a/drivers/net/dsa/rtl8365mb.c +++ b/drivers/net/dsa/rtl8365mb.c @@ -107,6 +107,7 @@ #define RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC 2112 /* Family-specific data and limits */ +#define RTL8365MB_PHYADDRMAX 7 #define RTL8365MB_NUM_PHYREGS 32 #define RTL8365MB_PHYREGMAX (RTL8365MB_NUM_PHYREGS - 1) #define RTL8365MB_MAX_NUM_PORTS (RTL8365MB_CPU_PORT_NUM_8365MB_VC + 1) @@ -176,7 +177,7 @@ #define RTL8365MB_INDIRECT_ACCESS_STATUS_REG 0x1F01 #define RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG 0x1F02 #define RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_5_1_MASK GENMASK(4, 0) -#define RTL8365MB_INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK GENMASK(6, 5) +#define RTL8365MB_INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK GENMASK(7, 5) #define RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK GENMASK(11, 8) #define RTL8365MB_PHY_BASE 0x2000 #define RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG 0x1F03 @@ -679,6 +680,9 @@ static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum) u16 val; int ret; + if (phy > RTL8365MB_PHYADDRMAX) + return -EINVAL; + if (regnum > RTL8365MB_PHYREGMAX) return -EINVAL; @@ -704,6 +708,9 @@ static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum, u32 ocp_addr; int ret; + if (phy > RTL8365MB_PHYADDRMAX) + return -EINVAL; + if (regnum > RTL8365MB_PHYREGMAX) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 817b653160db9852d5a0498a31f047e18ce27e5b Mon Sep 17 00:00:00 2001 From: Sven Schuchmann Date: Sat, 27 Nov 2021 11:47:07 +0100 Subject: net: usb: lan78xx: lan78xx_phy_init(): use PHY_POLL instead of "0" if no IRQ is available On most systems request for IRQ 0 will fail, phylib will print an error message and fall back to polling. To fix this set the phydev->irq to PHY_POLL if no IRQ is available. Fixes: cc89c323a30e ("lan78xx: Use irq_domain for phy interrupt from USB Int. EP") Reviewed-by: Andrew Lunn Signed-off-by: Sven Schuchmann Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f20376c1ef3f..8cd265fc1fd9 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2228,7 +2228,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) if (dev->domain_data.phyirq > 0) phydev->irq = dev->domain_data.phyirq; else - phydev->irq = 0; + phydev->irq = PHY_POLL; netdev_dbg(dev->net, "phydev->irq = %d\n", phydev->irq); /* set to AUTOMDIX */ -- cgit v1.2.3-59-g8ed1b From 2191b1dfef7d45f44b5008d2148676d9f2c82874 Mon Sep 17 00:00:00 2001 From: Erik Ekman Date: Sun, 28 Nov 2021 13:37:11 +0100 Subject: net/mlx4_en: Update reported link modes for 1/10G When link modes were initially added in commit 2c762679435dc ("net/mlx4_en: Use PTYS register to query ethtool settings") and later updated for the new ethtool API in commit 3d8f7cc78d0eb ("net: mlx4: use new ETHTOOL_G/SSETTINGS API") the only 1/10G non-baseT link modes configured were 1000baseKX, 10000baseKX4 and 10000baseKR. It looks like these got picked to represent other modes since nothing better was available. Switch to using more specific link modes added in commit 5711a98221443 ("net: ethtool: add support for 1000BaseX and missing 10G link modes"). Tested with MCX311A-XCAT connected via DAC. Before: % sudo ethtool enp3s0 Settings for enp3s0: Supported ports: [ FIBRE ] Supported link modes: 1000baseKX/Full 10000baseKR/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: 1000baseKX/Full 10000baseKR/Full Advertised pause frame use: Symmetric Advertised auto-negotiation: No Advertised FEC modes: Not reported Speed: 10000Mb/s Duplex: Full Auto-negotiation: off Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Supports Wake-on: d Wake-on: d Current message level: 0x00000014 (20) link ifdown Link detected: yes With this change: % sudo ethtool enp3s0 Settings for enp3s0: Supported ports: [ FIBRE ] Supported link modes: 1000baseX/Full 10000baseCR/Full 10000baseSR/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: 1000baseX/Full 10000baseCR/Full 10000baseSR/Full Advertised pause frame use: Symmetric Advertised auto-negotiation: No Advertised FEC modes: Not reported Speed: 10000Mb/s Duplex: Full Auto-negotiation: off Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Supports Wake-on: d Wake-on: d Current message level: 0x00000014 (20) link ifdown Link detected: yes Tested-by: Michael Stapelberg Signed-off-by: Erik Ekman Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 066d79e4ecfc..10238bedd694 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -670,7 +670,7 @@ void __init mlx4_en_init_ptys2ethtool_map(void) MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_T, SPEED_1000, ETHTOOL_LINK_MODE_1000baseT_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_CX_SGMII, SPEED_1000, - ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_KX, SPEED_1000, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_T, SPEED_10000, @@ -682,9 +682,9 @@ void __init mlx4_en_init_ptys2ethtool_map(void) MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KR, SPEED_10000, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CR, SPEED_10000, - ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_SR, SPEED_10000, - ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_20GBASE_KR2, SPEED_20000, ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); -- cgit v1.2.3-59-g8ed1b From aa1dcb5646fdf34a15763facf4bf5e482a2814ca Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Mon, 29 Nov 2021 05:28:23 -0800 Subject: atlantic: Increase delay for fw transactions The max waiting period (of 1 ms) while reading the data from FW shared buffer is too small for certain types of data (e.g., stats). There's a chance that FW could be updating buffer at the same time and driver would be unsuccessful in reading data. Firmware manual recommends to have 1 sec timeout to fix this issue. Fixes: 5cfd54d7dc186 ("net: atlantic: minimal A2 fw_ops") Signed-off-by: Dmitry Bogdanov Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index dd259c8f2f4f..b0e4119b9883 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -84,7 +84,7 @@ static int hw_atl2_shared_buffer_read_block(struct aq_hw_s *self, if (cnt > AQ_A2_FW_READ_TRY_MAX) return -ETIME; if (tid1.transaction_cnt_a != tid1.transaction_cnt_b) - udelay(1); + mdelay(1); } while (tid1.transaction_cnt_a != tid1.transaction_cnt_b); hw_atl2_mif_shared_buf_read(self, offset, (u32 *)data, dwords); @@ -339,8 +339,11 @@ static int aq_a2_fw_update_stats(struct aq_hw_s *self) { struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; struct statistics_s stats; + int err; - hw_atl2_shared_buffer_read_safe(self, stats, &stats); + err = hw_atl2_shared_buffer_read_safe(self, stats, &stats); + if (err) + return err; #define AQ_SDELTA(_N_, _F_) (self->curr_stats._N_ += \ stats.msm._F_ - priv->last_stats.msm._F_) -- cgit v1.2.3-59-g8ed1b From aa685acd98eae25d5351e30288d6cfb65b9c80a5 Mon Sep 17 00:00:00 2001 From: Nikita Danilov Date: Mon, 29 Nov 2021 05:28:24 -0800 Subject: atlatnic: enable Nbase-t speeds with base-t When 2.5G is advertised, N-Base should be advertised against the T-base caps. N5G is out of use in baseline code and driver should treat both 5G and N5G (and also 2.5G and N2.5G) equally from user perspective. Fixes: 5cfd54d7dc186 ("net: atlantic: minimal A2 fw_ops") Signed-off-by: Nikita Danilov Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_common.h | 25 +++++++++++----------- .../aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c | 3 --- .../aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c | 4 +--- 3 files changed, 13 insertions(+), 19 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h index 23b2d390fcdd..4ad8f36fcade 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h @@ -53,20 +53,19 @@ #define AQ_NIC_RATE_10G BIT(0) #define AQ_NIC_RATE_5G BIT(1) -#define AQ_NIC_RATE_5GSR BIT(2) -#define AQ_NIC_RATE_2G5 BIT(3) -#define AQ_NIC_RATE_1G BIT(4) -#define AQ_NIC_RATE_100M BIT(5) -#define AQ_NIC_RATE_10M BIT(6) -#define AQ_NIC_RATE_1G_HALF BIT(7) -#define AQ_NIC_RATE_100M_HALF BIT(8) -#define AQ_NIC_RATE_10M_HALF BIT(9) +#define AQ_NIC_RATE_2G5 BIT(2) +#define AQ_NIC_RATE_1G BIT(3) +#define AQ_NIC_RATE_100M BIT(4) +#define AQ_NIC_RATE_10M BIT(5) +#define AQ_NIC_RATE_1G_HALF BIT(6) +#define AQ_NIC_RATE_100M_HALF BIT(7) +#define AQ_NIC_RATE_10M_HALF BIT(8) -#define AQ_NIC_RATE_EEE_10G BIT(10) -#define AQ_NIC_RATE_EEE_5G BIT(11) -#define AQ_NIC_RATE_EEE_2G5 BIT(12) -#define AQ_NIC_RATE_EEE_1G BIT(13) -#define AQ_NIC_RATE_EEE_100M BIT(14) +#define AQ_NIC_RATE_EEE_10G BIT(9) +#define AQ_NIC_RATE_EEE_5G BIT(10) +#define AQ_NIC_RATE_EEE_2G5 BIT(11) +#define AQ_NIC_RATE_EEE_1G BIT(12) +#define AQ_NIC_RATE_EEE_100M BIT(13) #define AQ_NIC_RATE_EEE_MSK (AQ_NIC_RATE_EEE_10G |\ AQ_NIC_RATE_EEE_5G |\ AQ_NIC_RATE_EEE_2G5 |\ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index eac631c45c56..4d4cfbc91e19 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -132,9 +132,6 @@ static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed) if (speed & AQ_NIC_RATE_5G) rate |= FW2X_RATE_5G; - if (speed & AQ_NIC_RATE_5GSR) - rate |= FW2X_RATE_5G; - if (speed & AQ_NIC_RATE_2G5) rate |= FW2X_RATE_2G5; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index b0e4119b9883..b7a9b0ed6df3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -154,7 +154,7 @@ static void a2_link_speed_mask2fw(u32 speed, { link_options->rate_10G = !!(speed & AQ_NIC_RATE_10G); link_options->rate_5G = !!(speed & AQ_NIC_RATE_5G); - link_options->rate_N5G = !!(speed & AQ_NIC_RATE_5GSR); + link_options->rate_N5G = link_options->rate_5G; link_options->rate_2P5G = !!(speed & AQ_NIC_RATE_2G5); link_options->rate_N2P5G = link_options->rate_2P5G; link_options->rate_1G = !!(speed & AQ_NIC_RATE_1G); @@ -192,8 +192,6 @@ static u32 a2_fw_lkp_to_mask(struct lkp_link_caps_s *lkp_link_caps) rate |= AQ_NIC_RATE_10G; if (lkp_link_caps->rate_5G) rate |= AQ_NIC_RATE_5G; - if (lkp_link_caps->rate_N5G) - rate |= AQ_NIC_RATE_5GSR; if (lkp_link_caps->rate_2P5G) rate |= AQ_NIC_RATE_2G5; if (lkp_link_caps->rate_1G) -- cgit v1.2.3-59-g8ed1b From 2465c802232bc8d2b5bd83b55b08d05c11808704 Mon Sep 17 00:00:00 2001 From: Sameer Saurabh Date: Mon, 29 Nov 2021 05:28:25 -0800 Subject: atlantic: Fix to display FW bundle version instead of FW mac version. The correct way to reflect firmware version is to use bundle version. Hence populating the same instead of MAC fw version. Fixes: c1be0bf092bd2 ("net: atlantic: common functions needed for basic A2 init/deinit hw_ops") Signed-off-by: Sameer Saurabh Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index b7a9b0ed6df3..e164ac5b55a8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -500,9 +500,9 @@ u32 hw_atl2_utils_get_fw_version(struct aq_hw_s *self) hw_atl2_shared_buffer_read_safe(self, version, &version); /* A2 FW version is stored in reverse order */ - return version.mac.major << 24 | - version.mac.minor << 16 | - version.mac.build; + return version.bundle.major << 24 | + version.bundle.minor << 16 | + version.bundle.build; } int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self, -- cgit v1.2.3-59-g8ed1b From 413d5e09caa5a11da9c7d72401ba0588466a04c0 Mon Sep 17 00:00:00 2001 From: Nikita Danilov Date: Mon, 29 Nov 2021 05:28:26 -0800 Subject: atlantic: Add missing DIDs and fix 115c. At the late production stages new dev ids were introduced. These are now in production, so its important for the driver to recognize these. And also fix the board caps for AQC115C adapter. Fixes: b3f0c79cba206 ("net: atlantic: A2 hw_ops skeleton") Signed-off-by: Nikita Danilov Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_common.h | 2 ++ drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 7 ++++++- .../net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c | 17 +++++++++++++++++ .../net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h | 2 ++ 4 files changed, 27 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h index 4ad8f36fcade..ace691d7cd75 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h @@ -40,10 +40,12 @@ #define AQ_DEVICE_ID_AQC113DEV 0x00C0 #define AQ_DEVICE_ID_AQC113CS 0x94C0 +#define AQ_DEVICE_ID_AQC113CA 0x34C0 #define AQ_DEVICE_ID_AQC114CS 0x93C0 #define AQ_DEVICE_ID_AQC113 0x04C0 #define AQ_DEVICE_ID_AQC113C 0x14C0 #define AQ_DEVICE_ID_AQC115C 0x12C0 +#define AQ_DEVICE_ID_AQC116C 0x11C0 #define HW_ATL_NIC_NAME "Marvell (aQuantia) AQtion 10Gbit Network Adapter" diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index d4b1976ee69b..797a95142d1f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -49,6 +49,8 @@ static const struct pci_device_id aq_pci_tbl[] = { { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113), }, { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113C), }, { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC115C), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113CA), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC116C), }, {} }; @@ -85,7 +87,10 @@ static const struct aq_board_revision_s hw_atl_boards[] = { { AQ_DEVICE_ID_AQC113CS, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, { AQ_DEVICE_ID_AQC114CS, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, { AQ_DEVICE_ID_AQC113C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, - { AQ_DEVICE_ID_AQC115C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, + { AQ_DEVICE_ID_AQC115C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc115c, }, + { AQ_DEVICE_ID_AQC113CA, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, + { AQ_DEVICE_ID_AQC116C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc116c, }, + }; MODULE_DEVICE_TABLE(pci, aq_pci_tbl); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c index c98708bb044c..0a28428a0cb7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c @@ -72,6 +72,23 @@ const struct aq_hw_caps_s hw_atl2_caps_aqc113 = { AQ_NIC_RATE_10M_HALF, }; +const struct aq_hw_caps_s hw_atl2_caps_aqc115c = { + DEFAULT_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = AQ_NIC_RATE_2G5 | + AQ_NIC_RATE_1G | + AQ_NIC_RATE_100M | + AQ_NIC_RATE_10M, +}; + +const struct aq_hw_caps_s hw_atl2_caps_aqc116c = { + DEFAULT_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = AQ_NIC_RATE_1G | + AQ_NIC_RATE_100M | + AQ_NIC_RATE_10M, +}; + static u32 hw_atl2_sem_act_rslvr_get(struct aq_hw_s *self) { return hw_atl_reg_glb_cpu_sem_get(self, HW_ATL2_FW_SM_ACT_RSLVR); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h index de8723f1c28a..346f0dc9912e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h @@ -9,6 +9,8 @@ #include "aq_common.h" extern const struct aq_hw_caps_s hw_atl2_caps_aqc113; +extern const struct aq_hw_caps_s hw_atl2_caps_aqc115c; +extern const struct aq_hw_caps_s hw_atl2_caps_aqc116c; extern const struct aq_hw_ops hw_atl2_ops; #endif /* HW_ATL2_H */ -- cgit v1.2.3-59-g8ed1b From 03fa512189eb9b55ded5f3e81ad638315555b340 Mon Sep 17 00:00:00 2001 From: Sameer Saurabh Date: Mon, 29 Nov 2021 05:28:27 -0800 Subject: Remove Half duplex mode speed capabilities. Since Half Duplex mode has been deprecated by the firmware, driver should not advertise Half Duplex speed in ethtool support link speed values. Fixes: 071a02046c262 ("net: atlantic: A2: half duplex support") Signed-off-by: Sameer Saurabh Signed-off-by: Igor Russkikh Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c index 0a28428a0cb7..5dfc751572ed 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c @@ -65,11 +65,8 @@ const struct aq_hw_caps_s hw_atl2_caps_aqc113 = { AQ_NIC_RATE_5G | AQ_NIC_RATE_2G5 | AQ_NIC_RATE_1G | - AQ_NIC_RATE_1G_HALF | AQ_NIC_RATE_100M | - AQ_NIC_RATE_100M_HALF | - AQ_NIC_RATE_10M | - AQ_NIC_RATE_10M_HALF, + AQ_NIC_RATE_10M, }; const struct aq_hw_caps_s hw_atl2_caps_aqc115c = { -- cgit v1.2.3-59-g8ed1b From 2087ced0fc3a6d45203925750a2b1bcd5402e639 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Mon, 29 Nov 2021 05:28:28 -0800 Subject: atlantic: Fix statistics logic for production hardware B0 is the main and widespread device revision of atlantic2 HW. In the current state, driver will incorrectly fetch the statistics for this revision. Fixes: 5cfd54d7dc186 ("net: atlantic: minimal A2 fw_ops") Signed-off-by: Dmitry Bogdanov Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 2 + drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 10 +- .../aquantia/atlantic/hw_atl/hw_atl_utils.c | 15 ++- .../aquantia/atlantic/hw_atl2/hw_atl2_utils.h | 38 +++++++- .../aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c | 101 +++++++++++++++++---- 5 files changed, 139 insertions(+), 27 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 062a300a566a..dbd284660135 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -80,6 +80,8 @@ struct aq_hw_link_status_s { }; struct aq_stats_s { + u64 brc; + u64 btc; u64 uprc; u64 mprc; u64 bprc; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 1acf544afeb4..02c4e3b4a6a5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -905,8 +905,14 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data) data[++i] = stats->mbtc; data[++i] = stats->bbrc; data[++i] = stats->bbtc; - data[++i] = stats->ubrc + stats->mbrc + stats->bbrc; - data[++i] = stats->ubtc + stats->mbtc + stats->bbtc; + if (stats->brc) + data[++i] = stats->brc; + else + data[++i] = stats->ubrc + stats->mbrc + stats->bbrc; + if (stats->btc) + data[++i] = stats->btc; + else + data[++i] = stats->ubtc + stats->mbtc + stats->bbtc; data[++i] = stats->dma_pkt_rc; data[++i] = stats->dma_pkt_tc; data[++i] = stats->dma_oct_rc; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 3f1704cbe1cb..7e88d7234b14 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -867,12 +867,20 @@ static int hw_atl_fw1x_deinit(struct aq_hw_s *self) int hw_atl_utils_update_stats(struct aq_hw_s *self) { struct aq_stats_s *cs = &self->curr_stats; + struct aq_stats_s curr_stats = *cs; struct hw_atl_utils_mbox mbox; + bool corrupted_stats = false; hw_atl_utils_mpi_read_stats(self, &mbox); -#define AQ_SDELTA(_N_) (self->curr_stats._N_ += \ - mbox.stats._N_ - self->last_stats._N_) +#define AQ_SDELTA(_N_) \ +do { \ + if (!corrupted_stats && \ + ((s64)(mbox.stats._N_ - self->last_stats._N_)) >= 0) \ + curr_stats._N_ += mbox.stats._N_ - self->last_stats._N_; \ + else \ + corrupted_stats = true; \ +} while (0) if (self->aq_link_status.mbps) { AQ_SDELTA(uprc); @@ -892,6 +900,9 @@ int hw_atl_utils_update_stats(struct aq_hw_s *self) AQ_SDELTA(bbrc); AQ_SDELTA(bbtc); AQ_SDELTA(dpc); + + if (!corrupted_stats) + *cs = curr_stats; } #undef AQ_SDELTA diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h index b66fa346581c..6bad64c77b87 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h @@ -239,7 +239,8 @@ struct version_s { u8 minor; u16 build; } phy; - u32 rsvd; + u32 drv_iface_ver:4; + u32 rsvd:28; }; struct link_status_s { @@ -424,7 +425,7 @@ struct cable_diag_status_s { u16 rsvd2; }; -struct statistics_s { +struct statistics_a0_s { struct { u32 link_up; u32 link_down; @@ -457,6 +458,33 @@ struct statistics_s { u32 reserve_fw_gap; }; +struct __packed statistics_b0_s { + u64 rx_good_octets; + u64 rx_pause_frames; + u64 rx_good_frames; + u64 rx_errors; + u64 rx_unicast_frames; + u64 rx_multicast_frames; + u64 rx_broadcast_frames; + + u64 tx_good_octets; + u64 tx_pause_frames; + u64 tx_good_frames; + u64 tx_errors; + u64 tx_unicast_frames; + u64 tx_multicast_frames; + u64 tx_broadcast_frames; + + u32 main_loop_cycles; +}; + +struct __packed statistics_s { + union __packed { + struct statistics_a0_s a0; + struct statistics_b0_s b0; + }; +}; + struct filter_caps_s { u8 l2_filters_base_index:6; u8 flexible_filter_mask:2; @@ -545,7 +573,7 @@ struct management_status_s { u32 rsvd5; }; -struct fw_interface_out { +struct __packed fw_interface_out { struct transaction_counter_s transaction_id; struct version_s version; struct link_status_s link_status; @@ -569,7 +597,6 @@ struct fw_interface_out { struct core_dump_s core_dump; u32 rsvd11; struct statistics_s stats; - u32 rsvd12; struct filter_caps_s filter_caps; struct device_caps_s device_caps; u32 rsvd13; @@ -592,6 +619,9 @@ struct fw_interface_out { #define AQ_HOST_MODE_LOW_POWER 3U #define AQ_HOST_MODE_SHUTDOWN 4U +#define AQ_A2_FW_INTERFACE_A0 0 +#define AQ_A2_FW_INTERFACE_B0 1 + int hw_atl2_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops); int hw_atl2_utils_soft_reset(struct aq_hw_s *self); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index e164ac5b55a8..58d426dda3ed 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -333,18 +333,22 @@ static int aq_a2_fw_get_mac_permanent(struct aq_hw_s *self, u8 *mac) return 0; } -static int aq_a2_fw_update_stats(struct aq_hw_s *self) +static void aq_a2_fill_a0_stats(struct aq_hw_s *self, + struct statistics_s *stats) { struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; - struct statistics_s stats; - int err; - - err = hw_atl2_shared_buffer_read_safe(self, stats, &stats); - if (err) - return err; - -#define AQ_SDELTA(_N_, _F_) (self->curr_stats._N_ += \ - stats.msm._F_ - priv->last_stats.msm._F_) + struct aq_stats_s *cs = &self->curr_stats; + struct aq_stats_s curr_stats = *cs; + bool corrupted_stats = false; + +#define AQ_SDELTA(_N, _F) \ +do { \ + if (!corrupted_stats && \ + ((s64)(stats->a0.msm._F - priv->last_stats.a0.msm._F)) >= 0) \ + curr_stats._N += stats->a0.msm._F - priv->last_stats.a0.msm._F;\ + else \ + corrupted_stats = true; \ +} while (0) if (self->aq_link_status.mbps) { AQ_SDELTA(uprc, rx_unicast_frames); @@ -363,17 +367,76 @@ static int aq_a2_fw_update_stats(struct aq_hw_s *self) AQ_SDELTA(mbtc, tx_multicast_octets); AQ_SDELTA(bbrc, rx_broadcast_octets); AQ_SDELTA(bbtc, tx_broadcast_octets); + + if (!corrupted_stats) + *cs = curr_stats; + } +#undef AQ_SDELTA + +} + +static void aq_a2_fill_b0_stats(struct aq_hw_s *self, + struct statistics_s *stats) +{ + struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; + struct aq_stats_s *cs = &self->curr_stats; + struct aq_stats_s curr_stats = *cs; + bool corrupted_stats = false; + +#define AQ_SDELTA(_N, _F) \ +do { \ + if (!corrupted_stats && \ + ((s64)(stats->b0._F - priv->last_stats.b0._F)) >= 0) \ + curr_stats._N += stats->b0._F - priv->last_stats.b0._F; \ + else \ + corrupted_stats = true; \ +} while (0) + + if (self->aq_link_status.mbps) { + AQ_SDELTA(uprc, rx_unicast_frames); + AQ_SDELTA(mprc, rx_multicast_frames); + AQ_SDELTA(bprc, rx_broadcast_frames); + AQ_SDELTA(erpr, rx_errors); + AQ_SDELTA(brc, rx_good_octets); + + AQ_SDELTA(uptc, tx_unicast_frames); + AQ_SDELTA(mptc, tx_multicast_frames); + AQ_SDELTA(bptc, tx_broadcast_frames); + AQ_SDELTA(erpt, tx_errors); + AQ_SDELTA(btc, tx_good_octets); + + if (!corrupted_stats) + *cs = curr_stats; } #undef AQ_SDELTA - self->curr_stats.dma_pkt_rc = - hw_atl_stats_rx_dma_good_pkt_counter_get(self); - self->curr_stats.dma_pkt_tc = - hw_atl_stats_tx_dma_good_pkt_counter_get(self); - self->curr_stats.dma_oct_rc = - hw_atl_stats_rx_dma_good_octet_counter_get(self); - self->curr_stats.dma_oct_tc = - hw_atl_stats_tx_dma_good_octet_counter_get(self); - self->curr_stats.dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self); +} + +static int aq_a2_fw_update_stats(struct aq_hw_s *self) +{ + struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; + struct aq_stats_s *cs = &self->curr_stats; + struct statistics_s stats; + struct version_s version; + int err; + + err = hw_atl2_shared_buffer_read_safe(self, version, &version); + if (err) + return err; + + err = hw_atl2_shared_buffer_read_safe(self, stats, &stats); + if (err) + return err; + + if (version.drv_iface_ver == AQ_A2_FW_INTERFACE_A0) + aq_a2_fill_a0_stats(self, &stats); + else + aq_a2_fill_b0_stats(self, &stats); + + cs->dma_pkt_rc = hw_atl_stats_rx_dma_good_pkt_counter_get(self); + cs->dma_pkt_tc = hw_atl_stats_tx_dma_good_pkt_counter_get(self); + cs->dma_oct_rc = hw_atl_stats_rx_dma_good_octet_counter_get(self); + cs->dma_oct_tc = hw_atl_stats_tx_dma_good_octet_counter_get(self); + cs->dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self); memcpy(&priv->last_stats, &stats, sizeof(stats)); -- cgit v1.2.3-59-g8ed1b From 060a0fb721ec5bbe02ae322e434ec87dc25ed6e9 Mon Sep 17 00:00:00 2001 From: Sameer Saurabh Date: Mon, 29 Nov 2021 05:28:29 -0800 Subject: atlantic: Remove warn trace message. Remove the warn trace message - it's not a correct check here, because the function can still be called on the device in DOWN state Fixes: 508f2e3dce454 ("net: atlantic: split rx and tx per-queue stats") Signed-off-by: Sameer Saurabh Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index d281322d7dd2..f4774cf051c9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -362,9 +362,6 @@ unsigned int aq_vec_get_sw_stats(struct aq_vec_s *self, const unsigned int tc, u { unsigned int count; - WARN_ONCE(!aq_vec_is_valid_tc(self, tc), - "Invalid tc %u (#rx=%u, #tx=%u)\n", - tc, self->rx_rings, self->tx_rings); if (!aq_vec_is_valid_tc(self, tc)) return 0; -- cgit v1.2.3-59-g8ed1b From 191587cd1a5f36852a0fc32cff2d5bc7680551db Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 29 Nov 2021 14:41:48 +0100 Subject: mt76: fix key pointer overwrite in mt7921s_write_txwi/mt7663_usb_sdio_write_txwi Fix pointer overwrite in mt7921s_tx_prepare_skb and mt7663_usb_sdio_tx_prepare_skb routines since in commit '2a9e9857473b ("mt76: fix possible pktid leak") mt76_tx_status_skb_add() has been moved out of mt7921s_write_txwi()/mt7663_usb_sdio_write_txwi() overwriting hw key pointer in ieee80211_tx_info structure. Fix the issue saving key pointer before running mt76_tx_status_skb_add(). Fixes: 2a9e9857473b ("mt76: fix possible pktid leak") Tested-by: Deren Wu Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/eba40c84b6d114f618e2ae486cc6d0f2e9272cf9.1638193069.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c | 11 +++++------ drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c | 11 +++++------ 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c index bfe6c1579dc1..5a6d7829c6e0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c @@ -43,13 +43,11 @@ EXPORT_SYMBOL_GPL(mt7663_usb_sdio_reg_map); static void mt7663_usb_sdio_write_txwi(struct mt7615_dev *dev, struct mt76_wcid *wcid, enum mt76_txq_id qid, struct ieee80211_sta *sta, - int pid, struct sk_buff *skb) + struct ieee80211_key_conf *key, int pid, + struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_key_conf *key = info->control.hw_key; - __le32 *txwi; + __le32 *txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE); - txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE); memset(txwi, 0, MT_USB_TXD_SIZE); mt7615_mac_write_txwi(dev, txwi, skb, wcid, sta, pid, key, false); skb_push(skb, MT_USB_TXD_SIZE); @@ -188,6 +186,7 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76); struct sk_buff *skb = tx_info->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_key_conf *key = info->control.hw_key; struct mt7615_sta *msta; int pad, err, pktid; @@ -205,7 +204,7 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); - mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, pktid, skb); + mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, key, pktid, skb); if (mt76_is_usb(mdev)) { u32 len = skb->len; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c index 85b3d88f8ecc..bdec508b6b9f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c @@ -142,13 +142,11 @@ out: static void mt7921s_write_txwi(struct mt7921_dev *dev, struct mt76_wcid *wcid, enum mt76_txq_id qid, struct ieee80211_sta *sta, - int pid, struct sk_buff *skb) + struct ieee80211_key_conf *key, int pid, + struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_key_conf *key = info->control.hw_key; - __le32 *txwi; + __le32 *txwi = (__le32 *)(skb->data - MT_SDIO_TXD_SIZE); - txwi = (__le32 *)(skb->data - MT_SDIO_TXD_SIZE); memset(txwi, 0, MT_SDIO_TXD_SIZE); mt7921_mac_write_txwi(dev, txwi, skb, wcid, key, pid, false); skb_push(skb, MT_SDIO_TXD_SIZE); @@ -161,6 +159,7 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, { struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb); + struct ieee80211_key_conf *key = info->control.hw_key; struct sk_buff *skb = tx_info->skb; int err, pad, pktid; @@ -180,7 +179,7 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); - mt7921s_write_txwi(dev, wcid, qid, sta, pktid, skb); + mt7921s_write_txwi(dev, wcid, qid, sta, key, pktid, skb); mt7921_skb_add_sdio_hdr(skb, MT7921_SDIO_DATA); pad = round_up(skb->len, 4) - skb->len; -- cgit v1.2.3-59-g8ed1b From ae9287811ba75571cd69505d50ab0e612ace8572 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:20 -0500 Subject: wireguard: allowedips: add missing __rcu annotation to satisfy sparse A __rcu annotation got lost during refactoring, which caused sparse to become enraged. Fixes: bf7b042dc62a ("wireguard: allowedips: free empty intermediate nodes when removing single node") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/allowedips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index b7197e80f226..9a4c8ff32d9d 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -163,7 +163,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, return exact; } -static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node) +static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node) { node->parent_bit_packed = (unsigned long)parent | bit; rcu_assign_pointer(*parent, node); -- cgit v1.2.3-59-g8ed1b From b251b711a92189d558b07fde5a7ccd5a7915ebdd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 29 Nov 2021 10:39:23 -0500 Subject: wireguard: main: rename 'mod_init' & 'mod_exit' functions to be module-specific Rename module_init & module_exit functions that are named "mod_init" and "mod_exit" so that they are unique in both the System.map file and in initcall_debug output instead of showing up as almost anonymous "mod_init". This is helpful for debugging and in determining how long certain module_init calls take to execute. Signed-off-by: Randy Dunlap Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c index 75dbe77b0b4b..ee4da9ab8013 100644 --- a/drivers/net/wireguard/main.c +++ b/drivers/net/wireguard/main.c @@ -17,7 +17,7 @@ #include #include -static int __init mod_init(void) +static int __init wg_mod_init(void) { int ret; @@ -60,7 +60,7 @@ err_allowedips: return ret; } -static void __exit mod_exit(void) +static void __exit wg_mod_exit(void) { wg_genetlink_uninit(); wg_device_uninit(); @@ -68,8 +68,8 @@ static void __exit mod_exit(void) wg_allowedips_slab_uninit(); } -module_init(mod_init); -module_exit(mod_exit); +module_init(wg_mod_init); +module_exit(wg_mod_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("WireGuard secure network tunnel"); MODULE_AUTHOR("Jason A. Donenfeld "); -- cgit v1.2.3-59-g8ed1b From 20ae1d6aa159eb91a9bf09ff92ccaa94dbea92c2 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:25 -0500 Subject: wireguard: device: reset peer src endpoint when netns exits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each peer's endpoint contains a dst_cache entry that takes a reference to another netdev. When the containing namespace exits, we take down the socket and prevent future sockets from being created (by setting creating_net to NULL), which removes that potential reference on the netns. However, it doesn't release references to the netns that a netdev cached in dst_cache might be taking, so the netns still might fail to exit. Since the socket is gimped anyway, we can simply clear all the dst_caches (by way of clearing the endpoint src), which will release all references. However, the current dst_cache_reset function only releases those references lazily. But it turns out that all of our usages of wg_socket_clear_peer_endpoint_src are called from contexts that are not exactly high-speed or bottle-necked. For example, when there's connection difficulty, or when userspace is reconfiguring the interface. And in particular for this patch, when the netns is exiting. So for those cases, it makes more sense to call dst_release immediately. For that, we add a small helper function to dst_cache. This patch also adds a test to netns.sh from Hangbin Liu to ensure this doesn't regress. Tested-by: Hangbin Liu Reported-by: Xiumei Mu Cc: Toke Høiland-Jørgensen Cc: Paolo Abeni Fixes: 900575aa33a3 ("wireguard: device: avoid circular netns references") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/device.c | 3 +++ drivers/net/wireguard/socket.c | 2 +- include/net/dst_cache.h | 11 +++++++++++ net/core/dst_cache.c | 19 +++++++++++++++++++ tools/testing/selftests/wireguard/netns.sh | 24 +++++++++++++++++++++++- 5 files changed, 57 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 551ddaaaf540..77e64ea6be67 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -398,6 +398,7 @@ static struct rtnl_link_ops link_ops __read_mostly = { static void wg_netns_pre_exit(struct net *net) { struct wg_device *wg; + struct wg_peer *peer; rtnl_lock(); list_for_each_entry(wg, &device_list, device_list) { @@ -407,6 +408,8 @@ static void wg_netns_pre_exit(struct net *net) mutex_lock(&wg->device_update_lock); rcu_assign_pointer(wg->creating_net, NULL); wg_socket_reinit(wg, NULL, NULL); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); mutex_unlock(&wg->device_update_lock); } } diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 8c496b747108..6f07b949cb81 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -308,7 +308,7 @@ void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) { write_lock_bh(&peer->endpoint_lock); memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); - dst_cache_reset(&peer->endpoint_cache); + dst_cache_reset_now(&peer->endpoint_cache); write_unlock_bh(&peer->endpoint_lock); } diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index 67634675e919..df6622a5fe98 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache) dst_cache->reset_ts = jiffies; } +/** + * dst_cache_reset_now - invalidate the cache contents immediately + * @dst_cache: the cache + * + * The caller must be sure there are no concurrent users, as this frees + * all dst_cache users immediately, rather than waiting for the next + * per-cpu usage like dst_cache_reset does. Most callers should use the + * higher speed lazily-freed dst_cache_reset function instead. + */ +void dst_cache_reset_now(struct dst_cache *dst_cache); + /** * dst_cache_init - initialize the cache, allocating the required storage * @dst_cache: the cache diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index be74ab4551c2..0ccfd5fa5cb9 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -162,3 +162,22 @@ void dst_cache_destroy(struct dst_cache *dst_cache) free_percpu(dst_cache->cache); } EXPORT_SYMBOL_GPL(dst_cache_destroy); + +void dst_cache_reset_now(struct dst_cache *dst_cache) +{ + int i; + + if (!dst_cache->cache) + return; + + dst_cache->reset_ts = jiffies; + for_each_possible_cpu(i) { + struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i); + struct dst_entry *dst = idst->dst; + + idst->cookie = 0; + idst->dst = NULL; + dst_release(dst); + } +} +EXPORT_SYMBOL_GPL(dst_cache_reset_now); diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 2e5c1630885e..8a9461aa0878 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -613,6 +613,28 @@ ip0 link set wg0 up kill $ncat_pid ip0 link del wg0 +# Ensure that dst_cache references don't outlive netns lifetime +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +ip1 addr add fd00:aa::1/64 dev veth1 +ip2 addr add fd00:aa::2/64 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +ip1 -6 route add default dev veth1 via fd00:aa::2 +ip2 -6 route add default dev veth2 via fd00:aa::1 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n1 ping6 -c 1 fd00::2 +pp ip netns delete $netns1 +pp ip netns delete $netns2 +pp ip netns add $netns1 +pp ip netns add $netns2 + # Ensure there aren't circular reference loops ip1 link add wg1 type wireguard ip2 link add wg2 type wireguard @@ -631,7 +653,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do done < /dev/kmsg alldeleted=1 for object in "${!objects[@]}"; do - if [[ ${objects["$object"]} != *createddestroyed ]]; then + if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then echo "Error: $object: merely ${objects["$object"]}" >&3 alldeleted=0 fi -- cgit v1.2.3-59-g8ed1b From 886fcee939adb5e2af92741b90643a59f2b54f97 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:26 -0500 Subject: wireguard: receive: use ring buffer for incoming handshakes Apparently the spinlock on incoming_handshake's skb_queue is highly contended, and a torrent of handshake or cookie packets can bring the data plane to its knees, simply by virtue of enqueueing the handshake packets to be processed asynchronously. So, we try switching this to a ring buffer to hopefully have less lock contention. This alleviates the problem somewhat, though it still isn't perfect, so future patches will have to improve this further. However, it at least doesn't completely diminish the data plane. Reported-by: Streun Fabio Reported-by: Joel Wanner Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/device.c | 36 ++++++++++++++++++------------------ drivers/net/wireguard/device.h | 9 +++------ drivers/net/wireguard/queueing.c | 6 +++--- drivers/net/wireguard/queueing.h | 2 +- drivers/net/wireguard/receive.c | 27 ++++++++++++--------------- 5 files changed, 37 insertions(+), 43 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 77e64ea6be67..a46067c38bf5 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -98,6 +98,7 @@ static int wg_stop(struct net_device *dev) { struct wg_device *wg = netdev_priv(dev); struct wg_peer *peer; + struct sk_buff *skb; mutex_lock(&wg->device_update_lock); list_for_each_entry(peer, &wg->peer_list, peer_list) { @@ -108,7 +109,9 @@ static int wg_stop(struct net_device *dev) wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); } mutex_unlock(&wg->device_update_lock); - skb_queue_purge(&wg->incoming_handshakes); + while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL) + kfree_skb(skb); + atomic_set(&wg->handshake_queue_len, 0); wg_socket_reinit(wg, NULL, NULL); return 0; } @@ -235,14 +238,13 @@ static void wg_destruct(struct net_device *dev) destroy_workqueue(wg->handshake_receive_wq); destroy_workqueue(wg->handshake_send_wq); destroy_workqueue(wg->packet_crypt_wq); - wg_packet_queue_free(&wg->decrypt_queue); - wg_packet_queue_free(&wg->encrypt_queue); + wg_packet_queue_free(&wg->handshake_queue, true); + wg_packet_queue_free(&wg->decrypt_queue, false); + wg_packet_queue_free(&wg->encrypt_queue, false); rcu_barrier(); /* Wait for all the peers to be actually freed. */ wg_ratelimiter_uninit(); memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); - skb_queue_purge(&wg->incoming_handshakes); free_percpu(dev->tstats); - free_percpu(wg->incoming_handshakes_worker); kvfree(wg->index_hashtable); kvfree(wg->peer_hashtable); mutex_unlock(&wg->device_update_lock); @@ -298,7 +300,6 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, init_rwsem(&wg->static_identity.lock); mutex_init(&wg->socket_update_lock); mutex_init(&wg->device_update_lock); - skb_queue_head_init(&wg->incoming_handshakes); wg_allowedips_init(&wg->peer_allowedips); wg_cookie_checker_init(&wg->cookie_checker, wg); INIT_LIST_HEAD(&wg->peer_list); @@ -316,16 +317,10 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, if (!dev->tstats) goto err_free_index_hashtable; - wg->incoming_handshakes_worker = - wg_packet_percpu_multicore_worker_alloc( - wg_packet_handshake_receive_worker, wg); - if (!wg->incoming_handshakes_worker) - goto err_free_tstats; - wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); if (!wg->handshake_receive_wq) - goto err_free_incoming_handshakes; + goto err_free_tstats; wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); @@ -347,10 +342,15 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, if (ret < 0) goto err_free_encrypt_queue; - ret = wg_ratelimiter_init(); + ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker, + MAX_QUEUED_INCOMING_HANDSHAKES); if (ret < 0) goto err_free_decrypt_queue; + ret = wg_ratelimiter_init(); + if (ret < 0) + goto err_free_handshake_queue; + ret = register_netdevice(dev); if (ret < 0) goto err_uninit_ratelimiter; @@ -367,18 +367,18 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, err_uninit_ratelimiter: wg_ratelimiter_uninit(); +err_free_handshake_queue: + wg_packet_queue_free(&wg->handshake_queue, false); err_free_decrypt_queue: - wg_packet_queue_free(&wg->decrypt_queue); + wg_packet_queue_free(&wg->decrypt_queue, false); err_free_encrypt_queue: - wg_packet_queue_free(&wg->encrypt_queue); + wg_packet_queue_free(&wg->encrypt_queue, false); err_destroy_packet_crypt: destroy_workqueue(wg->packet_crypt_wq); err_destroy_handshake_send: destroy_workqueue(wg->handshake_send_wq); err_destroy_handshake_receive: destroy_workqueue(wg->handshake_receive_wq); -err_free_incoming_handshakes: - free_percpu(wg->incoming_handshakes_worker); err_free_tstats: free_percpu(dev->tstats); err_free_index_hashtable: diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h index 854bc3d97150..43c7cebbf50b 100644 --- a/drivers/net/wireguard/device.h +++ b/drivers/net/wireguard/device.h @@ -39,21 +39,18 @@ struct prev_queue { struct wg_device { struct net_device *dev; - struct crypt_queue encrypt_queue, decrypt_queue; + struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue; struct sock __rcu *sock4, *sock6; struct net __rcu *creating_net; struct noise_static_identity static_identity; - struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; - struct workqueue_struct *packet_crypt_wq; - struct sk_buff_head incoming_handshakes; - int incoming_handshake_cpu; - struct multicore_worker __percpu *incoming_handshakes_worker; + struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq; struct cookie_checker cookie_checker; struct pubkey_hashtable *peer_hashtable; struct index_hashtable *index_hashtable; struct allowedips peer_allowedips; struct mutex device_update_lock, socket_update_lock; struct list_head device_list, peer_list; + atomic_t handshake_queue_len; unsigned int num_peers, device_update_gen; u32 fwmark; u16 incoming_port; diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 48e7b982a307..1de413b19e34 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -38,11 +38,11 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, return 0; } -void wg_packet_queue_free(struct crypt_queue *queue) +void wg_packet_queue_free(struct crypt_queue *queue, bool purge) { free_percpu(queue->worker); - WARN_ON(!__ptr_ring_empty(&queue->ring)); - ptr_ring_cleanup(&queue->ring, NULL); + WARN_ON(!purge && !__ptr_ring_empty(&queue->ring)); + ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL); } #define NEXT(skb) ((skb)->prev) diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index 4ef2944a68bc..e2388107f7fd 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -23,7 +23,7 @@ struct sk_buff; /* queueing.c APIs: */ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, unsigned int len); -void wg_packet_queue_free(struct crypt_queue *queue); +void wg_packet_queue_free(struct crypt_queue *queue, bool purge); struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 7dc84bcca261..f4e537e3e8ec 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -116,8 +116,8 @@ static void wg_receive_handshake_packet(struct wg_device *wg, return; } - under_load = skb_queue_len(&wg->incoming_handshakes) >= - MAX_QUEUED_INCOMING_HANDSHAKES / 8; + under_load = atomic_read(&wg->handshake_queue_len) >= + MAX_QUEUED_INCOMING_HANDSHAKES / 8; if (under_load) { last_under_load = ktime_get_coarse_boottime_ns(); } else if (last_under_load) { @@ -212,13 +212,14 @@ static void wg_receive_handshake_packet(struct wg_device *wg, void wg_packet_handshake_receive_worker(struct work_struct *work) { - struct wg_device *wg = container_of(work, struct multicore_worker, - work)->ptr; + struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; + struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue); struct sk_buff *skb; - while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { wg_receive_handshake_packet(wg, skb); dev_kfree_skb(skb); + atomic_dec(&wg->handshake_queue_len); cond_resched(); } } @@ -554,21 +555,17 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { int cpu; - - if (skb_queue_len(&wg->incoming_handshakes) > - MAX_QUEUED_INCOMING_HANDSHAKES || - unlikely(!rng_is_initialized())) { + if (unlikely(!rng_is_initialized() || + ptr_ring_produce_bh(&wg->handshake_queue.ring, skb))) { net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", wg->dev->name, skb); goto err; } - skb_queue_tail(&wg->incoming_handshakes, skb); - /* Queues up a call to packet_process_queued_handshake_ - * packets(skb): - */ - cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu); + atomic_inc(&wg->handshake_queue_len); + cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu); + /* Queues up a call to packet_process_queued_handshake_packets(skb): */ queue_work_on(cpu, wg->handshake_receive_wq, - &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); + &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work); break; } case cpu_to_le32(MESSAGE_DATA): -- cgit v1.2.3-59-g8ed1b From fb32f4f606c17b869805d7cede8b03d78339b50a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:27 -0500 Subject: wireguard: receive: drop handshakes if queue lock is contended If we're being delivered packets from multiple CPUs so quickly that the ring lock is contended for CPU tries, then it's safe to assume that the queue is near capacity anyway, so just drop the packet rather than spinning. This helps deal with multicore DoS that can interfere with data path performance. It _still_ does not completely fix the issue, but it again chips away at it. Reported-by: Streun Fabio Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/receive.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index f4e537e3e8ec..7b8df406c773 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -554,9 +554,19 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { - int cpu; - if (unlikely(!rng_is_initialized() || - ptr_ring_produce_bh(&wg->handshake_queue.ring, skb))) { + int cpu, ret = -EBUSY; + + if (unlikely(!rng_is_initialized())) + goto drop; + if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) { + if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) { + ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb); + spin_unlock_bh(&wg->handshake_queue.ring.producer_lock); + } + } else + ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb); + if (ret) { + drop: net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", wg->dev->name, skb); goto err; -- cgit v1.2.3-59-g8ed1b From 4e3fd721710553832460c179c2ee5ce67ef7f1e0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Nov 2021 10:39:28 -0500 Subject: wireguard: ratelimiter: use kvcalloc() instead of kvzalloc() Use 2-factor argument form kvcalloc() instead of kvzalloc(). Link: https://github.com/KSPP/linux/issues/162 Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Gustavo A. R. Silva [Jason: Gustavo's link above is for KSPP, but this isn't actually a security fix, as table_size is bounded to 8192 anyway, and gcc realizes this, so the codegen comes out to be about the same.] Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/ratelimiter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c index 3fedd1d21f5e..dd55e5c26f46 100644 --- a/drivers/net/wireguard/ratelimiter.c +++ b/drivers/net/wireguard/ratelimiter.c @@ -176,12 +176,12 @@ int wg_ratelimiter_init(void) (1U << 14) / sizeof(struct hlist_head))); max_entries = table_size * 8; - table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL); + table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL); if (unlikely(!table_v4)) goto err_kmemcache; #if IS_ENABLED(CONFIG_IPV6) - table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL); + table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL); if (unlikely(!table_v6)) { kvfree(table_v4); goto err_kmemcache; -- cgit v1.2.3-59-g8ed1b From 1a59c9c55585e1ec5b352d31b3f8402f196eae94 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 29 Nov 2021 15:16:52 +0000 Subject: net: mscc: ocelot: fix missing unlock on error in ocelot_hwstamp_set() Add the missing mutex_unlock before return from function ocelot_hwstamp_set() in the ocelot_setup_ptp_traps() error handling case. Fixes: 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211129151652.1165433-1-weiyongjun1@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 409cde1e59c6..1e4ad953cffb 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1563,8 +1563,10 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) } err = ocelot_setup_ptp_traps(ocelot, port, l2, l4); - if (err) + if (err) { + mutex_unlock(&ocelot->ptp_lock); return err; + } if (l2 && l4) cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; -- cgit v1.2.3-59-g8ed1b From b83f5ac7d922e69a109261f5f940eebbd4e514c4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 29 Nov 2021 22:53:27 +0100 Subject: net: marvell: mvpp2: Fix the computation of shared CPUs 'bitmap_fill()' fills a bitmap one 'long' at a time. It is likely that an exact number of bits is expected. Use 'bitmap_set()' instead in order not to set unexpected bits. Fixes: e531f76757eb ("net: mvpp2: handle cases where more CPUs are available than s/w threads") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index ce486e16489c..6480696c979b 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7458,7 +7458,7 @@ static int mvpp2_probe(struct platform_device *pdev) shared = num_present_cpus() - priv->nthreads; if (shared > 0) - bitmap_fill(&priv->lock_map, + bitmap_set(&priv->lock_map, 0, min_t(int, shared, MVPP2_MAX_THREADS)); for (i = 0; i < MVPP2_MAX_THREADS; i++) { -- cgit v1.2.3-59-g8ed1b From d1ec975f9fa6d2211c1f403010361034a87e317f Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 29 Nov 2021 15:17:46 -0800 Subject: ice: xsk: clear status_error0 for each allocated desc Fix a bug in which the receiving of packets can stop in the zero-copy driver. Ice HW ignores 3 lower bits from QRX_TAIL register, which means that tail is bumped only on intervals of 8. Currently with XSK RX batching in place, ice_alloc_rx_bufs_zc() clears the status_error0 only of the last descriptor that has been allocated/taken from the XSK buffer pool. status_error0 includes DD bit that is looked upon by the ice_clean_rx_irq_zc() to tell if a descriptor can be processed. The bug can be triggered when driver updates the ntu but not the QRX_TAIL, so HW wouldn't have a chance to write to the ready descriptors. Later on driver moves the ntc to the mentioned set of descriptors and interprets them as a ready to be processed, since corresponding DD bits were not cleared nor any writeback has happened that would clear it. This can then lead to ntc == ntu case which means that ring is empty and no further packet processing. Fix the XSK traffic hang that can be observed when l2fwd scenario from xdpsock is used by making sure that status_error0 is cleared for each descriptor that is fed to HW and therefore we are sure that driver will not processed non-valid DD bits. This will also prevent the driver from processing the descriptors that were allocated in favor of the previously processed ones, but writeback didn't happen yet. Fixes: db804cfc21e9 ("ice: Use the xsk batched rx allocation interface") Signed-off-by: Maciej Fijalkowski Reviewed-by: Alexander Lobakin Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_xsk.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index ff55cb415b11..bb9a80847298 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -383,6 +383,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) while (i--) { dma = xsk_buff_xdp_get_dma(*xdp); rx_desc->read.pkt_addr = cpu_to_le64(dma); + rx_desc->wb.status_error0 = 0; rx_desc++; xdp++; -- cgit v1.2.3-59-g8ed1b From f4a8adbfe4841491b60c14fe610571e1422359f9 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 30 Nov 2021 12:05:54 +0800 Subject: dpaa2-eth: destroy workqueue at the end of remove function The commit c55211892f46 ("dpaa2-eth: support PTP Sync packet one-step timestamping") forgets to destroy workqueue at the end of remove function. Fix this by adding destroy_workqueue before fsl_mc_portal_free and free_netdev. Fixes: c55211892f46 ("dpaa2-eth: support PTP Sync packet one-step timestamping") Signed-off-by: Dongliang Mu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 6451c8383639..8e643567abce 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4550,6 +4550,8 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) fsl_mc_portal_free(priv->mc_io); + destroy_workqueue(priv->dpaa2_ptp_wq); + dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name); free_netdev(net_dev); -- cgit v1.2.3-59-g8ed1b From b0f38e15979fa8851e88e8aa371367f264e7b6e9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 29 Nov 2021 22:39:47 -0800 Subject: natsemi: xtensa: fix section mismatch warnings Fix section mismatch warnings in xtsonic. The first one appears to be bogus and after fixing the second one, the first one is gone. WARNING: modpost: vmlinux.o(.text+0x529adc): Section mismatch in reference from the function sonic_get_stats() to the function .init.text:set_reset_devices() The function sonic_get_stats() references the function __init set_reset_devices(). This is often because sonic_get_stats lacks a __init annotation or the annotation of set_reset_devices is wrong. WARNING: modpost: vmlinux.o(.text+0x529b3b): Section mismatch in reference from the function xtsonic_probe() to the function .init.text:sonic_probe1() The function xtsonic_probe() references the function __init sonic_probe1(). This is often because xtsonic_probe lacks a __init annotation or the annotation of sonic_probe1 is wrong. Fixes: 74f2a5f0ef64 ("xtensa: Add support for the Sonic Ethernet device for the XT2000 board.") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Christophe JAILLET Cc: Finn Thain Cc: Chris Zankel Cc: linux-xtensa@linux-xtensa.org Cc: Thomas Bogendoerfer Acked-by: Max Filippov Link: https://lore.kernel.org/r/20211130063947.7529-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/natsemi/xtsonic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index ca4686094701..0a02d8bd0a3e 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -120,7 +120,7 @@ static const struct net_device_ops xtsonic_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; -static int __init sonic_probe1(struct net_device *dev) +static int sonic_probe1(struct net_device *dev) { unsigned int silicon_revision; struct sonic_local *lp = netdev_priv(dev); -- cgit v1.2.3-59-g8ed1b From c65d638ab39034cbaa36773b980d28106cfc81fa Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Wed, 17 Nov 2021 13:33:57 +0200 Subject: net/mlx5e: IPsec: Fix Software parser inner l3 type setting in case of encapsulation Current code wrongly uses the skb->protocol field which reflects the outer l3 protocol to set the inner l3 type in Software Parser (SWP) fields settings in the ethernet segment (eseg) in flows where inner l3 exists like in Vxlan over ESP flow, the above method wrongly use the outer protocol type instead of the inner one. thus breaking cases where inner and outer headers have different protocols. Fix by setting the inner l3 type in SWP according to the inner l3 ip header version. Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path") Signed-off-by: Raed Salem Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index fb5397324aa4..2db9573a3fe6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -191,7 +191,7 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; eseg->swp_inner_l4_offset = (skb->csum_start + skb->head - skb->data) / 2; - if (skb->protocol == htons(ETH_P_IPV6)) + if (inner_ip_hdr(skb)->version == 6) eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; break; default: -- cgit v1.2.3-59-g8ed1b From 51ebf5db67f5c6aed79c05f1aa5137bdf5ca6614 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 8 Jul 2021 12:48:24 +0300 Subject: net/mlx5e: Fix missing IPsec statistics on uplink representor The cited patch added the IPsec support to uplink representor, however as uplink representors have his private statistics where IPsec stats is not part of it, that effectively makes IPsec stats hidden when uplink representor stats queried. Resolve by adding IPsec stats to uplink representor private statistics. Fixes: 5589b8f1a2c7 ("net/mlx5e: Add IPsec support to uplink representor") Signed-off-by: Raed Salem Reviewed-by: Alaa Hleihel Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index e58a9ec42553..48895d79796a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1080,6 +1080,10 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { &MLX5E_STATS_GRP(pme), &MLX5E_STATS_GRP(channels), &MLX5E_STATS_GRP(per_port_buff_congest), +#ifdef CONFIG_MLX5_EN_IPSEC + &MLX5E_STATS_GRP(ipsec_sw), + &MLX5E_STATS_GRP(ipsec_hw), +#endif }; static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) -- cgit v1.2.3-59-g8ed1b From 4cce2ccf08fbc27ae34ce0e72db15166e7b5f6a7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 13 Sep 2021 13:54:30 +0300 Subject: net/mlx5e: Sync TIR params updates against concurrent create/modify Transport Interface Receive (TIR) objects perform the packet processing and reassembly and is also responsible for demultiplexing the packets into the different RQs. There are certain TIR context attributes that propagate to the pointed RQs and applied to them (like packet_merge offloads (LRO/SHAMPO) and tunneled_offload_en). When TIRs do not agree on attributes values, a "last one wins" policy is applied. Hence, if not synced properly, a race between TIR params update and a concurrent TIR create/modify operation might yield to a mismatch between the shadow parameters in SW and the actual applied state of the RQs in HW. tunneled_offload_en is a fixed attribute per profile, while packet merge offload state might be toggled and get out-of-sync. When this happens, packet_merge offload might be working although not requested, or the opposite. All updates to packet_merge state and all create/modify operations of regular redirection/steering TIRs are done under the same priv->state_lock, so they do not run in parallel, and no race is possible. However, there are other kind of TIRs (acceleration offloads TIRs, like TLS TIRs) which are created on demand for each new connection without holding the coarse priv->state_lock, hence might race. Fix this by synchronizing all packet_merge state reads and writes against all TIR create/modify operations. Include the modify operations of the regular redirection steering TIRs under the new lock, for better code layering and division of responsibilities. Fixes: 1182f3659357 ("net/mlx5e: kTLS, Add kTLS RX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/rx_res.c | 41 +++++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en/rx_res.h | 6 ++-- .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 24 +------------ 3 files changed, 44 insertions(+), 27 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 142953847996..0015a81eb9a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -13,6 +13,9 @@ struct mlx5e_rx_res { unsigned int max_nch; u32 drop_rqn; + struct mlx5e_packet_merge_param pkt_merge_param; + struct rw_semaphore pkt_merge_param_sem; + struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS]; bool rss_active; u32 rss_rqns[MLX5E_INDIR_RQT_SIZE]; @@ -392,6 +395,7 @@ static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res) if (err) goto out; + /* Separated from the channels RQs, does not share pkt_merge state with them */ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->ptp.rqt), inner_ft_support); @@ -447,6 +451,9 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, res->max_nch = max_nch; res->drop_rqn = drop_rqn; + res->pkt_merge_param = *init_pkt_merge_param; + init_rwsem(&res->pkt_merge_param_sem); + err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch); if (err) goto err_out; @@ -513,7 +520,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) return mlx5e_tir_get_tirn(&res->ptp.tir); } -u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); } @@ -656,6 +663,9 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, if (!builder) return -ENOMEM; + down_write(&res->pkt_merge_param_sem); + res->pkt_merge_param = *pkt_merge_param; + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); final_err = 0; @@ -681,6 +691,7 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, } } + up_write(&res->pkt_merge_param_sem); mlx5e_tir_builder_free(builder); return final_err; } @@ -689,3 +700,31 @@ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res * { return mlx5e_rss_get_hash(res->rss[0]); } + +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + u32 rqtn; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq); + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn, + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + mlx5e_tir_builder_build_tls(builder); + down_read(&res->pkt_merge_param_sem); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); + err = mlx5e_tir_init(tir, builder, res->mdev, false); + up_read(&res->pkt_merge_param_sem); + + mlx5e_tir_builder_free(builder); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index d09f7d174a51..b39b20a720e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -37,9 +37,6 @@ u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); -/* RQTN getters for modules that create their own TIRs */ -u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); - /* Activate/deactivate API */ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res); @@ -69,4 +66,7 @@ struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx); /* Workaround for hairpin */ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res); +/* Accel TIRs */ +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir); #endif /* __MLX5_EN_RX_RES_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index a2a9f68579dd..15711814d2d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -100,25 +100,6 @@ mlx5e_ktls_rx_resync_create_resp_list(void) return resp_list; } -static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn) -{ - struct mlx5e_tir_builder *builder; - int err; - - builder = mlx5e_tir_builder_alloc(false); - if (!builder) - return -ENOMEM; - - mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false); - mlx5e_tir_builder_build_direct(builder); - mlx5e_tir_builder_build_tls(builder); - err = mlx5e_tir_init(tir, builder, mdev, false); - - mlx5e_tir_builder_free(builder); - - return err; -} - static void accel_rule_handle_work(struct work_struct *work) { struct mlx5e_ktls_offload_context_rx *priv_rx; @@ -609,7 +590,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, struct mlx5_core_dev *mdev; struct mlx5e_priv *priv; int rxq, err; - u32 rqtn; tls_ctx = tls_get_ctx(sk); priv = netdev_priv(netdev); @@ -635,9 +615,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); - rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq); - - err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn); + err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir); if (err) goto err_create_tir; -- cgit v1.2.3-59-g8ed1b From e45c0b34493c24eeeebf89f63a5293aac7728ed7 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 5 Nov 2021 15:03:20 +0200 Subject: net/mlx5: Move MODIFY_RQT command to ignore list in internal error state When the device is in internal error state, command interface isn't accessible and the driver decides which commands to fail and which to ignore. Move the MODIFY_RQT command to the ignore list in order to avoid the following redundant warning messages in internal error state: mlx5_core 0000:82:00.1: mlx5e_rss_disable:419:(pid 23754): Failed to redirect RQT 0x0 to drop RQ 0xc00848: err = -5 mlx5_core 0000:82:00.1: mlx5e_rx_res_channels_deactivate:598:(pid 23754): Failed to redirect direct RQT 0x1 to drop RQ 0xc00848 (channel 0): err = -5 mlx5_core 0000:82:00.1: mlx5e_rx_res_channels_deactivate:607:(pid 23754): Failed to redirect XSK RQT 0x19 to drop RQ 0xc00848 (channel 0): err = -5 Fixes: 43ec0f41fa73 ("net/mlx5e: Hide all implementation details of mlx5e_rx_res") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 8eaa24d865c5..a46284ca5172 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -341,6 +341,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DEALLOC_SF: case MLX5_CMD_OP_DESTROY_UCTX: case MLX5_CMD_OP_DESTROY_UMEM: + case MLX5_CMD_OP_MODIFY_RQT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -446,7 +447,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_TIS: case MLX5_CMD_OP_QUERY_TIS: case MLX5_CMD_OP_CREATE_RQT: - case MLX5_CMD_OP_MODIFY_RQT: case MLX5_CMD_OP_QUERY_RQT: case MLX5_CMD_OP_CREATE_FLOW_TABLE: -- cgit v1.2.3-59-g8ed1b From ffdf45315226926e5ae5faf0ff76caca68f6d39c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 18 Nov 2021 12:29:15 +0200 Subject: net/mlx5: Lag, Fix recreation of VF LAG Driver needs to nullify the port select attributes of the LAG when port selection is destroyed, otherwise it breaks recreation of the LAG. It fixes the below kernel oops: [ 587.906377] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 587.908843] #PF: supervisor read access in kernel mode [ 587.910730] #PF: error_code(0x0000) - not-present page [ 587.912580] PGD 0 P4D 0 [ 587.913632] Oops: 0000 [#1] SMP PTI [ 587.914644] CPU: 5 PID: 165 Comm: kworker/u20:5 Tainted: G OE 5.9.0_mlnx #1 [ 587.916152] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 587.918332] Workqueue: mlx5_lag mlx5_do_bond_work [mlx5_core] [ 587.919479] RIP: 0010:mlx5_del_flow_rules+0x10/0x270 [mlx5_core] [ 587.920568] mlx5_core 0000:08:00.1 enp8s0f1: Link up [ 587.920680] Code: c0 09 80 a0 e8 cf 42 a4 e0 48 c7 c3 f4 ff ff ff e8 8a 88 dd e0 e9 ab fe ff ff 0f 1f 44 00 00 41 56 41 55 49 89 fd 41 54 55 53 <48> 8b 47 08 48 8b 68 28 48 85 ed 74 2e 48 8d 7d 38 e8 6a 64 34 e1 [ 587.925116] bond0: (slave enp8s0f1): Enslaving as an active interface with an up link [ 587.930415] RSP: 0018:ffffc9000048fd88 EFLAGS: 00010282 [ 587.930417] RAX: ffff88846c14fac0 RBX: ffff88846cddcb80 RCX: 0000000080400007 [ 587.930417] RDX: 0000000080400008 RSI: ffff88846cddcb80 RDI: 0000000000000000 [ 587.930419] RBP: ffff88845fd80140 R08: 0000000000000001 R09: ffffffffa074ba00 [ 587.938132] R10: ffff88846c14fec0 R11: 0000000000000001 R12: ffff88846c122f10 [ 587.939473] R13: 0000000000000000 R14: 0000000000000001 R15: ffff88846d7a0000 [ 587.940800] FS: 0000000000000000(0000) GS:ffff88846fa80000(0000) knlGS:0000000000000000 [ 587.942416] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 587.943536] CR2: 0000000000000008 CR3: 000000000240a002 CR4: 0000000000770ee0 [ 587.944904] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 587.946308] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 587.947639] PKRU: 55555554 [ 587.948236] Call Trace: [ 587.948834] mlx5_lag_destroy_definer.isra.3+0x16/0x90 [mlx5_core] [ 587.950033] mlx5_lag_destroy_definers+0x5b/0x80 [mlx5_core] [ 587.951128] mlx5_deactivate_lag+0x6e/0x80 [mlx5_core] [ 587.952146] mlx5_do_bond+0x150/0x450 [mlx5_core] [ 587.953086] mlx5_do_bond_work+0x3e/0x50 [mlx5_core] [ 587.954086] process_one_work+0x1eb/0x3e0 [ 587.954899] worker_thread+0x2d/0x3c0 [ 587.955656] ? process_one_work+0x3e0/0x3e0 [ 587.956493] kthread+0x115/0x130 [ 587.957174] ? kthread_park+0x90/0x90 [ 587.957929] ret_from_fork+0x1f/0x30 [ 587.973055] ---[ end trace 71ccd6eca89f5513 ]--- Fixes: b7267869e923 ("net/mlx5: Lag, add support to create/destroy/modify port selection") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index ad63dd45c8fb..a6592f9c3c05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -608,4 +608,5 @@ void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) if (port_sel->tunnel) mlx5_destroy_ttc_table(port_sel->inner.ttc); mlx5_lag_destroy_definers(ldev); + memset(port_sel, 0, sizeof(*port_sel)); } -- cgit v1.2.3-59-g8ed1b From 1e59b32e45e47c8ea5455182286ba010bfa87813 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Tue, 21 Sep 2021 15:47:33 +0300 Subject: net/mlx5: E-switch, Respect BW share of the new group To enable transmit schduler on vport FW require non-zero configuration for vport's TSAR. If vport added to the group which has configured BW share value and TX rate values of the vport are zero, then scheduler wouldn't be enabled on this vport. Fix that by calling BW normalization if BW share of the new group is configured. Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups") Signed-off-by: Dmytro Linkin Reviewed-by: Roi Dayan Reviewed-by: Parav Pandit Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index c6cc67cb4f6a..4501e3d737f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -423,7 +423,7 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, return err; /* Recalculate bw share weights of old and new groups */ - if (vport->qos.bw_share) { + if (vport->qos.bw_share || new_group->bw_share) { esw_qos_normalize_vports_min_rate(esw, curr_group, extack); esw_qos_normalize_vports_min_rate(esw, new_group, extack); } -- cgit v1.2.3-59-g8ed1b From 43a0696f11567278b9412f947e43dd7906c831a8 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 21 Oct 2021 12:46:17 +0000 Subject: net/mlx5: E-Switch, fix single FDB creation on BlueField Always use MLX5_FLOW_TABLE_OTHER_VPORT flag when creating egress ACL table for single FDB. Not doing so on BlueField will make firmware fail the command. On BlueField the E-Switch manager is the ECPF (vport 0xFFFE) which is filled in the flow table creation command but as the other_vport field wasn't set the firmware complains about a bad parameter. This is different from a regular HCA where the E-Switch manager vport is the PF (vport 0x0). Passing MLX5_FLOW_TABLE_OTHER_VPORT will make the firmware happy both on BlueField and on regular HCAs without special condition for each. This fixes the bellow firmware syndrome: mlx5_cmd_check:819:(pid 571): CREATE_FLOW_TABLE(0x930) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x754a4) Fixes: db202995f503 ("net/mlx5: E-Switch, add logic to enable shared FDB") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a46455694f7a..275af1d2b4d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2512,6 +2512,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, struct mlx5_eswitch *esw = master->priv.eswitch; struct mlx5_flow_table_attr ft_attr = { .max_fte = 1, .prio = 0, .level = 0, + .flags = MLX5_FLOW_TABLE_OTHER_VPORT, }; struct mlx5_flow_namespace *egress_ns; struct mlx5_flow_table *acl; -- cgit v1.2.3-59-g8ed1b From 5c4e8ae7aa4875041102406801ee434e6c581aef Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 17 Nov 2021 11:47:21 +0200 Subject: net/mlx5: E-Switch, Check group pointer before reading bw_share value If log_esw_max_sched_depth is not supported group pointer of the vport is NULL. Hence, check the pointer before reading bw_share value. Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups") Signed-off-by: Dmytro Linkin Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 4501e3d737f8..d377ddc70fc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -130,7 +130,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, /* If vports min rate divider is 0 but their group has bw_share configured, then * need to set bw_share for vports to minimal value. */ - if (!group_level && !max_guarantee && group->bw_share) + if (!group_level && !max_guarantee && group && group->bw_share) return 1; return 0; } -- cgit v1.2.3-59-g8ed1b From e219440da0c3a63b3cec23d08473436ae7d95fa6 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 23 Nov 2021 14:37:11 +0200 Subject: net/mlx5: E-Switch, Use indirect table only if all destinations support it When adding rule with multiple destinations, indirect table is used for all of the destinations if at least one of the destinations support it, this can cause creation of invalid indirect tables for the destinations that doesn't support it. Fixed it by using indirect table only if all destinations support it. Fixes: a508728a4c8b ("net/mlx5e: VF tunnel RX traffic offloading") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 275af1d2b4d3..32bc08a39925 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -329,14 +329,25 @@ static bool esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + bool result = false; int i; - for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + /* Indirect table is supported only for flows with in_port uplink + * and the destination is vport on the same eswitch as the uplink, + * return false in case at least one of destinations doesn't meet + * this criteria. + */ + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { if (esw_attr->dests[i].rep && mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, - esw_attr->dests[i].mdev)) - return true; - return false; + esw_attr->dests[i].mdev)) { + result = true; + } else { + result = false; + break; + } + } + return result; } static int -- cgit v1.2.3-59-g8ed1b From 76091b0fb60970f610b7ba2d886cd7fb95c5eb2e Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Wed, 20 Oct 2021 12:45:05 +0300 Subject: net/mlx5: Fix use after free in mlx5_health_wait_pci_up The device health recovery flow calls mlx5_health_wait_pci_up() which queries the device for FW_RESET timeout after freeing the device timeouts structure on mlx5_function_teardown(). Fix this bug by moving timeouts structure init/cleanup to the device's init/uninit phases. Since it is necessary to reset default software timeouts on function reload, extract setting of defaults values from mlx5_tout_init() and call mlx5_tout_set_def_val() directly from mlx5_function_setup(). Fixes: 5945e1adeab5 ("net/mlx5: Read timeout values from init segment") Reported by: Niklas Schnelle Signed-off-by: Amir Tzin Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c | 5 ++--- drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/main.c | 22 ++++++++++++---------- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index 0dd96a6b140d..c1df0d3595d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -31,11 +31,11 @@ static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_type dev->timeouts->to[type] = val; } -static void tout_set_def_val(struct mlx5_core_dev *dev) +void mlx5_tout_set_def_val(struct mlx5_core_dev *dev) { int i; - for (i = MLX5_TO_FW_PRE_INIT_TIMEOUT_MS; i < MAX_TIMEOUT_TYPES; i++) + for (i = 0; i < MAX_TIMEOUT_TYPES; i++) tout_set(dev, tout_def_sw_val[i], i); } @@ -45,7 +45,6 @@ int mlx5_tout_init(struct mlx5_core_dev *dev) if (!dev->timeouts) return -ENOMEM; - tout_set_def_val(dev); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index 31faa5c17aa9..1c42ead782fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -34,6 +34,7 @@ int mlx5_tout_init(struct mlx5_core_dev *dev); void mlx5_tout_cleanup(struct mlx5_core_dev *dev); void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); +void mlx5_tout_set_def_val(struct mlx5_core_dev *dev); u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a92a92a52346..e127c0530b3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -992,11 +992,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) if (mlx5_core_is_pf(dev)) pcie_print_link_status(dev->pdev); - err = mlx5_tout_init(dev); - if (err) { - mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); - return err; - } + mlx5_tout_set_def_val(dev); /* wait for firmware to accept initialization segments configurations */ @@ -1005,13 +1001,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) if (err) { mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); - goto err_tout_cleanup; + return err; } err = mlx5_cmd_init(dev); if (err) { mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); - goto err_tout_cleanup; + return err; } mlx5_tout_query_iseg(dev); @@ -1094,8 +1090,6 @@ err_disable_hca: err_cmd_cleanup: mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); -err_tout_cleanup: - mlx5_tout_cleanup(dev); return err; } @@ -1114,7 +1108,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) mlx5_core_disable_hca(dev, 0); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); - mlx5_tout_cleanup(dev); return 0; } @@ -1476,6 +1469,12 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mlx5_debugfs_root); INIT_LIST_HEAD(&priv->traps); + err = mlx5_tout_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); + goto err_timeout_init; + } + err = mlx5_health_init(dev); if (err) goto err_health_init; @@ -1501,6 +1500,8 @@ err_adev_init: err_pagealloc_init: mlx5_health_cleanup(dev); err_health_init: + mlx5_tout_cleanup(dev); +err_timeout_init: debugfs_remove(dev->priv.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); @@ -1518,6 +1519,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mlx5_adev_cleanup(dev); mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); + mlx5_tout_cleanup(dev); debugfs_remove_recursive(dev->priv.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); -- cgit v1.2.3-59-g8ed1b From 924cc4633f048b4fb4af3d1f9a51d10867625339 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sat, 6 Nov 2021 20:19:09 +0200 Subject: net/mlx5: Fix too early queueing of log timestamp work The log timestamp work should not be queued before the command interface is initialized, move it to a later stage in the init flow. Fixes: 5a1023deeed0 ("net/mlx5: Add periodic update of host time to firmware") Signed-off-by: Gal Pressman Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 64f1abc4dc36..380f50d5462d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -835,6 +835,9 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); + + if (mlx5_core_is_pf(dev)) + queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); } void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) @@ -902,8 +905,6 @@ int mlx5_health_init(struct mlx5_core_dev *dev) INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update); - if (mlx5_core_is_pf(dev)) - queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); return 0; -- cgit v1.2.3-59-g8ed1b From 502e82b91361955c66c8453b5b7a905b0b5bd5a1 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 7 Nov 2021 17:21:45 +0200 Subject: net/mlx5: Fix access to a non-supported register Validate MRTC register is supported before triggering a delayed work which accesses it. Fixes: 5a1023deeed0 ("net/mlx5: Add periodic update of host time to firmware") Signed-off-by: Aya Levin Reviewed-by: Gal Pressman Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 8 +++----- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 380f50d5462d..3ca998874c50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -836,7 +836,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); - if (mlx5_core_is_pf(dev)) + if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e127c0530b3a..7df9c7f8d9c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1071,18 +1071,16 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) mlx5_set_driver_version(dev); - mlx5_start_health_poll(dev); - err = mlx5_query_hca_caps(dev); if (err) { mlx5_core_err(dev, "query hca failed\n"); - goto stop_health; + goto reclaim_boot_pages; } + mlx5_start_health_poll(dev); + return 0; -stop_health: - mlx5_stop_health_poll(dev, boot); reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3636df90899a..fbaab440a484 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9698,7 +9698,10 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_84_to_68[0x11]; u8 tracer_registers[0x4]; - u8 regs_63_to_32[0x20]; + u8 regs_63_to_46[0x12]; + u8 mrtc[0x1]; + u8 regs_44_to_32[0xd]; + u8 regs_31_to_0[0x20]; }; -- cgit v1.2.3-59-g8ed1b From 8c8cf0382257b28378eeff535150c087a653ca19 Mon Sep 17 00:00:00 2001 From: Ben Ben-Ishay Date: Sun, 31 Oct 2021 18:31:02 +0200 Subject: net/mlx5e: SHAMPO, Fix constant expression result mlx5e_build_shampo_hd_umr uses counters i and index incorrectly as unsigned, thus the err state err_unmap could stuck in endless loop. Change i to int to solve the first issue. Reduce index check to solve the second issue, the caller function validates that index could not rotate. Fixes: 64509b052525 ("net/mlx5e: Add data path for SHAMPO feature") Signed-off-by: Ben Ben-Ishay Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 96967b0a2441..793511d5ee4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -543,13 +543,13 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u16 klm_entries, u16 index) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - u16 entries, pi, i, header_offset, err, wqe_bbs, new_entries; + u16 entries, pi, header_offset, err, wqe_bbs, new_entries; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; struct page *page = shampo->last_page; u64 addr = shampo->last_addr; struct mlx5e_dma_info *dma_info; struct mlx5e_umr_wqe *umr_wqe; - int headroom; + int headroom, i; headroom = rq->buff.headroom; new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); @@ -601,9 +601,7 @@ update_klm: err_unmap: while (--i >= 0) { - if (--index < 0) - index = shampo->hd_per_wq - 1; - dma_info = &shampo->info[index]; + dma_info = &shampo->info[--index]; if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); mlx5e_page_release(rq, dma_info, true); -- cgit v1.2.3-59-g8ed1b From 21635d9203e1cf2b73b67e9a86059a62f62a3563 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Tue, 30 Nov 2021 18:01:46 +0100 Subject: net: dsa: mv88e6xxx: Fix application of erratum 4.8 for 88E6393X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to SERDES scripts for 88E6393X, erratum 4.8 has to be applied every time before SerDes is powered on. Split the code for erratum 4.8 into separate function and call it in mv88e6393x_serdes_power(). Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 53 ++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 20 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 6ea003678798..0658ee3b014c 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1271,9 +1271,9 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p) } } -static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane) +static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane) { - u16 reg, pcs; + u16 reg; int err; /* mv88e6393x family errata 4.6: @@ -1300,11 +1300,32 @@ static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane) if (err) return err; - err = mv88e6390_serdes_power_sgmii(chip, lane, false); - if (err) - return err; + return mv88e6390_serdes_power_sgmii(chip, lane, false); } + return 0; +} + +int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip) +{ + int err; + + err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT0_LANE); + if (err) + return err; + + err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT9_LANE); + if (err) + return err; + + return mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT10_LANE); +} + +static int mv88e6393x_serdes_erratum_4_8(struct mv88e6xxx_chip *chip, int lane) +{ + u16 reg, pcs; + int err; + /* mv88e6393x family errata 4.8: * When a SERDES port is operating in 1000BASE-X or SGMII mode link may * not come up after hardware reset or software reset of SERDES core. @@ -1334,29 +1355,21 @@ static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane) MV88E6393X_ERRATA_4_8_REG, reg); } -int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip) -{ - int err; - - err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT0_LANE); - if (err) - return err; - - err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT9_LANE); - if (err) - return err; - - return mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT10_LANE); -} - int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, bool on) { u8 cmode = chip->ports[port].cmode; + int err; if (port != 0 && port != 9 && port != 10) return -EOPNOTSUPP; + if (on) { + err = mv88e6393x_serdes_erratum_4_8(chip, lane); + if (err) + return err; + } + switch (cmode) { case MV88E6XXX_PORT_STS_CMODE_SGMII: case MV88E6XXX_PORT_STS_CMODE_1000BASEX: -- cgit v1.2.3-59-g8ed1b From 8c3318b4874e2dee867f5ae8f6d38f78e044bf71 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Tue, 30 Nov 2021 18:01:47 +0100 Subject: net: dsa: mv88e6xxx: Drop unnecessary check in mv88e6393x_serdes_erratum_4_6() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The check for lane is unnecessary, since the function is called only with allowed lane argument. Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 0658ee3b014c..3a6244596a67 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1284,26 +1284,20 @@ static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane) * It seems that after this workaround the SERDES is automatically * powered up (the bit is cleared), so power it down. */ - if (lane == MV88E6393X_PORT0_LANE || lane == MV88E6393X_PORT9_LANE || - lane == MV88E6393X_PORT10_LANE) { - err = mv88e6390_serdes_read(chip, lane, - MDIO_MMD_PHYXS, - MV88E6393X_SERDES_POC, ®); - if (err) - return err; + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_POC, ®); + if (err) + return err; - reg &= ~MV88E6393X_SERDES_POC_PDOWN; - reg |= MV88E6393X_SERDES_POC_RESET; + reg &= ~MV88E6393X_SERDES_POC_PDOWN; + reg |= MV88E6393X_SERDES_POC_RESET; - err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, - MV88E6393X_SERDES_POC, reg); - if (err) - return err; - - return mv88e6390_serdes_power_sgmii(chip, lane, false); - } + err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_POC, reg); + if (err) + return err; - return 0; + return mv88e6390_serdes_power_sgmii(chip, lane, false); } int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip) -- cgit v1.2.3-59-g8ed1b From 7527d66260ac0c603c6baca5146748061fcddbd6 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Tue, 30 Nov 2021 18:01:48 +0100 Subject: net: dsa: mv88e6xxx: Save power by disabling SerDes trasmitter and receiver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Save power on 88E6393X by disabling SerDes receiver and transmitter after SerDes is SerDes is disabled. Signed-off-by: Marek Behún Cc: stable@vger.kernel.org # de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 46 ++++++++++++++++++++++++++++++++++---- drivers/net/dsa/mv88e6xxx/serdes.h | 3 +++ 2 files changed, 45 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 3a6244596a67..ceb63d7f1f97 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1271,6 +1271,28 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p) } } +static int mv88e6393x_serdes_power_lane(struct mv88e6xxx_chip *chip, int lane, + bool on) +{ + u16 reg; + int err; + + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_CTRL1, ®); + if (err) + return err; + + if (on) + reg &= ~(MV88E6393X_SERDES_CTRL1_TX_PDOWN | + MV88E6393X_SERDES_CTRL1_RX_PDOWN); + else + reg |= MV88E6393X_SERDES_CTRL1_TX_PDOWN | + MV88E6393X_SERDES_CTRL1_RX_PDOWN; + + return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_CTRL1, reg); +} + static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane) { u16 reg; @@ -1297,7 +1319,11 @@ static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane) if (err) return err; - return mv88e6390_serdes_power_sgmii(chip, lane, false); + err = mv88e6390_serdes_power_sgmii(chip, lane, false); + if (err) + return err; + + return mv88e6393x_serdes_power_lane(chip, lane, false); } int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip) @@ -1362,17 +1388,29 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, err = mv88e6393x_serdes_erratum_4_8(chip, lane); if (err) return err; + + err = mv88e6393x_serdes_power_lane(chip, lane, true); + if (err) + return err; } switch (cmode) { case MV88E6XXX_PORT_STS_CMODE_SGMII: case MV88E6XXX_PORT_STS_CMODE_1000BASEX: case MV88E6XXX_PORT_STS_CMODE_2500BASEX: - return mv88e6390_serdes_power_sgmii(chip, lane, on); + err = mv88e6390_serdes_power_sgmii(chip, lane, on); + break; case MV88E6393X_PORT_STS_CMODE_5GBASER: case MV88E6393X_PORT_STS_CMODE_10GBASER: - return mv88e6390_serdes_power_10g(chip, lane, on); + err = mv88e6390_serdes_power_10g(chip, lane, on); + break; } - return 0; + if (err) + return err; + + if (!on) + err = mv88e6393x_serdes_power_lane(chip, lane, false); + + return err; } diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h index cbb3ba30caea..e9292c8beee4 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.h +++ b/drivers/net/dsa/mv88e6xxx/serdes.h @@ -93,6 +93,9 @@ #define MV88E6393X_SERDES_POC_PCS_MASK 0x0007 #define MV88E6393X_SERDES_POC_RESET BIT(15) #define MV88E6393X_SERDES_POC_PDOWN BIT(5) +#define MV88E6393X_SERDES_CTRL1 0xf003 +#define MV88E6393X_SERDES_CTRL1_TX_PDOWN BIT(9) +#define MV88E6393X_SERDES_CTRL1_RX_PDOWN BIT(8) #define MV88E6393X_ERRATA_4_8_REG 0xF074 #define MV88E6393X_ERRATA_4_8_BIT BIT(14) -- cgit v1.2.3-59-g8ed1b From 93fd8207bed80ce19aaf59932cbe1c03d418a37d Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Tue, 30 Nov 2021 18:01:49 +0100 Subject: net: dsa: mv88e6xxx: Add fix for erratum 5.2 of 88E6393X family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add fix for erratum 5.2 of the 88E6393X (Amethyst) family: for 10gbase-r mode, some undocumented registers need to be written some special values. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 48 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index ceb63d7f1f97..9e4f18a4adc2 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1375,6 +1375,50 @@ static int mv88e6393x_serdes_erratum_4_8(struct mv88e6xxx_chip *chip, int lane) MV88E6393X_ERRATA_4_8_REG, reg); } +static int mv88e6393x_serdes_erratum_5_2(struct mv88e6xxx_chip *chip, int lane, + u8 cmode) +{ + static const struct { + u16 dev, reg, val, mask; + } fixes[] = { + { MDIO_MMD_VEND1, 0x8093, 0xcb5a, 0xffff }, + { MDIO_MMD_VEND1, 0x8171, 0x7088, 0xffff }, + { MDIO_MMD_VEND1, 0x80c9, 0x311a, 0xffff }, + { MDIO_MMD_VEND1, 0x80a2, 0x8000, 0xff7f }, + { MDIO_MMD_VEND1, 0x80a9, 0x0000, 0xfff0 }, + { MDIO_MMD_VEND1, 0x80a3, 0x0000, 0xf8ff }, + { MDIO_MMD_PHYXS, MV88E6393X_SERDES_POC, + MV88E6393X_SERDES_POC_RESET, MV88E6393X_SERDES_POC_RESET }, + }; + int err, i; + u16 reg; + + /* mv88e6393x family errata 5.2: + * For optimal signal integrity the following sequence should be applied + * to SERDES operating in 10G mode. These registers only apply to 10G + * operation and have no effect on other speeds. + */ + if (cmode != MV88E6393X_PORT_STS_CMODE_10GBASER) + return 0; + + for (i = 0; i < ARRAY_SIZE(fixes); ++i) { + err = mv88e6390_serdes_read(chip, lane, fixes[i].dev, + fixes[i].reg, ®); + if (err) + return err; + + reg &= ~fixes[i].mask; + reg |= fixes[i].val; + + err = mv88e6390_serdes_write(chip, lane, fixes[i].dev, + fixes[i].reg, reg); + if (err) + return err; + } + + return 0; +} + int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, bool on) { @@ -1389,6 +1433,10 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, if (err) return err; + err = mv88e6393x_serdes_erratum_5_2(chip, lane, cmode); + if (err) + return err; + err = mv88e6393x_serdes_power_lane(chip, lane, true); if (err) return err; -- cgit v1.2.3-59-g8ed1b From 163000dbc772c1eae9bdfe7c8fe30155db1efd74 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Tue, 30 Nov 2021 18:01:50 +0100 Subject: net: dsa: mv88e6xxx: Fix inband AN for 2500base-x on 88E6393X family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inband AN is broken on Amethyst in 2500base-x mode when set by standard mechanism (via cmode). (There probably is some weird setting done by default in the switch for this mode that make it cycle in some state or something, because when the peer is the mvneta controller, it receives link change interrupts every ~0.3ms, but the link is always down.) Get around this by configuring the PCS mode to 1000base-x (where inband AN works), and then changing the SerDes frequency while SerDes transmitter and receiver are disabled, before enabling SerDes PHY. After disabling SerDes PHY, change the PCS mode back to 2500base-x, to avoid confusing the device (if we leave it at 1000base-x PCS mode but with different frequency, and then change cmode to sgmii, the device won't change the frequency because it thinks it already has the correct one). The register which changes the frequency is undocumented. I discovered it by going through all registers in the ranges 4.f000-4.f100 and 1e.8000-1e.8200 for all SerDes cmodes (sgmii, 1000base-x, 2500base-x, 5gbase-r, 10gbase-r, usxgmii) and filtering out registers that didn't make sense (the value was the same for modes which have different frequency). The result of this was: reg sgmii 1000base-x 2500base-x 5gbase-r 10gbase-r usxgmii 04.f002 005b 0058 0059 005c 005d 005f 04.f076 3000 0000 1000 4000 5000 7000 04.f07c 0950 0950 1850 0550 0150 0150 1e.8000 0059 0059 0058 0055 0051 0051 1e.8140 0e20 0e20 0e28 0e21 0e42 0e42 Register 04.f002 is the documented Port Operational Confiuration register, it's last 3 bits select PCS type, so changing this register also changes the frequency to the appropriate value. Registers 04.f076 and 04.f07c are not writable. Undocumented register 1e.8000 was the one: changing bits 3:0 from 9 to 8 changed SerDes frequency to 3.125 GHz, while leaving the value of PCS mode in register 04.f002.2:0 at 1000base-x. Inband autonegotiation started working correctly. (I didn't try anything with register 1e.8140 since 1e.8000 solved the problem.) Since I don't have documentation for this register 1e.8000.3:0, I am using the constants without names, but my hypothesis is that this register selects PHY frequency. If in the future I have access to an oscilloscope able to handle these frequencies, I will try to test this hypothesis. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 61 +++++++++++++++++++++++++++++++++++++- drivers/net/dsa/mv88e6xxx/serdes.h | 1 + 2 files changed, 61 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 9e4f18a4adc2..6f60376b932c 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1419,6 +1419,54 @@ static int mv88e6393x_serdes_erratum_5_2(struct mv88e6xxx_chip *chip, int lane, return 0; } +static int mv88e6393x_serdes_fix_2500basex_an(struct mv88e6xxx_chip *chip, + int lane, u8 cmode, bool on) +{ + u16 reg; + int err; + + if (cmode != MV88E6XXX_PORT_STS_CMODE_2500BASEX) + return 0; + + /* Inband AN is broken on Amethyst in 2500base-x mode when set by + * standard mechanism (via cmode). + * We can get around this by configuring the PCS mode to 1000base-x + * and then writing value 0x58 to register 1e.8000. (This must be done + * while SerDes receiver and transmitter are disabled, which is, when + * this function is called.) + * It seem that when we do this configuration to 2500base-x mode (by + * changing PCS mode to 1000base-x and frequency to 3.125 GHz from + * 1.25 GHz) and then configure to sgmii or 1000base-x, the device + * thinks that it already has SerDes at 1.25 GHz and does not change + * the 1e.8000 register, leaving SerDes at 3.125 GHz. + * To avoid this, change PCS mode back to 2500base-x when disabling + * SerDes from 2500base-x mode. + */ + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_POC, ®); + if (err) + return err; + + reg &= ~(MV88E6393X_SERDES_POC_PCS_MASK | MV88E6393X_SERDES_POC_AN); + if (on) + reg |= MV88E6393X_SERDES_POC_PCS_1000BASEX | + MV88E6393X_SERDES_POC_AN; + else + reg |= MV88E6393X_SERDES_POC_PCS_2500BASEX; + reg |= MV88E6393X_SERDES_POC_RESET; + + err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_POC, reg); + if (err) + return err; + + err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_VEND1, 0x8000, 0x58); + if (err) + return err; + + return 0; +} + int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, bool on) { @@ -1437,6 +1485,11 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, if (err) return err; + err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode, + true); + if (err) + return err; + err = mv88e6393x_serdes_power_lane(chip, lane, true); if (err) return err; @@ -1457,8 +1510,14 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, if (err) return err; - if (!on) + if (!on) { err = mv88e6393x_serdes_power_lane(chip, lane, false); + if (err) + return err; + + err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode, + false); + } return err; } diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h index e9292c8beee4..8dd8ed225b45 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.h +++ b/drivers/net/dsa/mv88e6xxx/serdes.h @@ -93,6 +93,7 @@ #define MV88E6393X_SERDES_POC_PCS_MASK 0x0007 #define MV88E6393X_SERDES_POC_RESET BIT(15) #define MV88E6393X_SERDES_POC_PDOWN BIT(5) +#define MV88E6393X_SERDES_POC_AN BIT(3) #define MV88E6393X_SERDES_CTRL1 0xf003 #define MV88E6393X_SERDES_CTRL1_TX_PDOWN BIT(9) #define MV88E6393X_SERDES_CTRL1_RX_PDOWN BIT(8) -- cgit v1.2.3-59-g8ed1b From ede359d8843a2779d232ed30bc36089d4b5962e4 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Tue, 30 Nov 2021 18:01:51 +0100 Subject: net: dsa: mv88e6xxx: Link in pcs_get_state() if AN is bypassed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function mv88e6xxx_serdes_pcs_get_state() currently does not report link up if AN is enabled, Link bit is set, but Speed and Duplex Resolved bit is not set, which testing shows is the case for when auto-negotiation was bypassed (we have AN enabled but link partner does not). An example of such link partner is Marvell 88X3310 PHY, when put into the mode where host interface changes between 10gbase-r, 5gbase-r, 2500base-x and sgmii according to copper speed. The 88X3310 does not enable AN in 2500base-x, and so SerDes on mv88e6xxx currently does not link with it. Fix this. Fixes: a5a6858b793f ("net: dsa: mv88e6xxx: extend phylink to Serdes PHYs") Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 48 +++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 6f60376b932c..55273013bfb5 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -50,11 +50,22 @@ static int mv88e6390_serdes_write(struct mv88e6xxx_chip *chip, } static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, - u16 status, u16 lpa, + u16 ctrl, u16 status, u16 lpa, struct phylink_link_state *state) { + state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK); + if (status & MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID) { - state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK); + /* The Spped and Duplex Resolved register is 1 if AN is enabled + * and complete, or if AN is disabled. So with disabled AN we + * still get here on link up. But we want to set an_complete + * only if AN was enabled, thus we look at BMCR_ANENABLE. + * (According to 802.3-2008 section 22.2.4.2.10, we should be + * able to get this same value from BMSR_ANEGCAPABLE, but tests + * show that these Marvell PHYs don't conform to this part of + * the specificaion - BMSR_ANEGCAPABLE is simply always 1.) + */ + state->an_complete = !!(ctrl & BMCR_ANENABLE); state->duplex = status & MV88E6390_SGMII_PHY_STATUS_DUPLEX_FULL ? DUPLEX_FULL : DUPLEX_HALF; @@ -81,6 +92,18 @@ static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, dev_err(chip->dev, "invalid PHY speed\n"); return -EINVAL; } + } else if (state->link && + state->interface != PHY_INTERFACE_MODE_SGMII) { + /* If Speed and Duplex Resolved register is 0 and link is up, it + * means that AN was enabled, but link partner had it disabled + * and the PHY invoked the Auto-Negotiation Bypass feature and + * linked anyway. + */ + state->duplex = DUPLEX_FULL; + if (state->interface == PHY_INTERFACE_MODE_2500BASEX) + state->speed = SPEED_2500; + else + state->speed = SPEED_1000; } else { state->link = false; } @@ -168,9 +191,15 @@ int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port, int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port, int lane, struct phylink_link_state *state) { - u16 lpa, status; + u16 lpa, status, ctrl; int err; + err = mv88e6352_serdes_read(chip, MII_BMCR, &ctrl); + if (err) { + dev_err(chip->dev, "can't read Serdes PHY control: %d\n", err); + return err; + } + err = mv88e6352_serdes_read(chip, 0x11, &status); if (err) { dev_err(chip->dev, "can't read Serdes PHY status: %d\n", err); @@ -183,7 +212,7 @@ int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port, return err; } - return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state); + return mv88e6xxx_serdes_pcs_get_state(chip, ctrl, status, lpa, state); } int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port, @@ -883,9 +912,16 @@ int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port, static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip, int port, int lane, struct phylink_link_state *state) { - u16 lpa, status; + u16 lpa, status, ctrl; int err; + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_BMCR, &ctrl); + if (err) { + dev_err(chip->dev, "can't read Serdes PHY control: %d\n", err); + return err; + } + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, MV88E6390_SGMII_PHY_STATUS, &status); if (err) { @@ -900,7 +936,7 @@ static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip, return err; } - return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state); + return mv88e6xxx_serdes_pcs_get_state(chip, ctrl, status, lpa, state); } static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip, -- cgit v1.2.3-59-g8ed1b From 7e4dcc13965c57869684d57a1dc6dd7be589488c Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 4 Jun 2021 09:53:28 -0700 Subject: iavf: restore MSI state on reset If the PF experiences an FLR, the VF's MSI and MSI-X configuration will be conveniently and silently removed in the process. When this happens, reset recovery will appear to complete normally but no traffic will pass. The netdev watchdog will helpfully notify everyone of this issue. To prevent such public embarrassment, restore MSI configuration at every reset. For normal resets, this will do no harm, but for VF resets resulting from a PF FLR, this will keep the VF working. Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Mitch Williams Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 14934a7a13ef..cfdbf8c08d18 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2248,6 +2248,7 @@ static void iavf_reset_task(struct work_struct *work) } pci_set_master(adapter->pdev); + pci_restore_msi_state(adapter->pdev); if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", -- cgit v1.2.3-59-g8ed1b From e2dabc4f7e7b60299c20a36d6a7b24ed9bf8e572 Mon Sep 17 00:00:00 2001 From: Zhou Qingyang Date: Tue, 30 Nov 2021 19:08:48 +0800 Subject: net: qlogic: qlcnic: Fix a NULL pointer dereference in qlcnic_83xx_add_rings() In qlcnic_83xx_add_rings(), the indirect function of ahw->hw_ops->alloc_mbx_args will be called to allocate memory for cmd.req.arg, and there is a dereference of it in qlcnic_83xx_add_rings(), which could lead to a NULL pointer dereference on failure of the indirect function like qlcnic_83xx_alloc_mbx_args(). Fix this bug by adding a check of alloc_mbx_args(), this patch imitates the logic of mbx_cmd()'s failure handling. This bug was found by a static analyzer. The analysis employs differential checking to identify inconsistent security operations (e.g., checks or kfrees) between two code paths and confirms that the inconsistent operations are not recovered in the current function or the callers, so they constitute bugs. Note that, as a bug found by static analysis, it can be a false positive or hard to trigger. Multiple researchers have cross-reviewed the bug. Builds with CONFIG_QLCNIC=m show no new warnings, and our static analyzer no longer warns about this code. Fixes: 7f9664525f9c ("qlcnic: 83xx memory map and HW access routine") Signed-off-by: Zhou Qingyang Link: https://lore.kernel.org/r/20211130110848.109026-1-zhou1615@umn.edu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index d51bac7ba5af..bd0607680329 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -1077,8 +1077,14 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter) sds_mbx_size = sizeof(struct qlcnic_sds_mbx); context_id = recv_ctx->context_id; num_sds = adapter->drv_sds_rings - QLCNIC_MAX_SDS_RINGS; - ahw->hw_ops->alloc_mbx_args(&cmd, adapter, - QLCNIC_CMD_ADD_RCV_RINGS); + err = ahw->hw_ops->alloc_mbx_args(&cmd, adapter, + QLCNIC_CMD_ADD_RCV_RINGS); + if (err) { + dev_err(&adapter->pdev->dev, + "Failed to alloc mbx args %d\n", err); + return err; + } + cmd.req.arg[1] = 0 | (num_sds << 8) | (context_id << 16); /* set up status rings, mbx 2-81 */ -- cgit v1.2.3-59-g8ed1b From ee201011c1e1563c114a55c86eb164b236f18e84 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Tue, 30 Nov 2021 11:26:37 -0500 Subject: vrf: Reset IPCB/IP6CB when processing outbound pkts in vrf dev xmit IPCB/IP6CB need to be initialized when processing outbound v4 or v6 pkts in the codepath of vrf device xmit function so that leftover garbage doesn't cause futher code that uses the CB to incorrectly process the pkt. One occasion of the issue might occur when MPLS route uses the vrf device as the outgoing device such as when the route is added using "ip -f mpls route add