From 400f6ebbc175286576c7f7fddf3c347d09d12310 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Dec 2023 11:13:34 +0100 Subject: wifi: iwlwifi: pcie: don't synchronize IRQs from IRQ On older devices (before unified image!) we can end up calling stop_device from an rfkill interrupt. However, in stop_device we attempt to synchronize IRQs, which then of course deadlocks. Avoid this by checking the context, if running from the IRQ thread then don't synchronize. This wouldn't be correct on a new device since RSS is supported, but older devices only have a single interrupt/queue. Fixes: 37fb29bd1f90 ("wifi: iwlwifi: pcie: synchronize IRQs before NAPI") Reviewed-by: Miri Korenblit Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://msgid.link/20231215111335.59aab00baed7.Iadfe154d6248e7f9dfd69522e5429dbbd72925d7@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 4 ++-- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 8 ++++---- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 17 +++++++++-------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 56def20374f3..7805a42948af 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -770,7 +770,7 @@ static inline void iwl_enable_rfkill_int(struct iwl_trans *trans) } } -void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans); +void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans, bool from_irq); static inline bool iwl_is_rfkill_set(struct iwl_trans *trans) { @@ -817,7 +817,7 @@ static inline bool iwl_pcie_dbg_on(struct iwl_trans *trans) return (trans->dbg.dest_tlv || iwl_trans_dbg_ini_valid(trans)); } -void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state); +void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state, bool from_irq); void iwl_trans_pcie_dump_regs(struct iwl_trans *trans); #ifdef CONFIG_IWLWIFI_DEBUGFS diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index bc6a9f861711..07931c2db494 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1783,7 +1783,7 @@ static u32 iwl_pcie_int_cause_ict(struct iwl_trans *trans) return inta; } -void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans) +void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans, bool from_irq) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct isr_statistics *isr_stats = &trans_pcie->isr_stats; @@ -1807,7 +1807,7 @@ void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans) isr_stats->rfkill++; if (prev != report) - iwl_trans_pcie_rf_kill(trans, report); + iwl_trans_pcie_rf_kill(trans, report, from_irq); mutex_unlock(&trans_pcie->mutex); if (hw_rfkill) { @@ -1947,7 +1947,7 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id) /* HW RF KILL switch toggled */ if (inta & CSR_INT_BIT_RF_KILL) { - iwl_pcie_handle_rfkill_irq(trans); + iwl_pcie_handle_rfkill_irq(trans, true); handled |= CSR_INT_BIT_RF_KILL; } @@ -2370,7 +2370,7 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id) /* HW RF KILL switch toggled */ if (inta_hw & MSIX_HW_INT_CAUSES_REG_RF_KILL) - iwl_pcie_handle_rfkill_irq(trans); + iwl_pcie_handle_rfkill_irq(trans, true); if (inta_hw & MSIX_HW_INT_CAUSES_REG_HW_ERR) { IWL_ERR(trans, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 92253260f568..d10208075ae5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1082,7 +1082,7 @@ bool iwl_pcie_check_hw_rf_kill(struct iwl_trans *trans) report = test_bit(STATUS_RFKILL_OPMODE, &trans->status); if (prev != report) - iwl_trans_pcie_rf_kill(trans, report); + iwl_trans_pcie_rf_kill(trans, report, false); return hw_rfkill; } @@ -1237,7 +1237,7 @@ static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie) trans_pcie->hw_mask = trans_pcie->hw_init_mask; } -static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans) +static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool from_irq) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -1264,7 +1264,8 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans) if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) { IWL_DEBUG_INFO(trans, "DEVICE_ENABLED bit was set and is now cleared\n"); - iwl_pcie_synchronize_irqs(trans); + if (!from_irq) + iwl_pcie_synchronize_irqs(trans); iwl_pcie_rx_napi_sync(trans); iwl_pcie_tx_stop(trans); iwl_pcie_rx_stop(trans); @@ -1454,7 +1455,7 @@ void iwl_trans_pcie_handle_stop_rfkill(struct iwl_trans *trans, clear_bit(STATUS_RFKILL_OPMODE, &trans->status); } if (hw_rfkill != was_in_rfkill) - iwl_trans_pcie_rf_kill(trans, hw_rfkill); + iwl_trans_pcie_rf_kill(trans, hw_rfkill, false); } static void iwl_trans_pcie_stop_device(struct iwl_trans *trans) @@ -1469,12 +1470,12 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans) mutex_lock(&trans_pcie->mutex); trans_pcie->opmode_down = true; was_in_rfkill = test_bit(STATUS_RFKILL_OPMODE, &trans->status); - _iwl_trans_pcie_stop_device(trans); + _iwl_trans_pcie_stop_device(trans, false); iwl_trans_pcie_handle_stop_rfkill(trans, was_in_rfkill); mutex_unlock(&trans_pcie->mutex); } -void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state) +void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state, bool from_irq) { struct iwl_trans_pcie __maybe_unused *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -1487,7 +1488,7 @@ void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state) if (trans->trans_cfg->gen2) _iwl_trans_pcie_gen2_stop_device(trans); else - _iwl_trans_pcie_stop_device(trans); + _iwl_trans_pcie_stop_device(trans, from_irq); } } @@ -2887,7 +2888,7 @@ static ssize_t iwl_dbgfs_rfkill_write(struct file *file, IWL_WARN(trans, "changing debug rfkill %d->%d\n", trans_pcie->debug_rfkill, new_value); trans_pcie->debug_rfkill = new_value; - iwl_pcie_handle_rfkill_irq(trans); + iwl_pcie_handle_rfkill_irq(trans, false); return count; } -- cgit v1.2.3-59-g8ed1b From 044879ce54069cf5f0efe7b4e67200d92e23a7da Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 18 Dec 2023 13:11:05 +0100 Subject: MAINTAINERS: wifi: brcm80211: remove non-existing SHA-cyfmac-dev-list@infineon.com When sending an email to SHA-cyfmac-dev-list@infineon.com, the server responds '550 #5.1.0 Address rejected.' Signed-off-by: Lukas Bulwahn Signed-off-by: Kalle Valo Link: https://msgid.link/20231218121105.23882-1-lukas.bulwahn@gmail.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 509281e9e169..f72e21ff40fb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4138,7 +4138,6 @@ M: Franky Lin M: Hante Meuleman L: linux-wireless@vger.kernel.org L: brcm80211-dev-list.pdl@broadcom.com -L: SHA-cyfmac-dev-list@infineon.com S: Supported F: drivers/net/wireless/broadcom/brcm80211/ -- cgit v1.2.3-59-g8ed1b From 8c917f16eb6198073ff8bc5d5c83a1bef9fae813 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 20 Dec 2023 04:38:00 +0200 Subject: wifi: mac80211: do not re-add debugfs entries during resume The driver debugfs entries still exist when the interface is re-added during reconfiguration. This can be either because of a HW restart (in_reconfig) or because we are resuming. Fixes: a1f5dcb1c0c1 ("wifi: mac80211: add a driver callback to add vif debugfs") Signed-off-by: Benjamin Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20231220043149.ddd48c66ec6b.Ia81080d92129ceecf462eceb4966bab80df12060@changeid Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index d3820333cd59..5e0435db2f9c 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -77,7 +77,7 @@ int drv_add_interface(struct ieee80211_local *local, sdata->flags |= IEEE80211_SDATA_IN_DRIVER; - if (!local->in_reconfig) { + if (!local->in_reconfig && !local->resuming) { drv_vif_add_debugfs(local, sdata); /* initially vif is not MLD */ ieee80211_link_debugfs_drv_add(&sdata->deflink); @@ -534,7 +534,7 @@ int drv_change_vif_links(struct ieee80211_local *local, if (ret) return ret; - if (!local->in_reconfig) { + if (!local->in_reconfig && !local->resuming) { for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link = rcu_access_pointer(sdata->link[link_id]); @@ -590,7 +590,7 @@ int drv_change_sta_links(struct ieee80211_local *local, return ret; /* during reconfig don't add it to debugfs again */ - if (local->in_reconfig) + if (local->in_reconfig || local->resuming) return 0; for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { -- cgit v1.2.3-59-g8ed1b From 0a3d898ee9a8303d5b3982b97ef0703919c3ea76 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 20 Dec 2023 04:38:01 +0200 Subject: wifi: mac80211: add/remove driver debugfs entries as appropriate When an interface is removed, we should also be deleting the driver debugfs entries (as it might still exist in DOWN state in mac80211). At the same time, when adding an interface, we can check the IEEE80211_SDATA_IN_DRIVER flag to know whether the interface was previously known to the driver and is simply being reconfigured. Fixes: a1f5dcb1c0c1 ("wifi: mac80211: add a driver callback to add vif debugfs") Signed-off-by: Benjamin Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20231220043149.a9f64c359424.I7076526b5297ae8f832228079c999f7b8e147a4c@changeid Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 9 ++++++--- net/mac80211/driver-ops.c | 10 +++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 80aeb25f1b68..dce5606ed66d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -1043,9 +1043,12 @@ void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata, { ieee80211_debugfs_remove_netdev(sdata); ieee80211_debugfs_add_netdev(sdata, mld_vif); - drv_vif_add_debugfs(sdata->local, sdata); - if (!mld_vif) - ieee80211_link_debugfs_drv_add(&sdata->deflink); + + if (sdata->flags & IEEE80211_SDATA_IN_DRIVER) { + drv_vif_add_debugfs(sdata->local, sdata); + if (!mld_vif) + ieee80211_link_debugfs_drv_add(&sdata->deflink); + } } void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 5e0435db2f9c..3b7f70073fc3 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -75,9 +75,9 @@ int drv_add_interface(struct ieee80211_local *local, if (ret) return ret; - sdata->flags |= IEEE80211_SDATA_IN_DRIVER; + if (!(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) { + sdata->flags |= IEEE80211_SDATA_IN_DRIVER; - if (!local->in_reconfig && !local->resuming) { drv_vif_add_debugfs(local, sdata); /* initially vif is not MLD */ ieee80211_link_debugfs_drv_add(&sdata->deflink); @@ -113,9 +113,13 @@ void drv_remove_interface(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return; + sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; + + /* Remove driver debugfs entries */ + ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links); + trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); - sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; trace_drv_return_void(local); } -- cgit v1.2.3-59-g8ed1b From 0ae8e4cca78781401b17721bfb72718fdf7b4912 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Dec 2023 11:50:12 +0100 Subject: netfilter: nf_tables: set transport offset from mac header for netdev/egress Before this patch, transport offset (pkt->thoff) provides an offset relative to the network header. This is fine for the inet families because skb->data points to the network header in such case. However, from netdev/egress, skb->data points to the mac header (if available), thus, pkt->thoff is missing the mac header length. Add skb_network_offset() to the transport offset (pkt->thoff) for netdev, so transport header mangling works as expected. Adjust payload fast eval function to use skb->data now that pkt->thoff provides an absolute offset. This explains why users report that matching on egress/netdev works but payload mangling does not. This patch implicitly fixes payload mangling for IPv4 packets in netdev/egress given skb_store_bits() requires an offset from skb->data to reach the transport header. I suspect that nft_exthdr and the trace infra were also broken from netdev/egress because they also take skb->data as start, and pkt->thoff was not correct. Note that IPv6 is fine because ipv6_find_hdr() already provides a transport offset starting from skb->data, which includes skb_network_offset(). The bridge family also uses nft_set_pktinfo_ipv4_validate(), but there skb_network_offset() is zero, so the update in this patch does not alter the existing behaviour. Fixes: 42df6e1d221d ("netfilter: Introduce egress hook") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv4.h | 2 +- net/netfilter/nf_tables_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index 947973623dc7..60a7d0ce3080 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -30,7 +30,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) return -1; len = iph_totlen(pkt->skb, iph); - thoff = iph->ihl * 4; + thoff = skb_network_offset(pkt->skb) + (iph->ihl * 4); if (pkt->skb->len < len) return -1; else if (len < thoff) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 8b536d7ef6c2..c3e635364701 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -158,7 +158,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, else { if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) return false; - ptr = skb_network_header(skb) + nft_thoff(pkt); + ptr = skb->data + nft_thoff(pkt); } ptr += priv->offset; -- cgit v1.2.3-59-g8ed1b From 7315dc1e122c85ffdfc8defffbb8f8b616c2eb1a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Dec 2023 19:44:49 +0100 Subject: netfilter: nf_tables: skip set commit for deleted/destroyed sets NFT_MSG_DELSET deactivates all elements in the set, skip set->ops->commit() to avoid the unnecessary clone (for the pipapo case) as well as the sync GC cycle, which could deactivate again expired elements in such set. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c5c17c6e80ed..be04af433988 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9887,7 +9887,7 @@ static void nft_set_commit_update(struct list_head *set_update_list) list_for_each_entry_safe(set, next, set_update_list, pending_update) { list_del_init(&set->pending_update); - if (!set->ops->commit) + if (!set->ops->commit || set->dead) continue; set->ops->commit(set); -- cgit v1.2.3-59-g8ed1b From c95f919567d6f1914f13350af61a1b044ac85014 Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Tue, 19 Dec 2023 23:19:43 +0530 Subject: nfc: llcp_core: Hold a ref to llcp_local->dev when holding a ref to llcp_local llcp_sock_sendmsg() calls nfc_llcp_send_ui_frame() which in turn calls nfc_alloc_send_skb(), which accesses the nfc_dev from the llcp_sock for getting the headroom and tailroom needed for skb allocation. Parallelly the nfc_dev can be freed, as the refcount is decreased via nfc_free_device(), leading to a UAF reported by Syzkaller, which can be summarized as follows: (1) llcp_sock_sendmsg() -> nfc_llcp_send_ui_frame() -> nfc_alloc_send_skb() -> Dereference *nfc_dev (2) virtual_ncidev_close() -> nci_free_device() -> nfc_free_device() -> put_device() -> nfc_release() -> Free *nfc_dev When a reference to llcp_local is acquired, we do not acquire the same for the nfc_dev. This leads to freeing even when the llcp_local is in use, and this is the case with the UAF described above too. Thus, when we acquire a reference to llcp_local, we should acquire a reference to nfc_dev, and release the references appropriately later. References for llcp_local is initialized in nfc_llcp_register_device() (which is called by nfc_register_device()). Thus, we should acquire a reference to nfc_dev there. nfc_unregister_device() calls nfc_llcp_unregister_device() which in turn calls nfc_llcp_local_put(). Thus, the reference to nfc_dev is appropriately released later. Reported-and-tested-by: syzbot+bbe84a4010eeea00982d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=bbe84a4010eeea00982d Fixes: c7aa12252f51 ("NFC: Take a reference on the LLCP local pointer when creating a socket") Reviewed-by: Suman Ghosh Signed-off-by: Siddh Raman Pant Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- net/nfc/llcp_core.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 1dac28136e6a..18be13fb9b75 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -145,6 +145,13 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device, static struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) { + /* Since using nfc_llcp_local may result in usage of nfc_dev, whenever + * we hold a reference to local, we also need to hold a reference to + * the device to avoid UAF. + */ + if (!nfc_get_device(local->dev->idx)) + return NULL; + kref_get(&local->ref); return local; @@ -177,10 +184,18 @@ static void local_release(struct kref *ref) int nfc_llcp_local_put(struct nfc_llcp_local *local) { + struct nfc_dev *dev; + int ret; + if (local == NULL) return 0; - return kref_put(&local->ref, local_release); + dev = local->dev; + + ret = kref_put(&local->ref, local_release); + nfc_put_device(dev); + + return ret; } static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, @@ -959,8 +974,17 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, } new_sock = nfc_llcp_sock(new_sk); - new_sock->dev = local->dev; + new_sock->local = nfc_llcp_local_get(local); + if (!new_sock->local) { + reason = LLCP_DM_REJ; + sock_put(&new_sock->sk); + release_sock(&sock->sk); + sock_put(&sock->sk); + goto fail; + } + + new_sock->dev = local->dev; new_sock->rw = sock->rw; new_sock->miux = sock->miux; new_sock->nfc_protocol = sock->nfc_protocol; @@ -1597,7 +1621,16 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) if (local == NULL) return -ENOMEM; - local->dev = ndev; + /* As we are going to initialize local's refcount, we need to get the + * nfc_dev to avoid UAF, otherwise there is no point in continuing. + * See nfc_llcp_local_get(). + */ + local->dev = nfc_get_device(ndev->idx); + if (!local->dev) { + kfree(local); + return -ENODEV; + } + INIT_LIST_HEAD(&local->list); kref_init(&local->ref); mutex_init(&local->sdp_lock); -- cgit v1.2.3-59-g8ed1b From 6ec0d7527c4287369b52df3bcefd21a0c4fb2b7c Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Tue, 19 Dec 2023 23:19:44 +0530 Subject: nfc: Do not send datagram if socket state isn't LLCP_BOUND As we know we cannot send the datagram (state can be set to LLCP_CLOSED by nfc_llcp_socket_release()), there is no need to proceed further. Thus, bail out early from llcp_sock_sendmsg(). Signed-off-by: Siddh Raman Pant Reviewed-by: Krzysztof Kozlowski Reviewed-by: Suman Ghosh Signed-off-by: David S. Miller --- net/nfc/llcp_sock.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 645677f84dba..819157bbb5a2 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -796,6 +796,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, } if (sk->sk_type == SOCK_DGRAM) { + if (sk->sk_state != LLCP_BOUND) { + release_sock(sk); + return -ENOTCONN; + } + DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, msg->msg_name); -- cgit v1.2.3-59-g8ed1b From fea7b71b8751a0bfa7fdf9977e5782f061f71589 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 11 Dec 2023 13:31:44 +0100 Subject: idpf: fix corrupted frames and skb leaks in singleq mode idpf_ring::skb serves only for keeping an incomplete frame between several NAPI Rx polling cycles, as one cycle may end up before processing the end of packet descriptor. The pointer is taken from the ring onto the stack before entering the loop and gets written there after the loop exits. When inside the loop, only the onstack pointer is used. For some reason, the logics is broken in the singleq mode, where the pointer is taken from the ring each iteration. This means that if a frame got fragmented into several descriptors, each fragment will have its own skb, but only the last one will be passed up the stack (containing garbage), leaving the rest leaked. Then, on ifdown, rxq::skb is being freed only in the splitq mode, while it can point to a valid skb in singleq as well. This can lead to a yet another skb leak. Just don't touch the ring skb field inside the polling loop, letting the onstack skb pointer work as expected: build a new skb if it's the first frame descriptor and attach a frag otherwise. On ifdown, free rxq::skb unconditionally if the pointer is non-NULL. Fixes: a5ab9ee0df0b ("idpf: add singleq start_xmit and napi poll") Reviewed-by: Przemek Kitszel Reviewed-by: Michal Kubiak Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Signed-off-by: Alexander Lobakin Tested-by: Scott Register Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c | 1 - drivers/net/ethernet/intel/idpf/idpf_txrx.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 81288a17da2a..20c4b3a64710 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -1044,7 +1044,6 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) } idpf_rx_sync_for_cpu(rx_buf, fields.size); - skb = rx_q->skb; if (skb) idpf_rx_add_frag(rx_buf, skb, fields.size); else diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 1f728a9004d9..9e942e5baf39 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -396,7 +396,7 @@ static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) if (!rxq) return; - if (!bufq && idpf_is_queue_model_split(q_model) && rxq->skb) { + if (rxq->skb) { dev_kfree_skb_any(rxq->skb); rxq->skb = NULL; } -- cgit v1.2.3-59-g8ed1b From a613fb464dc4d8902d1dbca836a0872a80d36296 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Linga Date: Fri, 15 Dec 2023 15:48:07 -0800 Subject: idpf: avoid compiler introduced padding in virtchnl2_rss_key struct Size of the virtchnl2_rss_key struct should be 7 bytes but the compiler introduces a padding byte for the structure alignment. This results in idpf sending an additional byte of memory to the device control plane than the expected buffer size. As the control plane enforces virtchnl message size checks to validate the message, set RSS key message fails resulting in the driver load failure. Remove implicit compiler padding by using "__packed" structure attribute for the virtchnl2_rss_key struct. Also there is no need to use __DECLARE_FLEX_ARRAY macro for the 'key_flex' struct field. So drop it. Fixes: 0d7502a9b4a7 ("virtchnl: add virtchnl version 2 ops") Reviewed-by: Larysa Zaremba Signed-off-by: Pavan Kumar Linga Reviewed-by: Simon Horman Tested-by: Scott Register Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/virtchnl2.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h index 07e72c72d156..8dc837889723 100644 --- a/drivers/net/ethernet/intel/idpf/virtchnl2.h +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -1104,9 +1104,9 @@ struct virtchnl2_rss_key { __le32 vport_id; __le16 key_len; u8 pad; - __DECLARE_FLEX_ARRAY(u8, key_flex); -}; -VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_rss_key); + u8 key_flex[]; +} __packed; +VIRTCHNL2_CHECK_STRUCT_LEN(7, virtchnl2_rss_key); /** * struct virtchnl2_queue_chunk - chunk of contiguous queues -- cgit v1.2.3-59-g8ed1b From 0ee2384a5a0f3b4eeac8d10bb01a0609d245a4d1 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 19 Dec 2023 19:56:33 +0530 Subject: octeontx2-af: Fix marking couple of structure as __packed Couple of structures was not marked as __packed. This patch fixes the same and mark them as __packed. Fixes: 42006910b5ea ("octeontx2-af: cleanup KPU config data") Signed-off-by: Suman Ghosh Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/npc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index ab3e39eef2eb..8c0732c9a7ee 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -528,7 +528,7 @@ struct npc_lt_def { u8 ltype_mask; u8 ltype_match; u8 lid; -}; +} __packed; struct npc_lt_def_ipsec { u8 ltype_mask; @@ -536,7 +536,7 @@ struct npc_lt_def_ipsec { u8 lid; u8 spi_offset; u8 spi_nz; -}; +} __packed; struct npc_lt_def_apad { u8 ltype_mask; -- cgit v1.2.3-59-g8ed1b From 6a8d8bb55e7001de2d50920381cc858f3a3e9fb7 Mon Sep 17 00:00:00 2001 From: Katarzyna Wieczerzycka Date: Fri, 15 Dec 2023 12:01:56 +0100 Subject: ice: Fix link_down_on_close message The driver should not report an error message when for a medialess port the link_down_on_close flag is enabled and the physical link cannot be set down. Fixes: 8ac7132704f3 ("ice: Fix interface being down after reset with link-down-on-close flag on") Reviewed-by: Przemek Kitszel Signed-off-by: Katarzyna Wieczerzycka Signed-off-by: Wojciech Drewek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index fb9c93f37e84..270d68b69c59 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2146,7 +2146,7 @@ static int ice_configure_phy(struct ice_vsi *vsi) /* Ensure we have media as we cannot configure a medialess port */ if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) - return -EPERM; + return -ENOMEDIUM; ice_print_topo_conflict(vsi); @@ -9187,8 +9187,12 @@ int ice_stop(struct net_device *netdev) int link_err = ice_force_phys_link_state(vsi, false); if (link_err) { - netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", - vsi->vsi_num, link_err); + if (link_err == -ENOMEDIUM) + netdev_info(vsi->netdev, "Skipping link reconfig - no media attached, VSI %d\n", + vsi->vsi_num); + else + netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", + vsi->vsi_num, link_err); return -EIO; } } -- cgit v1.2.3-59-g8ed1b From 6d05ff55ef4f4954d28551236239f297bd52ea48 Mon Sep 17 00:00:00 2001 From: Ngai-Mint Kwan Date: Fri, 15 Dec 2023 12:01:57 +0100 Subject: ice: Shut down VSI with "link-down-on-close" enabled Disabling netdev with ethtool private flag "link-down-on-close" enabled can cause NULL pointer dereference bug. Shut down VSI regardless of "link-down-on-close" state. Fixes: 8ac7132704f3 ("ice: Fix interface being down after reset with link-down-on-close flag on") Reviewed-by: Przemek Kitszel Signed-off-by: Ngai-Mint Kwan Signed-off-by: Wojciech Drewek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 270d68b69c59..adfdea1e2805 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -9193,6 +9193,8 @@ int ice_stop(struct net_device *netdev) else netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", vsi->vsi_num, link_err); + + ice_vsi_close(vsi); return -EIO; } } -- cgit v1.2.3-59-g8ed1b From 8278a6a43d030a3aa8d7768148e74844331e39e3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Mon, 18 Dec 2023 15:58:55 +0100 Subject: ice: dpll: fix phase offset value Stop dividing the phase_offset value received from firmware. This fault is present since the initial implementation. The phase_offset value received from firmware is in 0.01ps resolution. Dpll subsystem is using the value in 0.001ps, raw value is adjusted before providing it to the user. The user can observe the value of phase offset with response to `pin-get` netlink message of dpll subsystem for an active pin: $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/dpll.yaml \ --do pin-get --json '{"id":2}' Where example of correct response would be: {'board-label': 'C827_0-RCLKA', 'capabilities': 6, 'clock-id': 4658613174691613800, 'frequency': 1953125, 'id': 2, 'module-name': 'ice', 'parent-device': [{'direction': 'input', 'parent-id': 6, 'phase-offset': -216839550, 'prio': 9, 'state': 'connected'}, {'direction': 'input', 'parent-id': 7, 'phase-offset': -42930, 'prio': 8, 'state': 'connected'}], 'phase-adjust': 0, 'phase-adjust-max': 16723, 'phase-adjust-min': -16723, 'type': 'mux'} Provided phase-offset value (-42930) shall be divided by the user with DPLL_PHASE_OFFSET_DIVIDER to get actual value of -42.930 ps. Before the fix, the response was not correct: {'board-label': 'C827_0-RCLKA', 'capabilities': 6, 'clock-id': 4658613174691613800, 'frequency': 1953125, 'id': 2, 'module-name': 'ice', 'parent-device': [{'direction': 'input', 'parent-id': 6, 'phase-offset': -216839, 'prio': 9, 'state': 'connected'}, {'direction': 'input', 'parent-id': 7, 'phase-offset': -42, 'prio': 8, 'state': 'connected'}], 'phase-adjust': 0, 'phase-adjust-max': 16723, 'phase-adjust-min': -16723, 'type': 'mux'} Where phase-offset value (-42), after division (DPLL_PHASE_OFFSET_DIVIDER) would be: -0.042 ps. Fixes: 8a3a565ff210 ("ice: add admin commands to access cgu configuration") Fixes: 90e1c90750d7 ("ice: dpll: implement phase related callbacks") Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Signed-off-by: Arkadiusz Kubalewski Reviewed-by: Paul Menzel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 9a6c25f98632..edac34c796ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -5332,7 +5332,6 @@ ice_aq_get_cgu_dpll_status(struct ice_hw *hw, u8 dpll_num, u8 *ref_state, u8 *eec_mode) { struct ice_aqc_get_cgu_dpll_status *cmd; - const s64 nsec_per_psec = 1000LL; struct ice_aq_desc desc; int status; @@ -5348,8 +5347,7 @@ ice_aq_get_cgu_dpll_status(struct ice_hw *hw, u8 dpll_num, u8 *ref_state, *phase_offset = le32_to_cpu(cmd->phase_offset_h); *phase_offset <<= 32; *phase_offset += le32_to_cpu(cmd->phase_offset_l); - *phase_offset = div64_s64(sign_extend64(*phase_offset, 47), - nsec_per_psec); + *phase_offset = sign_extend64(*phase_offset, 47); *eec_mode = cmd->eec_mode; } -- cgit v1.2.3-59-g8ed1b From 3e48041d9820c17e0a51599d12e66c6e12a8d08d Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Wed, 29 Nov 2023 11:23:11 +0100 Subject: i40e: Fix filter input checks to prevent config with invalid values Prevent VF from configuring filters with unsupported actions or use REDIRECT action with invalid tc number. Current checks could cause out of bounds access on PF side. Fixes: e284fc280473 ("i40e: Add and delete cloud filter") Reviewed-by: Andrii Staikov Signed-off-by: Sudheer Mogilappagari Signed-off-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 3f99eb198245..031b15cceab9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3521,16 +3521,16 @@ static int i40e_validate_cloud_filter(struct i40e_vf *vf, bool found = false; int bkt; - if (!tc_filter->action) { + if (tc_filter->action != VIRTCHNL_ACTION_TC_REDIRECT) { dev_info(&pf->pdev->dev, - "VF %d: Currently ADq doesn't support Drop Action\n", - vf->vf_id); + "VF %d: ADQ doesn't support this action (%d)\n", + vf->vf_id, tc_filter->action); goto err; } /* action_meta is TC number here to which the filter is applied */ if (!tc_filter->action_meta || - tc_filter->action_meta > I40E_MAX_VF_VSI) { + tc_filter->action_meta > vf->num_tc) { dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n", vf->vf_id, tc_filter->action_meta); goto err; -- cgit v1.2.3-59-g8ed1b From 088464abd48cf3735aee91f9e211b32da9d81117 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Fri, 1 Dec 2023 08:50:42 +0100 Subject: igc: Report VLAN EtherType matching back to user Currently the driver allows to configure matching by VLAN EtherType. However, the retrieval function does not report it back to the user. Add it. Before: |root@host:~# ethtool -N enp3s0 flow-type ether vlan-etype 0x8100 action 0 |Added rule with ID 63 |root@host:~# ethtool --show-ntuple enp3s0 |4 RX rings available |Total 1 rules | |Filter: 63 | Flow Type: Raw Ethernet | Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF | Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF | Ethertype: 0x0 mask: 0xFFFF | Action: Direct to queue 0 After: |root@host:~# ethtool -N enp3s0 flow-type ether vlan-etype 0x8100 action 0 |Added rule with ID 63 |root@host:~# ethtool --show-ntuple enp3s0 |4 RX rings available |Total 1 rules | |Filter: 63 | Flow Type: Raw Ethernet | Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF | Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF | Ethertype: 0x0 mask: 0xFFFF | VLAN EtherType: 0x8100 mask: 0x0 | VLAN: 0x0 mask: 0xffff | User-defined: 0x0 mask: 0xffffffffffffffff | Action: Direct to queue 0 Fixes: 2b477d057e33 ("igc: Integrate flex filter into ethtool ops") Signed-off-by: Kurt Kanzenbach Acked-by: Vinicius Costa Gomes Reviewed-by: Simon Horman Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 785eaa8e0ba8..69b2fd006293 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -980,6 +980,12 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter, fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK; } + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) { + fsp->flow_type |= FLOW_EXT; + fsp->h_ext.vlan_etype = rule->filter.vlan_etype; + fsp->m_ext.vlan_etype = ETHER_TYPE_FULL_MASK; + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { fsp->flow_type |= FLOW_EXT; fsp->h_ext.vlan_tci = htons(rule->filter.vlan_tci); -- cgit v1.2.3-59-g8ed1b From b5063cbe148b829e8eb97672c2cbccc058835476 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Fri, 1 Dec 2023 08:50:43 +0100 Subject: igc: Check VLAN TCI mask Currently the driver accepts VLAN TCI steering rules regardless of the configured mask. And things might fail silently or with confusing error messages to the user. There are two ways to handle the VLAN TCI mask: 1. Match on the PCP field using a VLAN prio filter 2. Match on complete TCI field using a flex filter Therefore, add checks and code for that. For instance the following rule is invalid and will be converted into a VLAN prio rule which is not correct: |root@host:~# ethtool -N enp3s0 flow-type ether vlan 0x0001 m 0xf000 \ | action 1 |Added rule with ID 61 |root@host:~# ethtool --show-ntuple enp3s0 |4 RX rings available |Total 1 rules | |Filter: 61 | Flow Type: Raw Ethernet | Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF | Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF | Ethertype: 0x0 mask: 0xFFFF | VLAN EtherType: 0x0 mask: 0xffff | VLAN: 0x1 mask: 0x1fff | User-defined: 0x0 mask: 0xffffffffffffffff | Action: Direct to queue 1 After: |root@host:~# ethtool -N enp3s0 flow-type ether vlan 0x0001 m 0xf000 \ | action 1 |rmgr: Cannot insert RX class rule: Operation not supported Fixes: 7991487ecb2d ("igc: Allow for Flex Filters to be installed") Signed-off-by: Kurt Kanzenbach Acked-by: Vinicius Costa Gomes Reviewed-by: Simon Horman Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_ethtool.c | 28 +++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index f48f82d5e274..85cc16396506 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -568,6 +568,7 @@ struct igc_nfc_filter { u16 etype; __be16 vlan_etype; u16 vlan_tci; + u16 vlan_tci_mask; u8 src_addr[ETH_ALEN]; u8 dst_addr[ETH_ALEN]; u8 user_data[8]; diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 69b2fd006293..b56b4f338bd3 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -958,6 +958,7 @@ static int igc_ethtool_set_coalesce(struct net_device *netdev, } #define ETHER_TYPE_FULL_MASK ((__force __be16)~0) +#define VLAN_TCI_FULL_MASK ((__force __be16)~0) static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter, struct ethtool_rxnfc *cmd) { @@ -989,7 +990,7 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter, if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { fsp->flow_type |= FLOW_EXT; fsp->h_ext.vlan_tci = htons(rule->filter.vlan_tci); - fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK); + fsp->m_ext.vlan_tci = htons(rule->filter.vlan_tci_mask); } if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { @@ -1224,6 +1225,7 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule, if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) { rule->filter.vlan_tci = ntohs(fsp->h_ext.vlan_tci); + rule->filter.vlan_tci_mask = ntohs(fsp->m_ext.vlan_tci); rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI; } @@ -1261,11 +1263,19 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule, memcpy(rule->filter.user_mask, fsp->m_ext.data, sizeof(fsp->m_ext.data)); } - /* When multiple filter options or user data or vlan etype is set, use a - * flex filter. + /* The i225/i226 has various different filters. Flex filters provide a + * way to match up to the first 128 bytes of a packet. Use them for: + * a) For specific user data + * b) For VLAN EtherType + * c) For full TCI match + * d) Or in case multiple filter criteria are set + * + * Otherwise, use the simple MAC, VLAN PRIO or EtherType filters. */ if ((rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) || (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) || + ((rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) && + rule->filter.vlan_tci_mask == ntohs(VLAN_TCI_FULL_MASK)) || (rule->filter.match_flags & (rule->filter.match_flags - 1))) rule->flex = true; else @@ -1335,6 +1345,18 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter, return -EINVAL; } + /* There are two ways to match the VLAN TCI: + * 1. Match on PCP field and use vlan prio filter for it + * 2. Match on complete TCI field and use flex filter for it + */ + if ((fsp->flow_type & FLOW_EXT) && + fsp->m_ext.vlan_tci && + fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK) && + fsp->m_ext.vlan_tci != VLAN_TCI_FULL_MASK) { + netdev_dbg(netdev, "VLAN mask not supported\n"); + return -EOPNOTSUPP; + } + if (fsp->location >= IGC_MAX_RXNFC_RULES) { netdev_dbg(netdev, "Invalid location\n"); return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 7afd49a38e73afd57ff62c8d1cf5af760c4d49c0 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Wed, 6 Dec 2023 15:07:18 +0100 Subject: igc: Check VLAN EtherType mask Currently the driver accepts VLAN EtherType steering rules regardless of the configured mask. And things might fail silently or with confusing error messages to the user. The VLAN EtherType can only be matched by full mask. Therefore, add a check for that. For instance the following rule is invalid, but the driver accepts it and ignores the user specified mask: |root@host:~# ethtool -N enp3s0 flow-type ether vlan-etype 0x8100 \ | m 0x00ff action 0 |Added rule with ID 63 |root@host:~# ethtool --show-ntuple enp3s0 |4 RX rings available |Total 1 rules | |Filter: 63 | Flow Type: Raw Ethernet | Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF | Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF | Ethertype: 0x0 mask: 0xFFFF | VLAN EtherType: 0x8100 mask: 0x0 | VLAN: 0x0 mask: 0xffff | User-defined: 0x0 mask: 0xffffffffffffffff | Action: Direct to queue 0 After: |root@host:~# ethtool -N enp3s0 flow-type ether vlan-etype 0x8100 \ | m 0x00ff action 0 |rmgr: Cannot insert RX class rule: Operation not supported Fixes: 2b477d057e33 ("igc: Integrate flex filter into ethtool ops") Suggested-by: Suman Ghosh Signed-off-by: Kurt Kanzenbach Acked-by: Vinicius Costa Gomes Reviewed-by: Simon Horman Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index b56b4f338bd3..859b2636f3d9 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1357,6 +1357,14 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter, return -EOPNOTSUPP; } + /* VLAN EtherType can only be matched by full mask. */ + if ((fsp->flow_type & FLOW_EXT) && + fsp->m_ext.vlan_etype && + fsp->m_ext.vlan_etype != ETHER_TYPE_FULL_MASK) { + netdev_dbg(netdev, "VLAN EtherType mask not supported\n"); + return -EOPNOTSUPP; + } + if (fsp->location >= IGC_MAX_RXNFC_RULES) { netdev_dbg(netdev, "Invalid location\n"); return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 97417cd79ce179a774b245a3f5535cc3fbbaee50 Mon Sep 17 00:00:00 2001 From: Sagi Maimon Date: Wed, 20 Dec 2023 10:19:14 +0200 Subject: ptp: ocp: fix bug in unregistering the DPLL subsystem When unregistering the DPLL subsystem the priv pointer is different then the one used for registration which cause failure in unregistering. Fixes: 09eeb3aecc6c ("ptp_ocp: implement DPLL ops") Reviewed-by: Jiri Pirko Reviewed-by: Vadim Fedorenko Signed-off-by: David S. Miller --- drivers/ptp/ptp_ocp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 4021d3d325f9..e7defce8cf48 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -4492,7 +4492,7 @@ ptp_ocp_remove(struct pci_dev *pdev) cancel_delayed_work_sync(&bp->sync_work); for (i = 0; i < OCP_SMA_NUM; i++) { if (bp->sma[i].dpll_pin) { - dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, bp); + dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, &bp->sma[i]); dpll_pin_put(bp->sma[i].dpll_pin); } } -- cgit v1.2.3-59-g8ed1b From dcea1bd45e6d111cc8fc1aaefa7e31694089bda3 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 20 Dec 2023 18:47:39 -0500 Subject: mlxbf_gige: fix receive packet race condition Under heavy traffic, the BlueField Gigabit interface can become unresponsive. This is due to a possible race condition in the mlxbf_gige_rx_packet function, where the function exits with producer and consumer indices equal but there are remaining packet(s) to be processed. In order to prevent this situation, read receive consumer index *before* the HW replenish so that the mlxbf_gige_rx_packet function returns an accurate return value even if a packet is received into just-replenished buffer prior to exiting this routine. If the just-replenished buffer is received and occupies the last RX ring entry, the interface would not recover and instead would encounter RX packet drops related to internal buffer shortages since the driver RX logic is not being triggered to drain the RX ring. This patch will address and prevent this "ring full" condition. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Reviewed-by: Asmaa Mnebhi Signed-off-by: David Thompson Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c index 0d5a41a2ae01..227d01cace3f 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c @@ -267,6 +267,13 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts) priv->stats.rx_truncate_errors++; } + /* Read receive consumer index before replenish so that this routine + * returns accurate return value even if packet is received into + * just-replenished buffer prior to exiting this routine. + */ + rx_ci = readq(priv->base + MLXBF_GIGE_RX_CQE_PACKET_CI); + rx_ci_rem = rx_ci % priv->rx_q_entries; + /* Let hardware know we've replenished one buffer */ rx_pi++; @@ -279,8 +286,6 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts) rx_pi_rem = rx_pi % priv->rx_q_entries; if (rx_pi_rem == 0) priv->valid_polarity ^= 1; - rx_ci = readq(priv->base + MLXBF_GIGE_RX_CQE_PACKET_CI); - rx_ci_rem = rx_ci % priv->rx_q_entries; if (skb) netif_receive_skb(skb); -- cgit v1.2.3-59-g8ed1b From 8fcb0382af6f1ef50936f1be05b8149eb2f88496 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Thu, 21 Dec 2023 10:25:31 +0800 Subject: net: sched: em_text: fix possible memory leak in em_text_destroy() m->data needs to be freed when em_text_destroy is called. Fixes: d675c989ed2d ("[PKT_SCHED]: Packet classification based on textsearch (ematch)") Acked-by: Jamal Hadi Salim Signed-off-by: Hangyu Hua Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/em_text.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 6f3c1fb2fb44..f176afb70559 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -97,8 +97,10 @@ retry: static void em_text_destroy(struct tcf_ematch *m) { - if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) + if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) { textsearch_destroy(EM_TEXT_PRIV(m)->config); + kfree(EM_TEXT_PRIV(m)); + } } static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) -- cgit v1.2.3-59-g8ed1b From b901a4e276943f61e11ddb597a0abc1e7dfadf0f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 22 Dec 2023 01:13:59 +0000 Subject: net/tcp_sigpool: Use kref_get_unless_zero() The freeing and re-allocation of algorithm are protected by cpool_mutex, so it doesn't fix an actual use-after-free, but avoids a deserved refcount_warn_saturate() warning. A trivial fix for the racy behavior. Fixes: 8c73b26315aa ("net/tcp: Prepare tcp_md5sig_pool for TCP-AO") Suggested-by: Eric Dumazet Signed-off-by: Dmitry Safonov Tested-by: Bagas Sanjaya Reported-by: syzbot Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_sigpool.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c index 55b310a722c7..8512cb09ebc0 100644 --- a/net/ipv4/tcp_sigpool.c +++ b/net/ipv4/tcp_sigpool.c @@ -162,9 +162,8 @@ int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size) if (strcmp(cpool[i].alg, alg)) continue; - if (kref_read(&cpool[i].kref) > 0) - kref_get(&cpool[i].kref); - else + /* pairs with tcp_sigpool_release() */ + if (!kref_get_unless_zero(&cpool[i].kref)) kref_init(&cpool[i].kref); ret = i; goto out; -- cgit v1.2.3-59-g8ed1b From 9c476269bff2908a20930c58085bf0b05ebd569a Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 22 Dec 2023 12:34:09 +0800 Subject: r8169: Fix PCI error on system resume Some r8168 NICs stop working upon system resume: [ 688.051096] r8169 0000:02:00.1 enp2s0f1: rtl_ep_ocp_read_cond == 0 (loop: 10, delay: 10000). [ 688.175131] r8169 0000:02:00.1 enp2s0f1: Link is Down ... [ 691.534611] r8169 0000:02:00.1 enp2s0f1: PCI error (cmd = 0x0407, status_errs = 0x0000) Not sure if it's related, but those NICs have a BMC device at function 0: 02:00.0 Unassigned class [ff00]: Realtek Semiconductor Co., Ltd. Realtek RealManage BMC [10ec:816e] (rev 1a) Trial and error shows that increase the loop wait on rtl_ep_ocp_read_cond to 30 can eliminate the issue, so let rtl8168ep_driver_start() to wait a bit longer. Fixes: e6d6ca6e1204 ("r8169: Add support for another RTL8168FP") Signed-off-by: Kai-Heng Feng Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index bb787a52bc75..81fd31f6fac4 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1211,7 +1211,7 @@ static void rtl8168ep_driver_start(struct rtl8169_private *tp) { r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 10); + rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); } static void rtl8168_driver_start(struct rtl8169_private *tp) -- cgit v1.2.3-59-g8ed1b From 82585d5e2af13608dd18fe2458126cd82e2cc255 Mon Sep 17 00:00:00 2001 From: "Radu Pirea (NXP OSS)" Date: Thu, 21 Dec 2023 17:44:19 +0200 Subject: MAINTAINERS: step down as TJA11XX C45 maintainer I am stepping down as TJA11XX C45 maintainer. Andrei Botila will take the responsibility to maintain and improve the support for TJA11XX C45 PHYs. Signed-off-by: Radu Pirea (NXP OSS) Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 61f9c0f018dd..5ae0191d9a90 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15424,7 +15424,7 @@ F: Documentation/devicetree/bindings/net/bluetooth/nxp,88w8987-bt.yaml F: drivers/bluetooth/btnxpuart.c NXP C45 TJA11XX PHY DRIVER -M: Radu Pirea +M: Andrei Botila L: netdev@vger.kernel.org S: Maintained F: drivers/net/phy/nxp-c45-tja11xx.c -- cgit v1.2.3-59-g8ed1b From 9bf2e9165f90dc9f416af53c902be7e33930f728 Mon Sep 17 00:00:00 2001 From: Sarannya S Date: Thu, 21 Dec 2023 15:36:51 +0530 Subject: net: qrtr: ns: Return 0 if server port is not present When a 'DEL_CLIENT' message is received from the remote, the corresponding server port gets deleted. A DEL_SERVER message is then announced for this server. As part of handling the subsequent DEL_SERVER message, the name- server attempts to delete the server port which results in a '-ENOENT' error. The return value from server_del() is then propagated back to qrtr_ns_worker, causing excessive error prints. To address this, return 0 from control_cmd_del_server() without checking the return value of server_del(), since the above scenario is not an error case and hence server_del() doesn't have any other error return value. Signed-off-by: Sarannya Sasikumar Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/qrtr/ns.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index b1db0b519179..abb0c70ffc8b 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -512,7 +512,9 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from, if (!node) return -ENOENT; - return server_del(node, port, true); + server_del(node, port, true); + + return 0; } static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, -- cgit v1.2.3-59-g8ed1b From 7f6ca95d16b96567ce4cf458a2790ff17fa620c3 Mon Sep 17 00:00:00 2001 From: Jörn-Thorben Hinz Date: Fri, 22 Dec 2023 00:19:01 +0100 Subject: net: Implement missing getsockopt(SO_TIMESTAMPING_NEW) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW") added the new socket option SO_TIMESTAMPING_NEW. Setting the option is handled in sk_setsockopt(), querying it was not handled in sk_getsockopt(), though. Following remarks on an earlier submission of this patch, keep the old behavior of getsockopt(SO_TIMESTAMPING_OLD) which returns the active flags even if they actually have been set through SO_TIMESTAMPING_NEW. The new getsockopt(SO_TIMESTAMPING_NEW) is stricter, returning flags only if they have been set through the same option. Fixes: 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW") Link: https://lore.kernel.org/lkml/20230703175048.151683-1-jthinz@mailbox.tu-berlin.de/ Link: https://lore.kernel.org/netdev/0d7cddc9-03fa-43db-a579-14f3e822615b@app.fastmail.com/ Signed-off-by: Jörn-Thorben Hinz Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/sock.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index fef349dd72fa..51d52859e942 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1711,9 +1711,16 @@ int sk_getsockopt(struct sock *sk, int level, int optname, break; case SO_TIMESTAMPING_OLD: + case SO_TIMESTAMPING_NEW: lv = sizeof(v.timestamping); - v.timestamping.flags = READ_ONCE(sk->sk_tsflags); - v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc); + /* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only + * returning the flags when they were set through the same option. + * Don't change the beviour for the old case SO_TIMESTAMPING_OLD. + */ + if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) { + v.timestamping.flags = READ_ONCE(sk->sk_tsflags); + v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc); + } break; case SO_RCVTIMEO_OLD: -- cgit v1.2.3-59-g8ed1b From 059d37b718d38d26087121c754691df77acfc66b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 22 Dec 2023 21:06:13 -0800 Subject: net: phy: linux/phy.h: fix Excess kernel-doc description warning Remove the @phy_timer: line to prevent the kernel-doc warning: include/linux/phy.h:768: warning: Excess struct member 'phy_timer' description in 'phy_device' Signed-off-by: Randy Dunlap Cc: Andrew Lunn Cc: Heiner Kallweit Cc: Russell King Cc: netdev@vger.kernel.org Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/phy.h b/include/linux/phy.h index 3cc52826f18e..bd285950972c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -568,7 +568,6 @@ struct macsec_ops; * - Bits [31:24] are reserved for defining generic * PHY driver behavior. * @irq: IRQ number of the PHY's interrupt (-1 if none) - * @phy_timer: The timer for handling the state machine * @phylink: Pointer to phylink instance for this PHY * @sfp_bus_attached: Flag indicating whether the SFP bus has been attached * @sfp_bus: SFP bus attached to this PHY's fiber port -- cgit v1.2.3-59-g8ed1b From c46bfba1337d301661dbb23cfd905d4cb51f27ca Mon Sep 17 00:00:00 2001 From: wangkeqi Date: Sat, 23 Dec 2023 14:50:32 +0800 Subject: connector: Fix proc_event_num_listeners count not cleared When we register a cn_proc listening event, the proc_event_num_listener variable will be incremented by one, but if PROC_CN_MCAST_IGNORE is not called, the count will not decrease. This will cause the proc_*_connector function to take the wrong path. It will reappear when the forkstat tool exits via ctrl + c. We solve this problem by determining whether there are still listeners to clear proc_event_num_listener. Signed-off-by: wangkeqi Signed-off-by: David S. Miller --- drivers/connector/cn_proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 44b19e696176..3d5e6d705fc6 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -108,8 +108,9 @@ static inline void send_msg(struct cn_msg *msg) filter_data[1] = 0; } - cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT, - cn_filter, (void *)filter_data); + if (cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT, + cn_filter, (void *)filter_data) == -ESRCH) + atomic_set(&proc_event_num_listeners, 0); local_unlock(&local_event.lock); } -- cgit v1.2.3-59-g8ed1b From 61fa2493ca76fd7bb74e13f0205274f4ab0aa696 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 23 Dec 2023 20:59:22 +0800 Subject: selftests: bonding: do not set port down when adding to bond Similar to commit be809424659c ("selftests: bonding: do not set port down before adding to bond"). The bond-arp-interval-causes-panic test failed after commit a4abfa627c38 ("net: rtnetlink: Enslave device before bringing it up") as the kernel will set the port down _after_ adding to bond if setting port down specifically. Fix it by removing the link down operation when adding to bond. Fixes: 2ffd57327ff1 ("selftests: bonding: cause oops in bond_rr_gen_slave_id") Signed-off-by: Hangbin Liu Tested-by: Benjamin Poirier Signed-off-by: David S. Miller --- .../selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh index 4917dbb35a44..5667febee328 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh @@ -30,16 +30,16 @@ ip netns exec server ip addr add ${server_ip4}/24 dev eth0 ip netns exec client ip link add dev bond0 down type bond mode 1 \ miimon 100 all_slaves_active 1 -ip netns exec client ip link set dev eth0 down master bond0 +ip netns exec client ip link set dev eth0 master bond0 ip netns exec client ip link set dev bond0 up ip netns exec client ip addr add ${client_ip4}/24 dev bond0 ip netns exec client ping -c 5 $server_ip4 >/dev/null -ip netns exec client ip link set dev eth0 down nomaster +ip netns exec client ip link set dev eth0 nomaster ip netns exec client ip link set dev bond0 down ip netns exec client ip link set dev bond0 type bond mode 0 \ arp_interval 1000 arp_ip_target "+${server_ip4}" -ip netns exec client ip link set dev eth0 down master bond0 +ip netns exec client ip link set dev eth0 master bond0 ip netns exec client ip link set dev bond0 up ip netns exec client ping -c 5 $server_ip4 >/dev/null -- cgit v1.2.3-59-g8ed1b From 8cdafdd94654ba418648d039c48e7a90508c1982 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Fri, 29 Dec 2023 21:32:44 -0700 Subject: Revert "net: ipv6/addrconf: clamp preferred_lft to the minimum required" The commit had a bug and might not have been the right approach anyway. Fixes: 629df6701c8a ("net: ipv6/addrconf: clamp preferred_lft to the minimum required") Fixes: ec575f885e3e ("Documentation: networking: explain what happens if temp_prefered_lft is too small or too large") Reported-by: Dan Moulding Closes: https://lore.kernel.org/netdev/20231221231115.12402-1-dan@danm.net/ Link: https://lore.kernel.org/netdev/CAMMLpeTdYhd=7hhPi2Y7pwdPCgnnW5JYh-bu3hSc7im39uxnEA@mail.gmail.com/ Signed-off-by: Alex Henrie Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20231230043252.10530-1-alexhenrie24@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv6/addrconf.c | 18 +++++------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 4dfe0d9a57bb..7afff42612e9 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2511,7 +2511,7 @@ temp_valid_lft - INTEGER temp_prefered_lft - INTEGER Preferred lifetime (in seconds) for temporary addresses. If temp_prefered_lft is less than the minimum required lifetime (typically - 5 seconds), the preferred lifetime is the minimum required. If + 5 seconds), temporary addresses will not be created. If temp_prefered_lft is greater than temp_valid_lft, the preferred lifetime is temp_valid_lft. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2692a7b24c40..733ace18806c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1407,23 +1407,15 @@ retry: write_unlock_bh(&idev->lock); - /* From RFC 4941: - * - * A temporary address is created only if this calculated Preferred - * Lifetime is greater than REGEN_ADVANCE time units. In - * particular, an implementation must not create a temporary address - * with a zero Preferred Lifetime. - * - * Clamp the preferred lifetime to a minimum of regen_advance, unless - * that would exceed valid_lft. - * + /* A temporary address is created only if this calculated Preferred + * Lifetime is greater than REGEN_ADVANCE time units. In particular, + * an implementation must not create a temporary address with a zero + * Preferred Lifetime. * Use age calculation as in addrconf_verify to avoid unnecessary * temporary addresses being generated. */ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; - if (cfg.preferred_lft <= regen_advance + age) - cfg.preferred_lft = regen_advance + age + 1; - if (cfg.preferred_lft > cfg.valid_lft) { + if (cfg.preferred_lft <= regen_advance + age) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; -- cgit v1.2.3-59-g8ed1b From d5a306aedba34e640b11d7026dbbafb78ee3a5f6 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Mon, 25 Dec 2023 19:29:14 +0800 Subject: sfc: fix a double-free bug in efx_probe_filters In efx_probe_filters, the channel->rps_flow_id is freed in a efx_for_each_channel marco when success equals to 0. However, after the following call chain: ef100_net_open |-> efx_probe_filters |-> ef100_net_stop |-> efx_remove_filters The channel->rps_flow_id is freed again in the efx_for_each_channel of efx_remove_filters, triggering a double-free bug. Fixes: a9dc3d5612ce ("sfc_ef100: RX filter table management and related gubbins") Reviewed-by: Simon Horman Reviewed-by: Edward Cree Signed-off-by: Zhipeng Lu Link: https://lore.kernel.org/r/20231225112915.3544581-1-alexious@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/rx_common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index d2f35ee15eff..fac227d372db 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -823,8 +823,10 @@ int efx_probe_filters(struct efx_nic *efx) } if (!success) { - efx_for_each_channel(channel, efx) + efx_for_each_channel(channel, efx) { kfree(channel->rps_flow_id); + channel->rps_flow_id = NULL; + } efx->type->filter_table_remove(efx); rc = -ENOMEM; goto out_unlock; -- cgit v1.2.3-59-g8ed1b From fe6d8300a7af390667359c661d78a24b733dc5ad Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 25 Dec 2023 23:52:45 +0100 Subject: MAINTAINERS: Update mvpp2 driver email I no longer use mw@semihalf.com email. Update mvpp2 driver entry with my alternative address. Signed-off-by: Marcin Wojtas Link: https://lore.kernel.org/r/20231225225245.1606-1-marcin.s.wojtas@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5ae0191d9a90..409946ed5e98 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12814,7 +12814,7 @@ S: Maintained F: drivers/net/ethernet/marvell/mvneta.* MARVELL MVPP2 ETHERNET DRIVER -M: Marcin Wojtas +M: Marcin Wojtas M: Russell King L: netdev@vger.kernel.org S: Maintained -- cgit v1.2.3-59-g8ed1b From 118ba479d02c5a55e1c1d3c7a43de8937680e67d Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 26 Dec 2023 13:10:17 +0100 Subject: MAINTAINERS: add Geliang as reviewer for MPTCP For a long time now, Geliang has contributed to a lot of code and reviews related to MPTCP. So let's reflect that in the MAINTAINERS file. This should also encourage patch submitters to add him to the CC list. Acked-by: Geliang Tang Acked-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231226-upstream-net-20231226-mptcp-prevent-warn-v1-1-1404dcc431ea@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 409946ed5e98..14cb0732c375 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15077,6 +15077,7 @@ K: \bmdo_ NETWORKING [MPTCP] M: Matthieu Baerts M: Mat Martineau +R: Geliang Tang L: netdev@vger.kernel.org L: mptcp@lists.linux.dev S: Maintained -- cgit v1.2.3-59-g8ed1b From 4c0288299fd09ee7c6fbe2f57421f314d8c981db Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 26 Dec 2023 13:10:18 +0100 Subject: mptcp: prevent tcp diag from closing listener subflows The MPTCP protocol does not expect that any other entity could change the first subflow status when such socket is listening. Unfortunately the TCP diag interface allows aborting any TCP socket, including MPTCP listeners subflows. As reported by syzbot, that trigger a WARN() and could lead to later bigger trouble. The MPTCP protocol needs to do some MPTCP-level cleanup actions to properly shutdown the listener. To keep the fix simple, prevent entirely the diag interface from stopping such listeners. We could refine the diag callback in a later, larger patch targeting net-next. Fixes: 57fc0f1ceaa4 ("mptcp: ensure listener is unhashed before updating the sk status") Cc: stable@vger.kernel.org Reported-by: Closes: https://lore.kernel.org/netdev/0000000000004f4579060c68431b@google.com/ Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231226-upstream-net-20231226-mptcp-prevent-warn-v1-2-1404dcc431ea@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 6d7684c35e93..852b3f4af000 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1982,6 +1982,17 @@ static void tcp_release_cb_override(struct sock *ssk) tcp_release_cb(ssk); } +static int tcp_abort_override(struct sock *ssk, int err) +{ + /* closing a listener subflow requires a great deal of care. + * keep it simple and just prevent such operation + */ + if (inet_sk_state_load(ssk) == TCP_LISTEN) + return -EINVAL; + + return tcp_abort(ssk, err); +} + static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { .name = "mptcp", .owner = THIS_MODULE, @@ -2026,6 +2037,7 @@ void __init mptcp_subflow_init(void) tcp_prot_override = tcp_prot; tcp_prot_override.release_cb = tcp_release_cb_override; + tcp_prot_override.diag_destroy = tcp_abort_override; #if IS_ENABLED(CONFIG_MPTCP_IPV6) /* In struct mptcp_subflow_request_sock, we assume the TCP request sock @@ -2061,6 +2073,7 @@ void __init mptcp_subflow_init(void) tcpv6_prot_override = tcpv6_prot; tcpv6_prot_override.release_cb = tcp_release_cb_override; + tcpv6_prot_override.diag_destroy = tcp_abort_override; #endif mptcp_diag_subflow_init(&subflow_ulp_ops); -- cgit v1.2.3-59-g8ed1b From e584f2ff1e6cc9b1d99e8a6b0f3415940d1b3eb3 Mon Sep 17 00:00:00 2001 From: Adrian Cinal Date: Thu, 28 Dec 2023 14:56:38 +0100 Subject: net: bcmgenet: Fix FCS generation for fragmented skbuffs The flag DMA_TX_APPEND_CRC was only written to the first DMA descriptor in the TX path, where each descriptor corresponds to a single skbuff fragment (or the skbuff head). This led to packets with no FCS appearing on the wire if the kernel allocated the packet in fragments, which would always happen when using PACKET_MMAP/TPACKET (cf. tpacket_fill_skb() in net/af_packet.c). Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Adrian Cinal Acked-by: Doug Berger Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20231228135638.1339245-1-adriancinal1@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 9282403d1bf6..2d7ae71287b1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2132,8 +2132,10 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) /* Note: if we ever change from DMA_TX_APPEND_CRC below we * will need to restore software padding of "runt" packets */ + len_stat |= DMA_TX_APPEND_CRC; + if (!i) { - len_stat |= DMA_TX_APPEND_CRC | DMA_SOP; + len_stat |= DMA_SOP; if (skb->ip_summed == CHECKSUM_PARTIAL) len_stat |= DMA_TX_DO_CSUM; } -- cgit v1.2.3-59-g8ed1b From e6345d2824a3f58aab82428d11645e0da861ac13 Mon Sep 17 00:00:00 2001 From: Brad Cowie Date: Fri, 22 Dec 2023 11:43:11 +1300 Subject: netfilter: nf_nat: fix action not being set for all ct states This fixes openvswitch's handling of nat packets in the related state. In nf_ct_nat_execute(), which is called from nf_ct_nat(), ICMP/ICMPv6 packets in the IP_CT_RELATED or IP_CT_RELATED_REPLY state, which have not been dropped, will follow the goto, however the placement of the goto label means that updating the action bit field will be bypassed. This causes ovs_nat_update_key() to not be called from ovs_ct_nat() which means the openvswitch match key for the ICMP/ICMPv6 packet is not updated and the pre-nat value will be retained for the key, which will result in the wrong openflow rule being matched for that packet. Move the goto label above where the action bit field is being set so that it is updated in all cases where the packet is accepted. Fixes: ebddb1404900 ("net: move the nat function to nf_nat_ovs for ovs and tc") Signed-off-by: Brad Cowie Reviewed-by: Simon Horman Acked-by: Xin Long Acked-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_ovs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_ovs.c b/net/netfilter/nf_nat_ovs.c index 551abd2da614..0f9a559f6207 100644 --- a/net/netfilter/nf_nat_ovs.c +++ b/net/netfilter/nf_nat_ovs.c @@ -75,9 +75,10 @@ static int nf_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, } err = nf_nat_packet(ct, ctinfo, hooknum, skb); +out: if (err == NF_ACCEPT) *action |= BIT(maniptype); -out: + return err; } -- cgit v1.2.3-59-g8ed1b From b29be0ca8e816119ccdf95cc7d7c7be9bde005f1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 1 Jan 2024 20:15:33 +0100 Subject: netfilter: nft_immediate: drop chain reference counter on error In the init path, nft_data_init() bumps the chain reference counter, decrement it on error by following the error path which calls nft_data_release() to restore it. Fixes: 4bedf9eee016 ("netfilter: nf_tables: fix chain binding transaction logic") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_immediate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index fccb3cf7749c..6475c7abc1fe 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -78,7 +78,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx, case NFT_GOTO: err = nf_tables_bind_chain(ctx, chain); if (err < 0) - return err; + goto err1; break; default: break; -- cgit v1.2.3-59-g8ed1b From 01b2885d9415152bcb12ff1f7788f500a74ea0ed Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 21 Dec 2023 09:12:30 -0400 Subject: net: Save and restore msg_namelen in sock_sendmsg Commit 86a7e0b69bd5 ("net: prevent rewrite of msg_name in sock_sendmsg()") made sock_sendmsg save the incoming msg_name pointer and restore it before returning, to insulate the caller against msg_name being changed by the called code. If the address length was also changed however, we may return with an inconsistent structure where the length doesn't match the address, and attempts to reuse it may lead to lost packets. For example, a kernel that doesn't have commit 1c5950fc6fe9 ("udp6: fix potential access to stale information") will replace a v4 mapped address with its ipv4 equivalent, and shorten namelen accordingly from 28 to 16. If the caller attempts to reuse the resulting msg structure, it will have the original ipv6 (v4 mapped) address but an incorrect v4 length. Fixes: 86a7e0b69bd5 ("net: prevent rewrite of msg_name in sock_sendmsg()") Signed-off-by: Marc Dionne Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/socket.c b/net/socket.c index 3379c64217a4..89d79205bf50 100644 --- a/net/socket.c +++ b/net/socket.c @@ -757,6 +757,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg) { struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name; struct sockaddr_storage address; + int save_len = msg->msg_namelen; int ret; if (msg->msg_name) { @@ -766,6 +767,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg) ret = __sock_sendmsg(sock, msg); msg->msg_name = save_addr; + msg->msg_namelen = save_len; return ret; } -- cgit v1.2.3-59-g8ed1b From 6a15584e99db8918b60e507539c7446375dcf366 Mon Sep 17 00:00:00 2001 From: Ke Xiao Date: Mon, 18 Dec 2023 15:08:50 +0800 Subject: i40e: fix use-after-free in i40e_aqc_add_filters() Commit 3116f59c12bd ("i40e: fix use-after-free in i40e_sync_filters_subtask()") avoided use-after-free issues, by increasing refcount during update the VSI filter list to the HW. However, it missed the unicast situation. When deleting an unicast FDB entry, the i40e driver will release the mac_filter, and i40e_service_task will concurrently request firmware to add the mac_filter, which will lead to the following use-after-free issue. Fix again for both netdev->uc and netdev->mc. BUG: KASAN: use-after-free in i40e_aqc_add_filters+0x55c/0x5b0 [i40e] Read of size 2 at addr ffff888eb3452d60 by task kworker/8:7/6379 CPU: 8 PID: 6379 Comm: kworker/8:7 Kdump: loaded Tainted: G Workqueue: i40e i40e_service_task [i40e] Call Trace: dump_stack+0x71/0xab print_address_description+0x6b/0x290 kasan_report+0x14a/0x2b0 i40e_aqc_add_filters+0x55c/0x5b0 [i40e] i40e_sync_vsi_filters+0x1676/0x39c0 [i40e] i40e_service_task+0x1397/0x2bb0 [i40e] process_one_work+0x56a/0x11f0 worker_thread+0x8f/0xf40 kthread+0x2a0/0x390 ret_from_fork+0x1f/0x40 Allocated by task 21948: kasan_kmalloc+0xa6/0xd0 kmem_cache_alloc_trace+0xdb/0x1c0 i40e_add_filter+0x11e/0x520 [i40e] i40e_addr_sync+0x37/0x60 [i40e] __hw_addr_sync_dev+0x1f5/0x2f0 i40e_set_rx_mode+0x61/0x1e0 [i40e] dev_uc_add_excl+0x137/0x190 i40e_ndo_fdb_add+0x161/0x260 [i40e] rtnl_fdb_add+0x567/0x950 rtnetlink_rcv_msg+0x5db/0x880 netlink_rcv_skb+0x254/0x380 netlink_unicast+0x454/0x610 netlink_sendmsg+0x747/0xb00 sock_sendmsg+0xe2/0x120 __sys_sendto+0x1ae/0x290 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0xa0/0x370 entry_SYSCALL_64_after_hwframe+0x65/0xca Freed by task 21948: __kasan_slab_free+0x137/0x190 kfree+0x8b/0x1b0 __i40e_del_filter+0x116/0x1e0 [i40e] i40e_del_mac_filter+0x16c/0x300 [i40e] i40e_addr_unsync+0x134/0x1b0 [i40e] __hw_addr_sync_dev+0xff/0x2f0 i40e_set_rx_mode+0x61/0x1e0 [i40e] dev_uc_del+0x77/0x90 rtnl_fdb_del+0x6a5/0x860 rtnetlink_rcv_msg+0x5db/0x880 netlink_rcv_skb+0x254/0x380 netlink_unicast+0x454/0x610 netlink_sendmsg+0x747/0xb00 sock_sendmsg+0xe2/0x120 __sys_sendto+0x1ae/0x290 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0xa0/0x370 entry_SYSCALL_64_after_hwframe+0x65/0xca Fixes: 3116f59c12bd ("i40e: fix use-after-free in i40e_sync_filters_subtask()") Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Ke Xiao Signed-off-by: Ding Hui Cc: Di Zhu Reviewed-by: Jan Sokolowski Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1ab8dbe2d880..d5633a440cca 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -107,12 +107,18 @@ static struct workqueue_struct *i40e_wq; static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f, struct net_device *netdev, int delta) { + struct netdev_hw_addr_list *ha_list; struct netdev_hw_addr *ha; if (!f || !netdev) return; - netdev_for_each_mc_addr(ha, netdev) { + if (is_unicast_ether_addr(f->macaddr) || is_link_local_ether_addr(f->macaddr)) + ha_list = &netdev->uc; + else + ha_list = &netdev->mc; + + netdev_hw_addr_list_for_each(ha, ha_list) { if (ether_addr_equal(ha->addr, f->macaddr)) { ha->refcount += delta; if (ha->refcount <= 0) -- cgit v1.2.3-59-g8ed1b From 371e576ff3e8580d91d49026e5d5faebf5565558 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Thu, 21 Dec 2023 14:27:35 +0100 Subject: i40e: Restore VF MSI-X state during PCI reset During a PCI FLR the MSI-X Enable flag in the VF PCI MSI-X capability register will be cleared. This can lead to issues when a VF is assigned to a VM because in these cases the VF driver receives no indication of the PF PCI error/reset and additionally it is incapable of restoring the cleared flag in the hypervisor configuration space without fully reinitializing the driver interrupt functionality. Since the VF driver is unable to easily resolve this condition on its own, restore the VF MSI-X flag during the PF PCI reset handling. Fixes: 19b7960b2da1 ("i40e: implement split PCI error reset handler") Co-developed-by: Karen Ostrowska Signed-off-by: Karen Ostrowska Co-developed-by: Mateusz Palczewski Signed-off-by: Mateusz Palczewski Reviewed-by: Wojciech Drewek Reviewed-by: Przemek Kitszel Signed-off-by: Andrii Staikov Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +++ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 26 ++++++++++++++++++++++ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 3 +++ 3 files changed, 32 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d5633a440cca..d5519af34657 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16518,6 +16518,9 @@ static void i40e_pci_error_reset_done(struct pci_dev *pdev) return; i40e_reset_and_rebuild(pf, false, false); +#ifdef CONFIG_PCI_IOV + i40e_restore_all_vfs_msi_state(pdev); +#endif /* CONFIG_PCI_IOV */ } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 3f99eb198245..d60f5419d6bd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -154,6 +154,32 @@ void i40e_vc_notify_reset(struct i40e_pf *pf) (u8 *)&pfe, sizeof(struct virtchnl_pf_event)); } +#ifdef CONFIG_PCI_IOV +void i40e_restore_all_vfs_msi_state(struct pci_dev *pdev) +{ + u16 vf_id; + u16 pos; + + /* Continue only if this is a PF */ + if (!pdev->is_physfn) + return; + + if (!pci_num_vf(pdev)) + return; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (pos) { + struct pci_dev *vf_dev = NULL; + + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); + while ((vf_dev = pci_get_device(pdev->vendor, vf_id, vf_dev))) { + if (vf_dev->is_virtfn && vf_dev->physfn == pdev) + pci_restore_msi_state(vf_dev); + } + } +} +#endif /* CONFIG_PCI_IOV */ + /** * i40e_vc_notify_vf_reset * @vf: pointer to the VF structure diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 2ee0f8a23248..5fd607c0de0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -137,6 +137,9 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable); void i40e_vc_notify_link_state(struct i40e_pf *pf); void i40e_vc_notify_reset(struct i40e_pf *pf); +#ifdef CONFIG_PCI_IOV +void i40e_restore_all_vfs_msi_state(struct pci_dev *pdev); +#endif /* CONFIG_PCI_IOV */ int i40e_get_vf_stats(struct net_device *netdev, int vf_id, struct ifla_vf_stats *vf_stats); -- cgit v1.2.3-59-g8ed1b From 9fb266dcb6aab27867b22f7bc9e4ad6bd14b2ddc Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Thu, 21 Dec 2023 16:40:28 -0500 Subject: ice: fix Get link status data length Get link status version 2 (opcode 0x0607) is returning an error because FW expects a data length of 56 bytes, and this is causing the driver to fail probe. Update the get link status version 2 data length to 56 bytes by adding 5 byte reserved5 field to the end of struct ice_aqc_get_link_status_data and passing it as parameter to offsetofend() to the fix error. Fixes: 2777d24ec6d1 ("ice: Add ice_get_link_status_datalen") Reviewed-by: Przemek Kitszel Signed-off-by: Paul Greenwalt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index d7fdb7ba7268..fbd5d92182d3 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1359,8 +1359,9 @@ struct ice_aqc_get_link_status_data { u8 lp_flowcontrol; #define ICE_AQ_LINK_LP_PAUSE_ADV BIT(0) #define ICE_AQ_LINK_LP_ASM_DIR_ADV BIT(1) + u8 reserved5[5]; #define ICE_AQC_LS_DATA_SIZE_V2 \ - offsetofend(struct ice_aqc_get_link_status_data, lp_flowcontrol) + offsetofend(struct ice_aqc_get_link_status_data, reserved5) } __packed; /* Set event mask command (direct 0x0613) */ -- cgit v1.2.3-59-g8ed1b From 947dfc8138dfaeb6e966e2d661de89eb203e3064 Mon Sep 17 00:00:00 2001 From: Rodrigo Cataldo Date: Fri, 8 Dec 2023 15:58:16 +0100 Subject: igc: Fix hicredit calculation According to the Intel Software Manual for I225, Section 7.5.2.7, hicredit should be multiplied by the constant link-rate value, 0x7736. Currently, the old constant link-rate value, 0x7735, from the boards supported on igb are being used, most likely due to a copy'n'paste, as the rest of the logic is the same for both drivers. Update hicredit accordingly. Fixes: 1ab011b0bf07 ("igc: Add support for CBS offloading") Reviewed-by: Kurt Kanzenbach Signed-off-by: Rodrigo Cataldo Acked-by: Vinicius Costa Gomes Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_tsn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index a9c08321aca9..22cefb1eeedf 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -227,7 +227,7 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) wr32(IGC_TQAVCC(i), tqavcc); wr32(IGC_TQAVHC(i), - 0x80000000 + ring->hicredit * 0x7735); + 0x80000000 + ring->hicredit * 0x7736); } else { /* Disable any CBS for the queue */ txqctl &= ~(IGC_TXQCTL_QAV_SEL_MASK); -- cgit v1.2.3-59-g8ed1b From 2311e06b9bf3d44e15f9175af177a782806f688f Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 26 Dec 2023 17:43:33 +0800 Subject: virtio_net: fix missing dma unmap for resize For rq, we have three cases getting buffers from virtio core: 1. virtqueue_get_buf{,_ctx} 2. virtqueue_detach_unused_buf 3. callback for virtqueue_resize But in commit 295525e29a5b("virtio_net: merge dma operations when filling mergeable buffers"), I missed the dma unmap for the #3 case. That will leak some memory, because I did not release the pages referred by the unused buffers. If we do such script, we will make the system OOM. while true do ethtool -G ens4 rx 128 ethtool -G ens4 rx 256 free -m done Fixes: 295525e29a5b ("virtio_net: merge dma operations when filling mergeable buffers") Signed-off-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20231226094333.47740-1-xuanzhuo@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 60 ++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d16f592c2061..51b1868d2f22 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -334,7 +334,6 @@ struct virtio_net_common_hdr { }; }; -static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf); static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); static bool is_xdp_frame(void *ptr) @@ -408,6 +407,17 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) return p; } +static void virtnet_rq_free_buf(struct virtnet_info *vi, + struct receive_queue *rq, void *buf) +{ + if (vi->mergeable_rx_bufs) + put_page(virt_to_head_page(buf)); + else if (vi->big_packets) + give_pages(rq, buf); + else + put_page(virt_to_head_page(buf)); +} + static void enable_delayed_refill(struct virtnet_info *vi) { spin_lock_bh(&vi->refill_lock); @@ -634,17 +644,6 @@ static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) return buf; } -static void *virtnet_rq_detach_unused_buf(struct receive_queue *rq) -{ - void *buf; - - buf = virtqueue_detach_unused_buf(rq->vq); - if (buf && rq->do_dma) - virtnet_rq_unmap(rq, buf, 0); - - return buf; -} - static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) { struct virtnet_rq_dma *dma; @@ -744,6 +743,20 @@ static void virtnet_rq_set_premapped(struct virtnet_info *vi) } } +static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) +{ + struct virtnet_info *vi = vq->vdev->priv; + struct receive_queue *rq; + int i = vq2rxq(vq); + + rq = &vi->rq[i]; + + if (rq->do_dma) + virtnet_rq_unmap(rq, buf, 0); + + virtnet_rq_free_buf(vi, rq, buf); +} + static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { unsigned int len; @@ -1764,7 +1777,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); DEV_STATS_INC(dev, rx_length_errors); - virtnet_rq_free_unused_buf(rq->vq, buf); + virtnet_rq_free_buf(vi, rq, buf); return; } @@ -2392,7 +2405,7 @@ static int virtnet_rx_resize(struct virtnet_info *vi, if (running) napi_disable(&rq->napi); - err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf); + err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); if (err) netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); @@ -4031,19 +4044,6 @@ static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) xdp_return_frame(ptr_to_xdp(buf)); } -static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf) -{ - struct virtnet_info *vi = vq->vdev->priv; - int i = vq2rxq(vq); - - if (vi->mergeable_rx_bufs) - put_page(virt_to_head_page(buf)); - else if (vi->big_packets) - give_pages(&vi->rq[i], buf); - else - put_page(virt_to_head_page(buf)); -} - static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@ -4057,10 +4057,10 @@ static void free_unused_bufs(struct virtnet_info *vi) } for (i = 0; i < vi->max_queue_pairs; i++) { - struct receive_queue *rq = &vi->rq[i]; + struct virtqueue *vq = vi->rq[i].vq; - while ((buf = virtnet_rq_detach_unused_buf(rq)) != NULL) - virtnet_rq_free_unused_buf(rq->vq, buf); + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) + virtnet_rq_unmap_free_buf(vq, buf); cond_resched(); } } -- cgit v1.2.3-59-g8ed1b From 89f45c30172c80e55c887f32f1af8e184124577b Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Wed, 27 Dec 2023 15:02:27 +0800 Subject: net/qla3xxx: fix potential memleak in ql_alloc_buffer_queues When dma_alloc_coherent() fails, we should free qdev->lrg_buf to prevent potential memleak. Fixes: 1357bfcf7106 ("qla3xxx: Dynamically size the rx buffer queue based on the MTU.") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20231227070227.10527-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qla3xxx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 0d57ffcedf0c..fc78bc959ded 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2591,6 +2591,7 @@ static int ql_alloc_buffer_queues(struct ql3_adapter *qdev) if (qdev->lrg_buf_q_alloc_virt_addr == NULL) { netdev_err(qdev->ndev, "lBufQ failed\n"); + kfree(qdev->lrg_buf); return -ENOMEM; } qdev->lrg_buf_q_virt_addr = qdev->lrg_buf_q_alloc_virt_addr; @@ -2615,6 +2616,7 @@ static int ql_alloc_buffer_queues(struct ql3_adapter *qdev) qdev->lrg_buf_q_alloc_size, qdev->lrg_buf_q_alloc_virt_addr, qdev->lrg_buf_q_alloc_phy_addr); + kfree(qdev->lrg_buf); return -ENOMEM; } -- cgit v1.2.3-59-g8ed1b From 9dbe086c69b8902c85cece394760ac212e9e4ccc Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Wed, 27 Dec 2023 15:40:35 +0800 Subject: net/smc: fix invalid link access in dumping SMC-R connections A crash was found when dumping SMC-R connections. It can be reproduced by following steps: - environment: two RNICs on both sides. - run SMC-R between two sides, now a SMC_LGR_SYMMETRIC type link group will be created. - set the first RNIC down on either side and link group will turn to SMC_LGR_ASYMMETRIC_LOCAL then. - run 'smcss -R' and the crash will be triggered. BUG: kernel NULL pointer dereference, address: 0000000000000010 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 8000000101fdd067 P4D 8000000101fdd067 PUD 10ce46067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 3 PID: 1810 Comm: smcss Kdump: loaded Tainted: G W E 6.7.0-rc6+ #51 RIP: 0010:__smc_diag_dump.constprop.0+0x36e/0x620 [smc_diag] Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x66/0x150 ? exc_page_fault+0x69/0x140 ? asm_exc_page_fault+0x26/0x30 ? __smc_diag_dump.constprop.0+0x36e/0x620 [smc_diag] smc_diag_dump_proto+0xd0/0xf0 [smc_diag] smc_diag_dump+0x26/0x60 [smc_diag] netlink_dump+0x19f/0x320 __netlink_dump_start+0x1dc/0x300 smc_diag_handler_dump+0x6a/0x80 [smc_diag] ? __pfx_smc_diag_dump+0x10/0x10 [smc_diag] sock_diag_rcv_msg+0x121/0x140 ? __pfx_sock_diag_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 sock_diag_rcv+0x28/0x40 netlink_unicast+0x22a/0x330 netlink_sendmsg+0x240/0x4a0 __sock_sendmsg+0xb0/0xc0 ____sys_sendmsg+0x24e/0x300 ? copy_msghdr_from_user+0x62/0x80 ___sys_sendmsg+0x7c/0xd0 ? __do_fault+0x34/0x1a0 ? do_read_fault+0x5f/0x100 ? do_fault+0xb0/0x110 __sys_sendmsg+0x4d/0x80 do_syscall_64+0x45/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 When the first RNIC is set down, the lgr->lnk[0] will be cleared and an asymmetric link will be allocated in lgr->link[SMC_LINKS_PER_LGR_MAX - 1] by smc_llc_alloc_alt_link(). Then when we try to dump SMC-R connections in __smc_diag_dump(), the invalid lgr->lnk[0] will be accessed, resulting in this issue. So fix it by accessing the right link. Fixes: f16a7dd5cf27 ("smc: netlink interface for SMC sockets") Reported-by: henaumars Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7616 Signed-off-by: Wen Gu Reviewed-by: Tony Lu Link: https://lore.kernel.org/r/1703662835-53416-1-git-send-email-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_diag.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index a584613aca12..5cc376834c57 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -153,8 +153,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, .lnk[0].link_id = link->link_id, }; - memcpy(linfo.lnk[0].ibname, - smc->conn.lgr->lnk[0].smcibdev->ibdev->name, + memcpy(linfo.lnk[0].ibname, link->smcibdev->ibdev->name, sizeof(link->smcibdev->ibdev->name)); smc_gid_be16_convert(linfo.lnk[0].gid, link->gid); smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid); -- cgit v1.2.3-59-g8ed1b From a0d9528f6daf7fe8de217fa80a94d2989d2a57a7 Mon Sep 17 00:00:00 2001 From: Naveen Mamindlapalli Date: Tue, 2 Jan 2024 15:26:43 +0530 Subject: octeontx2-af: Always configure NIX TX link credits based on max frame size Currently the NIX TX link credits are initialized based on the max frame size that can be transmitted on a link but when the MTU is changed, the NIX TX link credits are reprogrammed by the SW based on the new MTU value. Since SMQ max packet length is programmed to max frame size by default, there is a chance that NIX TX may stall while sending a max frame sized packet on the link with insufficient credits to send the packet all at once. This patch avoids stall issue by not changing the link credits dynamically when the MTU is changed. Fixes: 1c74b89171c3 ("octeontx2-af: Wait for TX link idle for credits change") Signed-off-by: Naveen Mamindlapalli Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Nithin Kumar Dabilpuram Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 110 +-------------------- 1 file changed, 3 insertions(+), 107 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 4227ebb4a758..2b6ab748ce25 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4143,90 +4143,18 @@ static void nix_find_link_frs(struct rvu *rvu, req->minlen = minlen; } -static int -nix_config_link_credits(struct rvu *rvu, int blkaddr, int link, - u16 pcifunc, u64 tx_credits) -{ - struct rvu_hwinfo *hw = rvu->hw; - int pf = rvu_get_pf(pcifunc); - u8 cgx_id = 0, lmac_id = 0; - unsigned long poll_tmo; - bool restore_tx_en = 0; - struct nix_hw *nix_hw; - u64 cfg, sw_xoff = 0; - u32 schq = 0; - u32 credits; - int rc; - - nix_hw = get_nix_hw(rvu->hw, blkaddr); - if (!nix_hw) - return NIX_AF_ERR_INVALID_NIXBLK; - - if (tx_credits == nix_hw->tx_credits[link]) - return 0; - - /* Enable cgx tx if disabled for credits to be back */ - if (is_pf_cgxmapped(rvu, pf)) { - rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); - restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), - lmac_id, true); - } - - mutex_lock(&rvu->rsrc_lock); - /* Disable new traffic to link */ - if (hw->cap.nix_shaping) { - schq = nix_get_tx_link(rvu, pcifunc); - sw_xoff = rvu_read64(rvu, blkaddr, NIX_AF_TL1X_SW_XOFF(schq)); - rvu_write64(rvu, blkaddr, - NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0)); - } - - rc = NIX_AF_ERR_LINK_CREDITS; - poll_tmo = jiffies + usecs_to_jiffies(200000); - /* Wait for credits to return */ - do { - if (time_after(jiffies, poll_tmo)) - goto exit; - usleep_range(100, 200); - - cfg = rvu_read64(rvu, blkaddr, - NIX_AF_TX_LINKX_NORM_CREDIT(link)); - credits = (cfg >> 12) & 0xFFFFFULL; - } while (credits != nix_hw->tx_credits[link]); - - cfg &= ~(0xFFFFFULL << 12); - cfg |= (tx_credits << 12); - rvu_write64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link), cfg); - rc = 0; - - nix_hw->tx_credits[link] = tx_credits; - -exit: - /* Enable traffic back */ - if (hw->cap.nix_shaping && !sw_xoff) - rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SW_XOFF(schq), 0); - - /* Restore state of cgx tx */ - if (restore_tx_en) - rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); - - mutex_unlock(&rvu->rsrc_lock); - return rc; -} - int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req, struct msg_rsp *rsp) { struct rvu_hwinfo *hw = rvu->hw; u16 pcifunc = req->hdr.pcifunc; int pf = rvu_get_pf(pcifunc); - int blkaddr, schq, link = -1; - struct nix_txsch *txsch; - u64 cfg, lmac_fifo_len; + int blkaddr, link = -1; struct nix_hw *nix_hw; struct rvu_pfvf *pfvf; u8 cgx = 0, lmac = 0; u16 max_mtu; + u64 cfg; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); if (blkaddr < 0) @@ -4247,25 +4175,6 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req, if (req->update_minlen && req->minlen < NIC_HW_MIN_FRS) return NIX_AF_ERR_FRS_INVALID; - /* Check if requester wants to update SMQ's */ - if (!req->update_smq) - goto rx_frscfg; - - /* Update min/maxlen in each of the SMQ attached to this PF/VF */ - txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ]; - mutex_lock(&rvu->rsrc_lock); - for (schq = 0; schq < txsch->schq.max; schq++) { - if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc) - continue; - cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq)); - cfg = (cfg & ~(0xFFFFULL << 8)) | ((u64)req->maxlen << 8); - if (req->update_minlen) - cfg = (cfg & ~0x7FULL) | ((u64)req->minlen & 0x7F); - rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq), cfg); - } - mutex_unlock(&rvu->rsrc_lock); - -rx_frscfg: /* Check if config is for SDP link */ if (req->sdp_link) { if (!hw->sdp_links) @@ -4288,7 +4197,6 @@ rx_frscfg: if (link < 0) return NIX_AF_ERR_RX_LINK_INVALID; - linkcfg: nix_find_link_frs(rvu, req, pcifunc); @@ -4298,19 +4206,7 @@ linkcfg: cfg = (cfg & ~0xFFFFULL) | req->minlen; rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link), cfg); - if (req->sdp_link || pf == 0) - return 0; - - /* Update transmit credits for CGX links */ - lmac_fifo_len = rvu_cgx_get_lmac_fifolen(rvu, cgx, lmac); - if (!lmac_fifo_len) { - dev_err(rvu->dev, - "%s: Failed to get CGX/RPM%d:LMAC%d FIFO size\n", - __func__, cgx, lmac); - return 0; - } - return nix_config_link_credits(rvu, blkaddr, link, pcifunc, - (lmac_fifo_len - req->maxlen) / 16); + return 0; } int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req, -- cgit v1.2.3-59-g8ed1b From 818ed8933bd17bc91a9fa8b94a898189c546fc1a Mon Sep 17 00:00:00 2001 From: Naveen Mamindlapalli Date: Tue, 2 Jan 2024 19:44:00 +0530 Subject: octeontx2-af: Re-enable MAC TX in otx2_stop processing During QoS scheduling testing with multiple strict priority flows, the netdev tx watchdog timeout routine is invoked when a low priority QoS queue doesn't get a chance to transmit the packets because other high priority flows are completely subscribing the transmit link. The netdev tx watchdog timeout routine will stop MAC RX and TX functionality in otx2_stop() routine before cleanup of HW TX queues which results in SMQ flush errors because the packets belonging to low priority queues will never gets flushed since MAC TX is disabled. This patch fixes the issue by re-enabling MAC TX to ensure the packets in HW pipeline gets flushed properly. Fixes: a7faa68b4e7f ("octeontx2-af: Start/Stop traffic in CGX along with NPC") Signed-off-by: Naveen Mamindlapalli Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 1 + drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 17 +++++++++++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 8 +++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index cce2806aaa50..8802961b8889 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -905,6 +905,7 @@ u32 rvu_cgx_get_fifolen(struct rvu *rvu); void *rvu_first_cgx_pdata(struct rvu *rvu); int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id); int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable); +int rvu_cgx_tx_enable(struct rvu *rvu, u16 pcifunc, bool enable); int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause, u16 pfc_en); int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 15a319684ed3..38acdc7a73bb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -465,6 +465,23 @@ int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start) return mac_ops->mac_rx_tx_enable(cgxd, lmac_id, start); } +int rvu_cgx_tx_enable(struct rvu *rvu, u16 pcifunc, bool enable) +{ + int pf = rvu_get_pf(pcifunc); + struct mac_ops *mac_ops; + u8 cgx_id, lmac_id; + void *cgxd; + + if (!is_cgx_config_permitted(rvu, pcifunc)) + return LMAC_AF_ERR_PERM_DENIED; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); + + return mac_ops->mac_tx_enable(cgxd, lmac_id, enable); +} + int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable) { struct mac_ops *mac_ops; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 2b6ab748ce25..58744313f0eb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4737,7 +4737,13 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req, pfvf = rvu_get_pfvf(rvu, pcifunc); clear_bit(NIXLF_INITIALIZED, &pfvf->flags); - return rvu_cgx_start_stop_io(rvu, pcifunc, false); + err = rvu_cgx_start_stop_io(rvu, pcifunc, false); + if (err) + return err; + + rvu_cgx_tx_enable(rvu, pcifunc, true); + + return 0; } #define RX_SA_BASE GENMASK_ULL(52, 7) -- cgit v1.2.3-59-g8ed1b From eaac6a2d26b65511e164772bec6918fcbc61938e Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 3 Jan 2024 03:35:34 +0000 Subject: asix: Add check for usbnet_get_endpoints Add check for usbnet_get_endpoints() and return the error if it fails in order to transfer the error. Fixes: 16626b0cc3d5 ("asix: Add a new driver for the AX88172A") Signed-off-by: Chen Ni Signed-off-by: David S. Miller --- drivers/net/usb/ax88172a.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 3777c7e2e6fc..e47bb125048d 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -161,7 +161,9 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) u8 buf[ETH_ALEN]; struct ax88172a_private *priv; - usbnet_get_endpoints(dev, intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + return ret; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) -- cgit v1.2.3-59-g8ed1b From 9039cd4c61635b2d541009a7cd5e2cc052402f28 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 3 Jan 2024 10:13:53 +0200 Subject: net: ravb: Wait for operating mode to be applied CSR.OPS bits specify the current operating mode and (according to documentation) they are updated by HW when the operating mode change request is processed. To comply with this check CSR.OPS before proceeding. Commit introduces ravb_set_opmode() that does all the necessities for setting the operating mode (set CCC.OPC (and CCC.GAC, CCC.CSEL, if any) and wait for CSR.OPS) and call it where needed. This should comply with all the HW manuals requirements as different manual variants specify that different modes need to be checked in CSR.OPS when setting CCC.OPC. If gPTP active in config mode is supported and it needs to be enabled, the CCC.GAC and CCC.CSEL needs to be configured along with CCC.OPC in the same write access. For this, ravb_set_opmode() allows passing GAC and CSEL as part of opmode and the function updates accordingly CCC register. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Claudiu Beznea Reviewed-by: Sergey Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 65 +++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 664eda4b5a11..8649b3e90edb 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -66,16 +66,27 @@ int ravb_wait(struct net_device *ndev, enum ravb_reg reg, u32 mask, u32 value) return -ETIMEDOUT; } -static int ravb_config(struct net_device *ndev) +static int ravb_set_opmode(struct net_device *ndev, u32 opmode) { + u32 csr_ops = 1U << (opmode & CCC_OPC); + u32 ccc_mask = CCC_OPC; int error; - /* Set config mode */ - ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); - /* Check if the operating mode is changed to the config mode */ - error = ravb_wait(ndev, CSR, CSR_OPS, CSR_OPS_CONFIG); - if (error) - netdev_err(ndev, "failed to switch device to config mode\n"); + /* If gPTP active in config mode is supported it needs to be configured + * along with CSEL and operating mode in the same access. This is a + * hardware limitation. + */ + if (opmode & CCC_GAC) + ccc_mask |= CCC_GAC | CCC_CSEL; + + /* Set operating mode */ + ravb_modify(ndev, CCC, ccc_mask, opmode); + /* Check if the operating mode is changed to the requested one */ + error = ravb_wait(ndev, CSR, CSR_OPS, csr_ops); + if (error) { + netdev_err(ndev, "failed to switch device to requested mode (%u)\n", + opmode & CCC_OPC); + } return error; } @@ -673,7 +684,7 @@ static int ravb_dmac_init(struct net_device *ndev) int error; /* Set CONFIG mode */ - error = ravb_config(ndev); + error = ravb_set_opmode(ndev, CCC_OPC_CONFIG); if (error) return error; @@ -682,9 +693,7 @@ static int ravb_dmac_init(struct net_device *ndev) return error; /* Setting the control will start the AVB-DMAC process. */ - ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_OPERATION); - - return 0; + return ravb_set_opmode(ndev, CCC_OPC_OPERATION); } static void ravb_get_tx_tstamp(struct net_device *ndev) @@ -1046,7 +1055,7 @@ static int ravb_stop_dma(struct net_device *ndev) return error; /* Stop AVB-DMAC process */ - return ravb_config(ndev); + return ravb_set_opmode(ndev, CCC_OPC_CONFIG); } /* E-MAC interrupt handler */ @@ -2560,21 +2569,25 @@ static int ravb_set_gti(struct net_device *ndev) return 0; } -static void ravb_set_config_mode(struct net_device *ndev) +static int ravb_set_config_mode(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; + int error; if (info->gptp) { - ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); + error = ravb_set_opmode(ndev, CCC_OPC_CONFIG); + if (error) + return error; /* Set CSEL value */ ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB); } else if (info->ccc_gac) { - ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG | - CCC_GAC | CCC_CSEL_HPB); + error = ravb_set_opmode(ndev, CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB); } else { - ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); + error = ravb_set_opmode(ndev, CCC_OPC_CONFIG); } + + return error; } /* Set tx and rx clock internal delay modes */ @@ -2794,7 +2807,9 @@ static int ravb_probe(struct platform_device *pdev) ndev->ethtool_ops = &ravb_ethtool_ops; /* Set AVB config mode */ - ravb_set_config_mode(ndev); + error = ravb_set_config_mode(ndev); + if (error) + goto out_disable_gptp_clk; if (info->gptp || info->ccc_gac) { /* Set GTI value */ @@ -2917,8 +2932,7 @@ static void ravb_remove(struct platform_device *pdev) dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); - /* Set reset mode */ - ravb_write(ndev, CCC_OPC_RESET, CCC); + ravb_set_opmode(ndev, CCC_OPC_RESET); clk_disable_unprepare(priv->gptp_clk); clk_disable_unprepare(priv->refclk); @@ -3000,8 +3014,11 @@ static int __maybe_unused ravb_resume(struct device *dev) int ret = 0; /* If WoL is enabled set reset mode to rearm the WoL logic */ - if (priv->wol_enabled) - ravb_write(ndev, CCC_OPC_RESET, CCC); + if (priv->wol_enabled) { + ret = ravb_set_opmode(ndev, CCC_OPC_RESET); + if (ret) + return ret; + } /* All register have been reset to default values. * Restore all registers which where setup at probe time and @@ -3009,7 +3026,9 @@ static int __maybe_unused ravb_resume(struct device *dev) */ /* Set AVB config mode */ - ravb_set_config_mode(ndev); + ret = ravb_set_config_mode(ndev); + if (ret) + return ret; if (info->gptp || info->ccc_gac) { /* Set GTI value */ -- cgit v1.2.3-59-g8ed1b From e009b2efb7a8850498796b360043ac25c8d3d28f Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 3 Jan 2024 16:59:24 -0800 Subject: bnxt_en: Remove mis-applied code from bnxt_cfg_ntp_filters() The 2 lines to check for the BNXT_HWRM_PF_UNLOAD_SP_EVENT bit was mis-applied to bnxt_cfg_ntp_filters() and should have been applied to bnxt_sp_task(). Fixes: 19241368443f ("bnxt_en: Send PF driver unload notification to all VFs.") Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 579eebb6fc56..e1f1e646cf48 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12093,6 +12093,8 @@ static void bnxt_sp_task(struct work_struct *work) bnxt_cfg_ntp_filters(bp); if (test_and_clear_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event)) bnxt_hwrm_exec_fwd_req(bp); + if (test_and_clear_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event)) + netdev_info(bp->dev, "Receive PF driver unload event!\n"); if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) { bnxt_hwrm_port_qstats(bp, 0); bnxt_hwrm_port_qstats_ext(bp, 0); @@ -13093,8 +13095,6 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp) } } } - if (test_and_clear_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event)) - netdev_info(bp->dev, "Receive PF driver unload event!\n"); } #else -- cgit v1.2.3-59-g8ed1b From 382a32018b74f407008615e0e831d05ed28e81cd Mon Sep 17 00:00:00 2001 From: Thomas Lange Date: Thu, 4 Jan 2024 09:57:44 +0100 Subject: net: Implement missing SO_TIMESTAMPING_NEW cmsg support Commit 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW") added the new socket option SO_TIMESTAMPING_NEW. However, it was never implemented in __sock_cmsg_send thus breaking SO_TIMESTAMPING cmsg for platforms using SO_TIMESTAMPING_NEW. Fixes: 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW") Link: https://lore.kernel.org/netdev/6a7281bf-bc4a-4f75-bb88-7011908ae471@app.fastmail.com/ Signed-off-by: Thomas Lange Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240104085744.49164-1-thomas@corelatus.se Signed-off-by: Jakub Kicinski --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock.c b/net/core/sock.c index 51d52859e942..d02534c77413 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2813,6 +2813,7 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, sockc->mark = *(u32 *)CMSG_DATA(cmsg); break; case SO_TIMESTAMPING_OLD: + case SO_TIMESTAMPING_NEW: if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 4c8530dc7d7da4abe97d65e8e038ce9852491369 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 4 Jan 2024 13:42:39 +0000 Subject: net/tcp: Only produce AO/MD5 logs if there are any keys User won't care about inproper hash options in the TCP header if they don't use neither TCP-AO nor TCP-MD5. Yet, those logs can add up in syslog, while not being a real concern to the host admin: > kernel: TCP: TCP segment has incorrect auth options set for XX.20.239.12.54681->XX.XX.90.103.80 [S] Keep silent and avoid logging when there aren't any keys in the system. Side-note: I also defined static_branch_tcp_*() helpers to avoid more ifdeffery, going to remove more ifdeffery further with their help. Reported-by: Christian Kujau Closes: https://lore.kernel.org/all/f6b59324-1417-566f-a976-ff2402718a8d@nerdbynature.de/ Signed-off-by: Dmitry Safonov Reviewed-by: Eric Dumazet Fixes: 2717b5adea9e ("net/tcp: Add tcp_hash_fail() ratelimited logs") Link: https://lore.kernel.org/r/20240104-tcp_hash_fail-logs-v1-1-ff3e1f6f9e72@arista.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 -- include/net/tcp_ao.h | 26 +++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 144ba48bb07b..87f0e6c2e1f2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1788,8 +1788,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG -#include -extern struct static_key_false_deferred tcp_md5_needed; struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family, bool any_l3index); diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 647781080613..b04afced4cc9 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -127,12 +127,35 @@ struct tcp_ao_info { struct rcu_head rcu; }; +#ifdef CONFIG_TCP_MD5SIG +#include +extern struct static_key_false_deferred tcp_md5_needed; +#define static_branch_tcp_md5() static_branch_unlikely(&tcp_md5_needed.key) +#else +#define static_branch_tcp_md5() false +#endif +#ifdef CONFIG_TCP_AO +/* TCP-AO structures and functions */ +#include +extern struct static_key_false_deferred tcp_ao_needed; +#define static_branch_tcp_ao() static_branch_unlikely(&tcp_ao_needed.key) +#else +#define static_branch_tcp_ao() false +#endif + +static inline bool tcp_hash_should_produce_warnings(void) +{ + return static_branch_tcp_md5() || static_branch_tcp_ao(); +} + #define tcp_hash_fail(msg, family, skb, fmt, ...) \ do { \ const struct tcphdr *th = tcp_hdr(skb); \ char hdr_flags[6]; \ char *f = hdr_flags; \ \ + if (!tcp_hash_should_produce_warnings()) \ + break; \ if (th->fin) \ *f++ = 'F'; \ if (th->syn) \ @@ -159,9 +182,6 @@ do { \ #ifdef CONFIG_TCP_AO /* TCP-AO structures and functions */ -#include -extern struct static_key_false_deferred tcp_ao_needed; - struct tcp4_ao_context { __be32 saddr; __be32 daddr; -- cgit v1.2.3-59-g8ed1b