From 88f46b3fe2ac41c381770ebad9f2ee49346b57a2 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Mon, 20 Jul 2020 22:33:15 +0800 Subject: ieee802154: fix one possible memleak in ca8210_dev_com_init We should call destroy_workqueue to destroy mlme_workqueue in error branch. Fixes: ded845a781a5 ("ieee802154: Add CA8210 IEEE 802.15.4 device driver") Signed-off-by: Liu Jian Link: https://lore.kernel.org/r/20200720143315.40523-1-liujian56@huawei.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index e04c3b60cae7..4eb64709d44c 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -2925,6 +2925,7 @@ static int ca8210_dev_com_init(struct ca8210_priv *priv) ); if (!priv->irq_workqueue) { dev_crit(&priv->spi->dev, "alloc of irq_workqueue failed!\n"); + destroy_workqueue(priv->mlme_workqueue); return -ENOMEM; } -- cgit v1.2.3-59-g8ed1b From e3914ed6cf44bfe1f169e26241f8314556fd1ac1 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 2 Aug 2020 07:23:39 -0700 Subject: ieee802154/adf7242: check status of adf7242_read_reg Clang static analysis reports this error adf7242.c:887:6: warning: Assigned value is garbage or undefined len = len_u8; ^ ~~~~~~ len_u8 is set in adf7242_read_reg(lp, 0, &len_u8); When this call fails, len_u8 is not set. So check the return code. Fixes: 7302b9d90117 ("ieee802154/adf7242: Driver for ADF7242 MAC IEEE802154") Signed-off-by: Tom Rix Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20200802142339.21091-1-trix@redhat.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index c11f32f644db..7db9cbd0f5de 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -882,7 +882,9 @@ static int adf7242_rx(struct adf7242_local *lp) int ret; u8 lqi, len_u8, *data; - adf7242_read_reg(lp, 0, &len_u8); + ret = adf7242_read_reg(lp, 0, &len_u8); + if (ret) + return ret; len = len_u8; -- cgit v1.2.3-59-g8ed1b From 9a2a0862d973c9eab6a1f3efa0d4d18dcedbaf1a Mon Sep 17 00:00:00 2001 From: Wright Feng Date: Thu, 13 Aug 2020 02:00:16 -0500 Subject: brcmfmac: reserve tx credit only when txctl is ready to send The 4329 throughput drops from 40.2 Mbits/sec to 190 Kbits/sec in 2G 11n mode because the commit b41c232d33666 ("brcmfmac: reserve 2 credits for host tx control path"). To fix the issue, host driver only reserves tx control credit when there is a txctl frame is pending to send. And we also check available credit by using "not equal to 0" instead of "greater than 0" because tx_max and tx_seq are circled positive numbers. Reported-by: Dmitry Osipenko Fixes: b41c232d33666 ("brcmfmac: reserve 2 credits for host tx control path") Signed-off-by: Wright Feng Tested-by: Dmitry Osipenko Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200813070017.89023-1-wright.feng@cypress.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index e8712ad3ac45..3c07d1bbe1c6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -664,9 +664,15 @@ static void pkt_align(struct sk_buff *p, int len, int align) /* To check if there's window offered */ static bool data_ok(struct brcmf_sdio *bus) { - /* Reserve TXCTL_CREDITS credits for txctl */ - return (bus->tx_max - bus->tx_seq) > TXCTL_CREDITS && - ((bus->tx_max - bus->tx_seq) & 0x80) == 0; + u8 tx_rsv = 0; + + /* Reserve TXCTL_CREDITS credits for txctl when it is ready to send */ + if (bus->ctrl_frame_stat) + tx_rsv = TXCTL_CREDITS; + + return (bus->tx_max - bus->tx_seq - tx_rsv) != 0 && + ((bus->tx_max - bus->tx_seq - tx_rsv) & 0x80) == 0; + } /* To check if there's window offered */ -- cgit v1.2.3-59-g8ed1b From d1c9da9e4c938e8bbf8b0ef9e5772b97db5639e9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 12 Aug 2020 12:23:32 +0200 Subject: mt76: mt7615: use v1 MCU API on MT7615 to fix issues with adding/removing stations The implementation of embedding WTBL update inside the STA_REC update is buggy on the MT7615 v2 firmware. This leads to connection issues after a station has connected and disconnected again. Switch to the v1 MCU API ops, since they have received much more testing and should be more stable. On MT7622 and later, the v2 API is more actively used, so we should keep using it as well. Fixes: 6849e29ed92e ("mt76: mt7615: add starec operating flow for firmware v2") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200812102332.11812-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c index d0cbb283982f..bd316dbd9041 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c @@ -2128,7 +2128,8 @@ static int mt7615_load_n9(struct mt7615_dev *dev, const char *name) sizeof(dev->mt76.hw->wiphy->fw_version), "%.10s-%.15s", hdr->fw_ver, hdr->build_date); - if (!strncmp(hdr->fw_ver, "2.0", sizeof(hdr->fw_ver))) { + if (!is_mt7615(&dev->mt76) && + !strncmp(hdr->fw_ver, "2.0", sizeof(hdr->fw_ver))) { dev->fw_ver = MT7615_FIRMWARE_V2; dev->mcu_ops = &sta_update_ops; } else { -- cgit v1.2.3-59-g8ed1b From b4be5a53ebf478ffcfb4c98c0ccc4a8d922b9a02 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 12 Aug 2020 16:49:43 +0200 Subject: mt76: mt7915: use ieee80211_free_txskb to free tx skbs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using dev_kfree_skb for tx skbs breaks AQL. This worked until now only by accident, because a mac80211 issue breaks AQL on drivers with firmware rate control that report the rate via ieee80211_tx_status_ext as struct rate_info. Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200812144943.91974-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 8 ++++++-- drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index e90d0087e377..8d6ceb3b67b4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -699,8 +699,12 @@ void mt7915_unregister_device(struct mt7915_dev *dev) spin_lock_bh(&dev->token_lock); idr_for_each_entry(&dev->token, txwi, id) { mt7915_txp_skb_unmap(&dev->mt76, txwi); - if (txwi->skb) - dev_kfree_skb_any(txwi->skb); + if (txwi->skb) { + struct ieee80211_hw *hw; + + hw = mt76_tx_status_get_hw(&dev->mt76, txwi->skb); + ieee80211_free_txskb(hw, txwi->skb); + } mt76_put_txwi(&dev->mt76, txwi); } spin_unlock_bh(&dev->token_lock); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 6825afca1efb..036207f828f3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -841,7 +841,7 @@ mt7915_tx_complete_status(struct mt76_dev *mdev, struct sk_buff *skb, if (sta || !(info->flags & IEEE80211_TX_CTL_NO_ACK)) mt7915_tx_status(sta, hw, info, NULL); - dev_kfree_skb(skb); + ieee80211_free_txskb(hw, skb); } void mt7915_txp_skb_unmap(struct mt76_dev *dev, -- cgit v1.2.3-59-g8ed1b From 4afc850e2e9e781976fb2c7852ce7bac374af938 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Tue, 25 Aug 2020 17:38:29 +0200 Subject: mwifiex: Increase AES key storage size to 256 bits Following commit e18696786548 ("mwifiex: Prevent memory corruption handling keys") the mwifiex driver fails to authenticate with certain networks, specifically networks with 256 bit keys, and repeatedly asks for the password. The kernel log repeats the following lines (id and bssid redacted): mwifiex_pcie 0000:01:00.0: info: trying to associate to '' bssid mwifiex_pcie 0000:01:00.0: info: associated to bssid successfully mwifiex_pcie 0000:01:00.0: crypto keys added mwifiex_pcie 0000:01:00.0: info: successfully disconnected from : reason code 3 Tracking down this problem lead to the overflow check introduced by the aforementioned commit into mwifiex_ret_802_11_key_material_v2(). This check fails on networks with 256 bit keys due to the current storage size for AES keys in struct mwifiex_aes_param being only 128 bit. To fix this issue, increase the storage size for AES keys to 256 bit. Fixes: e18696786548 ("mwifiex: Prevent memory corruption handling keys") Signed-off-by: Maximilian Luz Reported-by: Kaloyan Nikolov Tested-by: Kaloyan Nikolov Reviewed-by: Dan Carpenter Reviewed-by: Brian Norris Tested-by: Brian Norris Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200825153829.38043-1-luzmaximilian@gmail.com --- drivers/net/wireless/marvell/mwifiex/fw.h | 2 +- drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 8047e307892e..d9f8bdbc817b 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -954,7 +954,7 @@ struct mwifiex_tkip_param { struct mwifiex_aes_param { u8 pn[WPA_PN_SIZE]; __le16 key_len; - u8 key[WLAN_KEY_LEN_CCMP]; + u8 key[WLAN_KEY_LEN_CCMP_256]; } __packed; struct mwifiex_wapi_param { diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index 962d8bfe6f10..119ccacd1fcc 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -619,7 +619,7 @@ static int mwifiex_ret_802_11_key_material_v2(struct mwifiex_private *priv, key_v2 = &resp->params.key_material_v2; len = le16_to_cpu(key_v2->key_param_set.key_params.aes.key_len); - if (len > WLAN_KEY_LEN_CCMP) + if (len > sizeof(key_v2->key_param_set.key_params.aes.key)) return -EINVAL; if (le16_to_cpu(key_v2->action) == HostCmd_ACT_GEN_SET) { @@ -635,7 +635,7 @@ static int mwifiex_ret_802_11_key_material_v2(struct mwifiex_private *priv, return 0; memset(priv->aes_key_v2.key_param_set.key_params.aes.key, 0, - WLAN_KEY_LEN_CCMP); + sizeof(key_v2->key_param_set.key_params.aes.key)); priv->aes_key_v2.key_param_set.key_params.aes.key_len = cpu_to_le16(len); memcpy(priv->aes_key_v2.key_param_set.key_params.aes.key, -- cgit v1.2.3-59-g8ed1b From 097930e85f90f252c44dc0d084598265dd44ca48 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Thu, 27 Aug 2020 17:34:48 +0200 Subject: batman-adv: bla: fix type misuse for backbone_gw hash indexing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that due to a copy & paste error the void pointer in batadv_choose_backbone_gw() is cast to the wrong type. Fixing this by using "struct batadv_bla_backbone_gw" instead of "struct batadv_bla_claim" which better matches the caller's side. For now it seems that we were lucky because the two structs both have their orig/vid and addr/vid in the beginning. However I stumbled over this issue when I was trying to add some debug variables in front of "orig" in batadv_backbone_gw, which caused hash lookups to fail. Fixes: 07568d0369f9 ("batman-adv: don't rely on positions in struct for hashing") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann --- net/batman-adv/bridge_loop_avoidance.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8500f56cbd10..d8c5d3170676 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -83,11 +83,12 @@ static inline u32 batadv_choose_claim(const void *data, u32 size) */ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) { - const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; + const struct batadv_bla_backbone_gw *gw; u32 hash = 0; - hash = jhash(&claim->addr, sizeof(claim->addr), hash); - hash = jhash(&claim->vid, sizeof(claim->vid), hash); + gw = (struct batadv_bla_backbone_gw *)data; + hash = jhash(&gw->orig, sizeof(gw->orig), hash); + hash = jhash(&gw->vid, sizeof(gw->vid), hash); return hash % size; } -- cgit v1.2.3-59-g8ed1b From 3168c158ad3535af1cd7423c9f8cd5ac549f2f9c Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sun, 30 Aug 2020 17:03:04 -0700 Subject: libbpf: Fix build failure from uninitialized variable warning While compiling libbpf, some GCC versions (at least 8.4.0) have difficulty determining control flow and a emit warning for potentially uninitialized usage of 'map', which results in a build error if using "-Werror": In file included from libbpf.c:56: libbpf.c: In function '__bpf_object__open': libbpf_internal.h:59:2: warning: 'map' may be used uninitialized in this function [-Wmaybe-uninitialized] libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ ^~~~~~~~~~~~ libbpf.c:5032:18: note: 'map' was declared here struct bpf_map *map, *targ_map; ^~~ The warning/error is false based on code inspection, so silence it with a NULL initialization. Fixes: 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support") Reference: 063e68813391 ("libbpf: Fix false uninitialized variable warning") Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200831000304.1696435-1-Tony.Ambardar@gmail.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0ad0b0491e1f..7253b833576c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5203,8 +5203,8 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, int i, j, nrels, new_sz; const struct btf_var_secinfo *vi = NULL; const struct btf_type *sec, *var, *def; + struct bpf_map *map = NULL, *targ_map; const struct btf_member *member; - struct bpf_map *map, *targ_map; const char *name, *mname; Elf_Data *symbols; unsigned int moff; -- cgit v1.2.3-59-g8ed1b From 1eb832ac2dee8a350f3fcd5544886f07a3ef8869 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 1 Sep 2020 16:43:43 +0200 Subject: tools/bpf: build: Make sure resolve_btfids cleans up after itself MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new resolve_btfids tool did not clean up the feature detection folder on 'make clean', and also was not called properly from the clean rule in tools/make/ folder on its 'make clean'. This lead to stale objects being left around, which could cause feature detection to fail on subsequent builds. Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20200901144343.179552-1-toke@redhat.com --- tools/bpf/Makefile | 4 ++-- tools/bpf/resolve_btfids/Makefile | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 0a6d09a3e91f..39bb322707b4 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args FEATURE_DISPLAY = libbfd disassembler-four-args check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean +NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) check_feat := 0 @@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c -clean: bpftool_clean runqslower_clean +clean: bpftool_clean runqslower_clean resolve_btfids_clean $(call QUIET_CLEAN, bpf-progs) $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.* diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index a88cd4426398..fe8eb537688b 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -80,6 +80,7 @@ libbpf-clean: clean: libsubcmd-clean libbpf-clean fixdep-clean $(call msg,CLEAN,$(BINARY)) $(Q)$(RM) -f $(BINARY); \ + $(RM) -rf $(if $(OUTPUT),$(OUTPUT),.)/feature; \ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) tags: -- cgit v1.2.3-59-g8ed1b From 21e9ba5373fc2cec608fd68301a1dbfd14df3172 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 2 Sep 2020 14:12:46 +0530 Subject: libbpf: Remove arch-specific include path in Makefile Ubuntu mainline builds for ppc64le are failing with the below error (*): CALL /home/kernel/COD/linux/scripts/atomic/check-atomics.sh DESCEND bpf/resolve_btfids Auto-detecting system features: ... libelf: [ [32mon[m ] ... zlib: [ [32mon[m ] ... bpf: [ [31mOFF[m ] BPF API too old make[6]: *** [Makefile:295: bpfdep] Error 1 make[5]: *** [Makefile:54: /home/kernel/COD/linux/debian/build/build-generic/tools/bpf/resolve_btfids//libbpf.a] Error 2 make[4]: *** [Makefile:71: bpf/resolve_btfids] Error 2 make[3]: *** [/home/kernel/COD/linux/Makefile:1890: tools/bpf/resolve_btfids] Error 2 make[2]: *** [/home/kernel/COD/linux/Makefile:335: __build_one_by_one] Error 2 make[2]: Leaving directory '/home/kernel/COD/linux/debian/build/build-generic' make[1]: *** [Makefile:185: __sub-make] Error 2 make[1]: Leaving directory '/home/kernel/COD/linux' resolve_btfids needs to be build as a host binary and it needs libbpf. However, libbpf Makefile hardcodes an include path utilizing $(ARCH). This results in mixing of cross-architecture headers resulting in a build failure. The specific header include path doesn't seem necessary for a libbpf build. Hence, remove the same. (*) https://kernel.ubuntu.com/~kernel-ppa/mainline/v5.9-rc3/ppc64el/log Reported-by: Vaidyanathan Srinivasan Signed-off-by: Naveen N. Rao Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200902084246.1513055-1-naveen.n.rao@linux.vnet.ibm.com --- tools/lib/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index bf8ed134cb8a..b78484e7a608 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -59,7 +59,7 @@ FEATURE_USER = .libbpf FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray FEATURE_DISPLAY = libelf zlib bpf -INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) check_feat := 1 -- cgit v1.2.3-59-g8ed1b From dc0988bbe1bd41e2fa555e4a6f890b819a34b49b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 2 Sep 2020 16:53:40 -0700 Subject: bpf: Do not use bucket_lock for hashmap iterator Currently, for hashmap, the bpf iterator will grab a bucket lock, a spinlock, before traversing the elements in the bucket. This can ensure all bpf visted elements are valid. But this mechanism may cause deadlock if update/deletion happens to the same bucket of the visited map in the program. For example, if we added bpf_map_update_elem() call to the same visited element in selftests bpf_iter_bpf_hash_map.c, we will have the following deadlock: ============================================ WARNING: possible recursive locking detected 5.9.0-rc1+ #841 Not tainted -------------------------------------------- test_progs/1750 is trying to acquire lock: ffff9a5bb73c5e70 (&htab->buckets[i].raw_lock){....}-{2:2}, at: htab_map_update_elem+0x1cf/0x410 but task is already holding lock: ffff9a5bb73c5e20 (&htab->buckets[i].raw_lock){....}-{2:2}, at: bpf_hash_map_seq_find_next+0x94/0x120 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&htab->buckets[i].raw_lock); lock(&htab->buckets[i].raw_lock); *** DEADLOCK *** ... Call Trace: dump_stack+0x78/0xa0 __lock_acquire.cold.74+0x209/0x2e3 lock_acquire+0xba/0x380 ? htab_map_update_elem+0x1cf/0x410 ? __lock_acquire+0x639/0x20c0 _raw_spin_lock_irqsave+0x3b/0x80 ? htab_map_update_elem+0x1cf/0x410 htab_map_update_elem+0x1cf/0x410 ? lock_acquire+0xba/0x380 bpf_prog_ad6dab10433b135d_dump_bpf_hash_map+0x88/0xa9c ? find_held_lock+0x34/0xa0 bpf_iter_run_prog+0x81/0x16e __bpf_hash_map_seq_show+0x145/0x180 bpf_seq_read+0xff/0x3d0 vfs_read+0xad/0x1c0 ksys_read+0x5f/0xe0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... The bucket_lock first grabbed in seq_ops->next() called by bpf_seq_read(), and then grabbed again in htab_map_update_elem() in the bpf program, causing deadlocks. Actually, we do not need bucket_lock here, we can just use rcu_read_lock() similar to netlink iterator where the rcu_read_{lock,unlock} likes below: seq_ops->start(): rcu_read_lock(); seq_ops->next(): rcu_read_unlock(); /* next element */ rcu_read_lock(); seq_ops->stop(); rcu_read_unlock(); Compared to old bucket_lock mechanism, if concurrent updata/delete happens, we may visit stale elements, miss some elements, or repeat some elements. I think this is a reasonable compromise. For users wanting to avoid stale, missing/repeated accesses, bpf_map batch access syscall interface can be used. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200902235340.2001375-1-yhs@fb.com --- kernel/bpf/hashtab.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 78dfff6a501b..7df28a45c66b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1622,7 +1622,6 @@ struct bpf_iter_seq_hash_map_info { struct bpf_map *map; struct bpf_htab *htab; void *percpu_value_buf; // non-zero means percpu hash - unsigned long flags; u32 bucket_id; u32 skip_elems; }; @@ -1632,7 +1631,6 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, struct htab_elem *prev_elem) { const struct bpf_htab *htab = info->htab; - unsigned long flags = info->flags; u32 skip_elems = info->skip_elems; u32 bucket_id = info->bucket_id; struct hlist_nulls_head *head; @@ -1656,19 +1654,18 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, /* not found, unlock and go to the next bucket */ b = &htab->buckets[bucket_id++]; - htab_unlock_bucket(htab, b, flags); + rcu_read_unlock(); skip_elems = 0; } for (i = bucket_id; i < htab->n_buckets; i++) { b = &htab->buckets[i]; - flags = htab_lock_bucket(htab, b); + rcu_read_lock(); count = 0; head = &b->head; hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { if (count >= skip_elems) { - info->flags = flags; info->bucket_id = i; info->skip_elems = count; return elem; @@ -1676,7 +1673,7 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, count++; } - htab_unlock_bucket(htab, b, flags); + rcu_read_unlock(); skip_elems = 0; } @@ -1754,14 +1751,10 @@ static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) { - struct bpf_iter_seq_hash_map_info *info = seq->private; - if (!v) (void)__bpf_hash_map_seq_show(seq, NULL); else - htab_unlock_bucket(info->htab, - &info->htab->buckets[info->bucket_id], - info->flags); + rcu_read_unlock(); } static int bpf_iter_init_hash_map(void *priv_data, -- cgit v1.2.3-59-g8ed1b From 4daab7132731ac5ec9384c8a070cdb9607dc38c8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 2 Sep 2020 16:53:41 -0700 Subject: selftests/bpf: Add bpf_{update, delete}_map_elem in hashmap iter program Added bpf_{updata,delete}_map_elem to the very map element the iter program is visiting. Due to rcu protection, the visited map elements, although stale, should still contain correct values. $ ./test_progs -n 4/18 #4/18 bpf_hash_map:OK #4 bpf_iter:OK Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200902235341.2001534-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c index 07ddbfdbcab7..6dfce3fd68bc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -47,7 +47,10 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) __u32 seq_num = ctx->meta->seq_num; struct bpf_map *map = ctx->map; struct key_t *key = ctx->key; + struct key_t tmp_key; __u64 *val = ctx->value; + __u64 tmp_val = 0; + int ret; if (in_test_mode) { /* test mode is used by selftests to @@ -61,6 +64,18 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) if (key == (void *)0 || val == (void *)0) return 0; + /* update the value and then delete the pair. + * it should not impact the existing 'val' which is still + * accessible under rcu. + */ + __builtin_memcpy(&tmp_key, key, sizeof(struct key_t)); + ret = bpf_map_update_elem(&hashmap1, &tmp_key, &tmp_val, 0); + if (ret) + return 0; + ret = bpf_map_delete_elem(&hashmap1, &tmp_key); + if (ret) + return 0; + key_sum_a += key->a; key_sum_b += key->b; key_sum_c += key->c; -- cgit v1.2.3-59-g8ed1b From 44a049c42681de71c783d75cd6e56b4e339488b0 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 2 Sep 2020 17:06:58 -0700 Subject: drivers/net/wan/hdlc_fr: Add needed_headroom for PVC devices PVC devices are virtual devices in this driver stacked on top of the actual HDLC device. They are the devices normal users would use. PVC devices have two types: normal PVC devices and Ethernet-emulating PVC devices. When transmitting data with PVC devices, the ndo_start_xmit function will prepend a header of 4 or 10 bytes. Currently this driver requests this headroom to be reserved for normal PVC devices by setting their hard_header_len to 10. However, this does not work when these devices are used with AF_PACKET/RAW sockets. Also, this driver does not request this headroom for Ethernet-emulating PVC devices (but deals with this problem by reallocating the skb when needed, which is not optimal). This patch replaces hard_header_len with needed_headroom, and set needed_headroom for Ethernet-emulating PVC devices, too. This makes the driver to request headroom for all PVC devices in all cases. Cc: Krzysztof Halasa Cc: Martin Schiller Signed-off-by: Xie He Signed-off-by: Jakub Kicinski --- drivers/net/wan/hdlc_fr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 9acad651ea1f..12b35404cd8e 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1041,7 +1041,7 @@ static void pvc_setup(struct net_device *dev) { dev->type = ARPHRD_DLCI; dev->flags = IFF_POINTOPOINT; - dev->hard_header_len = 10; + dev->hard_header_len = 0; dev->addr_len = 2; netif_keep_dst(dev); } @@ -1093,6 +1093,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) dev->mtu = HDLC_MAX_MTU; dev->min_mtu = 68; dev->max_mtu = HDLC_MAX_MTU; + dev->needed_headroom = 10; dev->priv_flags |= IFF_NO_QUEUE; dev->ml_priv = pvc; -- cgit v1.2.3-59-g8ed1b From c2b947879ca320ac5505c6c29a731ff17da5e805 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Fri, 4 Sep 2020 10:51:03 +0800 Subject: atm: eni: fix the missed pci_disable_device() for eni_init_one() eni_init_one() misses to call pci_disable_device() in an error path. Jump to err_disable to fix it. Fixes: ede58ef28e10 ("atm: remove deprecated use of pci api") Signed-off-by: Jing Xiangfeng Signed-off-by: Jakub Kicinski --- drivers/atm/eni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 39be444534d0..316a9947541f 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2224,7 +2224,7 @@ static int eni_init_one(struct pci_dev *pci_dev, rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); if (rc < 0) - goto out; + goto err_disable; rc = -ENOMEM; eni_dev = kmalloc(sizeof(struct eni_dev), GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From cc8e58f8325cdf14b9516b61c384cdfd02a4f408 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Sep 2020 19:10:11 -0700 Subject: act_ife: load meta modules before tcf_idr_check_alloc() The following deadlock scenario is triggered by syzbot: Thread A: Thread B: tcf_idr_check_alloc() ... populate_metalist() rtnl_unlock() rtnl_lock() ... request_module() tcf_idr_check_alloc() rtnl_lock() At this point, thread A is waiting for thread B to release RTNL lock, while thread B is waiting for thread A to commit the IDR change with tcf_idr_insert() later. Break this deadlock situation by preloading ife modules earlier, before tcf_idr_check_alloc(), this is fine because we only need to load modules we need potentially. Reported-and-tested-by: syzbot+80e32b5d1f9923f8ace6@syzkaller.appspotmail.com Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Cc: Jamal Hadi Salim Cc: Vlad Buslov Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: Jakub Kicinski --- net/sched/act_ife.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c1fcd85719d6..5c568757643b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -436,6 +436,25 @@ static void tcf_ife_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } +static int load_metalist(struct nlattr **tb, bool rtnl_held) +{ + int i; + + for (i = 1; i < max_metacnt; i++) { + if (tb[i]) { + void *val = nla_data(tb[i]); + int len = nla_len(tb[i]); + int rc; + + rc = load_metaops_and_vet(i, val, len, rtnl_held); + if (rc != 0) + return rc; + } + } + + return 0; +} + static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, bool exists, bool rtnl_held) { @@ -449,10 +468,6 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(i, val, len, rtnl_held); - if (rc != 0) - return rc; - rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; @@ -509,6 +524,21 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (!p) return -ENOMEM; + if (tb[TCA_IFE_METALST]) { + err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, + tb[TCA_IFE_METALST], NULL, + NULL); + if (err) { + kfree(p); + return err; + } + err = load_metalist(tb2, rtnl_held); + if (err) { + kfree(p); + return err; + } + } + index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) { @@ -570,15 +600,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (tb[TCA_IFE_METALST]) { - err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, - tb[TCA_IFE_METALST], NULL, - NULL); - if (err) - goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); if (err) goto metadata_parse_err; - } else { /* if no passed metadata allow list or passed allow-all * then here we process by adding as many supported metadatum -- cgit v1.2.3-59-g8ed1b From 7dda5b3384121181c4e79f6eaeac2b94c0622c8d Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Fri, 4 Sep 2020 20:28:00 +0200 Subject: batman-adv: mcast/TT: fix wrongly dropped or rerouted packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unicast packet rerouting code makes several assumptions. For instance it assumes that there is always exactly one destination in the TT. This breaks for multicast frames in a unicast packets in several ways: For one thing if there is actually no TT entry and the destination node was selected due to the multicast tvlv flags it announced. Then an intermediate node will wrongly drop the packet. For another thing if there is a TT entry but the TTVN of this entry is newer than the originally addressed destination node: Then the intermediate node will wrongly redirect the packet, leading to duplicated multicast packets at a multicast listener and missing packets at other multicast listeners or multicast routers. Fixing this by not applying the unicast packet rerouting to batman-adv unicast packets with a multicast payload. We are not able to detect a roaming multicast listener at the moment and will just continue to send the multicast frame to both the new and old destination for a while in case of such a roaming multicast listener. Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/routing.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 27cdf5e4349a..9e5c71e406ff 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -826,6 +826,10 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, vid = batadv_get_vid(skb, hdr_len); ethhdr = (struct ethhdr *)(skb->data + hdr_len); + /* do not reroute multicast frames in a unicast header */ + if (is_multicast_ether_addr(ethhdr->h_dest)) + return true; + /* check if the destination client was served by this node and it is now * roaming. In this case, it means that the node has got a ROAM_ADV * message and that it knows the new destination in the mesh to re-route -- cgit v1.2.3-59-g8ed1b From 4e4269ebe7e18038fffacf113e2dd5ded6d49942 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 4 Sep 2020 16:37:28 +0800 Subject: hinic: bump up the timeout of SET_FUNC_STATE cmd We free memory regardless of the return value of SET_FUNC_STATE cmd in hinic_close function to avoid memory leak and this cmd may timeout when fw is busy with handling other cmds, so we bump up the timeout of this cmd to ensure it won't return failure. Fixes: 00e57a6d4ad3 ("net-next/hinic: Add Tx operation") Signed-off-by: Luo bin Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index c6ce5966284c..0d56c6ceccd9 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -47,6 +47,8 @@ #define MGMT_MSG_TIMEOUT 5000 +#define SET_FUNC_PORT_MBOX_TIMEOUT 30000 + #define SET_FUNC_PORT_MGMT_TIMEOUT 25000 #define mgmt_to_pfhwdev(pf_mgmt) \ @@ -361,16 +363,20 @@ int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt, return -EINVAL; } - if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE) - timeout = SET_FUNC_PORT_MGMT_TIMEOUT; + if (HINIC_IS_VF(hwif)) { + if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE) + timeout = SET_FUNC_PORT_MBOX_TIMEOUT; - if (HINIC_IS_VF(hwif)) return hinic_mbox_to_pf(pf_to_mgmt->hwdev, mod, cmd, buf_in, - in_size, buf_out, out_size, 0); - else + in_size, buf_out, out_size, timeout); + } else { + if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE) + timeout = SET_FUNC_PORT_MGMT_TIMEOUT; + return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size, buf_out, out_size, MGMT_DIRECT_SEND, MSG_NOT_RESP, timeout); + } } static void recv_mgmt_msg_work_handler(struct work_struct *work) -- cgit v1.2.3-59-g8ed1b From 0c97ee5fbdca1be3cc7af730832170f4f7c38e68 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 4 Sep 2020 16:37:29 +0800 Subject: hinic: bump up the timeout of UPDATE_FW cmd Firmware erases the entire flash region which may take several seconds before flashing, so we bump up the timeout to ensure this cmd won't return failure. Fixes: 5e126e7c4e52 ("hinic: add firmware update support") Signed-off-by: Luo bin Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index 0d56c6ceccd9..2ebae6cb5db5 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -51,6 +51,8 @@ #define SET_FUNC_PORT_MGMT_TIMEOUT 25000 +#define UPDATE_FW_MGMT_TIMEOUT 20000 + #define mgmt_to_pfhwdev(pf_mgmt) \ container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt) @@ -372,6 +374,8 @@ int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt, } else { if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE) timeout = SET_FUNC_PORT_MGMT_TIMEOUT; + else if (cmd == HINIC_PORT_CMD_UPDATE_FW) + timeout = UPDATE_FW_MGMT_TIMEOUT; return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size, buf_out, out_size, MGMT_DIRECT_SEND, -- cgit v1.2.3-59-g8ed1b From 94cc242a067a869c29800aa789d38b7676136e50 Mon Sep 17 00:00:00 2001 From: Ganji Aravind Date: Fri, 4 Sep 2020 15:58:18 +0530 Subject: cxgb4: Fix offset when clearing filter byte counters Pass the correct offset to clear the stale filter hit bytes counter. Otherwise, the counter starts incrementing from the stale information, instead of 0. Fixes: 12b276fbf6e0 ("cxgb4: add support to create hash filters") Signed-off-by: Ganji Aravind Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 650db92cb11c..481498585ead 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1911,13 +1911,16 @@ out: static int configure_filter_tcb(struct adapter *adap, unsigned int tid, struct filter_entry *f) { - if (f->fs.hitcnts) + if (f->fs.hitcnts) { set_tcb_field(adap, f, tid, TCB_TIMESTAMP_W, - TCB_TIMESTAMP_V(TCB_TIMESTAMP_M) | + TCB_TIMESTAMP_V(TCB_TIMESTAMP_M), + TCB_TIMESTAMP_V(0ULL), + 1); + set_tcb_field(adap, f, tid, TCB_RTT_TS_RECENT_AGE_W, TCB_RTT_TS_RECENT_AGE_V(TCB_RTT_TS_RECENT_AGE_M), - TCB_TIMESTAMP_V(0ULL) | TCB_RTT_TS_RECENT_AGE_V(0ULL), 1); + } if (f->fs.newdmac) set_tcb_tflag(adap, f, tid, TF_CCTRL_ECE_S, 1, -- cgit v1.2.3-59-g8ed1b From d7739b0b6d15ef9ad5c79424736b8ded5ed3e913 Mon Sep 17 00:00:00 2001 From: Parshuram Thombare Date: Sat, 5 Sep 2020 10:21:33 +0200 Subject: net: macb: fix for pause frame receive enable bit PAE bit of NCFGR register, when set, pauses transmission if a non-zero 802.3 classic pause frame is received. Fixes: 7897b071ac3b ("net: macb: convert to phylink") Signed-off-by: Parshuram Thombare Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6761f404b8aa..9179f7b0b900 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -647,8 +647,7 @@ static void macb_mac_link_up(struct phylink_config *config, ctrl |= GEM_BIT(GBE); } - /* We do not support MLO_PAUSE_RX yet */ - if (tx_pause) + if (rx_pause) ctrl |= MACB_BIT(PAE); macb_set_tx_clk(bp->tx_clk, speed, ndev); -- cgit v1.2.3-59-g8ed1b From 4ddcaf1ebb5e4e99240f29d531ee69d4244fe416 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 5 Sep 2020 12:32:33 +0200 Subject: net: dsa: rtl8366: Properly clear member config When removing a port from a VLAN we are just erasing the member config for the VLAN, which is wrong: other ports can be using it. Just mask off the port and only zero out the rest of the member config once ports using of the VLAN are removed from it. Reported-by: Florian Fainelli Fixes: d8652956cf37 ("net: dsa: realtek-smi: Add Realtek SMI driver") Signed-off-by: Linus Walleij Signed-off-by: Jakub Kicinski --- drivers/net/dsa/rtl8366.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index 8f40fbf70a82..a8c5a934c3d3 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -452,13 +452,19 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port, return ret; if (vid == vlanmc.vid) { - /* clear VLAN member configurations */ - vlanmc.vid = 0; - vlanmc.priority = 0; - vlanmc.member = 0; - vlanmc.untag = 0; - vlanmc.fid = 0; - + /* Remove this port from the VLAN */ + vlanmc.member &= ~BIT(port); + vlanmc.untag &= ~BIT(port); + /* + * If no ports are members of this VLAN + * anymore then clear the whole member + * config so it can be reused. + */ + if (!vlanmc.member && vlanmc.untag) { + vlanmc.vid = 0; + vlanmc.priority = 0; + vlanmc.fid = 0; + } ret = smi->ops->set_vlan_mc(smi, i, &vlanmc); if (ret) { dev_err(smi->dev, -- cgit v1.2.3-59-g8ed1b From 1264c1e0cfe55e2d6c35e869244093195529af37 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 26 Aug 2020 07:49:34 +0200 Subject: Revert "wlcore: Adding suppoprt for IGTK key in wlcore driver" This patch causes a regression betwen Kernel 5.7 and 5.8 at wlcore: with it applied, WiFi stops working, and the Kernel starts printing this message every second: wlcore: PHY firmware version: Rev 8.2.0.0.242 wlcore: firmware booted (Rev 8.9.0.0.79) wlcore: ERROR command execute failure 14 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 133 at drivers/net/wireless/ti/wlcore/main.c:795 wl12xx_queue_recovery_work.part.0+0x6c/0x74 [wlcore] Modules linked in: wl18xx wlcore mac80211 libarc4 cfg80211 rfkill snd_soc_hdmi_codec crct10dif_ce wlcore_sdio adv7511 cec kirin9xx_drm(C) kirin9xx_dw_drm_dsi(C) drm_kms_helper drm ip_tables x_tables ipv6 nf_defrag_ipv6 CPU: 0 PID: 133 Comm: kworker/0:1 Tainted: G WC 5.8.0+ #186 Hardware name: HiKey970 (DT) Workqueue: events_freezable ieee80211_restart_work [mac80211] pstate: 60000005 (nZCv daif -PAN -UAO BTYPE=--) pc : wl12xx_queue_recovery_work.part.0+0x6c/0x74 [wlcore] lr : wl12xx_queue_recovery_work+0x24/0x30 [wlcore] sp : ffff8000126c3a60 x29: ffff8000126c3a60 x28: 00000000000025de x27: 0000000000000010 x26: 0000000000000005 x25: ffff0001a5d49e80 x24: ffff8000092cf580 x23: ffff0001b7c12623 x22: ffff0001b6fcf2e8 x21: ffff0001b7e46200 x20: 00000000fffffffb x19: ffff0001a78e6400 x18: 0000000000000030 x17: 0000000000000001 x16: 0000000000000001 x15: ffff0001b7e46670 x14: ffffffffffffffff x13: ffff8000926c37d7 x12: ffff8000126c37e0 x11: ffff800011e01000 x10: ffff8000120526d0 x9 : 0000000000000000 x8 : 3431206572756c69 x7 : 6166206574756365 x6 : 0000000000000c2c x5 : 0000000000000000 x4 : ffff0001bf1361e8 x3 : ffff0001bf1790b0 x2 : 0000000000000000 x1 : ffff0001a5d49e80 x0 : 0000000000000001 Call trace: wl12xx_queue_recovery_work.part.0+0x6c/0x74 [wlcore] wl12xx_queue_recovery_work+0x24/0x30 [wlcore] wl1271_cmd_set_sta_key+0x258/0x25c [wlcore] wl1271_set_key+0x7c/0x2dc [wlcore] wlcore_set_key+0xe4/0x360 [wlcore] wl18xx_set_key+0x48/0x1d0 [wl18xx] wlcore_op_set_key+0xa4/0x180 [wlcore] ieee80211_key_enable_hw_accel+0xb0/0x2d0 [mac80211] ieee80211_reenable_keys+0x70/0x110 [mac80211] ieee80211_reconfig+0xa00/0xca0 [mac80211] ieee80211_restart_work+0xc4/0xfc [mac80211] process_one_work+0x1cc/0x350 worker_thread+0x13c/0x470 kthread+0x154/0x160 ret_from_fork+0x10/0x30 ---[ end trace b1f722abf9af5919 ]--- wlcore: WARNING could not set keys wlcore: ERROR Could not add or replace key wlan0: failed to set key (4, ff:ff:ff:ff:ff:ff) to hardware (-5) wlcore: Hardware recovery in progress. FW ver: Rev 8.9.0.0.79 wlcore: pc: 0x0, hint_sts: 0x00000040 count: 39 wlcore: down wlcore: down ieee80211 phy0: Hardware restart was requested mmc_host mmc0: Bus speed (slot 0) = 400000Hz (slot req 400000Hz, actual 400000HZ div = 0) mmc_host mmc0: Bus speed (slot 0) = 25000000Hz (slot req 25000000Hz, actual 25000000HZ div = 0) wlcore: PHY firmware version: Rev 8.2.0.0.242 wlcore: firmware booted (Rev 8.9.0.0.79) wlcore: ERROR command execute failure 14 ------------[ cut here ]------------ Tested on Hikey 970. This reverts commit 2b7aadd3b9e17e8b81eeb8d9cc46756ae4658265. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/f0a2cb7ea606f1a284d4c23cbf983da2954ce9b6.1598420968.git.mchehab+huawei@kernel.org --- drivers/net/wireless/ti/wlcore/cmd.h | 1 - drivers/net/wireless/ti/wlcore/main.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/cmd.h b/drivers/net/wireless/ti/wlcore/cmd.h index 9acd8a41ea61..f2609d5b6bf7 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.h +++ b/drivers/net/wireless/ti/wlcore/cmd.h @@ -458,7 +458,6 @@ enum wl1271_cmd_key_type { KEY_TKIP = 2, KEY_AES = 3, KEY_GEM = 4, - KEY_IGTK = 5, }; struct wl1271_cmd_set_keys { diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 821ad1acd505..d2bbd5108f7e 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -3559,9 +3559,6 @@ int wlcore_set_key(struct wl1271 *wl, enum set_key_cmd cmd, case WL1271_CIPHER_SUITE_GEM: key_type = KEY_GEM; break; - case WLAN_CIPHER_SUITE_AES_CMAC: - key_type = KEY_IGTK; - break; default: wl1271_error("Unknown key algo 0x%x", key_conf->cipher); @@ -6231,7 +6228,6 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) WLAN_CIPHER_SUITE_TKIP, WLAN_CIPHER_SUITE_CCMP, WL1271_CIPHER_SUITE_GEM, - WLAN_CIPHER_SUITE_AES_CMAC, }; /* The tx descriptor buffer */ -- cgit v1.2.3-59-g8ed1b From b340dc680ed48dcc05b56e1ebe1b9535813c3ee0 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 5 Sep 2020 22:55:36 -0400 Subject: bnxt_en: Avoid sending firmware messages when AER error is detected. When the driver goes through PCIe AER reset in error state, all firmware messages will timeout because the PCIe bus is no longer accessible. This can lead to AER reset taking many minutes to complete as each firmware command takes time to timeout. Define a new macro BNXT_NO_FW_ACCESS() to skip these firmware messages when either firmware is in fatal error state or when pci_channel_offline() is true. It now takes a more reasonable 20 to 30 seconds to complete AER recovery. Fixes: b4fff2079d10 ("bnxt_en: Do not send firmware messages if firmware is in error state.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++--- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b167066af450..619eb556683d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4305,7 +4305,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM; u16 dst = BNXT_HWRM_CHNL_CHIMP; - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + if (BNXT_NO_FW_ACCESS(bp)) return -EBUSY; if (msg_len > BNXT_HWRM_MAX_REQ_LEN) { @@ -5723,7 +5723,7 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp, struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr; u16 error_code; - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + if (BNXT_NO_FW_ACCESS(bp)) return 0; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1); @@ -7817,7 +7817,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa) if (set_tpa) tpa_flags = bp->flags & BNXT_FLAG_TPA; - else if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + else if (BNXT_NO_FW_ACCESS(bp)) return 0; for (i = 0; i < bp->nr_vnics; i++) { rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 5a13eb66beda..0ef89dabfd61 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1737,6 +1737,10 @@ struct bnxt { #define BNXT_STATE_FW_FATAL_COND 6 #define BNXT_STATE_DRV_REGISTERED 7 +#define BNXT_NO_FW_ACCESS(bp) \ + (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ + pci_channel_offline((bp)->pdev)) + struct bnxt_irq *irq_tbl; int total_irqs; u8 mac_addr[ETH_ALEN]; -- cgit v1.2.3-59-g8ed1b From b16939b59cc00231a75d224fd058d22c9d064976 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 5 Sep 2020 22:55:37 -0400 Subject: bnxt_en: Fix NULL ptr dereference crash in bnxt_fw_reset_task() bnxt_fw_reset_task() which runs from a workqueue can race with bnxt_remove_one(). For example, if firmware reset and VF FLR are happening at about the same time. bnxt_remove_one() already cancels the workqueue and waits for it to finish, but we need to do this earlier before the devlink reporters are destroyed. This will guarantee that the devlink reporters will always be valid when bnxt_fw_reset_task() is still running. Fixes: b148bb238c02 ("bnxt_en: Fix possible crash in bnxt_fw_reset_task().") Reviewed-by: Edwin Peer Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 619eb556683d..8eb73fe39e84 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11779,6 +11779,10 @@ static void bnxt_remove_one(struct pci_dev *pdev) if (BNXT_PF(bp)) bnxt_sriov_disable(bp); + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + bnxt_cancel_sp_work(bp); + bp->sp_event = 0; + bnxt_dl_fw_reporters_destroy(bp, true); if (BNXT_PF(bp)) devlink_port_type_clear(&bp->dl_port); @@ -11786,9 +11790,6 @@ static void bnxt_remove_one(struct pci_dev *pdev) unregister_netdev(dev); bnxt_dl_unregister(bp); bnxt_shutdown_tc(bp); - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - bnxt_cancel_sp_work(bp); - bp->sp_event = 0; bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); -- cgit v1.2.3-59-g8ed1b From eb02d39ad3099c3dcd96c5b3fdc0303541766ed1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:31:16 -0700 Subject: netdevice.h: fix proto_down_reason kernel-doc warning Fix kernel-doc warning in : ../include/linux/netdevice.h:2158: warning: Function parameter or member 'proto_down_reason' not described in 'net_device' Fixes: 829eb208e80d ("rtnetlink: add support for protodown reason") Signed-off-by: Randy Dunlap Acked-by: Roopa Prabhu Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b0e303f6603f..bf0e313486be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1784,6 +1784,7 @@ enum netdev_priv_flags { * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers * + * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one -- cgit v1.2.3-59-g8ed1b From ffa59b0b396cb2c0ea6712ff30ee05c35d4c9c8d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:32:30 -0700 Subject: netdevice.h: fix xdp_state kernel-doc warning Fix kernel-doc warning in : ../include/linux/netdevice.h:2158: warning: Function parameter or member 'xdp_state' not described in 'net_device' Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Signed-off-by: Randy Dunlap Cc: Andrii Nakryiko Cc: Alexei Starovoitov Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf0e313486be..7bd4fcdd0738 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1849,6 +1849,7 @@ enum netdev_priv_flags { * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state + * @xdp_state: stores info on attached XDP BPF programs * * FIXME: cleanup struct net_device such that network protocol info * moves out. -- cgit v1.2.3-59-g8ed1b From 8ae4dff882eb879c17bf46574201bd37fc6bc8b5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 4 Sep 2020 21:07:49 -0700 Subject: ibmvnic: add missing parenthesis in do_reset() Indentation and logic clearly show that this code is missing parenthesis. Fixes: 9f1345737790 ("ibmvnic fix NULL tx_pools and rx_tools issue at do_reset") Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index d3a774331afc..1b702a43a5d0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2032,16 +2032,18 @@ static int do_reset(struct ibmvnic_adapter *adapter, } else { rc = reset_tx_pools(adapter); - if (rc) + if (rc) { netdev_dbg(adapter->netdev, "reset tx pools failed (%d)\n", rc); goto out; + } rc = reset_rx_pools(adapter); - if (rc) + if (rc) { netdev_dbg(adapter->netdev, "reset rx pools failed (%d)\n", rc); goto out; + } } ibmvnic_disable_irqs(adapter); } -- cgit v1.2.3-59-g8ed1b From e1f469cd5866499ac40bfdca87411e1c525a10c7 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 7 Sep 2020 15:54:41 +0000 Subject: Revert "netns: don't disable BHs when locking "nsid_lock"" This reverts commit 8d7e5dee972f1cde2ba96c621f1541fa36e7d4f4. To protect netns id, the nsid_lock is used when netns id is being allocated and removed by peernet2id_alloc() and unhash_nsid(). The nsid_lock can be used in BH context but only spin_lock() is used in this code. Using spin_lock() instead of spin_lock_bh() can result in a deadlock in the following scenario reported by the lockdep. In order to avoid a deadlock, the spin_lock_bh() should be used instead of spin_lock() to acquire nsid_lock. Test commands: ip netns del nst ip netns add nst ip link add veth1 type veth peer name veth2 ip link set veth1 netns nst ip netns exec nst ip link add name br1 type bridge vlan_filtering 1 ip netns exec nst ip link set dev br1 up ip netns exec nst ip link set dev veth1 master br1 ip netns exec nst ip link set dev veth1 up ip netns exec nst ip link add macvlan0 link br1 up type macvlan Splat looks like: [ 33.615860][ T607] WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected [ 33.617194][ T607] 5.9.0-rc1+ #665 Not tainted [ ... ] [ 33.670615][ T607] Chain exists of: [ 33.670615][ T607] &mc->mca_lock --> &bridge_netdev_addr_lock_key --> &net->nsid_lock [ 33.670615][ T607] [ 33.673118][ T607] Possible interrupt unsafe locking scenario: [ 33.673118][ T607] [ 33.674599][ T607] CPU0 CPU1 [ 33.675557][ T607] ---- ---- [ 33.676516][ T607] lock(&net->nsid_lock); [ 33.677306][ T607] local_irq_disable(); [ 33.678517][ T607] lock(&mc->mca_lock); [ 33.679725][ T607] lock(&bridge_netdev_addr_lock_key); [ 33.681166][ T607] [ 33.681791][ T607] lock(&mc->mca_lock); [ 33.682579][ T607] [ 33.682579][ T607] *** DEADLOCK *** [ ... ] [ 33.922046][ T607] stack backtrace: [ 33.922999][ T607] CPU: 3 PID: 607 Comm: ip Not tainted 5.9.0-rc1+ #665 [ 33.924099][ T607] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 33.925714][ T607] Call Trace: [ 33.926238][ T607] dump_stack+0x78/0xab [ 33.926905][ T607] check_irq_usage+0x70b/0x720 [ 33.927708][ T607] ? iterate_chain_key+0x60/0x60 [ 33.928507][ T607] ? check_path+0x22/0x40 [ 33.929201][ T607] ? check_noncircular+0xcf/0x180 [ 33.930024][ T607] ? __lock_acquire+0x1952/0x1f20 [ 33.930860][ T607] __lock_acquire+0x1952/0x1f20 [ 33.931667][ T607] lock_acquire+0xaf/0x3a0 [ 33.932366][ T607] ? peernet2id_alloc+0x3a/0x170 [ 33.933147][ T607] ? br_port_fill_attrs+0x54c/0x6b0 [bridge] [ 33.934140][ T607] ? br_port_fill_attrs+0x5de/0x6b0 [bridge] [ 33.935113][ T607] ? kvm_sched_clock_read+0x14/0x30 [ 33.935974][ T607] _raw_spin_lock+0x30/0x70 [ 33.936728][ T607] ? peernet2id_alloc+0x3a/0x170 [ 33.937523][ T607] peernet2id_alloc+0x3a/0x170 [ 33.938313][ T607] rtnl_fill_ifinfo+0xb5e/0x1400 [ 33.939091][ T607] rtmsg_ifinfo_build_skb+0x8a/0xf0 [ 33.939953][ T607] rtmsg_ifinfo_event.part.39+0x17/0x50 [ 33.940863][ T607] rtmsg_ifinfo+0x1f/0x30 [ 33.941571][ T607] __dev_notify_flags+0xa5/0xf0 [ 33.942376][ T607] ? __irq_work_queue_local+0x49/0x50 [ 33.943249][ T607] ? irq_work_queue+0x1d/0x30 [ 33.943993][ T607] ? __dev_set_promiscuity+0x7b/0x1a0 [ 33.944878][ T607] __dev_set_promiscuity+0x7b/0x1a0 [ 33.945758][ T607] dev_set_promiscuity+0x1e/0x50 [ 33.946582][ T607] br_port_set_promisc+0x1f/0x40 [bridge] [ 33.947487][ T607] br_manage_promisc+0x8b/0xe0 [bridge] [ 33.948388][ T607] __dev_set_promiscuity+0x123/0x1a0 [ 33.949244][ T607] __dev_set_rx_mode+0x68/0x90 [ 33.950021][ T607] dev_uc_add+0x50/0x60 [ 33.950720][ T607] macvlan_open+0x18e/0x1f0 [macvlan] [ 33.951601][ T607] __dev_open+0xd6/0x170 [ 33.952269][ T607] __dev_change_flags+0x181/0x1d0 [ 33.953056][ T607] rtnl_configure_link+0x2f/0xa0 [ 33.953884][ T607] __rtnl_newlink+0x6b9/0x8e0 [ 33.954665][ T607] ? __lock_acquire+0x95d/0x1f20 [ 33.955450][ T607] ? lock_acquire+0xaf/0x3a0 [ 33.956193][ T607] ? is_bpf_text_address+0x5/0xe0 [ 33.956999][ T607] rtnl_newlink+0x47/0x70 Acked-by: Guillaume Nault Fixes: 8d7e5dee972f ("netns: don't disable BHs when locking "nsid_lock"") Reported-by: syzbot+3f960c64a104eaa2c813@syzkaller.appspotmail.com Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- net/core/net_namespace.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dcd61aca343e..944ab214e5ae 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -251,10 +251,10 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) if (refcount_read(&net->count) == 0) return NETNSA_NSID_NOT_ASSIGNED; - spin_lock(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); id = __peernet2id(net, peer); if (id >= 0) { - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); return id; } @@ -264,12 +264,12 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) * just been idr_remove()'d from there in cleanup_net(). */ if (!maybe_get_net(peer)) { - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); return NETNSA_NSID_NOT_ASSIGNED; } id = alloc_netid(net, peer, -1); - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); put_net(peer); if (id < 0) @@ -534,20 +534,20 @@ static void unhash_nsid(struct net *net, struct net *last) for_each_net(tmp) { int id; - spin_lock(&tmp->nsid_lock); + spin_lock_bh(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock(&tmp->nsid_lock); + spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, GFP_KERNEL); if (tmp == last) break; } - spin_lock(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); } static LLIST_HEAD(cleanup_list); @@ -760,9 +760,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(peer); } - spin_lock(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); err = -EEXIST; NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, @@ -771,7 +771,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, } err = alloc_netid(net, peer, nsid); - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, nlh, GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From 19162fd4063a3211843b997a454b505edb81d5ce Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 7 Sep 2020 00:13:39 -0700 Subject: hv_netvsc: Fix hibernation for mlx5 VF driver mlx5_suspend()/resume() keep the network interface, so during hibernation netvsc_unregister_vf() and netvsc_register_vf() are not called, and hence netvsc_resume() should call netvsc_vf_changed() to switch the data path back to the VF after hibernation. Note: after we close and re-open the vmbus channel of the netvsc NIC in netvsc_suspend() and netvsc_resume(), the data path is implicitly switched to the netvsc NIC. Similarly, netvsc_suspend() should not call netvsc_unregister_vf(), otherwise the VF can no longer be used after hibernation. For mlx4, since the VF network interafce is explicitly destroyed and re-created during hibernation (see mlx4_suspend()/resume()), hv_netvsc already explicitly switches the data path from and to the VF automatically via netvsc_register_vf() and netvsc_unregister_vf(), so mlx4 doesn't need this fix. Note: mlx4 can still work with the fix because in netvsc_suspend()/resume() ndev_ctx->vf_netdev is NULL for mlx4. Fixes: 0efeea5fb153 ("hv_netvsc: Add the support of hibernation") Signed-off-by: Dexuan Cui Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc_drv.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 64b0a74c1523..81c5c70b616a 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2587,8 +2587,8 @@ static int netvsc_remove(struct hv_device *dev) static int netvsc_suspend(struct hv_device *dev) { struct net_device_context *ndev_ctx; - struct net_device *vf_netdev, *net; struct netvsc_device *nvdev; + struct net_device *net; int ret; net = hv_get_drvdata(dev); @@ -2604,10 +2604,6 @@ static int netvsc_suspend(struct hv_device *dev) goto out; } - vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); - if (vf_netdev) - netvsc_unregister_vf(vf_netdev); - /* Save the current config info */ ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev); @@ -2623,6 +2619,7 @@ static int netvsc_resume(struct hv_device *dev) struct net_device *net = hv_get_drvdata(dev); struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info; + struct net_device *vf_netdev; int ret; rtnl_lock(); @@ -2635,6 +2632,15 @@ static int netvsc_resume(struct hv_device *dev) netvsc_devinfo_put(device_info); net_device_ctx->saved_netvsc_dev_info = NULL; + /* A NIC driver (e.g. mlx5) may keep the VF network interface across + * hibernation, but here the data path is implicitly switched to the + * netvsc NIC since the vmbus channel is closed and re-opened, so + * netvsc_vf_changed() must be used to switch the data path to the VF. + */ + vf_netdev = rtnl_dereference(net_device_ctx->vf_netdev); + if (vf_netdev && netvsc_vf_changed(vf_netdev) != NOTIFY_OK) + ret = -EINVAL; + rtnl_unlock(); return ret; -- cgit v1.2.3-59-g8ed1b From 1cc5ef91d2ff94d2bf2de3b3585423e8a1051cb6 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Mon, 24 Aug 2020 19:38:32 +0000 Subject: netfilter: ctnetlink: add a range check for l3/l4 protonum The indexes to the nf_nat_l[34]protos arrays come from userspace. So check the tuple's family, e.g. l3num, when creating the conntrack in order to prevent an OOB memory access during setup. Here is an example kernel panic on 4.14.180 when userspace passes in an index greater than NFPROTO_NUMPROTO. Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in:... Process poc (pid: 5614, stack limit = 0x00000000a3933121) CPU: 4 PID: 5614 Comm: poc Tainted: G S W O 4.14.180-g051355490483 Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150 Google Inc. MSM task: 000000002a3dfffe task.stack: 00000000a3933121 pc : __cfi_check_fail+0x1c/0x24 lr : __cfi_check_fail+0x1c/0x24 ... Call trace: __cfi_check_fail+0x1c/0x24 name_to_dev_t+0x0/0x468 nfnetlink_parse_nat_setup+0x234/0x258 ctnetlink_parse_nat_setup+0x4c/0x228 ctnetlink_new_conntrack+0x590/0xc40 nfnetlink_rcv_msg+0x31c/0x4d4 netlink_rcv_skb+0x100/0x184 nfnetlink_rcv+0xf4/0x180 netlink_unicast+0x360/0x770 netlink_sendmsg+0x5a0/0x6a4 ___sys_sendmsg+0x314/0x46c SyS_sendmsg+0xb4/0x108 el0_svc_naked+0x34/0x38 This crash is not happening since 5.4+, however, ctnetlink still allows for creating entries with unsupported layer 3 protocol number. Fixes: c1d10adb4a521 ("[NETFILTER]: Add ctnetlink port for nf_conntrack") Signed-off-by: Will McVicker [pablo@netfilter.org: rebased original patch on top of nf.git] Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 832eabecfbdd..d65846aa8059 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1404,7 +1404,8 @@ ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], if (err < 0) return err; - + if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6) + return -EOPNOTSUPP; tuple->src.l3num = l3num; if (flags & CTA_FILTER_FLAG(CTA_IP_DST) || -- cgit v1.2.3-59-g8ed1b From 67cc570edaa02016a8685a06a0ee91f05a6277d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Aug 2020 19:28:42 +0200 Subject: netfilter: nf_tables: coalesce multiple notifications into one skbuff On x86_64, each notification results in one skbuff allocation which consumes at least 768 bytes due to the skbuff overhead. This patch coalesces several notifications into one single skbuff, so each notification consumes at least ~211 bytes, that ~3.5 times less memory consumption. As a result, this is reducing the chances to exhaust the netlink socket receive buffer. Rule of thumb is that each notification batch only contains netlink messages whose report flag is the same, nfnetlink_send() requires this to do appropriate delivery to userspace, either via unicast (echo mode) or multicast (monitor mode). The skbuff control buffer is used to annotate the report flag for later handling at the new coalescing routine. The batch skbuff notification size is NLMSG_GOODSIZE, using a larger skbuff would allow for more socket receiver buffer savings (to amortize the cost of the skbuff even more), however, going over that size might break userspace applications, so let's be conservative and stick to NLMSG_GOODSIZE. Reported-by: Phil Sutter Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netns/nftables.h | 1 + net/netfilter/nf_tables_api.c | 70 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index a1a8d45adb42..6c0806bd8d1e 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -8,6 +8,7 @@ struct netns_nftables { struct list_head tables; struct list_head commit_list; struct list_head module_list; + struct list_head notify_list; struct mutex commit_mutex; unsigned int base_seq; u8 gencursor; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b7dc1cbf40ea..4603b667973a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -684,6 +684,18 @@ nla_put_failure: return -1; } +struct nftnl_skb_parms { + bool report; +}; +#define NFT_CB(skb) (*(struct nftnl_skb_parms*)&((skb)->cb)) + +static void nft_notify_enqueue(struct sk_buff *skb, bool report, + struct list_head *notify_list) +{ + NFT_CB(skb).report = report; + list_add_tail(&skb->list, notify_list); +} + static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; @@ -715,8 +727,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -1468,8 +1479,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -2807,8 +2817,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -3837,8 +3846,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report, - gfp_flags); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -4959,8 +4967,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, - GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -6275,7 +6282,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); + nft_notify_enqueue(skb, report, &net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7085,8 +7092,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7695,6 +7701,41 @@ static void nf_tables_commit_release(struct net *net) mutex_unlock(&net->nft.commit_mutex); } +static void nft_commit_notify(struct net *net, u32 portid) +{ + struct sk_buff *batch_skb = NULL, *nskb, *skb; + unsigned char *data; + int len; + + list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) { + if (!batch_skb) { +new_batch: + batch_skb = skb; + len = NLMSG_GOODSIZE - skb->len; + list_del(&skb->list); + continue; + } + len -= skb->len; + if (len > 0 && NFT_CB(skb).report == NFT_CB(batch_skb).report) { + data = skb_put(batch_skb, skb->len); + memcpy(data, skb->data, skb->len); + list_del(&skb->list); + kfree_skb(skb); + continue; + } + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + goto new_batch; + } + + if (batch_skb) { + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + } + + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; @@ -7897,6 +7938,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_release(net); @@ -8721,6 +8763,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.commit_list); INIT_LIST_HEAD(&net->nft.module_list); + INIT_LIST_HEAD(&net->nft.notify_list); mutex_init(&net->nft.commit_mutex); net->nft.base_seq = 1; net->nft.validate_state = NFT_VALIDATE_SKIP; @@ -8737,6 +8780,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft.module_list)); + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); } static struct pernet_operations nf_tables_net_ops = { -- cgit v1.2.3-59-g8ed1b From 6c0d95d1238d944fe54f0bbfc7ec017d78435daa Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 1 Sep 2020 08:56:19 +0200 Subject: netfilter: ctnetlink: fix mark based dump filtering regression conntrack mark based dump filtering may falsely skip entries if a mask is given: If the mask-based check does not filter out the entry, the else-if check is always true and compares the mark without considering the mask. The if/else-if logic seems wrong. Given that the mask during filter setup is implicitly set to 0xffffffff if not specified explicitly, the mark filtering flags seem to just complicate things. Restore the previously used approach by always matching against a zero mask is no filter mark is given. Fixes: cb8aa9a3affb ("netfilter: ctnetlink: add kernel side filtering for dump") Signed-off-by: Martin Willi Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d65846aa8059..c3a4214dc958 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -851,7 +851,6 @@ static int ctnetlink_done(struct netlink_callback *cb) } struct ctnetlink_filter { - u_int32_t cta_flags; u8 family; u_int32_t orig_flags; @@ -906,10 +905,6 @@ static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], struct nf_conntrack_zone *zone, u_int32_t flags); -/* applied on filters */ -#define CTA_FILTER_F_CTA_MARK (1 << 0) -#define CTA_FILTER_F_CTA_MARK_MASK (1 << 1) - static struct ctnetlink_filter * ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) { @@ -930,14 +925,10 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) #ifdef CONFIG_NF_CONNTRACK_MARK if (cda[CTA_MARK]) { filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); - filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK); - - if (cda[CTA_MARK_MASK]) { + if (cda[CTA_MARK_MASK]) filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); - filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK_MASK); - } else { + else filter->mark.mask = 0xffffffff; - } } else if (cda[CTA_MARK_MASK]) { err = -EINVAL; goto err_filter; @@ -1117,11 +1108,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK_MASK)) && - (ct->mark & filter->mark.mask) != filter->mark.val) - goto ignore_entry; - else if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK)) && - ct->mark != filter->mark.val) + if ((ct->mark & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif -- cgit v1.2.3-59-g8ed1b From 526e81b990e53e31ba40ba304a2285ffd098721f Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 1 Sep 2020 16:56:02 +0200 Subject: netfilter: conntrack: nf_conncount_init is failing with IPv6 disabled The openvswitch module fails initialization when used in a kernel without IPv6 enabled. nf_conncount_init() fails because the ct code unconditionally tries to initialize the netns IPv6 related bit, regardless of the build option. The change below ignores the IPv6 part if not enabled. Note that the corresponding _put() function already has this IPv6 configuration check. Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Signed-off-by: Eelco Chaudron Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 95f79980348c..47e9319d2cf3 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -565,6 +565,7 @@ static int nf_ct_netns_inet_get(struct net *net) int err; err = nf_ct_netns_do_get(net, NFPROTO_IPV4); +#if IS_ENABLED(CONFIG_IPV6) if (err < 0) goto err1; err = nf_ct_netns_do_get(net, NFPROTO_IPV6); @@ -575,6 +576,7 @@ static int nf_ct_netns_inet_get(struct net *net) err2: nf_ct_netns_put(net, NFPROTO_IPV4); err1: +#endif return err; } -- cgit v1.2.3-59-g8ed1b From 0c92411bb81de9bc516d6924f50289d8d5f880e5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Sep 2020 19:00:52 +0200 Subject: netfilter: nft_meta: use socket user_ns to retrieve skuid and skgid ... instead of using init_user_ns. Fixes: 96518518cc41 ("netfilter: add nftables") Tested-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 7bc6537f3ccb..b37bd02448d8 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -147,11 +147,11 @@ nft_meta_get_eval_skugid(enum nft_meta_keys key, switch (key) { case NFT_META_SKUID: - *dest = from_kuid_munged(&init_user_ns, + *dest = from_kuid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsuid); break; case NFT_META_SKGID: - *dest = from_kgid_munged(&init_user_ns, + *dest = from_kgid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsgid); break; default: -- cgit v1.2.3-59-g8ed1b From 0ff4628f4c6c1ab87eef9f16b25355cadc426d64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Sep 2020 03:40:25 -0700 Subject: mac802154: tx: fix use-after-free syzbot reported a bug in ieee802154_tx() [1] A similar issue in ieee802154_xmit_worker() is also fixed in this patch. [1] BUG: KASAN: use-after-free in ieee802154_tx+0x3d2/0x480 net/mac802154/tx.c:88 Read of size 4 at addr ffff8880251a8c70 by task syz-executor.3/928 CPU: 0 PID: 928 Comm: syz-executor.3 Not tainted 5.9.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 ieee802154_tx+0x3d2/0x480 net/mac802154/tx.c:88 ieee802154_subif_start_xmit+0xbe/0xe4 net/mac802154/tx.c:130 __netdev_start_xmit include/linux/netdevice.h:4634 [inline] netdev_start_xmit include/linux/netdevice.h:4648 [inline] dev_direct_xmit+0x4e9/0x6e0 net/core/dev.c:4203 packet_snd net/packet/af_packet.c:2989 [inline] packet_sendmsg+0x2413/0x5290 net/packet/af_packet.c:3014 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:671 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45d5b9 Code: 5d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc98e749c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000002ccc0 RCX: 000000000045d5b9 RDX: 0000000000000000 RSI: 0000000020007780 RDI: 000000000000000b RBP: 000000000118d020 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118cfec R13: 00007fff690c720f R14: 00007fc98e74a9c0 R15: 000000000118cfec Allocated by task 928: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461 slab_post_alloc_hook mm/slab.h:518 [inline] slab_alloc_node mm/slab.c:3254 [inline] kmem_cache_alloc_node+0x136/0x3e0 mm/slab.c:3574 __alloc_skb+0x71/0x550 net/core/skbuff.c:198 alloc_skb include/linux/skbuff.h:1094 [inline] alloc_skb_with_frags+0x92/0x570 net/core/skbuff.c:5771 sock_alloc_send_pskb+0x72a/0x880 net/core/sock.c:2348 packet_alloc_skb net/packet/af_packet.c:2837 [inline] packet_snd net/packet/af_packet.c:2932 [inline] packet_sendmsg+0x19fb/0x5290 net/packet/af_packet.c:3014 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:671 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 928: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track+0x1c/0x30 mm/kasan/common.c:56 kasan_set_free_info+0x1b/0x30 mm/kasan/generic.c:355 __kasan_slab_free+0xd8/0x120 mm/kasan/common.c:422 __cache_free mm/slab.c:3418 [inline] kmem_cache_free.part.0+0x74/0x1e0 mm/slab.c:3693 kfree_skbmem+0xef/0x1b0 net/core/skbuff.c:622 __kfree_skb net/core/skbuff.c:679 [inline] consume_skb net/core/skbuff.c:838 [inline] consume_skb+0xcf/0x160 net/core/skbuff.c:832 __dev_kfree_skb_any+0x9c/0xc0 net/core/dev.c:3107 fakelb_hw_xmit+0x20e/0x2a0 drivers/net/ieee802154/fakelb.c:81 drv_xmit_async net/mac802154/driver-ops.h:16 [inline] ieee802154_tx+0x282/0x480 net/mac802154/tx.c:81 ieee802154_subif_start_xmit+0xbe/0xe4 net/mac802154/tx.c:130 __netdev_start_xmit include/linux/netdevice.h:4634 [inline] netdev_start_xmit include/linux/netdevice.h:4648 [inline] dev_direct_xmit+0x4e9/0x6e0 net/core/dev.c:4203 packet_snd net/packet/af_packet.c:2989 [inline] packet_sendmsg+0x2413/0x5290 net/packet/af_packet.c:3014 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:671 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff8880251a8c00 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 112 bytes inside of 224-byte region [ffff8880251a8c00, ffff8880251a8ce0) The buggy address belongs to the page: page:0000000062b6a4f1 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x251a8 flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea0000435c88 ffffea00028b6c08 ffff8880a9055d00 raw: 0000000000000000 ffff8880251a80c0 000000010000000c 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880251a8b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880251a8b80: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc >ffff8880251a8c00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880251a8c80: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff8880251a8d00: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb Fixes: 409c3b0c5f03 ("mac802154: tx: move stats tx increment") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Alexander Aring Cc: Stefan Schmidt Cc: linux-wpan@vger.kernel.org Link: https://lore.kernel.org/r/20200908104025.4009085-1-edumazet@google.com Signed-off-by: Stefan Schmidt --- net/mac802154/tx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index ab52811523e9..c829e4a75325 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -34,11 +34,11 @@ void ieee802154_xmit_worker(struct work_struct *work) if (res) goto err_tx; - ieee802154_xmit_complete(&local->hw, skb, false); - dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; + ieee802154_xmit_complete(&local->hw, skb, false); + return; err_tx: @@ -78,6 +78,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) /* async is priority, otherwise sync is fallback */ if (local->ops->xmit_async) { + unsigned int len = skb->len; + ret = drv_xmit_async(local, skb); if (ret) { ieee802154_wake_queue(&local->hw); @@ -85,7 +87,7 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) } dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + dev->stats.tx_bytes += len; } else { local->tx_skb = skb; queue_work(local->workqueue, &local->tx_work); -- cgit v1.2.3-59-g8ed1b From e6a18d36118bea3bf497c9df4d9988b6df120689 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 8 Sep 2020 00:04:10 +0200 Subject: bpf: Fix clobbering of r2 in bpf_gen_ld_abs Bryce reported that he saw the following with: 0: r6 = r1 1: r1 = 12 2: r0 = *(u16 *)skb[r1] The xlated sequence was incorrectly clobbering r2 with pointer value of r6 ... 0: (bf) r6 = r1 1: (b7) r1 = 12 2: (bf) r1 = r6 3: (bf) r2 = r1 4: (85) call bpf_skb_load_helper_16_no_cache#7692160 ... and hence call to the load helper never succeeded given the offset was too high. Fix it by reordering the load of r6 to r1. Other than that the insn has similar calling convention than BPF helpers, that is, r0 - r5 are scratch regs, so nothing else affected after the insn. Fixes: e0cea7ce988c ("bpf: implement ld_abs/ld_ind in native bpf") Reported-by: Bryce Kahle Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/cace836e4d07bb63b1a53e49c5dfb238a040c298.1599512096.git.daniel@iogearbox.net --- net/core/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index b2df52086445..2d62c25e0395 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7065,8 +7065,6 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig, bool indirect = BPF_MODE(orig->code) == BPF_IND; struct bpf_insn *insn = insn_buf; - /* We're guaranteed here that CTX is in R6. */ - *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); if (!indirect) { *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm); } else { @@ -7074,6 +7072,8 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig, if (orig->imm) *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm); } + /* We're guaranteed here that CTX is in R6. */ + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); switch (BPF_SIZE(orig->code)) { case BPF_B: -- cgit v1.2.3-59-g8ed1b From 746f534a4809e07f427f7d13d10f3a6a9641e5c3 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sat, 5 Sep 2020 14:48:31 -0700 Subject: tools/libbpf: Avoid counting local symbols in ABI check Encountered the following failure building libbpf from kernel 5.8.5 sources with GCC 8.4.0 and binutils 2.34: (long paths shortened) Warning: Num of global symbols in sharedobjs/libbpf-in.o (234) does NOT match with num of versioned symbols in libbpf.so (236). Please make sure all LIBBPF_API symbols are versioned in libbpf.map. --- libbpf_global_syms.tmp 2020-09-02 07:30:58.920084380 +0000 +++ libbpf_versioned_syms.tmp 2020-09-02 07:30:58.924084388 +0000 @@ -1,3 +1,5 @@ +_fini +_init bpf_btf_get_fd_by_id bpf_btf_get_next_id bpf_create_map make[4]: *** [Makefile:210: check_abi] Error 1 Investigation shows _fini and _init are actually local symbols counted amongst global ones: $ readelf --dyn-syms --wide libbpf.so|head -10 Symbol table '.dynsym' contains 343 entries: Num: Value Size Type Bind Vis Ndx Name 0: 00000000 0 NOTYPE LOCAL DEFAULT UND 1: 00004098 0 SECTION LOCAL DEFAULT 11 2: 00004098 8 FUNC LOCAL DEFAULT 11 _init@@LIBBPF_0.0.1 3: 00023040 8 FUNC LOCAL DEFAULT 14 _fini@@LIBBPF_0.0.1 4: 00000000 0 OBJECT GLOBAL DEFAULT ABS LIBBPF_0.0.4 5: 00000000 0 OBJECT GLOBAL DEFAULT ABS LIBBPF_0.0.1 6: 0000ffa4 8 FUNC GLOBAL DEFAULT 12 bpf_object__find_map_by_offset@@LIBBPF_0.0.1 A previous commit filtered global symbols in sharedobjs/libbpf-in.o. Do the same with the libbpf.so DSO for consistent comparison. Fixes: 306b267cb3c4 ("libbpf: Verify versioned symbols") Signed-off-by: Tony Ambardar Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200905214831.1565465-1-Tony.Ambardar@gmail.com --- tools/lib/bpf/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index b78484e7a608..9ae8f4ef0aac 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -152,6 +152,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) @@ -219,6 +220,7 @@ check_abi: $(OUTPUT)libbpf.so awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ diff -u $(OUTPUT)libbpf_global_syms.tmp \ -- cgit v1.2.3-59-g8ed1b From 2f1e8ea726e9020e01e9e2ae29c2d5eb11133032 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 8 Sep 2020 02:48:42 +0300 Subject: net: dsa: link interfaces with the DSA master to get rid of lockdep warnings Since commit 845e0ebb4408 ("net: change addr_list_lock back to static key"), cascaded DSA setups (DSA switch port as DSA master for another DSA switch port) are emitting this lockdep warning: ============================================ WARNING: possible recursive locking detected 5.8.0-rc1-00133-g923e4b5032dd-dirty #208 Not tainted -------------------------------------------- dhcpcd/323 is trying to acquire lock: ffff000066dd4268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 but task is already holding lock: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&dsa_master_addr_list_lock_key/1); lock(&dsa_master_addr_list_lock_key/1); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by dhcpcd/323: #0: ffffdbd1381dda18 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x24/0x30 #1: ffff00006614b268 (_xmit_ETHER){+...}-{2:2}, at: dev_set_rx_mode+0x28/0x48 #2: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 stack backtrace: Call trace: dump_backtrace+0x0/0x1e0 show_stack+0x20/0x30 dump_stack+0xec/0x158 __lock_acquire+0xca0/0x2398 lock_acquire+0xe8/0x440 _raw_spin_lock_nested+0x64/0x90 dev_mc_sync+0x44/0x90 dsa_slave_set_rx_mode+0x34/0x50 __dev_set_rx_mode+0x60/0xa0 dev_mc_sync+0x84/0x90 dsa_slave_set_rx_mode+0x34/0x50 __dev_set_rx_mode+0x60/0xa0 dev_set_rx_mode+0x30/0x48 __dev_open+0x10c/0x180 __dev_change_flags+0x170/0x1c8 dev_change_flags+0x2c/0x70 devinet_ioctl+0x774/0x878 inet_ioctl+0x348/0x3b0 sock_do_ioctl+0x50/0x310 sock_ioctl+0x1f8/0x580 ksys_ioctl+0xb0/0xf0 __arm64_sys_ioctl+0x28/0x38 el0_svc_common.constprop.0+0x7c/0x180 do_el0_svc+0x2c/0x98 el0_sync_handler+0x9c/0x1b8 el0_sync+0x158/0x180 Since DSA never made use of the netdev API for describing links between upper devices and lower devices, the dev->lower_level value of a DSA switch interface would be 1, which would warn when it is a DSA master. We can use netdev_upper_dev_link() to describe the relationship between a DSA slave and a DSA master. To be precise, a DSA "slave" (switch port) is an "upper" to a DSA "master" (host port). The relationship is "many uppers to one lower", like in the case of VLAN. So, for that reason, we use the same function as VLAN uses. There might be a chance that somebody will try to take hold of this interface and use it immediately after register_netdev() and before netdev_upper_dev_link(). To avoid that, we do the registration and linkage while holding the RTNL, and we use the RTNL-locked cousin of register_netdev(), which is register_netdevice(). Since this warning was not there when lockdep was using dynamic keys for addr_list_lock, we are blaming the lockdep patch itself. The network stack _has_ been using static lockdep keys before, and it _is_ likely that stacked DSA setups have been triggering these lockdep warnings since forever, however I can't test very old kernels on this particular stacked DSA setup, to ensure I'm not in fact introducing regressions. Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key") Suggested-by: Cong Wang Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9af1a2d0cec4..16e5f98d4882 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1799,15 +1799,27 @@ int dsa_slave_create(struct dsa_port *port) dsa_slave_notify(slave_dev, DSA_PORT_REGISTER); - ret = register_netdev(slave_dev); + rtnl_lock(); + + ret = register_netdevice(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", ret, slave_dev->name); + rtnl_unlock(); goto out_phy; } + ret = netdev_upper_dev_link(master, slave_dev, NULL); + + rtnl_unlock(); + + if (ret) + goto out_unregister; + return 0; +out_unregister: + unregister_netdev(slave_dev); out_phy: rtnl_lock(); phylink_disconnect_phy(p->dp->pl); @@ -1824,16 +1836,18 @@ out_free: void dsa_slave_destroy(struct net_device *slave_dev) { + struct net_device *master = dsa_slave_to_master(slave_dev); struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_slave_priv *p = netdev_priv(slave_dev); netif_carrier_off(slave_dev); rtnl_lock(); + netdev_upper_dev_unlink(master, slave_dev); + unregister_netdevice(slave_dev); phylink_disconnect_phy(dp->pl); rtnl_unlock(); dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); - unregister_netdev(slave_dev); phylink_destroy(dp->pl); gro_cells_destroy(&p->gcells); free_percpu(p->stats64); -- cgit v1.2.3-59-g8ed1b From 843d926b003ea692468c8cc5bea1f9f58dfa8c75 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Sep 2020 01:20:23 -0700 Subject: ipv6: avoid lockdep issue in fib6_del() syzbot reported twice a lockdep issue in fib6_del() [1] which I think is caused by net->ipv6.fib6_null_entry having a NULL fib6_table pointer. fib6_del() already checks for fib6_null_entry special case, we only need to return earlier. Bug seems to occur very rarely, I have thus chosen a 'bug origin' that makes backports not too complex. [1] WARNING: suspicious RCU usage 5.9.0-rc4-syzkaller #0 Not tainted ----------------------------- net/ipv6/ip6_fib.c:1996 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 4 locks held by syz-executor.5/8095: #0: ffffffff8a7ea708 (rtnl_mutex){+.+.}-{3:3}, at: ppp_release+0x178/0x240 drivers/net/ppp/ppp_generic.c:401 #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: spin_trylock_bh include/linux/spinlock.h:414 [inline] #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: fib6_run_gc+0x21b/0x2d0 net/ipv6/ip6_fib.c:2312 #2: ffffffff89bd6a40 (rcu_read_lock){....}-{1:2}, at: __fib6_clean_all+0x0/0x290 net/ipv6/ip6_fib.c:2613 #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:359 [inline] #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: __fib6_clean_all+0x107/0x290 net/ipv6/ip6_fib.c:2245 stack backtrace: CPU: 1 PID: 8095 Comm: syz-executor.5 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 fib6_del+0x12b4/0x1630 net/ipv6/ip6_fib.c:1996 fib6_clean_node+0x39b/0x570 net/ipv6/ip6_fib.c:2180 fib6_walk_continue+0x4aa/0x8e0 net/ipv6/ip6_fib.c:2102 fib6_walk+0x182/0x370 net/ipv6/ip6_fib.c:2150 fib6_clean_tree+0xdb/0x120 net/ipv6/ip6_fib.c:2230 __fib6_clean_all+0x120/0x290 net/ipv6/ip6_fib.c:2246 fib6_clean_all net/ipv6/ip6_fib.c:2257 [inline] fib6_run_gc+0x113/0x2d0 net/ipv6/ip6_fib.c:2320 ndisc_netdev_event+0x217/0x350 net/ipv6/ndisc.c:1805 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:2033 call_netdevice_notifiers_extack net/core/dev.c:2045 [inline] call_netdevice_notifiers net/core/dev.c:2059 [inline] dev_close_many+0x30b/0x650 net/core/dev.c:1634 rollback_registered_many+0x3a8/0x1210 net/core/dev.c:9261 rollback_registered net/core/dev.c:9329 [inline] unregister_netdevice_queue+0x2dd/0x570 net/core/dev.c:10410 unregister_netdevice include/linux/netdevice.h:2774 [inline] ppp_release+0x216/0x240 drivers/net/ppp/ppp_generic.c:403 __fput+0x285/0x920 fs/file_table.c:281 task_work_run+0xdd/0x190 kernel/task_work.c:141 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:163 [inline] exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 421842edeaf6 ("net/ipv6: Add fib6_null_entry") Signed-off-by: Eric Dumazet Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 25a90f3f705c..4a664ad4f4d4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1993,14 +1993,19 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, /* Need to own table->tb6_lock */ int fib6_del(struct fib6_info *rt, struct nl_info *info) { - struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, - lockdep_is_held(&rt->fib6_table->tb6_lock)); - struct fib6_table *table = rt->fib6_table; struct net *net = info->nl_net; struct fib6_info __rcu **rtp; struct fib6_info __rcu **rtp_next; + struct fib6_table *table; + struct fib6_node *fn; + + if (rt == net->ipv6.fib6_null_entry) + return -ENOENT; - if (!fn || rt == net->ipv6.fib6_null_entry) + table = rt->fib6_table; + fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&table->tb6_lock)); + if (!fn) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); -- cgit v1.2.3-59-g8ed1b From 923f614cdba2842b6f7eaf0713cba1c5668c1b5f Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Tue, 8 Sep 2020 09:18:12 -0700 Subject: fib: fix fib_rule_ops indirect call wrappers when CONFIG_IPV6=m If CONFIG_IPV6=m, the IPV6 functions won't be found by the linker: ld: net/core/fib_rules.o: in function `fib_rules_lookup': fib_rules.c:(.text+0x606): undefined reference to `fib6_rule_match' ld: fib_rules.c:(.text+0x611): undefined reference to `fib6_rule_match' ld: fib_rules.c:(.text+0x68c): undefined reference to `fib6_rule_action' ld: fib_rules.c:(.text+0x693): undefined reference to `fib6_rule_action' ld: fib_rules.c:(.text+0x6aa): undefined reference to `fib6_rule_suppress' ld: fib_rules.c:(.text+0x6bc): undefined reference to `fib6_rule_suppress' make: *** [Makefile:1166: vmlinux] Error 1 Reported-by: Sven Joachim Fixes: b9aaec8f0be5 ("fib: use indirect call wrappers in the most common fib_rules_ops") Acked-by: Randy Dunlap # build-tested Signed-off-by: Brian Vazquez Signed-off-by: David S. Miller --- net/core/fib_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 51678a528f85..7bcfb16854cb 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -16,7 +16,7 @@ #include #include -#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#if defined(CONFIG_IPV6) && defined(CONFIG_IPV6_MULTIPLE_TABLES) #ifdef CONFIG_IP_MULTIPLE_TABLES #define INDIRECT_CALL_MT(f, f2, f1, ...) \ INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__) -- cgit v1.2.3-59-g8ed1b From 2a154988aa4b2b87bef5dc0678df675829448e32 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Sep 2020 09:30:12 -0700 Subject: MAINTAINERS: remove John Allen from ibmvnic John's email has bounced and Thomas confirms he no longer works on ibmvnic. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index dca9bfd8c888..12be7ae4d989 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8322,7 +8322,6 @@ F: drivers/pci/hotplug/rpaphp* IBM Power SRIOV Virtual NIC Device Driver M: Thomas Falcon -M: John Allen L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmvnic.* -- cgit v1.2.3-59-g8ed1b From ba9e04a7ddf4f22a10e05bf9403db6b97743c7bf Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 8 Sep 2020 14:09:34 -0700 Subject: ip: fix tos reflection in ack and reset packets Currently, in tcp_v4_reqsk_send_ack() and tcp_v4_send_reset(), we echo the TOS value of the received packets in the response. However, we do not want to echo the lower 2 ECN bits in accordance with RFC 3168 6.1.5 robustness principles. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 61f802d5350c..e6f2ada9e7d5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include @@ -1703,7 +1704,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, if (IS_ERR(rt)) return; - inet_sk(sk)->tos = arg->tos; + inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; -- cgit v1.2.3-59-g8ed1b From 3ca1a42a52ca4b4f02061683851692ad65fefac8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Sep 2020 01:27:39 -0700 Subject: net: qrtr: check skb_put_padto() return value If skb_put_padto() returns an error, skb has been freed. Better not touch it anymore, as reported by syzbot [1] Note to qrtr maintainers : this suggests qrtr_sendmsg() should adjust sock_alloc_send_skb() second parameter to account for the potential added alignment to avoid reallocation. [1] BUG: KASAN: use-after-free in __skb_insert include/linux/skbuff.h:1907 [inline] BUG: KASAN: use-after-free in __skb_queue_before include/linux/skbuff.h:2016 [inline] BUG: KASAN: use-after-free in __skb_queue_tail include/linux/skbuff.h:2049 [inline] BUG: KASAN: use-after-free in skb_queue_tail+0x6b/0x120 net/core/skbuff.c:3146 Write of size 8 at addr ffff88804d8ab3c0 by task syz-executor.4/4316 CPU: 1 PID: 4316 Comm: syz-executor.4 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1d6/0x29e lib/dump_stack.c:118 print_address_description+0x66/0x620 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report+0x132/0x1d0 mm/kasan/report.c:530 __skb_insert include/linux/skbuff.h:1907 [inline] __skb_queue_before include/linux/skbuff.h:2016 [inline] __skb_queue_tail include/linux/skbuff.h:2049 [inline] skb_queue_tail+0x6b/0x120 net/core/skbuff.c:3146 qrtr_tun_send+0x1a/0x40 net/qrtr/tun.c:23 qrtr_node_enqueue+0x44f/0xc00 net/qrtr/qrtr.c:364 qrtr_bcast_enqueue+0xbe/0x140 net/qrtr/qrtr.c:861 qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] sock_write_iter+0x317/0x470 net/socket.c:998 call_write_iter include/linux/fs.h:1882 [inline] new_sync_write fs/read_write.c:503 [inline] vfs_write+0xa96/0xd10 fs/read_write.c:578 ksys_write+0x11b/0x220 fs/read_write.c:631 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45d5b9 Code: 5d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f84b5b81c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000038b40 RCX: 000000000045d5b9 RDX: 0000000000000055 RSI: 0000000020001240 RDI: 0000000000000003 RBP: 00007f84b5b81ca0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000f R13: 00007ffcbbf86daf R14: 00007f84b5b829c0 R15: 000000000118cf4c Allocated by task 4316: kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc+0x100/0x130 mm/kasan/common.c:461 slab_post_alloc_hook+0x3e/0x290 mm/slab.h:518 slab_alloc mm/slab.c:3312 [inline] kmem_cache_alloc+0x1c1/0x2d0 mm/slab.c:3482 skb_clone+0x1b2/0x370 net/core/skbuff.c:1449 qrtr_bcast_enqueue+0x6d/0x140 net/qrtr/qrtr.c:857 qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] sock_write_iter+0x317/0x470 net/socket.c:998 call_write_iter include/linux/fs.h:1882 [inline] new_sync_write fs/read_write.c:503 [inline] vfs_write+0xa96/0xd10 fs/read_write.c:578 ksys_write+0x11b/0x220 fs/read_write.c:631 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 4316: kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track+0x3d/0x70 mm/kasan/common.c:56 kasan_set_free_info+0x17/0x30 mm/kasan/generic.c:355 __kasan_slab_free+0xdd/0x110 mm/kasan/common.c:422 __cache_free mm/slab.c:3418 [inline] kmem_cache_free+0x82/0xf0 mm/slab.c:3693 __skb_pad+0x3f5/0x5a0 net/core/skbuff.c:1823 __skb_put_padto include/linux/skbuff.h:3233 [inline] skb_put_padto include/linux/skbuff.h:3252 [inline] qrtr_node_enqueue+0x62f/0xc00 net/qrtr/qrtr.c:360 qrtr_bcast_enqueue+0xbe/0x140 net/qrtr/qrtr.c:861 qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] sock_write_iter+0x317/0x470 net/socket.c:998 call_write_iter include/linux/fs.h:1882 [inline] new_sync_write fs/read_write.c:503 [inline] vfs_write+0xa96/0xd10 fs/read_write.c:578 ksys_write+0x11b/0x220 fs/read_write.c:631 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff88804d8ab3c0 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 0 bytes inside of 224-byte region [ffff88804d8ab3c0, ffff88804d8ab4a0) The buggy address belongs to the page: page:00000000ea8cccfb refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88804d8abb40 pfn:0x4d8ab flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea0002237ec8 ffffea00029b3388 ffff88821bb66800 raw: ffff88804d8abb40 ffff88804d8ab000 000000010000000b 0000000000000000 page dumped because: kasan: bad access detected Fixes: ce57785bf91b ("net: qrtr: fix len of skb_put_padto in qrtr_node_enqueue") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Carl Huang Cc: Wen Gong Cc: Bjorn Andersson Cc: Manivannan Sadhasivam Acked-by: Manivannan Sadhasivam Reviewed-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 90c558f89d46..957aa9263ba4 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -332,8 +332,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, { struct qrtr_hdr_v1 *hdr; size_t len = skb->len; - int rc = -ENODEV; - int confirm_rx; + int rc, confirm_rx; confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type); if (confirm_rx < 0) { @@ -357,15 +356,17 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, hdr->size = cpu_to_le32(len); hdr->confirm_rx = !!confirm_rx; - skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); - - mutex_lock(&node->ep_lock); - if (node->ep) - rc = node->ep->xmit(node->ep, skb); - else - kfree_skb(skb); - mutex_unlock(&node->ep_lock); + rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); + if (!rc) { + mutex_lock(&node->ep_lock); + rc = -ENODEV; + if (node->ep) + rc = node->ep->xmit(node->ep, skb); + else + kfree_skb(skb); + mutex_unlock(&node->ep_lock); + } /* Need to ensure that a subsequent message carries the otherwise lost * confirm_rx flag if we dropped this one */ if (rc && confirm_rx) -- cgit v1.2.3-59-g8ed1b From 4a009cb04aeca0de60b73f37b102573354214b52 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Sep 2020 01:27:40 -0700 Subject: net: add __must_check to skb_put_padto() skb_put_padto() and __skb_put_padto() callers must check return values or risk use-after-free. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ed9bea924dc3..04a18e01b362 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3223,8 +3223,9 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error if @free_on_error is true. */ -static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, - bool free_on_error) +static inline int __must_check __skb_put_padto(struct sk_buff *skb, + unsigned int len, + bool free_on_error) { unsigned int size = skb->len; @@ -3247,7 +3248,7 @@ static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ -static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len) { return __skb_put_padto(skb, len, true); } -- cgit v1.2.3-59-g8ed1b From b87f9fe1ac9441b75656dfd95eba70ef9f0375e0 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 9 Sep 2020 17:38:21 +0800 Subject: hsr: avoid newline at end of message in NL_SET_ERR_MSG_MOD clean follow coccicheck warning: net//hsr/hsr_netlink.c:94:8-42: WARNING avoid newline at end of message in NL_SET_ERR_MSG_MOD net//hsr/hsr_netlink.c:87:30-57: WARNING avoid newline at end of message in NL_SET_ERR_MSG_MOD net//hsr/hsr_netlink.c:79:29-53: WARNING avoid newline at end of message in NL_SET_ERR_MSG_MOD Signed-off-by: Ye Bin Signed-off-by: David S. Miller --- net/hsr/hsr_netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 06c3cd988760..0e4681cf71db 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -76,7 +76,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, proto = nla_get_u8(data[IFLA_HSR_PROTOCOL]); if (proto >= HSR_PROTOCOL_MAX) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol\n"); + NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol"); return -EINVAL; } @@ -84,14 +84,14 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, proto_version = HSR_V0; } else { if (proto == HSR_PROTOCOL_PRP) { - NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported\n"); + NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported"); return -EINVAL; } proto_version = nla_get_u8(data[IFLA_HSR_VERSION]); if (proto_version > HSR_V1) { NL_SET_ERR_MSG_MOD(extack, - "Only HSR version 0/1 supported\n"); + "Only HSR version 0/1 supported"); return -EINVAL; } } -- cgit v1.2.3-59-g8ed1b From 9179ba31367bcf481c3c79b5f028c94faad9f30a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Sep 2020 13:58:14 +0200 Subject: wireguard: noise: take lock when removing handshake entry from table Eric reported that syzkaller found a race of this variety: CPU 1 CPU 2 -------------------------------------------|--------------------------------------- wg_index_hashtable_replace(old, ...) | if (hlist_unhashed(&old->index_hash)) | | wg_index_hashtable_remove(old) | hlist_del_init_rcu(&old->index_hash) | old->index_hash.pprev = NULL hlist_replace_rcu(&old->index_hash, ...) | *old->index_hash.pprev | Syzbot wasn't actually able to reproduce this more than once or create a reproducer, because the race window between checking "hlist_unhashed" and calling "hlist_replace_rcu" is just so small. Adding an mdelay(5) or similar there helps make this demonstrable using this simple script: #!/bin/bash set -ex trap 'kill $pid1; kill $pid2; ip link del wg0; ip link del wg1' EXIT ip link add wg0 type wireguard ip link add wg1 type wireguard wg set wg0 private-key <(wg genkey) listen-port 9999 wg set wg1 private-key <(wg genkey) peer $(wg show wg0 public-key) endpoint 127.0.0.1:9999 persistent-keepalive 1 wg set wg0 peer $(wg show wg1 public-key) ip link set wg0 up yes link set wg1 up | ip -force -batch - & pid1=$! yes link set wg1 down | ip -force -batch - & pid2=$! wait The fundumental underlying problem is that we permit calls to wg_index_ hashtable_remove(handshake.entry) without requiring the caller to take the handshake mutex that is intended to protect members of handshake during mutations. This is consistently the case with calls to wg_index_ hashtable_insert(handshake.entry) and wg_index_hashtable_replace( handshake.entry), but it's missing from a pertinent callsite of wg_ index_hashtable_remove(handshake.entry). So, this patch makes sure that mutex is taken. The original code was a little bit funky though, in the form of: remove(handshake.entry) lock(), memzero(handshake.some_members), unlock() remove(handshake.entry) The original intention of that double removal pattern outside the lock appears to be some attempt to prevent insertions that might happen while locks are dropped during expensive crypto operations, but actually, all callers of wg_index_hashtable_insert(handshake.entry) take the write lock and then explicitly check handshake.state, as they should, which the aforementioned memzero clears, which means an insertion should already be impossible. And regardless, the original intention was necessarily racy, since it wasn't guaranteed that something else would run after the unlock() instead of after the remove(). So, from a soundness perspective, it seems positive to remove what looks like a hack at best. The crash from both syzbot and from the script above is as follows: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 PID: 7395 Comm: kworker/0:3 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: wg-kex-wg1 wg_packet_handshake_receive_worker RIP: 0010:hlist_replace_rcu include/linux/rculist.h:505 [inline] RIP: 0010:wg_index_hashtable_replace+0x176/0x330 drivers/net/wireguard/peerlookup.c:174 Code: 00 fc ff df 48 89 f9 48 c1 e9 03 80 3c 01 00 0f 85 44 01 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 10 48 89 c6 48 c1 ee 03 <80> 3c 0e 00 0f 85 06 01 00 00 48 85 d2 4c 89 28 74 47 e8 a3 4f b5 RSP: 0018:ffffc90006a97bf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888050ffc4f8 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88808e04e010 RBP: ffff88808e04e000 R08: 0000000000000001 R09: ffff8880543d0000 R10: ffffed100a87a000 R11: 000000000000016e R12: ffff8880543d0000 R13: ffff88808e04e008 R14: ffff888050ffc508 R15: ffff888050ffc500 FS: 0000000000000000(0000) GS:ffff8880ae600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000f5505db0 CR3: 0000000097cf7000 CR4: 00000000001526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: wg_noise_handshake_begin_session+0x752/0xc9a drivers/net/wireguard/noise.c:820 wg_receive_handshake_packet drivers/net/wireguard/receive.c:183 [inline] wg_packet_handshake_receive_worker+0x33b/0x730 drivers/net/wireguard/receive.c:220 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 Reported-by: syzbot Reported-by: Eric Dumazet Link: https://lore.kernel.org/wireguard/20200908145911.4090480-1-edumazet@google.com/ Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/noise.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c index 3dd3b76790d0..c0cfd9b36c0b 100644 --- a/drivers/net/wireguard/noise.c +++ b/drivers/net/wireguard/noise.c @@ -87,15 +87,12 @@ static void handshake_zero(struct noise_handshake *handshake) void wg_noise_handshake_clear(struct noise_handshake *handshake) { + down_write(&handshake->lock); wg_index_hashtable_remove( handshake->entry.peer->device->index_hashtable, &handshake->entry); - down_write(&handshake->lock); handshake_zero(handshake); up_write(&handshake->lock); - wg_index_hashtable_remove( - handshake->entry.peer->device->index_hashtable, - &handshake->entry); } static struct noise_keypair *keypair_create(struct wg_peer *peer) -- cgit v1.2.3-59-g8ed1b From 6147f7b1e90ff09bd52afc8b9206a7fcd133daf7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Sep 2020 13:58:15 +0200 Subject: wireguard: peerlookup: take lock before checking hash in replace operation Eric's suggested fix for the previous commit's mentioned race condition was to simply take the table->lock in wg_index_hashtable_replace(). The table->lock of the hash table is supposed to protect the bucket heads, not the entires, but actually, since all the mutator functions are already taking it, it makes sense to take it too for the test to hlist_unhashed, as a defense in depth measure, so that it no longer races with deletions, regardless of what other locks are protecting individual entries. This is sensible from a performance perspective because, as Eric pointed out, the case of being unhashed is already the unlikely case, so this won't add common contention. And comparing instructions, this basically doesn't make much of a difference other than pushing and popping %r13, used by the new `bool ret`. More generally, I like the idea of locking consistency across table mutator functions, and this might let me rest slightly easier at night. Suggested-by: Eric Dumazet Link: https://lore.kernel.org/wireguard/20200908145911.4090480-1-edumazet@google.com/ Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/peerlookup.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireguard/peerlookup.c b/drivers/net/wireguard/peerlookup.c index e4deb331476b..f2783aa7a88f 100644 --- a/drivers/net/wireguard/peerlookup.c +++ b/drivers/net/wireguard/peerlookup.c @@ -167,9 +167,13 @@ bool wg_index_hashtable_replace(struct index_hashtable *table, struct index_hashtable_entry *old, struct index_hashtable_entry *new) { - if (unlikely(hlist_unhashed(&old->index_hash))) - return false; + bool ret; + spin_lock_bh(&table->lock); + ret = !hlist_unhashed(&old->index_hash); + if (unlikely(!ret)) + goto out; + new->index = old->index; hlist_replace_rcu(&old->index_hash, &new->index_hash); @@ -180,8 +184,9 @@ bool wg_index_hashtable_replace(struct index_hashtable *table, * simply gets dropped, which isn't terrible. */ INIT_HLIST_NODE(&old->index_hash); +out: spin_unlock_bh(&table->lock); - return true; + return ret; } void wg_index_hashtable_remove(struct index_hashtable *table, -- cgit v1.2.3-59-g8ed1b From 2d2fe8433796603091ac8ea235b9165ac5a85f9a Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 9 Sep 2020 20:43:08 +0300 Subject: net: qed: Disable aRFS for NPAR and 100G In CMT and NPAR the PF is unknown when the GFS block processes the packet. Therefore cannot use searcher as it has a per PF database, and thus ARFS must be disabled. Fixes: d51e4af5c209 ("qed: aRFS infrastructure support") Signed-off-by: Manish Chopra Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 11 ++++++++++- drivers/net/ethernet/qlogic/qed/qed_l2.c | 3 +++ drivers/net/ethernet/qlogic/qed/qed_main.c | 2 ++ include/linux/qed/qed_if.h | 1 + 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index b8f076e4e6b8..3db181f3617a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -4253,7 +4253,8 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) | BIT(QED_MF_LLH_PROTO_CLSS) | BIT(QED_MF_LL2_NON_UNICAST) | - BIT(QED_MF_INTER_PF_SWITCH); + BIT(QED_MF_INTER_PF_SWITCH) | + BIT(QED_MF_DISABLE_ARFS); break; case NVM_CFG1_GLOB_MF_MODE_DEFAULT: cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) | @@ -4266,6 +4267,14 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n", cdev->mf_bits); + + /* In CMT the PF is unknown when the GFS block processes the + * packet. Therefore cannot use searcher as it has a per PF + * database, and thus ARFS must be disabled. + * + */ + if (QED_IS_CMT(cdev)) + cdev->mf_bits |= BIT(QED_MF_DISABLE_ARFS); } DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 4c6ac8862744..07824bf9d68d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1980,6 +1980,9 @@ void qed_arfs_mode_configure(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_arfs_config_params *p_cfg_params) { + if (test_bit(QED_MF_DISABLE_ARFS, &p_hwfn->cdev->mf_bits)) + return; + if (p_cfg_params->mode != QED_FILTER_CONFIG_MODE_DISABLE) { qed_gft_config(p_hwfn, p_ptt, p_hwfn->rel_pf_id, p_cfg_params->tcp, diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index f39f629242a1..50e5eb22e60a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -444,6 +444,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->fw_eng = FW_ENGINEERING_VERSION; dev_info->b_inter_pf_switch = test_bit(QED_MF_INTER_PF_SWITCH, &cdev->mf_bits); + if (!test_bit(QED_MF_DISABLE_ARFS, &cdev->mf_bits)) + dev_info->b_arfs_capable = true; dev_info->tx_switching = true; if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME) diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index cd6a5c7e56eb..cdd73afc4c46 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -623,6 +623,7 @@ struct qed_dev_info { #define QED_MFW_VERSION_3_OFFSET 24 u32 flash_size; + bool b_arfs_capable; bool b_inter_pf_switch; bool tx_switching; bool rdma_supported; -- cgit v1.2.3-59-g8ed1b From 0367f05885b9f21d062447bd2ba1302ba3cc7392 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 9 Sep 2020 20:43:09 +0300 Subject: net: qede: Disable aRFS for NPAR and 100G In some configurations ARFS cannot be used, so disable it if device is not capable. Fixes: e4917d46a653 ("qede: Add aRFS support") Signed-off-by: Manish Chopra Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 3 +++ drivers/net/ethernet/qlogic/qede/qede_main.c | 11 +++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index f961f65d9372..c59b72c90293 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -311,6 +311,9 @@ int qede_alloc_arfs(struct qede_dev *edev) { int i; + if (!edev->dev_info.common.b_arfs_capable) + return -EINVAL; + edev->arfs = vzalloc(sizeof(*edev->arfs)); if (!edev->arfs) return -ENOMEM; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 140a392a81bb..9e1f41ba766c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -804,7 +804,7 @@ static void qede_init_ndev(struct qede_dev *edev) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_TC; - if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) + if (edev->dev_info.common.b_arfs_capable) hw_features |= NETIF_F_NTUPLE; if (edev->dev_info.common.vxlan_enable || @@ -2274,7 +2274,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, qede_vlan_mark_nonconfigured(edev); edev->ops->fastpath_stop(edev->cdev); - if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) { + if (edev->dev_info.common.b_arfs_capable) { qede_poll_for_freeing_arfs_filters(edev); qede_free_arfs(edev); } @@ -2341,10 +2341,9 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, if (rc) goto err2; - if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) { - rc = qede_alloc_arfs(edev); - if (rc) - DP_NOTICE(edev, "aRFS memory allocation failed\n"); + if (qede_alloc_arfs(edev)) { + edev->ndev->features &= ~NETIF_F_NTUPLE; + edev->dev_info.common.b_arfs_capable = false; } qede_napi_add_enable(edev); -- cgit v1.2.3-59-g8ed1b From ce1cf9e5025f4e2d2198728391f1847b3e168bc6 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 9 Sep 2020 20:43:10 +0300 Subject: net: qed: RDMA personality shouldn't fail VF load Fix the assert during VF driver installation when the personality is iWARP Fixes: 1fe614d10f45 ("qed: Relax VF firmware requirements") Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index f1f75b6d0421..b8dc5c4591ef 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -71,6 +71,7 @@ static int qed_sp_vf_start(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf) p_ramrod->personality = PERSONALITY_ETH; break; case QED_PCI_ETH_ROCE: + case QED_PCI_ETH_IWARP: p_ramrod->personality = PERSONALITY_RDMA_AND_ETH; break; default: -- cgit v1.2.3-59-g8ed1b From e1e1b5356eb48dce4307f5cae10e4d6d5bd3df74 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 13 Aug 2020 13:26:38 +0200 Subject: i40e: fix return of uninitialized aq_ret in i40e_set_vsi_promisc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c: In function ‘i40e_set_vsi_promisc’: drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:1176:14: error: ‘aq_ret’ may be used uninitialized in this function [-Werror=maybe-uninitialized] i40e_status aq_ret; In case the code inside the if statement and the for loop does not get executed aq_ret will be uninitialized when the variable gets returned at the end of the function. Avoid this by changing num_vlans from int to u16, so aq_ret always gets set. Making this change in additional places as num_vlans should never be negative. Fixes: 37d318d7805f ("i40e: Remove scheduling while atomic possibility") Signed-off-by: Stefan Assmann Acked-by: Jakub Kicinski Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 8e133d6545bd..5defcb777e92 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1115,7 +1115,7 @@ static int i40e_quiesce_vf_pci(struct i40e_vf *vf) static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) { struct i40e_mac_filter *f; - int num_vlans = 0, bkt; + u16 num_vlans = 0, bkt; hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) @@ -1134,8 +1134,8 @@ static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) * * Called to get number of VLANs and VLAN list present in mac_filter_hash. **/ -static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, int *num_vlans, - s16 **vlan_list) +static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, u16 *num_vlans, + s16 **vlan_list) { struct i40e_mac_filter *f; int i = 0; @@ -1169,7 +1169,7 @@ err: **/ static i40e_status i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable, - bool unicast_enable, s16 *vl, int num_vlans) + bool unicast_enable, s16 *vl, u16 num_vlans) { struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; @@ -1258,7 +1258,7 @@ static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf, i40e_status aq_ret = I40E_SUCCESS; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi; - int num_vlans; + u16 num_vlans; s16 *vl; vsi = i40e_find_vsi_from_id(pf, vsi_id); -- cgit v1.2.3-59-g8ed1b From b6f23d3817b965bcd6d72aab1f438ff6d16a0691 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 20 Aug 2020 13:53:12 +0200 Subject: i40e: always propagate error value in i40e_set_vsi_promisc() The for loop in i40e_set_vsi_promisc() reports errors via dev_err() but does not propagate the error up the call chain. Instead it continues the loop and potentially overwrites the reported error value. This results in the error being recorded in the log buffer, but the caller might never know anything went the wrong way. To avoid this situation i40e_set_vsi_promisc() needs to temporarily store the error after reporting it. This is still not optimal as multiple different errors may occur, so store the first error and hope that's the main issue. Fixes: 37d318d7805f (i40e: Remove scheduling while atomic possibility) Reported-by: Michal Schmidt Signed-off-by: Stefan Assmann Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 5defcb777e92..47bfb2e95e2d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1171,9 +1171,9 @@ static i40e_status i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable, bool unicast_enable, s16 *vl, u16 num_vlans) { + i40e_status aq_ret, aq_tmp = 0; struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; - i40e_status aq_ret; int i; /* No VLAN to set promisc on, set on VSI */ @@ -1222,6 +1222,9 @@ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable, vf->vf_id, i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); + + if (!aq_tmp) + aq_tmp = aq_ret; } aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, seid, @@ -1235,8 +1238,15 @@ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable, vf->vf_id, i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); + + if (!aq_tmp) + aq_tmp = aq_ret; } } + + if (aq_tmp) + aq_ret = aq_tmp; + return aq_ret; } -- cgit v1.2.3-59-g8ed1b From f03369b9bfab8e23ac28ca7ab7e6631c374b7cbe Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 18 Aug 2020 16:40:01 -0700 Subject: igc: Fix wrong timestamp latency numbers The previous timestamping latency numbers were obtained by interpolating the i210 numbers with the i225 crystal clock value. That calculation was wrong. Use the correct values from real measurements. Fixes: 81b055205e8b ("igc: Add support for RX timestamping") Signed-off-by: Vinicius Costa Gomes Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 3070dfdb7eb4..2d566f3c827b 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -299,18 +299,14 @@ extern char igc_driver_name[]; #define IGC_RX_HDR_LEN IGC_RXBUFFER_256 /* Transmit and receive latency (for PTP timestamps) */ -/* FIXME: These values were estimated using the ones that i225 has as - * basis, they seem to provide good numbers with ptp4l/phc2sys, but we - * need to confirm them. - */ -#define IGC_I225_TX_LATENCY_10 9542 -#define IGC_I225_TX_LATENCY_100 1024 -#define IGC_I225_TX_LATENCY_1000 178 -#define IGC_I225_TX_LATENCY_2500 64 -#define IGC_I225_RX_LATENCY_10 20662 -#define IGC_I225_RX_LATENCY_100 2213 -#define IGC_I225_RX_LATENCY_1000 448 -#define IGC_I225_RX_LATENCY_2500 160 +#define IGC_I225_TX_LATENCY_10 240 +#define IGC_I225_TX_LATENCY_100 58 +#define IGC_I225_TX_LATENCY_1000 80 +#define IGC_I225_TX_LATENCY_2500 1325 +#define IGC_I225_RX_LATENCY_10 6450 +#define IGC_I225_RX_LATENCY_100 185 +#define IGC_I225_RX_LATENCY_1000 300 +#define IGC_I225_RX_LATENCY_2500 1485 /* RX and TX descriptor control thresholds. * PTHRESH - MAC will consider prefetch if it has fewer than this number of -- cgit v1.2.3-59-g8ed1b From 4406e977a4a1e997818b6d77c3218ef8d15b2abf Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 18 Aug 2020 16:40:02 -0700 Subject: igc: Fix not considering the TX delay for timestamps When timestamping a packet there's a delay between the start of the packet and the point where the hardware actually captures the timestamp. This difference needs to be considered if we want accurate timestamps. This was done on the RX side, but not on the TX side. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: Vinicius Costa Gomes Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 36c999250fcc..6a9b5102aa55 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -364,6 +364,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) struct sk_buff *skb = adapter->ptp_tx_skb; struct skb_shared_hwtstamps shhwtstamps; struct igc_hw *hw = &adapter->hw; + int adjust = 0; u64 regval; if (WARN_ON_ONCE(!skb)) @@ -373,6 +374,24 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) regval |= (u64)rd32(IGC_TXSTMPH) << 32; igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGC_I225_TX_LATENCY_10; + break; + case SPEED_100: + adjust = IGC_I225_TX_LATENCY_100; + break; + case SPEED_1000: + adjust = IGC_I225_TX_LATENCY_1000; + break; + case SPEED_2500: + adjust = IGC_I225_TX_LATENCY_2500; + break; + } + + shhwtstamps.hwtstamp = + ktime_add_ns(shhwtstamps.hwtstamp, adjust); + /* Clear the lock early before calling skb_tstamp_tx so that * applications are not woken up before the lock bit is clear. We use * a copy of the skb pointer to ensure other threads can't change it -- cgit v1.2.3-59-g8ed1b From 46cf789b68b25744be3dc99efd4afce4a5381b5c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Sep 2020 08:40:13 -0700 Subject: connector: Move maintainence under networking drivers umbrella. Evgeniy does not have the time nor capacity to maintain the connector subsystem any longer, so just move it under networking as that is effectively what has been happening lately. Signed-off-by: David S. Miller --- MAINTAINERS | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 12be7ae4d989..2783a5f68d2c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4407,12 +4407,6 @@ T: git git://git.infradead.org/users/hch/configfs.git F: fs/configfs/ F: include/linux/configfs.h -CONNECTOR -M: Evgeniy Polyakov -L: netdev@vger.kernel.org -S: Maintained -F: drivers/connector/ - CONSOLE SUBSYSTEM M: Greg Kroah-Hartman S: Supported @@ -12046,6 +12040,7 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git F: Documentation/devicetree/bindings/net/ +F: drivers/connector/ F: drivers/net/ F: include/linux/etherdevice.h F: include/linux/fcdevice.h -- cgit v1.2.3-59-g8ed1b From a4b5cc9e10803ecba64a7d54c0f47e4564b4a980 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 5 Sep 2020 15:14:47 +0900 Subject: tipc: fix shutdown() of connection oriented socket I confirmed that the problem fixed by commit 2a63866c8b51a3f7 ("tipc: fix shutdown() of connectionless socket") also applies to stream socket. ---------- #include #include #include int main(int argc, char *argv[]) { int fds[2] = { -1, -1 }; socketpair(PF_TIPC, SOCK_STREAM /* or SOCK_DGRAM */, 0, fds); if (fork() == 0) _exit(read(fds[0], NULL, 1)); shutdown(fds[0], SHUT_RDWR); /* This must make read() return. */ wait(NULL); /* To be woken up by _exit(). */ return 0; } ---------- Since shutdown(SHUT_RDWR) should affect all processes sharing that socket, unconditionally setting sk->sk_shutdown to SHUTDOWN_MASK will be the right behavior. Signed-off-by: Tetsuo Handa Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ebd280e767bd..11b27ddc75ba 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2771,10 +2771,7 @@ static int tipc_shutdown(struct socket *sock, int how) trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " "); __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); - if (tipc_sk_type_connectionless(sk)) - sk->sk_shutdown = SHUTDOWN_MASK; - else - sk->sk_shutdown = SEND_SHUTDOWN; + sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ -- cgit v1.2.3-59-g8ed1b From 57025817eaa42c0b6e2a907f28a125c74e3b2fc6 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 8 Sep 2020 10:49:38 +0800 Subject: mptcp: fix subflow's local_id issues In mptcp_pm_nl_get_local_id, skc_local is the same as msk_local, so it always return 0. Thus every subflow's local_id is 0. It's incorrect. This patch fixed this issue. Also, we need to ignore the zero address here, like 0.0.0.0 in IPv4. When we use the zero address as a local address, it means that we can use any one of the local addresses. The zero address is not a new address, we don't need to add it to PM, so this patch added a new function address_zero to check whether an address is the zero address, if it is, we ignore this address. Fixes: 01cacb00b35cb ("mptcp: add netlink-based PM") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Reported-by: kernel test robot Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c8820c4156e6..6b41d1d939a0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -66,6 +66,16 @@ static bool addresses_equal(const struct mptcp_addr_info *a, return a->port == b->port; } +static bool address_zero(const struct mptcp_addr_info *addr) +{ + struct mptcp_addr_info zero; + + memset(&zero, 0, sizeof(zero)); + zero.family = addr->family; + + return addresses_equal(addr, &zero, false); +} + static void local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) { @@ -323,10 +333,13 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) * addr */ local_address((struct sock_common *)msk, &msk_local); - local_address((struct sock_common *)msk, &skc_local); + local_address((struct sock_common *)skc, &skc_local); if (addresses_equal(&msk_local, &skc_local, false)) return 0; + if (address_zero(&skc_local)) + return 0; + pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); rcu_read_lock(); -- cgit v1.2.3-59-g8ed1b From 2ff0e566faa4e92cba8138c5b396d6ba96a215f1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 8 Sep 2020 10:49:39 +0800 Subject: mptcp: fix subflow's remote_id issues This patch set the init remote_id to zero, otherwise it will be a random number. Then it added the missing subflow's remote_id setting code both in __mptcp_subflow_connect and in subflow_ulp_clone. Fixes: 01cacb00b35cb ("mptcp: add netlink-based PM") Fixes: ec3edaa7ca6ce ("mptcp: Add handling of outgoing MP_JOIN requests") Fixes: f296234c98a8f ("mptcp: Add handling of incoming MP_JOIN requests") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 2 +- net/mptcp/subflow.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 6b41d1d939a0..5ea121d0222e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -181,9 +181,9 @@ static void check_work_pending(struct mptcp_sock *msk) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { + struct mptcp_addr_info remote = { 0 }; struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *local; - struct mptcp_addr_info remote; struct pm_nl_pernet *pernet; pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e8cac2655c82..9ead43f79023 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1063,6 +1063,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; struct sockaddr_storage addr; + int remote_id = remote->id; int local_id = loc->id; struct socket *sf; struct sock *ssk; @@ -1107,10 +1108,11 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, goto failed; mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); - pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token, - local_id); + pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, + remote_token, local_id, remote_id); subflow->remote_token = remote_token; subflow->local_id = local_id; + subflow->remote_id = remote_id; subflow->request_join = 1; subflow->request_bkup = 1; mptcp_info2sockaddr(remote, &addr); @@ -1347,6 +1349,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->backup = subflow_req->backup; new_ctx->local_id = subflow_req->local_id; + new_ctx->remote_id = subflow_req->remote_id; new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; } -- cgit v1.2.3-59-g8ed1b From f612eb76f349919128d5f5cc4e8cc4251a16bf30 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 9 Sep 2020 11:01:24 +0800 Subject: mptcp: fix kmalloc flag in mptcp_pm_nl_get_local_id mptcp_pm_nl_get_local_id may be called in interrupt context, so we need to use GFP_ATOMIC flag to allocate memory to avoid sleeping in atomic context. [ 280.209809] BUG: sleeping function called from invalid context at mm/slab.h:498 [ 280.209812] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1680, name: kworker/1:3 [ 280.209814] INFO: lockdep is turned off. [ 280.209816] CPU: 1 PID: 1680 Comm: kworker/1:3 Tainted: G W 5.9.0-rc3-mptcp+ #146 [ 280.209818] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 280.209820] Workqueue: events mptcp_worker [ 280.209822] Call Trace: [ 280.209824] [ 280.209826] dump_stack+0x77/0xa0 [ 280.209829] ___might_sleep.cold+0xa6/0xb6 [ 280.209832] kmem_cache_alloc_trace+0x1d1/0x290 [ 280.209835] mptcp_pm_nl_get_local_id+0x23c/0x410 [ 280.209840] subflow_init_req+0x1e9/0x2ea [ 280.209843] ? inet_reqsk_alloc+0x1c/0x120 [ 280.209845] ? kmem_cache_alloc+0x264/0x290 [ 280.209849] tcp_conn_request+0x303/0xae0 [ 280.209854] ? printk+0x53/0x6a [ 280.209857] ? tcp_rcv_state_process+0x28f/0x1374 [ 280.209859] tcp_rcv_state_process+0x28f/0x1374 [ 280.209864] ? tcp_v4_do_rcv+0xb3/0x1f0 [ 280.209866] tcp_v4_do_rcv+0xb3/0x1f0 [ 280.209869] tcp_v4_rcv+0xed6/0xfa0 [ 280.209873] ip_protocol_deliver_rcu+0x28/0x270 [ 280.209875] ip_local_deliver_finish+0x89/0x120 [ 280.209877] ip_local_deliver+0x180/0x220 [ 280.209881] ip_rcv+0x166/0x210 [ 280.209885] __netif_receive_skb_one_core+0x82/0x90 [ 280.209888] process_backlog+0xd6/0x230 [ 280.209891] net_rx_action+0x13a/0x410 [ 280.209895] __do_softirq+0xcf/0x468 [ 280.209899] asm_call_on_stack+0x12/0x20 [ 280.209901] [ 280.209903] ? ip_finish_output2+0x240/0x9a0 [ 280.209906] do_softirq_own_stack+0x4d/0x60 [ 280.209908] do_softirq.part.0+0x2b/0x60 [ 280.209911] __local_bh_enable_ip+0x9a/0xa0 [ 280.209913] ip_finish_output2+0x264/0x9a0 [ 280.209916] ? rcu_read_lock_held+0x4d/0x60 [ 280.209920] ? ip_output+0x7a/0x250 [ 280.209922] ip_output+0x7a/0x250 [ 280.209925] ? __ip_finish_output+0x330/0x330 [ 280.209928] __ip_queue_xmit+0x1dc/0x5a0 [ 280.209931] __tcp_transmit_skb+0xa0f/0xc70 [ 280.209937] tcp_connect+0xb03/0xff0 [ 280.209939] ? lockdep_hardirqs_on_prepare+0xe7/0x190 [ 280.209942] ? ktime_get_with_offset+0x125/0x150 [ 280.209944] ? trace_hardirqs_on+0x1c/0xe0 [ 280.209948] tcp_v4_connect+0x449/0x550 [ 280.209953] __inet_stream_connect+0xbb/0x320 [ 280.209955] ? mark_held_locks+0x49/0x70 [ 280.209958] ? lockdep_hardirqs_on_prepare+0xe7/0x190 [ 280.209960] ? __local_bh_enable_ip+0x6b/0xa0 [ 280.209963] inet_stream_connect+0x32/0x50 [ 280.209966] __mptcp_subflow_connect+0x1fd/0x242 [ 280.209972] mptcp_pm_create_subflow_or_signal_addr+0x2db/0x600 [ 280.209975] mptcp_worker+0x543/0x7a0 [ 280.209980] process_one_work+0x26d/0x5b0 [ 280.209984] ? process_one_work+0x5b0/0x5b0 [ 280.209987] worker_thread+0x48/0x3d0 [ 280.209990] ? process_one_work+0x5b0/0x5b0 [ 280.209993] kthread+0x117/0x150 [ 280.209996] ? kthread_park+0x80/0x80 [ 280.209998] ret_from_fork+0x22/0x30 Fixes: 01cacb00b35cb ("mptcp: add netlink-based PM") Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5ea121d0222e..770da3627848 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -354,7 +354,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return ret; /* address not found, add to local list */ - entry = kmalloc(sizeof(*entry), GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From edecfa98f602a597666e3c5cab2677ada38d93c5 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Tue, 8 Sep 2020 10:01:38 +0200 Subject: net: dsa: microchip: look for phy-mode in port nodes Documentation/devicetree/bindings/net/dsa/dsa.txt says that the phy-mode property should be specified on port nodes. However, the microchip drivers read it from the switch node. Let the driver use the per-port property and fall back to the old location with a warning. Fix in-tree users. Signed-off-by: Helmut Grohne Link: https://lore.kernel.org/netdev/20200617082235.GA1523@laureti-dev/ Acked-by: Alexandre Belloni Signed-off-by: David S. Miller --- arch/arm/boot/dts/at91-sama5d2_icp.dts | 2 +- drivers/net/dsa/microchip/ksz8795.c | 18 +++++++++++++----- drivers/net/dsa/microchip/ksz9477.c | 29 +++++++++++++++++++---------- drivers/net/dsa/microchip/ksz_common.c | 13 ++++++++++++- drivers/net/dsa/microchip/ksz_common.h | 3 ++- 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d2_icp.dts b/arch/arm/boot/dts/at91-sama5d2_icp.dts index 8d19925fc09e..6783cf16ff81 100644 --- a/arch/arm/boot/dts/at91-sama5d2_icp.dts +++ b/arch/arm/boot/dts/at91-sama5d2_icp.dts @@ -116,7 +116,6 @@ switch0: ksz8563@0 { compatible = "microchip,ksz8563"; reg = <0>; - phy-mode = "mii"; reset-gpios = <&pioA PIN_PD4 GPIO_ACTIVE_LOW>; spi-max-frequency = <500000>; @@ -140,6 +139,7 @@ reg = <2>; label = "cpu"; ethernet = <&macb0>; + phy-mode = "mii"; fixed-link { speed = <100>; full-duplex; diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 8f1d15ea15d9..f7d59d7b2cbe 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -932,11 +932,19 @@ static void ksz8795_port_setup(struct ksz_device *dev, int port, bool cpu_port) ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_ENABLE, true); if (cpu_port) { + if (!p->interface && dev->compat_interface) { + dev_warn(dev->dev, + "Using legacy switch \"phy-mode\" property, because it is missing on port %d node. " + "Please update your device tree.\n", + port); + p->interface = dev->compat_interface; + } + /* Configure MII interface for proper network communication. */ ksz_read8(dev, REG_PORT_5_CTRL_6, &data8); data8 &= ~PORT_INTERFACE_TYPE; data8 &= ~PORT_GMII_1GPS_MODE; - switch (dev->interface) { + switch (p->interface) { case PHY_INTERFACE_MODE_MII: p->phydev.speed = SPEED_100; break; @@ -952,11 +960,11 @@ static void ksz8795_port_setup(struct ksz_device *dev, int port, bool cpu_port) default: data8 &= ~PORT_RGMII_ID_IN_ENABLE; data8 &= ~PORT_RGMII_ID_OUT_ENABLE; - if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID || - dev->interface == PHY_INTERFACE_MODE_RGMII_RXID) + if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || + p->interface == PHY_INTERFACE_MODE_RGMII_RXID) data8 |= PORT_RGMII_ID_IN_ENABLE; - if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID || - dev->interface == PHY_INTERFACE_MODE_RGMII_TXID) + if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || + p->interface == PHY_INTERFACE_MODE_RGMII_TXID) data8 |= PORT_RGMII_ID_OUT_ENABLE; data8 |= PORT_GMII_1GPS_MODE; data8 |= PORT_INTERFACE_RGMII; diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 3cb22d149813..2f5506ac7d19 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1208,7 +1208,7 @@ static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) /* configure MAC to 1G & RGMII mode */ ksz_pread8(dev, port, REG_PORT_XMII_CTRL_1, &data8); - switch (dev->interface) { + switch (p->interface) { case PHY_INTERFACE_MODE_MII: ksz9477_set_xmii(dev, 0, &data8); ksz9477_set_gbit(dev, false, &data8); @@ -1229,11 +1229,11 @@ static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) ksz9477_set_gbit(dev, true, &data8); data8 &= ~PORT_RGMII_ID_IG_ENABLE; data8 &= ~PORT_RGMII_ID_EG_ENABLE; - if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID || - dev->interface == PHY_INTERFACE_MODE_RGMII_RXID) + if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || + p->interface == PHY_INTERFACE_MODE_RGMII_RXID) data8 |= PORT_RGMII_ID_IG_ENABLE; - if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID || - dev->interface == PHY_INTERFACE_MODE_RGMII_TXID) + if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || + p->interface == PHY_INTERFACE_MODE_RGMII_TXID) data8 |= PORT_RGMII_ID_EG_ENABLE; p->phydev.speed = SPEED_1000; break; @@ -1269,23 +1269,32 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds) dev->cpu_port = i; dev->host_mask = (1 << dev->cpu_port); dev->port_mask |= dev->host_mask; + p = &dev->ports[i]; /* Read from XMII register to determine host port * interface. If set specifically in device tree * note the difference to help debugging. */ interface = ksz9477_get_interface(dev, i); - if (!dev->interface) - dev->interface = interface; - if (interface && interface != dev->interface) + if (!p->interface) { + if (dev->compat_interface) { + dev_warn(dev->dev, + "Using legacy switch \"phy-mode\" property, because it is missing on port %d node. " + "Please update your device tree.\n", + i); + p->interface = dev->compat_interface; + } else { + p->interface = interface; + } + } + if (interface && interface != p->interface) dev_info(dev->dev, "use %s instead of %s\n", - phy_modes(dev->interface), + phy_modes(p->interface), phy_modes(interface)); /* enable cpu port */ ksz9477_port_setup(dev, i, true); - p = &dev->ports[dev->cpu_port]; p->vid_member = dev->port_mask; p->on = 1; } diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 8d53b12d40a8..8e755b50c9c1 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -388,6 +388,8 @@ int ksz_switch_register(struct ksz_device *dev, const struct ksz_dev_ops *ops) { phy_interface_t interface; + struct device_node *port; + unsigned int port_num; int ret; if (dev->pdata) @@ -421,10 +423,19 @@ int ksz_switch_register(struct ksz_device *dev, /* Host port interface will be self detected, or specifically set in * device tree. */ + for (port_num = 0; port_num < dev->port_cnt; ++port_num) + dev->ports[port_num].interface = PHY_INTERFACE_MODE_NA; if (dev->dev->of_node) { ret = of_get_phy_mode(dev->dev->of_node, &interface); if (ret == 0) - dev->interface = interface; + dev->compat_interface = interface; + for_each_available_child_of_node(dev->dev->of_node, port) { + if (of_property_read_u32(port, "reg", &port_num)) + continue; + if (port_num >= dev->port_cnt) + return -EINVAL; + of_get_phy_mode(port, &dev->ports[port_num].interface); + } dev->synclko_125 = of_property_read_bool(dev->dev->of_node, "microchip,synclko-125"); } diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 206838160f49..cf866e48ff66 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -39,6 +39,7 @@ struct ksz_port { u32 freeze:1; /* MIB counter freeze is enabled */ struct ksz_port_mib mib; + phy_interface_t interface; }; struct ksz_device { @@ -72,7 +73,7 @@ struct ksz_device { int mib_cnt; int mib_port_cnt; int last_port; /* ports after that not used */ - phy_interface_t interface; + phy_interface_t compat_interface; u32 regs_size; bool phy_errata_9477; bool synclko_125; -- cgit v1.2.3-59-g8ed1b From 2fb541c862c987d02dfdf28f1545016deecfa0d5 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 8 Sep 2020 19:02:34 +0800 Subject: net: sch_generic: aviod concurrent reset and enqueue op for lockless qdisc Currently there is concurrent reset and enqueue operation for the same lockless qdisc when there is no lock to synchronize the q->enqueue() in __dev_xmit_skb() with the qdisc reset operation in qdisc_deactivate() called by dev_deactivate_queue(), which may cause out-of-bounds access for priv->ring[] in hns3 driver if user has requested a smaller queue num when __dev_xmit_skb() still enqueue a skb with a larger queue_mapping after the corresponding qdisc is reset, and call hns3_nic_net_xmit() with that skb later. Reused the existing synchronize_net() in dev_deactivate_many() to make sure skb with larger queue_mapping enqueued to old qdisc(which is saved in dev_queue->qdisc_sleeping) will always be reset when dev_reset_queue() is called. Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 48 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 265a61d011df..54c417244642 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1131,24 +1131,10 @@ EXPORT_SYMBOL(dev_activate); static void qdisc_deactivate(struct Qdisc *qdisc) { - bool nolock = qdisc->flags & TCQ_F_NOLOCK; - if (qdisc->flags & TCQ_F_BUILTIN) return; - if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state)) - return; - - if (nolock) - spin_lock_bh(&qdisc->seqlock); - spin_lock_bh(qdisc_lock(qdisc)); set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); - - qdisc_reset(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) - spin_unlock_bh(&qdisc->seqlock); } static void dev_deactivate_queue(struct net_device *dev, @@ -1165,6 +1151,30 @@ static void dev_deactivate_queue(struct net_device *dev, } } +static void dev_reset_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + bool nolock; + + qdisc = dev_queue->qdisc_sleeping; + if (!qdisc) + return; + + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) + spin_unlock_bh(&qdisc->seqlock); +} + static bool some_qdisc_is_busy(struct net_device *dev) { unsigned int i; @@ -1213,12 +1223,20 @@ void dev_deactivate_many(struct list_head *head) dev_watchdog_down(dev); } - /* Wait for outstanding qdisc-less dev_queue_xmit calls. + /* Wait for outstanding qdisc-less dev_queue_xmit calls or + * outstanding qdisc enqueuing calls. * This is avoided if all devices are in dismantle phase : * Caller will call synchronize_net() for us */ synchronize_net(); + list_for_each_entry(dev, head, close_list) { + netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); + + if (dev_ingress_queue(dev)) + dev_reset_queue(dev, dev_ingress_queue(dev), NULL); + } + /* Wait for outstanding qdisc_run calls. */ list_for_each_entry(dev, head, close_list) { while (some_qdisc_is_busy(dev)) { -- cgit v1.2.3-59-g8ed1b From de214e52de1bba5392b5b7054924a08dbd57c2f6 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 8 Sep 2020 21:07:32 -0700 Subject: hv_netvsc: Switch the data path at the right time during hibernation When netvsc_resume() is called, the mlx5 VF NIC has not been resumed yet, so in the future the host might sliently fail the call netvsc_vf_changed() -> netvsc_switch_datapath() there, even if the call works now. Call netvsc_vf_changed() in the NETDEV_CHANGE event handler: at that time the mlx5 VF NIC has been resumed. Fixes: 19162fd4063a ("hv_netvsc: Fix hibernation for mlx5 VF driver") Signed-off-by: Dexuan Cui Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 81c5c70b616a..4a25886e2346 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2619,7 +2619,6 @@ static int netvsc_resume(struct hv_device *dev) struct net_device *net = hv_get_drvdata(dev); struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info; - struct net_device *vf_netdev; int ret; rtnl_lock(); @@ -2632,15 +2631,6 @@ static int netvsc_resume(struct hv_device *dev) netvsc_devinfo_put(device_info); net_device_ctx->saved_netvsc_dev_info = NULL; - /* A NIC driver (e.g. mlx5) may keep the VF network interface across - * hibernation, but here the data path is implicitly switched to the - * netvsc NIC since the vmbus channel is closed and re-opened, so - * netvsc_vf_changed() must be used to switch the data path to the VF. - */ - vf_netdev = rtnl_dereference(net_device_ctx->vf_netdev); - if (vf_netdev && netvsc_vf_changed(vf_netdev) != NOTIFY_OK) - ret = -EINVAL; - rtnl_unlock(); return ret; @@ -2701,6 +2691,7 @@ static int netvsc_netdev_event(struct notifier_block *this, return netvsc_unregister_vf(event_dev); case NETDEV_UP: case NETDEV_DOWN: + case NETDEV_CHANGE: return netvsc_vf_changed(event_dev); default: return NOTIFY_DONE; -- cgit v1.2.3-59-g8ed1b From da26658c3d7005aa67a706dceff7b2807b59e123 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 8 Sep 2020 21:08:19 -0700 Subject: hv_netvsc: Cache the current data path to avoid duplicate call and message The previous change "hv_netvsc: Switch the data path at the right time during hibernation" adds the call of netvsc_vf_changed() upon NETDEV_CHANGE, so it's necessary to avoid the duplicate call and message when the VF is brought UP or DOWN. Signed-off-by: Dexuan Cui Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 +++ drivers/net/hyperv/netvsc_drv.c | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 2181d4538ab7..ff33f27cdcd3 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -974,6 +974,9 @@ struct net_device_context { /* Serial number of the VF to team with */ u32 vf_serial; + /* Is the current data path through the VF NIC? */ + bool data_path_is_vf; + /* Used to temporarily save the config info across hibernation */ struct netvsc_device_info *saved_netvsc_dev_info; }; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 4a25886e2346..b7db3766f5b9 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2366,7 +2366,16 @@ static int netvsc_register_vf(struct net_device *vf_netdev) return NOTIFY_OK; } -/* VF up/down change detected, schedule to change data path */ +/* Change the data path when VF UP/DOWN/CHANGE are detected. + * + * Typically a UP or DOWN event is followed by a CHANGE event, so + * net_device_ctx->data_path_is_vf is used to cache the current data path + * to avoid the duplicate call of netvsc_switch_datapath() and the duplicate + * message. + * + * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network + * interface, there is only the CHANGE event and no UP or DOWN event. + */ static int netvsc_vf_changed(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; @@ -2383,6 +2392,10 @@ static int netvsc_vf_changed(struct net_device *vf_netdev) if (!netvsc_dev) return NOTIFY_DONE; + if (net_device_ctx->data_path_is_vf == vf_is_up) + return NOTIFY_OK; + net_device_ctx->data_path_is_vf = vf_is_up; + netvsc_switch_datapath(ndev, vf_is_up); netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); @@ -2624,6 +2637,12 @@ static int netvsc_resume(struct hv_device *dev) rtnl_lock(); net_device_ctx = netdev_priv(net); + + /* Reset the data path to the netvsc NIC before re-opening the vmbus + * channel. Later netvsc_netdev_event() will switch the data path to + * the VF upon the UP or CHANGE event. + */ + net_device_ctx->data_path_is_vf = false; device_info = net_device_ctx->saved_netvsc_dev_info; ret = netvsc_attach(net, device_info); -- cgit v1.2.3-59-g8ed1b From 7d3ba9360c6dac7c077fbd6631e08f32ea2bcd53 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 9 Sep 2020 14:43:14 +0900 Subject: net: phy: call phy_disable_interrupts() in phy_attach_direct() instead Since the micrel phy driver calls phy_init_hw() as a workaround, the commit 9886a4dbd2aa ("net: phy: call phy_disable_interrupts() in phy_init_hw()") disables the interrupt unexpectedly. So, call phy_disable_interrupts() in phy_attach_direct() instead. Otherwise, the phy cannot link up after the ethernet cable was disconnected. Note that other drivers (like at803x.c) also calls phy_init_hw(). So, perhaps, the driver caused a similar issue too. Fixes: 9886a4dbd2aa ("net: phy: call phy_disable_interrupts() in phy_init_hw()") Signed-off-by: Yoshihiro Shimoda Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8adfbad0a1e8..b93b40cda54a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1143,10 +1143,6 @@ int phy_init_hw(struct phy_device *phydev) if (ret < 0) return ret; - ret = phy_disable_interrupts(phydev); - if (ret) - return ret; - if (phydev->drv->config_init) ret = phydev->drv->config_init(phydev); @@ -1423,6 +1419,10 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, if (err) goto error; + err = phy_disable_interrupts(phydev); + if (err) + return err; + phy_resume(phydev); phy_led_triggers_register(phydev); -- cgit v1.2.3-59-g8ed1b From 66d42ed8b25b64eb63111a2b8582c5afc8bf1105 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 9 Sep 2020 12:46:48 +0300 Subject: hdlc_ppp: add range checks in ppp_cp_parse_cr() There are a couple bugs here: 1) If opt[1] is zero then this results in a forever loop. If the value is less than 2 then it is invalid. 2) It assumes that "len" is more than sizeof(valid_accm) or 6 which can result in memory corruption. In the case of LCP_OPTION_ACCM, then we should check "opt[1]" instead of "len" because, if "opt[1]" is less than sizeof(valid_accm) then "nak_len" gets out of sync and it can lead to memory corruption in the next iterations through the loop. In case of LCP_OPTION_MAGIC, the only valid value for opt[1] is 6, but the code is trying to log invalid data so we should only discard the data when "len" is less than 6 because that leads to a read overflow. Reported-by: ChenNan Of Chaitin Security Research Lab Fixes: e022c2f07ae5 ("WAN: new synchronous PPP implementation for generic HDLC.") Signed-off-by: Dan Carpenter Reviewed-by: Eric Dumazet Reviewed-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_ppp.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 48ced3912576..16f33d1ffbfb 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -383,11 +383,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, } for (opt = data; len; len -= opt[1], opt += opt[1]) { - if (len < 2 || len < opt[1]) { - dev->stats.rx_errors++; - kfree(out); - return; /* bad packet, drop silently */ - } + if (len < 2 || opt[1] < 2 || len < opt[1]) + goto err_out; if (pid == PID_LCP) switch (opt[0]) { @@ -395,6 +392,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, continue; /* MRU always OK and > 1500 bytes? */ case LCP_OPTION_ACCM: /* async control character map */ + if (opt[1] < sizeof(valid_accm)) + goto err_out; if (!memcmp(opt, valid_accm, sizeof(valid_accm))) continue; @@ -406,6 +405,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, } break; case LCP_OPTION_MAGIC: + if (len < 6) + goto err_out; if (opt[1] != 6 || (!opt[2] && !opt[3] && !opt[4] && !opt[5])) break; /* reject invalid magic number */ @@ -424,6 +425,11 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, ppp_cp_event(dev, pid, RCR_GOOD, CP_CONF_ACK, id, req_len, data); kfree(out); + return; + +err_out: + dev->stats.rx_errors++; + kfree(out); } static int ppp_rx(struct sk_buff *skb) -- cgit v1.2.3-59-g8ed1b From 1be107de2ee4b3f0808e2071529364cf4d9a67b9 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 10 Sep 2020 04:41:53 -0400 Subject: net: Correct the comment of dst_dev_put() Since commit 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries"), we use blackhole_netdev to invalidate dst entries instead of loopback device anymore. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/dst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dst.c b/net/core/dst.c index d6b6ced0d451..0c01bd8d9d81 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -144,7 +144,7 @@ static void dst_destroy_rcu(struct rcu_head *head) /* Operations to mark dst as DEAD and clean up the net device referenced * by dst: - * 1. put the dst under loopback interface and discard all tx/rx packets + * 1. put the dst under blackhole interface and discard all tx/rx packets * on this route. * 2. release the net_device * This function should be called when removing routes from the fib tree -- cgit v1.2.3-59-g8ed1b From 83896b0bd8223ac33bcc609bcc82a57a587002ff Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 10 Sep 2020 04:46:40 -0400 Subject: net: Fix broken NETIF_F_CSUM_MASK spell in netdev_features.h Remove the weird space inside the NETIF_F_CSUM_MASK. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 2cc3cf80b49a..0b17c4322b09 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -193,7 +193,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_GSO_MASK (__NETIF_F_BIT(NETIF_F_GSO_LAST + 1) - \ __NETIF_F_BIT(NETIF_F_GSO_SHIFT)) -/* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be +/* List of IP checksum features. Note that NETIF_F_HW_CSUM should not be * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- * this would be contradictory */ -- cgit v1.2.3-59-g8ed1b From 5bf490e6807bf56f49b5991b4be817407dd32656 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 10 Sep 2020 11:05:18 +0200 Subject: s390/qeth: delay draining the TX buffers Wait until the QDIO data connection is severed. Otherwise the device might still be processing the buffers, and end up accessing skb data that we already freed. Fixes: 8b5026bc1693 ("s390/qeth: fix qdio teardown after early init error") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 3a94f6cad167..6384f7adba66 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -284,11 +284,11 @@ static void qeth_l2_stop_card(struct qeth_card *card) if (card->state == CARD_STATE_SOFTSETUP) { qeth_clear_ipacmd_list(card); - qeth_drain_output_queues(card); card->state = CARD_STATE_DOWN; } qeth_qdio_clear_card(card, 0); + qeth_drain_output_queues(card); qeth_clear_working_pool_list(card); flush_workqueue(card->event_wq); qeth_flush_local_addrs(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4d461960370d..09ef518ca1ea 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1168,11 +1168,11 @@ static void qeth_l3_stop_card(struct qeth_card *card) if (card->state == CARD_STATE_SOFTSETUP) { qeth_l3_clear_ip_htable(card, 1); qeth_clear_ipacmd_list(card); - qeth_drain_output_queues(card); card->state = CARD_STATE_DOWN; } qeth_qdio_clear_card(card, 0); + qeth_drain_output_queues(card); qeth_clear_working_pool_list(card); flush_workqueue(card->event_wq); qeth_flush_local_addrs(card); -- cgit v1.2.3-59-g8ed1b From 9d3b2d3e4942cb82630f0d638de4b2253c0af56d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 10 Sep 2020 11:08:01 +0200 Subject: net: mvneta: fix possible use-after-free in mvneta_xdp_put_buff Release first buffer as last one since it contains references to subsequent fragments. This code will be optimized introducing multi-buffer bit in xdp_buff structure. Fixes: ca0e014609f05 ("net: mvneta: move skb build after descriptors processing") Signed-off-by: Lorenzo Bianconi Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index dfcb1767acbb..69a900081165 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2029,11 +2029,11 @@ mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); int i; - page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), - sync_len, napi); for (i = 0; i < sinfo->nr_frags; i++) page_pool_put_full_page(rxq->page_pool, skb_frag_page(&sinfo->frags[i]), napi); + page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), + sync_len, napi); } static int -- cgit v1.2.3-59-g8ed1b From e1b9efe6baebe79019a2183176686a0e709388ae Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 10 Sep 2020 14:01:26 +0300 Subject: net: Fix bridge enslavement failure When a netdev is enslaved to a bridge, its parent identifier is queried. This is done so that packets that were already forwarded in hardware will not be forwarded again by the bridge device between netdevs belonging to the same hardware instance. The operation fails when the netdev is an upper of netdevs with different parent identifiers. Instead of failing the enslavement, have dev_get_port_parent_id() return '-EOPNOTSUPP' which will signal the bridge to skip the query operation. Other callers of the function are not affected by this change. Fixes: 7e1146e8c10c ("net: devlink: introduce devlink_compat_switch_id_get() helper") Signed-off-by: Ido Schimmel Reported-by: Vasundhara Volam Reviewed-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4086d335978c..266073e300b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8647,7 +8647,7 @@ int dev_get_port_parent_id(struct net_device *dev, if (!first.id_len) first = *ppid; else if (memcmp(&first, ppid, sizeof(*ppid))) - return -ENODATA; + return -EOPNOTSUPP; } return err; -- cgit v1.2.3-59-g8ed1b From 6374a5606990ea4de7fe0f7035dd04422c4690f5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 10 Sep 2020 14:01:27 +0300 Subject: selftests: rtnetlink: Test bridge enslavement with different parent IDs Test that an upper device of netdevs with different parent IDs can be enslaved to a bridge. The test fails without previous commit. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 47 ++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 7c38a909f8b8..8a2fe6d64bf2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1175,6 +1175,51 @@ kci_test_neigh_get() echo "PASS: neigh get" } +kci_test_bridge_parent_id() +{ + local ret=0 + sysfsnet=/sys/bus/netdevsim/devices/netdevsim + probed=false + + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + modprobe -q netdevsim + check_err $? + if [ $ret -ne 0 ]; then + echo "SKIP: bridge_parent_id can't load netdevsim" + return $ksft_skip + fi + probed=true + fi + + echo "10 1" > /sys/bus/netdevsim/new_device + while [ ! -d ${sysfsnet}10 ] ; do :; done + echo "20 1" > /sys/bus/netdevsim/new_device + while [ ! -d ${sysfsnet}20 ] ; do :; done + udevadm settle + dev10=`ls ${sysfsnet}10/net/` + dev20=`ls ${sysfsnet}20/net/` + + ip link add name test-bond0 type bond mode 802.3ad + ip link set dev $dev10 master test-bond0 + ip link set dev $dev20 master test-bond0 + ip link add name test-br0 type bridge + ip link set dev test-bond0 master test-br0 + check_err $? + + # clean up any leftovers + ip link del dev test-br0 + ip link del dev test-bond0 + echo 20 > /sys/bus/netdevsim/del_device + echo 10 > /sys/bus/netdevsim/del_device + $probed && rmmod netdevsim + + if [ $ret -ne 0 ]; then + echo "FAIL: bridge_parent_id" + return 1 + fi + echo "PASS: bridge_parent_id" +} + kci_test_rtnl() { local ret=0 @@ -1224,6 +1269,8 @@ kci_test_rtnl() check_err $? kci_test_neigh_get check_err $? + kci_test_bridge_parent_id + check_err $? kci_del_dummy return $ret -- cgit v1.2.3-59-g8ed1b From 297e77e53eadb332d5062913447b104a772dc33b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 10 Sep 2020 14:09:05 +0200 Subject: net: DCB: Validate DCB_ATTR_DCB_BUFFER argument The parameter passed via DCB_ATTR_DCB_BUFFER is a struct dcbnl_buffer. The field prio2buffer is an array of IEEE_8021Q_MAX_PRIORITIES bytes, where each value is a number of a buffer to direct that priority's traffic to. That value is however never validated to lie within the bounds set by DCBX_MAX_BUFFERS. The only driver that currently implements the callback is mlx5 (maintainers CCd), and that does not do any validation either, in particual allowing incorrect configuration if the prio2buffer value does not fit into 4 bits. Instead of offloading the need to validate the buffer index to drivers, do it right there in core, and bounce the request if the value is too large. CC: Parav Pandit CC: Saeed Mahameed Fixes: e549f6f9c098 ("net/dcb: Add dcbnl buffer attribute") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 84dde5a2066e..16014ad19406 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1426,6 +1426,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, { const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int prio; int err; if (!ops) @@ -1475,6 +1476,13 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, struct dcbnl_buffer *buffer = nla_data(ieee[DCB_ATTR_DCB_BUFFER]); + for (prio = 0; prio < ARRAY_SIZE(buffer->prio2buffer); prio++) { + if (buffer->prio2buffer[prio] >= DCBX_MAX_BUFFERS) { + err = -EINVAL; + goto err; + } + } + err = ops->dcbnl_setbuffer(netdev, buffer); if (err) goto err; -- cgit v1.2.3-59-g8ed1b From 553d87b658fed0e22a0f86b4f1b093c39d3e3074 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 10 Sep 2020 15:34:39 +0200 Subject: netlink: fix doc about nlmsg_parse/nla_validate There is no @validate argument. CC: Johannes Berg Fixes: 3de644035446 ("netlink: re-add parse/validate functions in strict mode") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/netlink.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index c0411f14fb53..8e0eb2c9c528 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -726,7 +726,6 @@ static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected - * @validate: validation strictness * @extack: extended ACK report struct * * See nla_parse() @@ -824,7 +823,6 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len, * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy - * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the -- cgit v1.2.3-59-g8ed1b From ee460417d254d941dfea5fb7cff841f589643992 Mon Sep 17 00:00:00 2001 From: Lucy Yan Date: Thu, 10 Sep 2020 12:05:09 -0700 Subject: net: dec: de2104x: Increase receive ring size for Tulip Increase Rx ring size to address issue where hardware is reaching the receive work limit. Before: [ 102.223342] de2104x 0000:17:00.0 eth0: rx work limit reached [ 102.245695] de2104x 0000:17:00.0 eth0: rx work limit reached [ 102.251387] de2104x 0000:17:00.0 eth0: rx work limit reached [ 102.267444] de2104x 0000:17:00.0 eth0: rx work limit reached Signed-off-by: Lucy Yan Reviewed-by: Moritz Fischer Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de2104x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index cb116b530f5e..2610efe4f873 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -85,7 +85,7 @@ MODULE_PARM_DESC (rx_copybreak, "de2104x Breakpoint at which Rx packets are copi #define DSL CONFIG_DE2104X_DSL #endif -#define DE_RX_RING_SIZE 64 +#define DE_RX_RING_SIZE 128 #define DE_TX_RING_SIZE 64 #define DE_RING_BYTES \ ((sizeof(struct de_desc) * DE_RX_RING_SIZE) + \ -- cgit v1.2.3-59-g8ed1b From fde6dedfb794ec8a89c3c980a245a2e43dad2411 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 10 Sep 2020 15:52:45 -0700 Subject: docs/bpf: Fix ringbuf documentation Remove link to litmus tests that didn't make it to upstream. Fix ringbuf benchmark link. I wasn't able to test this with `make htmldocs`, unfortunately, because of Sphinx dependencies. But bench_ringbufs.c path is certainly correct now. Fixes: 97abb2b39682 ("docs/bpf: Add BPF ring buffer design notes") Reported-by: Mauro Carvalho Chehab Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200910225245.2896991-1-andriin@fb.com --- Documentation/bpf/ringbuf.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/bpf/ringbuf.rst b/Documentation/bpf/ringbuf.rst index 75f943f0009d..4d4f3bcb1477 100644 --- a/Documentation/bpf/ringbuf.rst +++ b/Documentation/bpf/ringbuf.rst @@ -182,9 +182,6 @@ in the order of reservations, but only after all previous records where already committed. It is thus possible for slow producers to temporarily hold off submitted records, that were reserved later. -Reservation/commit/consumer protocol is verified by litmus tests in -Documentation/litmus_tests/bpf-rb/_. - One interesting implementation bit, that significantly simplifies (and thus speeds up as well) implementation of both producers and consumers is how data area is mapped twice contiguously back-to-back in the virtual memory. This @@ -200,7 +197,7 @@ a self-pacing notifications of new data being availability. being available after commit only if consumer has already caught up right up to the record being committed. If not, consumer still has to catch up and thus will see new data anyways without needing an extra poll notification. -Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbuf.c_) show that +Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbufs.c_) show that this allows to achieve a very high throughput without having to resort to tricks like "notify only every Nth sample", which are necessary with perf buffer. For extreme cases, when BPF program wants more manual control of -- cgit v1.2.3-59-g8ed1b From cdb0e6dccc1fd0f7096d333d90cad0ac81c2b342 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 11 Sep 2020 13:16:35 +0300 Subject: enetc: Fix mdio bus removal on PF probe bailout This is the correct resolution for the conflict from merging the "net" tree fix: commit 26cb7085c898 ("enetc: Remove the mdio bus on PF probe bailout") with the "net-next" new work: commit 07095c025ac2 ("net: enetc: Use DT protocol information to set up the ports") that moved mdio bus allocation to an ealier stage of the PF probing routine. Fixes: a57066b1a019 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 26d5981b798f..177334f0adb1 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -1053,7 +1053,6 @@ static int enetc_pf_probe(struct pci_dev *pdev, err_reg_netdev: enetc_teardown_serdes(priv); - enetc_mdio_remove(pf); enetc_free_msix(priv); err_alloc_msix: enetc_free_si_resources(priv); @@ -1061,6 +1060,7 @@ err_alloc_si_res: si->ndev = NULL; free_netdev(ndev); err_alloc_netdev: + enetc_mdio_remove(pf); enetc_of_put_phy(pf); err_map_pf_space: enetc_pci_remove(pdev); -- cgit v1.2.3-59-g8ed1b From b5b73b26b3ca34574124ed7ae9c5ba8391a7f176 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 9 Sep 2020 17:03:11 -0700 Subject: taprio: Fix allowing too small intervals It's possible that the user specifies an interval that couldn't allow any packet to be transmitted. This also avoids the issue of the hrtimer handler starving the other threads because it's running too often. The solution is to reject interval sizes that according to the current link speed wouldn't allow any packet to be transmitted. Reported-by: syzbot+8267241609ae8c23b248@syzkaller.appspotmail.com Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler") Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index fe53c1e38c7d..b0ad7687ee2c 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -777,9 +777,11 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, }; -static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, +static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb, + struct sched_entry *entry, struct netlink_ext_ack *extack) { + int min_duration = length_to_duration(q, ETH_ZLEN); u32 interval = 0; if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) @@ -794,7 +796,10 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, interval = nla_get_u32( tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); - if (interval == 0) { + /* The interval should allow at least the minimum ethernet + * frame to go out. + */ + if (interval < min_duration) { NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); return -EINVAL; } @@ -804,8 +809,9 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, return 0; } -static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, - int index, struct netlink_ext_ack *extack) +static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n, + struct sched_entry *entry, int index, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; int err; @@ -819,10 +825,10 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, entry->index = index; - return fill_sched_entry(tb, entry, extack); + return fill_sched_entry(q, tb, entry, extack); } -static int parse_sched_list(struct nlattr *list, +static int parse_sched_list(struct taprio_sched *q, struct nlattr *list, struct sched_gate_list *sched, struct netlink_ext_ack *extack) { @@ -847,7 +853,7 @@ static int parse_sched_list(struct nlattr *list, return -ENOMEM; } - err = parse_sched_entry(n, entry, i, extack); + err = parse_sched_entry(q, n, entry, i, extack); if (err < 0) { kfree(entry); return err; @@ -862,7 +868,7 @@ static int parse_sched_list(struct nlattr *list, return i; } -static int parse_taprio_schedule(struct nlattr **tb, +static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, struct sched_gate_list *new, struct netlink_ext_ack *extack) { @@ -883,8 +889,8 @@ static int parse_taprio_schedule(struct nlattr **tb, new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]); if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) - err = parse_sched_list( - tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack); + err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], + new, extack); if (err < 0) return err; @@ -1473,7 +1479,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto free_sched; } - err = parse_taprio_schedule(tb, new_admin, extack); + err = parse_taprio_schedule(q, tb, new_admin, extack); if (err < 0) goto free_sched; -- cgit v1.2.3-59-g8ed1b From a1b80e0143a1b878f8e21d82fd55f3f46f0014be Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 10 Sep 2020 22:04:40 +0800 Subject: hinic: fix rewaking txq after netif_tx_disable When calling hinic_close in hinic_set_channels, all queues are stopped after netif_tx_disable, but some queue may be rewaken in free_tx_poll by mistake while drv is handling tx irq. If one queue is rewaken core may call hinic_xmit_frame to send pkt after netif_tx_disable within a short time which may results in accessing memory that has been already freed in hinic_close. So we call napi_disable before netif_tx_disable in hinic_close to fix this bug. Fixes: 2eed5a8b614b ("hinic: add set_channels ethtool_ops support") Signed-off-by: Luo bin Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 24 ++++++++++++++++++++++++ drivers/net/ethernet/huawei/hinic/hinic_tx.c | 20 ++++---------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 501056fd32ee..28581bd8ce07 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -174,6 +174,24 @@ err_init_txq: return err; } +static void enable_txqs_napi(struct hinic_dev *nic_dev) +{ + int num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev); + int i; + + for (i = 0; i < num_txqs; i++) + napi_enable(&nic_dev->txqs[i].napi); +} + +static void disable_txqs_napi(struct hinic_dev *nic_dev) +{ + int num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev); + int i; + + for (i = 0; i < num_txqs; i++) + napi_disable(&nic_dev->txqs[i].napi); +} + /** * free_txqs - Free the Logical Tx Queues of specific NIC device * @nic_dev: the specific NIC device @@ -400,6 +418,8 @@ int hinic_open(struct net_device *netdev) goto err_create_txqs; } + enable_txqs_napi(nic_dev); + err = create_rxqs(nic_dev); if (err) { netif_err(nic_dev, drv, netdev, @@ -484,6 +504,7 @@ err_port_state: } err_create_rxqs: + disable_txqs_napi(nic_dev); free_txqs(nic_dev); err_create_txqs: @@ -497,6 +518,9 @@ int hinic_close(struct net_device *netdev) struct hinic_dev *nic_dev = netdev_priv(netdev); unsigned int flags; + /* Disable txq napi firstly to aviod rewaking txq in free_tx_poll */ + disable_txqs_napi(nic_dev); + down(&nic_dev->mgmt_lock); flags = nic_dev->flags; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index a97498ee6914..2b418b568767 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -745,18 +745,6 @@ static int free_tx_poll(struct napi_struct *napi, int budget) return budget; } -static void tx_napi_add(struct hinic_txq *txq, int weight) -{ - netif_napi_add(txq->netdev, &txq->napi, free_tx_poll, weight); - napi_enable(&txq->napi); -} - -static void tx_napi_del(struct hinic_txq *txq) -{ - napi_disable(&txq->napi); - netif_napi_del(&txq->napi); -} - static irqreturn_t tx_irq(int irq, void *data) { struct hinic_txq *txq = data; @@ -790,7 +778,7 @@ static int tx_request_irq(struct hinic_txq *txq) qp = container_of(sq, struct hinic_qp, sq); - tx_napi_add(txq, nic_dev->tx_weight); + netif_napi_add(txq->netdev, &txq->napi, free_tx_poll, nic_dev->tx_weight); hinic_hwdev_msix_set(nic_dev->hwdev, sq->msix_entry, TX_IRQ_NO_PENDING, TX_IRQ_NO_COALESC, @@ -807,14 +795,14 @@ static int tx_request_irq(struct hinic_txq *txq) if (err) { netif_err(nic_dev, drv, txq->netdev, "Failed to set TX interrupt coalescing attribute\n"); - tx_napi_del(txq); + netif_napi_del(&txq->napi); return err; } err = request_irq(sq->irq, tx_irq, 0, txq->irq_name, txq); if (err) { dev_err(&pdev->dev, "Failed to request Tx irq\n"); - tx_napi_del(txq); + netif_napi_del(&txq->napi); return err; } @@ -826,7 +814,7 @@ static void tx_free_irq(struct hinic_txq *txq) struct hinic_sq *sq = txq->sq; free_irq(sq->irq, txq); - tx_napi_del(txq); + netif_napi_del(&txq->napi); } /** -- cgit v1.2.3-59-g8ed1b From c047dc1d260f2593035d63747d616c3512f9d6b6 Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Thu, 10 Sep 2020 18:41:52 +0300 Subject: net: ipa: fix u32_replace_bits by u32p_xxx version Looks like u32p_replace_bits() should be used instead of u32_replace_bits() which does not modifies the value but returns the modified version. Fixes: 2b9feef2b6c2 ("soc: qcom: ipa: filter and routing tables") Signed-off-by: Vadym Kochan Reviewed-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index 2098ca2f2c90..b3790aa952a1 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -521,7 +521,7 @@ static void ipa_filter_tuple_zero(struct ipa_endpoint *endpoint) val = ioread32(endpoint->ipa->reg_virt + offset); /* Zero all filter-related fields, preserving the rest */ - u32_replace_bits(val, 0, IPA_REG_ENDP_FILTER_HASH_MSK_ALL); + u32p_replace_bits(&val, 0, IPA_REG_ENDP_FILTER_HASH_MSK_ALL); iowrite32(val, endpoint->ipa->reg_virt + offset); } @@ -573,7 +573,7 @@ static void ipa_route_tuple_zero(struct ipa *ipa, u32 route_id) val = ioread32(ipa->reg_virt + offset); /* Zero all route-related fields, preserving the rest */ - u32_replace_bits(val, 0, IPA_REG_ENDP_ROUTER_HASH_MSK_ALL); + u32p_replace_bits(&val, 0, IPA_REG_ENDP_ROUTER_HASH_MSK_ALL); iowrite32(val, ipa->reg_virt + offset); } -- cgit v1.2.3-59-g8ed1b From 5760d9acbe9514eec68eb70821d6fa5764f57042 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 10 Sep 2020 23:52:29 +0300 Subject: net: ethernet: ti: cpsw_new: fix suspend/resume Add missed suspend/resume callbacks to properly restore networking after suspend/resume cycle. Fixes: ed3525eda4c4 ("net: ethernet: ti: introduce cpsw switchdev based driver part 1 - dual-emac") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw_new.c | 53 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 8ed78577cded..15672d0a4de6 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -2070,9 +2071,61 @@ static int cpsw_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused cpsw_suspend(struct device *dev) +{ + struct cpsw_common *cpsw = dev_get_drvdata(dev); + int i; + + rtnl_lock(); + + for (i = 0; i < cpsw->data.slaves; i++) { + struct net_device *ndev = cpsw->slaves[i].ndev; + + if (!(ndev && netif_running(ndev))) + continue; + + cpsw_ndo_stop(ndev); + } + + rtnl_unlock(); + + /* Select sleep pin state */ + pinctrl_pm_select_sleep_state(dev); + + return 0; +} + +static int __maybe_unused cpsw_resume(struct device *dev) +{ + struct cpsw_common *cpsw = dev_get_drvdata(dev); + int i; + + /* Select default pin state */ + pinctrl_pm_select_default_state(dev); + + /* shut up ASSERT_RTNL() warning in netif_set_real_num_tx/rx_queues */ + rtnl_lock(); + + for (i = 0; i < cpsw->data.slaves; i++) { + struct net_device *ndev = cpsw->slaves[i].ndev; + + if (!(ndev && netif_running(ndev))) + continue; + + cpsw_ndo_open(ndev); + } + + rtnl_unlock(); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(cpsw_pm_ops, cpsw_suspend, cpsw_resume); + static struct platform_driver cpsw_driver = { .driver = { .name = "cpsw-switch", + .pm = &cpsw_pm_ops, .of_match_table = cpsw_of_mtable, }, .probe = cpsw_probe, -- cgit v1.2.3-59-g8ed1b From 4bba9dab86b6ac15ca560ef1f2b5aa4529cbf784 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 14 Sep 2020 13:58:16 +0200 Subject: batman-adv: Add missing include for in_interrupt() The fix for receiving (internally generated) bla packets outside the interrupt context introduced the usage of in_interrupt(). But this functionality is only defined in linux/preempt.h which was not included with the same patch. Fixes: 279e89b2281a ("batman-adv: bla: use netif_rx_ni when not in interrupt context") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index d8c5d3170676..08419a2e6b95 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-59-g8ed1b From 4202c9fdf03d79dedaa94b2c4cf574f25793d669 Mon Sep 17 00:00:00 2001 From: Olympia Giannou Date: Fri, 11 Sep 2020 14:17:24 +0000 Subject: rndis_host: increase sleep time in the query-response loop Some WinCE devices face connectivity issues via the NDIS interface. They fail to register, resulting in -110 timeout errors and failures during the probe procedure. In this kind of WinCE devices, the Windows-side ndis driver needs quite more time to be loaded and configured, so that the linux rndis host queries to them fail to be responded correctly on time. More specifically, when INIT is called on the WinCE side - no other requests can be served by the Client and this results in a failed QUERY afterwards. The increase of the waiting time on the side of the linux rndis host in the command-response loop leaves the INIT process to complete and respond to a QUERY, which comes afterwards. The WinCE devices with this special "feature" in their ndis driver are satisfied by this fix. Signed-off-by: Olympia Giannou Signed-off-by: David S. Miller --- drivers/net/usb/rndis_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index bd9c07888ebb..6fa7a009a24a 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -201,7 +201,7 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen) dev_dbg(&info->control->dev, "rndis response error, code %d\n", retval); } - msleep(20); + msleep(40); } dev_dbg(&info->control->dev, "rndis response timeout\n"); return -ETIMEDOUT; -- cgit v1.2.3-59-g8ed1b From dea36631e6f186d4b853af67a4aef2e35cfa8bb7 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 12 Sep 2020 21:36:26 +0200 Subject: net: lantiq: Wake TX queue again The call to netif_wake_queue() when the TX descriptors were freed was missing. When there are no TX buffers available the TX queue will be stopped, but it was not started again when they are available again, this is fixed in this patch. Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver") Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 1645e4e7ebdb..1feb9fc710e0 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -268,6 +268,9 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) net_dev->stats.tx_bytes += bytes; netdev_completed_queue(ch->priv->net_dev, pkts, bytes); + if (netif_queue_stopped(net_dev)) + netif_wake_queue(net_dev); + if (pkts < budget) { napi_complete(&ch->napi); ltq_dma_enable_irq(&ch->dma); -- cgit v1.2.3-59-g8ed1b From 74c7b80e222b58d3cea731d31e2a31a77fea8345 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 12 Sep 2020 21:36:27 +0200 Subject: net: lantiq: use netif_tx_napi_add() for TX NAPI netif_tx_napi_add() should be used for NAPI in the TX direction instead of the netif_napi_add() function. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 1feb9fc710e0..f34e4dc8c661 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -502,7 +502,7 @@ static int xrx200_probe(struct platform_device *pdev) /* setup NAPI */ netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, 32); - netif_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32); + netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32); platform_set_drvdata(pdev, priv); -- cgit v1.2.3-59-g8ed1b From c582a7fea9dad4d309437d1a7e22e6d2cb380e2e Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 12 Sep 2020 21:36:28 +0200 Subject: net: lantiq: Use napi_complete_done() Use napi_complete_done() and activate the interrupts when this function returns true. This way the generic NAPI code can take care of activating the interrupts. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index f34e4dc8c661..abee7d61074c 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -230,8 +230,8 @@ static int xrx200_poll_rx(struct napi_struct *napi, int budget) } if (rx < budget) { - napi_complete(&ch->napi); - ltq_dma_enable_irq(&ch->dma); + if (napi_complete_done(&ch->napi, rx)) + ltq_dma_enable_irq(&ch->dma); } return rx; @@ -272,8 +272,8 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) netif_wake_queue(net_dev); if (pkts < budget) { - napi_complete(&ch->napi); - ltq_dma_enable_irq(&ch->dma); + if (napi_complete_done(&ch->napi, pkts)) + ltq_dma_enable_irq(&ch->dma); } return pkts; -- cgit v1.2.3-59-g8ed1b From 9423361da52356cb68642db5b2729b6b85aad330 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 12 Sep 2020 21:36:29 +0200 Subject: net: lantiq: Disable IRQs only if NAPI gets scheduled The napi_schedule() call will only schedule the NAPI if it is not already running. To make sure that we do not deactivate interrupts without scheduling NAPI only deactivate the interrupts in case NAPI also gets scheduled. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index abee7d61074c..635ff3a5dcfb 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -345,10 +345,12 @@ static irqreturn_t xrx200_dma_irq(int irq, void *ptr) { struct xrx200_chan *ch = ptr; - ltq_dma_disable_irq(&ch->dma); - ltq_dma_ack_irq(&ch->dma); + if (napi_schedule_prep(&ch->napi)) { + __napi_schedule(&ch->napi); + ltq_dma_disable_irq(&ch->dma); + } - napi_schedule(&ch->napi); + ltq_dma_ack_irq(&ch->dma); return IRQ_HANDLED; } -- cgit v1.2.3-59-g8ed1b From 1869e226a7b3ef75b4f70ede2f1b7229f7157fa4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 13 Sep 2020 12:43:39 -0600 Subject: ipv4: Initialize flowi4_multipath_hash in data path flowi4_multipath_hash was added by the commit referenced below for tunnels. Unfortunately, the patch did not initialize the new field for several fast path lookups that do not initialize the entire flow struct to 0. Fix those locations. Currently, flowi4_multipath_hash is random garbage and affects the hash value computed by fib_multipath_hash for multipath selection. Fixes: 24ba14406c5c ("route: Add multipath_hash in flowi_common to make user-define hash") Signed-off-by: David Ahern Cc: wenxu Signed-off-by: David S. Miller --- include/net/flow.h | 1 + net/core/filter.c | 1 + net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 1 + 4 files changed, 4 insertions(+) diff --git a/include/net/flow.h b/include/net/flow.h index 929d3ca614d0..b2531df3f65f 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -116,6 +116,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->saddr = saddr; fl4->fl4_dport = dport; fl4->fl4_sport = sport; + fl4->flowi4_multipath_hash = 0; } /* Reset some input parameters after previous lookup */ diff --git a/net/core/filter.c b/net/core/filter.c index 1f647ab986b6..1b168371ba96 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4838,6 +4838,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl4.saddr = params->ipv4_src; fl4.fl4_sport = params->sport; fl4.fl4_dport = params->dport; + fl4.flowi4_multipath_hash = 0; if (flags & BPF_FIB_LOOKUP_DIRECT) { u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 41079490a118..86a23e4a6a50 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -362,6 +362,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; no_addr = idev->ifa_list == NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8ca6bcab7b03..e5f210d00851 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2147,6 +2147,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { flkeys = &_flkeys; -- cgit v1.2.3-59-g8ed1b From bb3a420d47ab00d7e1e5083286cab15235a96680 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Sun, 13 Sep 2020 04:06:05 -0400 Subject: tipc: Fix memory leak in tipc_group_create_member() tipc_group_add_to_tree() returns silently if `key` matches `nkey` of an existing node, causing tipc_group_create_member() to leak memory. Let tipc_group_add_to_tree() return an error in such a case, so that tipc_group_create_member() can handle it properly. Fixes: 75da2163dbb6 ("tipc: introduce communication groups") Reported-and-tested-by: syzbot+f95d90c454864b3b5bc9@syzkaller.appspotmail.com Cc: Hillf Danton Link: https://syzkaller.appspot.com/bug?id=048390604fe1b60df34150265479202f10e13aff Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- net/tipc/group.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 588c2d2b0c69..b1fcd2ad5ecf 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -273,8 +273,8 @@ static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, return NULL; } -static void tipc_group_add_to_tree(struct tipc_group *grp, - struct tipc_member *m) +static int tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) { u64 nkey, key = (u64)m->node << 32 | m->port; struct rb_node **n, *parent = NULL; @@ -291,10 +291,11 @@ static void tipc_group_add_to_tree(struct tipc_group *grp, else if (key > nkey) n = &(*n)->rb_right; else - return; + return -EEXIST; } rb_link_node(&m->tree_node, parent, n); rb_insert_color(&m->tree_node, &grp->members); + return 0; } static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, @@ -302,6 +303,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, u32 instance, int state) { struct tipc_member *m; + int ret; m = kzalloc(sizeof(*m), GFP_ATOMIC); if (!m) @@ -314,8 +316,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, m->port = port; m->instance = instance; m->bc_acked = grp->bc_snd_nxt - 1; + ret = tipc_group_add_to_tree(grp, m); + if (ret < 0) { + kfree(m); + return NULL; + } grp->member_cnt++; - tipc_group_add_to_tree(grp, m); tipc_nlist_add(&grp->dests, m->node); m->state = state; return m; -- cgit v1.2.3-59-g8ed1b From ff48b6222e65ebdba5a403ef1deba6214e749193 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Sep 2020 19:37:31 +0800 Subject: tipc: use skb_unshare() instead in tipc_buf_append() In tipc_buf_append() it may change skb's frag_list, and it causes problems when this skb is cloned. skb_unclone() doesn't really make this skb's flag_list available to change. Shuang Li has reported an use-after-free issue because of this when creating quite a few macvlan dev over the same dev, where the broadcast packets will be cloned and go up to the stack: [ ] BUG: KASAN: use-after-free in pskb_expand_head+0x86d/0xea0 [ ] Call Trace: [ ] dump_stack+0x7c/0xb0 [ ] print_address_description.constprop.7+0x1a/0x220 [ ] kasan_report.cold.10+0x37/0x7c [ ] check_memory_region+0x183/0x1e0 [ ] pskb_expand_head+0x86d/0xea0 [ ] process_backlog+0x1df/0x660 [ ] net_rx_action+0x3b4/0xc90 [ ] [ ] Allocated by task 1786: [ ] kmem_cache_alloc+0xbf/0x220 [ ] skb_clone+0x10a/0x300 [ ] macvlan_broadcast+0x2f6/0x590 [macvlan] [ ] macvlan_process_broadcast+0x37c/0x516 [macvlan] [ ] process_one_work+0x66a/0x1060 [ ] worker_thread+0x87/0xb10 [ ] [ ] Freed by task 3253: [ ] kmem_cache_free+0x82/0x2a0 [ ] skb_release_data+0x2c3/0x6e0 [ ] kfree_skb+0x78/0x1d0 [ ] tipc_recvmsg+0x3be/0xa40 [tipc] So fix it by using skb_unshare() instead, which would create a new skb for the cloned frag and it'll be safe to change its frag_list. The similar things were also done in sctp_make_reassembled_event(), which is using skb_copy(). Reported-by: Shuang Li Fixes: 37e22164a8a3 ("tipc: rename and move message reassembly function") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/msg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 848fae674532..52e93ba4d8e2 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -150,7 +150,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + frag = skb_unshare(frag, GFP_ATOMIC); + if (unlikely(!frag)) goto err; head = *headbuf = frag; *buf = NULL; -- cgit v1.2.3-59-g8ed1b From 13e6ce98aa65ab5ce19351c020419360dfe8af29 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Sep 2020 19:51:50 +0800 Subject: net: sched: only keep the available bits when setting vxlan md->gbp As we can see from vxlan_build/parse_gbp_hdr(), when processing metadata on vxlan rx/tx path, only dont_learn/policy_applied/policy_id fields can be set to or parse from the packet for vxlan gbp option. So we'd better do the mask when set it in act_tunnel_key and cls_flower. Otherwise, when users don't know these bits, they may configure with a value which can never be matched. Reported-by: Shuang Li Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/vxlan.h | 3 +++ net/sched/act_tunnel_key.c | 1 + net/sched/cls_flower.c | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 3a41627cbdfe..08537aa14f7c 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -121,6 +121,9 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) +#define VXLAN_GBP_MASK (VXLAN_GBP_DONT_LEARN | VXLAN_GBP_POLICY_APPLIED | \ + VXLAN_GBP_ID_MASK) + /* * VXLAN Generic Protocol Extension (VXLAN_F_GPE): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 536c4bc31be6..37f1e10f35e0 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -156,6 +156,7 @@ tunnel_key_copy_vxlan_opt(const struct nlattr *nla, void *dst, int dst_len, struct vxlan_metadata *md = dst; md->gbp = nla_get_u32(tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]); + md->gbp &= VXLAN_GBP_MASK; } return sizeof(struct vxlan_metadata); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a4f7ef1de7e7..e8fda1bd4d9d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1175,8 +1175,10 @@ static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key, return -EINVAL; } - if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) + if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) { md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]); + md->gbp &= VXLAN_GBP_MASK; + } return sizeof(*md); } -- cgit v1.2.3-59-g8ed1b From 681d2cfb790339a3e95b98bc140baf1f816a896a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Sep 2020 19:51:51 +0800 Subject: lwtunnel: only keep the available bits when setting vxlan md->gbp As we can see from vxlan_build/parse_gbp_hdr(), when processing metadata on vxlan rx/tx path, only dont_learn/policy_applied/policy_id fields can be set to or parse from the packet for vxlan gbp option. So do the mask when set it in lwtunnel, as it does in act_tunnel_key and cls_flower. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 75c6013ff9a4..b2ea1a8c5fd6 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -554,6 +554,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr, attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP]; md->gbp = nla_get_u32(attr); + md->gbp &= VXLAN_GBP_MASK; info->key.tun_flags |= TUNNEL_VXLAN_OPT; } -- cgit v1.2.3-59-g8ed1b From 8e1b3ac4786680c2d2b5a24e38a2d714c3bcd1ef Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Sep 2020 19:43:03 +0800 Subject: net: sched: initialize with 0 before setting erspan md->u In fl_set_erspan_opt(), all bits of erspan md was set 1, as this function is also used to set opt MASK. However, when setting for md->u.index for opt VALUE, the rest bits of the union md->u will be left 1. It would cause to fail the match of the whole md when version is 1 and only index is set. This patch is to fix by initializing with 0 before setting erspan md->u. Reported-by: Shuang Li Fixes: 79b1011cb33d ("net: sched: allow flower to match erspan options") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e8fda1bd4d9d..fed18fd2c50b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1223,6 +1223,7 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key, } if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) { nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]; + memset(&md->u, 0x00, sizeof(md->u)); md->u.index = nla_get_be32(nla); } } else if (md->version == 2) { -- cgit v1.2.3-59-g8ed1b From 2b1667e54caf95e1e4249d9068eea7a3089a5229 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Thu, 10 Sep 2020 09:56:09 +0200 Subject: xsk: Fix number of pinned pages/umem size discrepancy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For AF_XDP sockets, there was a discrepancy between the number of of pinned pages and the size of the umem region. The size of the umem region is used to validate the AF_XDP descriptor addresses. The logic that pinned the pages covered by the region only took whole pages into consideration, creating a mismatch between the size and pinned pages. A user could then pass AF_XDP addresses outside the range of pinned pages, but still within the size of the region, crashing the kernel. This change correctly calculates the number of pages to be pinned. Further, the size check for the aligned mode is simplified. Now the code simply checks if the size is divisible by the chunk size. Fixes: bbff2f321a86 ("xsk: new descriptor addressing scheme") Reported-by: Ciara Loftus Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Tested-by: Ciara Loftus Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200910075609.7904-1-bjorn.topel@gmail.com --- net/xdp/xdp_umem.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index e97db37354e4..b010bfde0149 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -303,10 +303,10 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { + u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; - u32 chunk_size = mr->chunk_size, headroom = mr->headroom; u64 npgs, addr = mr->addr, size = mr->len; - unsigned int chunks, chunks_per_page; + unsigned int chunks, chunks_rem; int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { @@ -336,19 +336,18 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if ((addr + size) < addr) return -EINVAL; - npgs = size >> PAGE_SHIFT; + npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); + if (npgs_rem) + npgs++; if (npgs > U32_MAX) return -EINVAL; - chunks = (unsigned int)div_u64(size, chunk_size); + chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); if (chunks == 0) return -EINVAL; - if (!unaligned_chunks) { - chunks_per_page = PAGE_SIZE / chunk_size; - if (chunks < chunks_per_page || chunks % chunks_per_page) - return -EINVAL; - } + if (!unaligned_chunks && chunks_rem) + return -EINVAL; if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 65dce596e2b839bc324b9543eac99a923eb6d794 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 14 Sep 2020 17:50:31 -0700 Subject: docs/bpf: Remove source code links Make path to bench_ringbufs.c just a text, not a special link. Fixes: 97abb2b39682 ("docs/bpf: Add BPF ring buffer design notes") Reported-by: Mauro Carvalho Chehab Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200915005031.2748397-1-andriin@fb.com --- Documentation/bpf/ringbuf.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/bpf/ringbuf.rst b/Documentation/bpf/ringbuf.rst index 4d4f3bcb1477..6a615cd62bda 100644 --- a/Documentation/bpf/ringbuf.rst +++ b/Documentation/bpf/ringbuf.rst @@ -197,7 +197,7 @@ a self-pacing notifications of new data being availability. being available after commit only if consumer has already caught up right up to the record being committed. If not, consumer still has to catch up and thus will see new data anyways without needing an extra poll notification. -Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbufs.c_) show that +Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbufs.c) show that this allows to achieve a very high throughput without having to resort to tricks like "notify only every Nth sample", which are necessary with perf buffer. For extreme cases, when BPF program wants more manual control of -- cgit v1.2.3-59-g8ed1b From 3236d215ad38a3f5372e65cd1e0a52cf93d3c6a2 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 15 Sep 2020 09:54:08 +0200 Subject: batman-adv: mcast: fix duplicate mcast packets in BLA backbone from LAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scenario: * Multicast frame send from a BLA backbone (multiple nodes with their bat0 bridged together, with BLA enabled) Issue: * BLA backbone nodes receive the frame multiple times on bat0 For multicast frames received via batman-adv broadcast packets the originator of the broadcast packet is checked before decapsulating and forwarding the frame to bat0 (batadv_bla_is_backbone_gw()-> batadv_recv_bcast_packet()). If it came from a node which shares the same BLA backbone with us then it is not forwarded to bat0 to avoid a loop. When sending a multicast frame in a non-4-address batman-adv unicast packet we are currently missing this check - and cannot do so because the batman-adv unicast packet has no originator address field. However, we can simply fix this on the sender side by only sending the multicast frame via unicasts to interested nodes which do not share the same BLA backbone with us. This also nicely avoids some unnecessary transmissions on mesh side. Note that no infinite loop was observed, probably because of dropping via batadv_interface_tx()->batadv_bla_tx(). However the duplicates still utterly confuse switches/bridges, ICMPv6 duplicate address detection and neighbor discovery and therefore leads to long delays before being able to establish TCP connections, for instance. And it also leads to the Linux bridge printing messages like: "br-lan: received packet on eth1 with own address as source address ..." Fixes: 2d3f6ccc4ea5 ("batman-adv: Modified forwarding behaviour for multicast packets") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 46 ++++++++++++++++++++++++++++++++--------- net/batman-adv/multicast.h | 15 ++++++++++++++ net/batman-adv/soft-interface.c | 5 ++--- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index bdc4a1fba1c6..ca24a2e522b7 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -51,6 +51,7 @@ #include #include +#include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" @@ -1434,6 +1435,35 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, return BATADV_FORW_ALL; } +/** + * batadv_mcast_forw_send_orig() - send a multicast packet to an originator + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to send + * @vid: the vlan identifier + * @orig_node: the originator to send the packet to + * + * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + */ +int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct batadv_orig_node *orig_node) +{ + /* Avoid sending multicast-in-unicast packets to other BLA + * gateways - they already got the frame from the LAN side + * we share with them. + * TODO: Refactor to take BLA into account earlier, to avoid + * reducing the mcast_fanout count. + */ + if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) { + dev_kfree_skb(skb); + return NET_XMIT_SUCCESS; + } + + return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0, + orig_node, vid); +} + /** * batadv_mcast_forw_tt() - forwards a packet to multicast listeners * @bat_priv: the bat priv with all the soft interface information @@ -1471,8 +1501,8 @@ batadv_mcast_forw_tt(struct batadv_priv *bat_priv, struct sk_buff *skb, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_entry->orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, + orig_entry->orig_node); } rcu_read_unlock(); @@ -1513,8 +1543,7 @@ batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; @@ -1551,8 +1580,7 @@ batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; @@ -1618,8 +1646,7 @@ batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; @@ -1656,8 +1683,7 @@ batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index ebf825991ecd..3e114bc5ca3b 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -46,6 +46,11 @@ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node **mcast_single_orig); +int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct batadv_orig_node *orig_node); + int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); @@ -71,6 +76,16 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, return BATADV_FORW_ALL; } +static inline int +batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct batadv_orig_node *orig_node) +{ + kfree_skb(skb); + return NET_XMIT_DROP; +} + static inline int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 23833a0ba5e6..3d037b17f3a7 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -364,9 +364,8 @@ send: goto dropped; ret = batadv_send_skb_via_gw(bat_priv, skb, vid); } else if (mcast_single_orig) { - ret = batadv_send_skb_unicast(bat_priv, skb, - BATADV_UNICAST, 0, - mcast_single_orig, vid); + ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid, + mcast_single_orig); } else if (forw_mode == BATADV_FORW_SOME) { ret = batadv_mcast_forw_send(bat_priv, skb, vid); } else { -- cgit v1.2.3-59-g8ed1b From 74c09b7275126da1b642b90c9cdc3ae8b729ad4b Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 15 Sep 2020 09:54:09 +0200 Subject: batman-adv: mcast: fix duplicate mcast packets in BLA backbone from mesh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scenario: * Multicast frame send from mesh to a BLA backbone (multiple nodes with their bat0 bridged together, with BLA enabled) Issue: * BLA backbone nodes receive the frame multiple times on bat0, once from mesh->bat0 and once from each backbone_gw from LAN For unicast, a node will send only to the best backbone gateway according to the TQ. However for multicast we currently cannot determine if multiple destination nodes share the same backbone if they don't share the same backbone with us. So we need to keep sending the unicasts to all backbone gateways and let the backbone gateways decide which one will forward the frame. We can use the CLAIM mechanism to make this decision. One catch: The batman-adv gateway feature for DHCP packets potentially sends multicast packets in the same batman-adv unicast header as the multicast optimizations code. And we are not allowed to drop those even if we did not claim the source address of the sender, as for such packets there is only this one multicast-in-unicast packet. How can we distinguish the two cases? The gateway feature uses a batman-adv unicast 4 address header. While the multicast-to-unicasts feature uses a simple, 3 address batman-adv unicast header. So let's use this to distinguish. Fixes: fe2da6ff27c7 ("batman-adv: check incoming packet type for bla") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 34 +++++++++++++++++++++++++--------- net/batman-adv/bridge_loop_avoidance.h | 4 ++-- net/batman-adv/soft-interface.c | 6 +++--- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 08419a2e6b95..68715783a742 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1814,7 +1814,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame - * @is_bcast: the packet came in a broadcast packet type. + * @packet_type: the batman packet type this frame came in * * batadv_bla_rx avoidance checks if: * * we have to race for a claim @@ -1826,7 +1826,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, * further process the skb. */ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid, bool is_bcast) + unsigned short vid, int packet_type) { struct batadv_bla_backbone_gw *backbone_gw; struct ethhdr *ethhdr; @@ -1848,9 +1848,24 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, goto handled; if (unlikely(atomic_read(&bat_priv->bla.num_requests))) - /* don't allow broadcasts while requests are in flight */ - if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) - goto handled; + /* don't allow multicast packets while requests are in flight */ + if (is_multicast_ether_addr(ethhdr->h_dest)) + /* Both broadcast flooding or multicast-via-unicasts + * delivery might send to multiple backbone gateways + * sharing the same LAN and therefore need to coordinate + * which backbone gateway forwards into the LAN, + * by claiming the payload source address. + * + * Broadcast flooding and multicast-via-unicasts + * delivery use the following two batman packet types. + * Note: explicitly exclude BATADV_UNICAST_4ADDR, + * as the DHCP gateway feature will send explicitly + * to only one BLA gateway, so the claiming process + * should be avoided there. + */ + if (packet_type == BATADV_BCAST || + packet_type == BATADV_UNICAST) + goto handled; ether_addr_copy(search_claim.addr, ethhdr->h_source); search_claim.vid = vid; @@ -1885,13 +1900,14 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, goto allow; } - /* if it is a broadcast ... */ - if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) { + /* if it is a multicast ... */ + if (is_multicast_ether_addr(ethhdr->h_dest) && + (packet_type == BATADV_BCAST || packet_type == BATADV_UNICAST)) { /* ... drop it. the responsible gateway is in charge. * - * We need to check is_bcast because with the gateway + * We need to check packet type because with the gateway * feature, broadcasts (like DHCP requests) may be sent - * using a unicast packet type. + * using a unicast 4 address packet type. See comment above. */ goto handled; } else { diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 41edb2c4a327..a81c41b636f9 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -35,7 +35,7 @@ static inline bool batadv_bla_is_loopdetect_mac(const uint8_t *mac) #ifdef CONFIG_BATMAN_ADV_BLA bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid, bool is_bcast); + unsigned short vid, int packet_type); bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); bool batadv_bla_is_backbone_gw(struct sk_buff *skb, @@ -66,7 +66,7 @@ bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, static inline bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, - bool is_bcast) + int packet_type) { return false; } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 3d037b17f3a7..cdde943c1b83 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -424,10 +424,10 @@ void batadv_interface_rx(struct net_device *soft_iface, struct vlan_ethhdr *vhdr; struct ethhdr *ethhdr; unsigned short vid; - bool is_bcast; + int packet_type; batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; - is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST); + packet_type = batadv_bcast_packet->packet_type; skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); @@ -470,7 +470,7 @@ void batadv_interface_rx(struct net_device *soft_iface, /* Let the bridge loop avoidance check the packet. If will * not handle it, we can safely push it up. */ - if (batadv_bla_rx(bat_priv, skb, vid, is_bcast)) + if (batadv_bla_rx(bat_priv, skb, vid, packet_type)) goto out; if (orig_node) -- cgit v1.2.3-59-g8ed1b From 2369e827046920ef0599e6a36b975ac5c0a359c2 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 15 Sep 2020 09:54:10 +0200 Subject: batman-adv: mcast: fix duplicate mcast packets from BLA backbone to mesh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scenario: * Multicast frame send from BLA backbone gateways (multiple nodes with their bat0 bridged together, with BLA enabled) sharing the same LAN to nodes in the mesh Issue: * Nodes receive the frame multiple times on bat0 from the mesh, once from each foreign BLA backbone gateway which shares the same LAN with another For multicast frames via batman-adv broadcast packets coming from the same BLA backbone but from different backbone gateways duplicates are currently detected via a CRC history of previously received packets. However this CRC so far was not performed for multicast frames received via batman-adv unicast packets. Fixing this by appyling the same check for such packets, too. Room for improvements in the future: Ideally we would introduce the possibility to not only claim a client, but a complete originator, too. This would allow us to only send a multicast-in-unicast packet from a BLA backbone gateway claiming the node and by that avoid potential redundant transmissions in the first place. Fixes: 279e89b2281a ("batman-adv: add broadcast duplicate check") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 103 ++++++++++++++++++++++++++++----- 1 file changed, 87 insertions(+), 16 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 68715783a742..c350ab63cd54 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1581,13 +1581,16 @@ int batadv_bla_init(struct batadv_priv *bat_priv) } /** - * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup. + * batadv_bla_check_duplist() - Check if a frame is in the broadcast dup. * @bat_priv: the bat priv with all the soft interface information - * @skb: contains the bcast_packet to be checked + * @skb: contains the multicast packet to be checked + * @payload_ptr: pointer to position inside the head buffer of the skb + * marking the start of the data to be CRC'ed + * @orig: originator mac address, NULL if unknown * - * check if it is on our broadcast list. Another gateway might - * have sent the same packet because it is connected to the same backbone, - * so we have to remove this duplicate. + * Check if it is on our broadcast list. Another gateway might have sent the + * same packet because it is connected to the same backbone, so we have to + * remove this duplicate. * * This is performed by checking the CRC, which will tell us * with a good chance that it is the same packet. If it is furthermore @@ -1596,19 +1599,17 @@ int batadv_bla_init(struct batadv_priv *bat_priv) * * Return: true if a packet is in the duplicate list, false otherwise. */ -bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, - struct sk_buff *skb) +static bool batadv_bla_check_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb, u8 *payload_ptr, + const u8 *orig) { - int i, curr; - __be32 crc; - struct batadv_bcast_packet *bcast_packet; struct batadv_bcast_duplist_entry *entry; bool ret = false; - - bcast_packet = (struct batadv_bcast_packet *)skb->data; + int i, curr; + __be32 crc; /* calculate the crc ... */ - crc = batadv_skb_crc32(skb, (u8 *)(bcast_packet + 1)); + crc = batadv_skb_crc32(skb, payload_ptr); spin_lock_bh(&bat_priv->bla.bcast_duplist_lock); @@ -1627,8 +1628,21 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, if (entry->crc != crc) continue; - if (batadv_compare_eth(entry->orig, bcast_packet->orig)) - continue; + /* are the originators both known and not anonymous? */ + if (orig && !is_zero_ether_addr(orig) && + !is_zero_ether_addr(entry->orig)) { + /* If known, check if the new frame came from + * the same originator: + * We are safe to take identical frames from the + * same orig, if known, as multiplications in + * the mesh are detected via the (orig, seqno) pair. + * So we can be a bit more liberal here and allow + * identical frames from the same orig which the source + * host might have sent multiple times on purpose. + */ + if (batadv_compare_eth(entry->orig, orig)) + continue; + } /* this entry seems to match: same crc, not too old, * and from another gw. therefore return true to forbid it. @@ -1644,7 +1658,14 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, entry = &bat_priv->bla.bcast_duplist[curr]; entry->crc = crc; entry->entrytime = jiffies; - ether_addr_copy(entry->orig, bcast_packet->orig); + + /* known originator */ + if (orig) + ether_addr_copy(entry->orig, orig); + /* anonymous originator */ + else + eth_zero_addr(entry->orig); + bat_priv->bla.bcast_duplist_curr = curr; out: @@ -1653,6 +1674,48 @@ out: return ret; } +/** + * batadv_bla_check_ucast_duplist() - Check if a frame is in the broadcast dup. + * @bat_priv: the bat priv with all the soft interface information + * @skb: contains the multicast packet to be checked, decapsulated from a + * unicast_packet + * + * Check if it is on our broadcast list. Another gateway might have sent the + * same packet because it is connected to the same backbone, so we have to + * remove this duplicate. + * + * Return: true if a packet is in the duplicate list, false otherwise. + */ +static bool batadv_bla_check_ucast_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return batadv_bla_check_duplist(bat_priv, skb, (u8 *)skb->data, NULL); +} + +/** + * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup. + * @bat_priv: the bat priv with all the soft interface information + * @skb: contains the bcast_packet to be checked + * + * Check if it is on our broadcast list. Another gateway might have sent the + * same packet because it is connected to the same backbone, so we have to + * remove this duplicate. + * + * Return: true if a packet is in the duplicate list, false otherwise. + */ +bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + struct batadv_bcast_packet *bcast_packet; + u8 *payload_ptr; + + bcast_packet = (struct batadv_bcast_packet *)skb->data; + payload_ptr = (u8 *)(bcast_packet + 1); + + return batadv_bla_check_duplist(bat_priv, skb, payload_ptr, + bcast_packet->orig); +} + /** * batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for * the VLAN identified by vid. @@ -1867,6 +1930,14 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, packet_type == BATADV_UNICAST) goto handled; + /* potential duplicates from foreign BLA backbone gateways via + * multicast-in-unicast packets + */ + if (is_multicast_ether_addr(ethhdr->h_dest) && + packet_type == BATADV_UNICAST && + batadv_bla_check_ucast_duplist(bat_priv, skb)) + goto handled; + ether_addr_copy(search_claim.addr, ethhdr->h_source); search_claim.vid = vid; claim = batadv_claim_hash_find(bat_priv, &search_claim); -- cgit v1.2.3-59-g8ed1b From d3f2ef1887e18a091ac25e3ec8985ecb9a5c76fc Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Mon, 14 Sep 2020 20:35:35 -0400 Subject: ibmvnic: update MAINTAINERS Update supporters for IBM Power SRIOV Virtual NIC Device Driver. Thomas Falcon is moving on to other works. Dany Madden, Lijun Pan and Sukadev Bhattiprolu are the current supporters. Signed-off-by: Dany Madden Signed-off-by: David S. Miller --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2783a5f68d2c..923c69ad4eec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8315,7 +8315,9 @@ S: Supported F: drivers/pci/hotplug/rpaphp* IBM Power SRIOV Virtual NIC Device Driver -M: Thomas Falcon +M: Dany Madden +M: Lijun Pan +M: Sukadev Bhattiprolu L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmvnic.* -- cgit v1.2.3-59-g8ed1b From 2e5117ba9f582262e93a1fdf8e1a7b9affd5121c Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Tue, 15 Sep 2020 10:39:55 +0800 Subject: net: tipc: kerneldoc fixes Fix parameter description of tipc_link_bc_create() Reported-by: Hulk Robot Fixes: 16ad3f4022bb ("tipc: introduce variable window congestion control") Signed-off-by: Lu Wei Signed-off-by: David S. Miller --- net/tipc/link.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index b7362556da95..cef38a910107 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -532,7 +532,8 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, * tipc_link_bc_create - create new link to be used for broadcast * @net: pointer to associated network namespace * @mtu: mtu to be used initially if no peers - * @window: send window to be used + * @min_win: minimal send window to be used by link + * @max_win: maximal send window to be used by link * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link -- cgit v1.2.3-59-g8ed1b From 2fbc6e89b2f1403189e624cabaf73e189c5e50c6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 14 Sep 2020 21:03:54 -0600 Subject: ipv4: Update exception handling for multipath routes via same device Kfir reported that pmtu exceptions are not created properly for deployments where multipath routes use the same device. After some digging I see 2 compounding problems: 1. ip_route_output_key_hash_rcu is updating the flowi4_oif *after* the route lookup. This is the second use case where this has been a problem (the first is related to use of vti devices with VRF). I can not find any reason for the oif to be changed after the lookup; the code goes back to the start of git. It does not seem logical so remove it. 2. fib_lookups for exceptions do not call fib_select_path to handle multipath route selection based on the hash. The end result is that the fib_lookup used to add the exception always creates it based using the first leg of the route. An example topology showing the problem: | host1 +------+ | eth0 | .209 +------+ | +------+ switch | br0 | +------+ | +---------+---------+ | host2 | host3 +------+ +------+ | eth0 | .250 | eth0 | 192.168.252.252 +------+ +------+ +-----+ +-----+ | vti | .2 | vti | 192.168.247.3 +-----+ +-----+ \ / ================================= tunnels 192.168.247.1/24 for h in host1 host2 host3; do ip netns add ${h} ip -netns ${h} link set lo up ip netns exec ${h} sysctl -wq net.ipv4.ip_forward=1 done ip netns add switch ip -netns switch li set lo up ip -netns switch link add br0 type bridge stp 0 ip -netns switch link set br0 up for n in 1 2 3; do ip -netns switch link add eth-sw type veth peer name eth-h${n} ip -netns switch li set eth-h${n} master br0 up ip -netns switch li set eth-sw netns host${n} name eth0 done ip -netns host1 addr add 192.168.252.209/24 dev eth0 ip -netns host1 link set dev eth0 up ip -netns host1 route add 192.168.247.0/24 \ nexthop via 192.168.252.250 dev eth0 nexthop via 192.168.252.252 dev eth0 ip -netns host2 addr add 192.168.252.250/24 dev eth0 ip -netns host2 link set dev eth0 up ip -netns host2 addr add 192.168.252.252/24 dev eth0 ip -netns host3 link set dev eth0 up ip netns add tunnel ip -netns tunnel li set lo up ip -netns tunnel li add br0 type bridge ip -netns tunnel li set br0 up for n in $(seq 11 20); do ip -netns tunnel addr add dev br0 192.168.247.${n}/24 done for n in 2 3 do ip -netns tunnel link add vti${n} type veth peer name eth${n} ip -netns tunnel link set eth${n} mtu 1360 master br0 up ip -netns tunnel link set vti${n} netns host${n} mtu 1360 up ip -netns host${n} addr add dev vti${n} 192.168.247.${n}/24 done ip -netns tunnel ro add default nexthop via 192.168.247.2 nexthop via 192.168.247.3 ip netns exec host1 ping -M do -s 1400 -c3 -I 192.168.252.209 192.168.247.11 ip netns exec host1 ping -M do -s 1400 -c3 -I 192.168.252.209 192.168.247.15 ip -netns host1 ro ls cache Before this patch the cache always shows exceptions against the first leg in the multipath route; 192.168.252.250 per this example. Since the hash has an initial random seed, you may need to vary the final octet more than what is listed. In my tests, using addresses between 11 and 19 usually found 1 that used both legs. With this patch, the cache will have exceptions for both legs. Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions") Reported-by: Kfir Itzhak Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e5f210d00851..58642b29a499 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -786,8 +786,10 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { - struct fib_nh_common *nhc = FIB_RES_NHC(res); + struct fib_nh_common *nhc; + fib_select_path(net, &res, fl4, skb); + nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); @@ -1013,6 +1015,7 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + struct net *net = dev_net(dst->dev); u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; @@ -1033,9 +1036,11 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { - struct fib_nh_common *nhc = FIB_RES_NHC(res); + if (fib_lookup(net, fl4, &res, 0) == 0) { + struct fib_nh_common *nhc; + fib_select_path(net, &res, fl4, NULL); + nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } @@ -2668,8 +2673,6 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, fib_select_path(net, res, fl4, skb); dev_out = FIB_RES_DEV(*res); - fl4->flowi4_oif = dev_out->ifindex; - make_route: rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); -- cgit v1.2.3-59-g8ed1b From 8c33dadc3e0eef1599811a55d748a0b95da0317d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2020 11:29:59 -0700 Subject: bpf: Bpf_skc_to_* casting helpers require a NULL check on sk The bpf_skc_to_* type casting helpers are available to BPF_PROG_TYPE_TRACING. The traced PTR_TO_BTF_ID may be NULL. For example, the skb->sk may be NULL. Thus, these casting helpers need to check "!sk" also and this patch fixes them. Fixes: 0d4fad3e57df ("bpf: Add bpf_skc_to_udp6_sock() helper") Fixes: 478cfbdf5f13 ("bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers") Fixes: af7ec1383361 ("bpf: Add bpf_skc_to_tcp6_sock() helper") Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200915182959.241101-1-kafai@fb.com --- net/core/filter.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 2d62c25e0395..23e8ded0ec97 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9522,7 +9522,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) * trigger an explicit type generation here. */ BTF_TYPE_EMIT(struct tcp6_sock); - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk->sk_family == AF_INET6) return (unsigned long)sk; @@ -9540,7 +9540,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) { - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) return (unsigned long)sk; return (unsigned long)NULL; @@ -9558,12 +9558,12 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) { #ifdef CONFIG_INET - if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) + if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; #endif #if IS_BUILTIN(CONFIG_IPV6) - if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) + if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; #endif @@ -9582,12 +9582,12 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk) { #ifdef CONFIG_INET - if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) return (unsigned long)sk; #endif #if IS_BUILTIN(CONFIG_IPV6) - if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) return (unsigned long)sk; #endif @@ -9609,7 +9609,7 @@ BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk) * trigger an explicit type generation here. */ BTF_TYPE_EMIT(struct udp6_sock); - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP && + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP && sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6) return (unsigned long)sk; -- cgit v1.2.3-59-g8ed1b From ce880cb825fcc22d4e39046a6c3a3a7f6603883d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 15 Sep 2020 17:44:01 -0700 Subject: bpf: Fix a rcu warning for bpffs map pretty-print Running selftest ./btf_btf -p the kernel had the following warning: [ 51.528185] WARNING: CPU: 3 PID: 1756 at kernel/bpf/hashtab.c:717 htab_map_get_next_key+0x2eb/0x300 [ 51.529217] Modules linked in: [ 51.529583] CPU: 3 PID: 1756 Comm: test_btf Not tainted 5.9.0-rc1+ #878 [ 51.530346] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.el7.centos 04/01/2014 [ 51.531410] RIP: 0010:htab_map_get_next_key+0x2eb/0x300 ... [ 51.542826] Call Trace: [ 51.543119] map_seq_next+0x53/0x80 [ 51.543528] seq_read+0x263/0x400 [ 51.543932] vfs_read+0xad/0x1c0 [ 51.544311] ksys_read+0x5f/0xe0 [ 51.544689] do_syscall_64+0x33/0x40 [ 51.545116] entry_SYSCALL_64_after_hwframe+0x44/0xa9 The related source code in kernel/bpf/hashtab.c: 709 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 710 { 711 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 712 struct hlist_nulls_head *head; 713 struct htab_elem *l, *next_l; 714 u32 hash, key_size; 715 int i = 0; 716 717 WARN_ON_ONCE(!rcu_read_lock_held()); In kernel/bpf/inode.c, bpffs map pretty print calls map->ops->map_get_next_key() without holding a rcu_read_lock(), hence causing the above warning. To fix the issue, just surrounding map->ops->map_get_next_key() with rcu read lock. Fixes: a26ca7c982cb ("bpf: btf: Add pretty print support to the basic arraymap") Reported-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Cc: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200916004401.146277-1-yhs@fb.com --- kernel/bpf/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index fb878ba3f22f..18f4969552ac 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -226,10 +226,12 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) else prev_key = key; + rcu_read_lock(); if (map->ops->map_get_next_key(map, prev_key, key)) { map_iter(m)->done = true; - return NULL; + key = NULL; } + rcu_read_unlock(); return key; } -- cgit v1.2.3-59-g8ed1b From 5f1ab0f493f81517a7b47d859cd5d32e6af1eb9f Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Wed, 16 Sep 2020 10:04:28 +0800 Subject: net: hns: kerneldoc fixes Fix some parameter description or spelling mistakes. Signed-off-by: Lu Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 40 ++++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 4eb50296f653..14e60c9e491d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -463,8 +463,8 @@ static int __lb_clean_rings(struct hns_nic_priv *priv, /** * nic_run_loopback_test - run loopback test - * @nic_dev: net device - * @loopback_type: loopback type + * @ndev: net device + * @loop_mode: loopback mode */ static int __lb_run_test(struct net_device *ndev, enum hnae_loop loop_mode) @@ -572,7 +572,7 @@ static int __lb_down(struct net_device *ndev, enum hnae_loop loop) /** * hns_nic_self_test - self test - * @dev: net device + * @ndev: net device * @eth_test: test cmd * @data: test result */ @@ -633,7 +633,7 @@ static void hns_nic_self_test(struct net_device *ndev, /** * hns_nic_get_drvinfo - get net driver info - * @dev: net device + * @net_dev: net device * @drvinfo: driver info */ static void hns_nic_get_drvinfo(struct net_device *net_dev, @@ -658,7 +658,7 @@ static void hns_nic_get_drvinfo(struct net_device *net_dev, /** * hns_get_ringparam - get ring parameter - * @dev: net device + * @net_dev: net device * @param: ethtool parameter */ static void hns_get_ringparam(struct net_device *net_dev, @@ -683,7 +683,7 @@ static void hns_get_ringparam(struct net_device *net_dev, /** * hns_get_pauseparam - get pause parameter - * @dev: net device + * @net_dev: net device * @param: pause parameter */ static void hns_get_pauseparam(struct net_device *net_dev, @@ -701,7 +701,7 @@ static void hns_get_pauseparam(struct net_device *net_dev, /** * hns_set_pauseparam - set pause parameter - * @dev: net device + * @net_dev: net device * @param: pause parameter * * Return 0 on success, negative on failure @@ -725,7 +725,7 @@ static int hns_set_pauseparam(struct net_device *net_dev, /** * hns_get_coalesce - get coalesce info. - * @dev: net device + * @net_dev: net device * @ec: coalesce info. * * Return 0 on success, negative on failure. @@ -769,7 +769,7 @@ static int hns_get_coalesce(struct net_device *net_dev, /** * hns_set_coalesce - set coalesce info. - * @dev: net device + * @net_dev: net device * @ec: coalesce info. * * Return 0 on success, negative on failure. @@ -808,7 +808,7 @@ static int hns_set_coalesce(struct net_device *net_dev, /** * hns_get_channels - get channel info. - * @dev: net device + * @net_dev: net device * @ch: channel info. */ static void @@ -825,7 +825,7 @@ hns_get_channels(struct net_device *net_dev, struct ethtool_channels *ch) /** * get_ethtool_stats - get detail statistics. - * @dev: net device + * @netdev: net device * @stats: statistics info. * @data: statistics data. */ @@ -883,8 +883,8 @@ static void hns_get_ethtool_stats(struct net_device *netdev, /** * get_strings: Return a set of strings that describe the requested objects - * @dev: net device - * @stats: string set ID. + * @netdev: net device + * @stringset: string set ID. * @data: objects data. */ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) @@ -972,7 +972,7 @@ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) /** * nic_get_sset_count - get string set count witch returned by nic_get_strings. - * @dev: net device + * @netdev: net device * @stringset: string set index, 0: self test string; 1: statistics string. * * Return string set count. @@ -1006,7 +1006,7 @@ static int hns_get_sset_count(struct net_device *netdev, int stringset) /** * hns_phy_led_set - set phy LED status. - * @dev: net device + * @netdev: net device * @value: LED state. * * Return 0 on success, negative on failure. @@ -1028,7 +1028,7 @@ static int hns_phy_led_set(struct net_device *netdev, int value) /** * nic_set_phys_id - set phy identify LED. - * @dev: net device + * @netdev: net device * @state: LED state. * * Return 0 on success, negative on failure. @@ -1104,9 +1104,9 @@ hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) /** * hns_get_regs - get net device register - * @dev: net device + * @net_dev: net device * @cmd: ethtool cmd - * @date: register data + * @data: register data */ static void hns_get_regs(struct net_device *net_dev, struct ethtool_regs *cmd, void *data) @@ -1126,7 +1126,7 @@ static void hns_get_regs(struct net_device *net_dev, struct ethtool_regs *cmd, /** * nic_get_regs_len - get total register len. - * @dev: net device + * @net_dev: net device * * Return total register len. */ @@ -1151,7 +1151,7 @@ static int hns_get_regs_len(struct net_device *net_dev) /** * hns_nic_nway_reset - nway reset - * @dev: net device + * @netdev: net device * * Return 0 on success, negative on failure */ -- cgit v1.2.3-59-g8ed1b From 34beb21594519ce64a55a498c2fe7d567bc1ca20 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Wed, 16 Sep 2020 05:19:35 -0400 Subject: geneve: add transport ports in route lookup for geneve This patch adds transport ports information for route lookup so that IPsec can select Geneve tunnel traffic to do encryption. This is needed for OVS/OVN IPsec with encrypted Geneve tunnels. This can be tested by configuring a host-host VPN using an IKE daemon and specifying port numbers. For example, for an Openswan-type configuration, the following parameters should be configured on both hosts and IPsec set up as-per normal: $ cat /etc/ipsec.conf conn in ... left=$IP1 right=$IP2 ... leftprotoport=udp/6081 rightprotoport=udp ... conn out ... left=$IP1 right=$IP2 ... leftprotoport=udp rightprotoport=udp/6081 ... The tunnel can then be setup using "ip" on both hosts (but changing the relevant IP addresses): $ ip link add tun type geneve id 1000 remote $IP2 $ ip addr add 192.168.0.1/24 dev tun $ ip link set tun up This can then be tested by pinging from $IP1: $ ping 192.168.0.2 Without this patch the traffic is unencrypted on the wire. Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Signed-off-by: Qiuyu Xiao Signed-off-by: Mark Gray Reviewed-by: Greg Rose Signed-off-by: David S. Miller --- drivers/net/geneve.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index c71f994fbc73..974a244f45ba 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -777,7 +777,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct net_device *dev, struct geneve_sock *gs4, struct flowi4 *fl4, - const struct ip_tunnel_info *info) + const struct ip_tunnel_info *info, + __be16 dport, __be16 sport) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); @@ -793,6 +794,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, fl4->flowi4_proto = IPPROTO_UDP; fl4->daddr = info->key.u.ipv4.dst; fl4->saddr = info->key.u.ipv4.src; + fl4->fl4_dport = dport; + fl4->fl4_sport = sport; tos = info->key.tos; if ((tos == 1) && !geneve->cfg.collect_md) { @@ -827,7 +830,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, struct net_device *dev, struct geneve_sock *gs6, struct flowi6 *fl6, - const struct ip_tunnel_info *info) + const struct ip_tunnel_info *info, + __be16 dport, __be16 sport) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); @@ -843,6 +847,9 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, fl6->flowi6_proto = IPPROTO_UDP; fl6->daddr = info->key.u.ipv6.dst; fl6->saddr = info->key.u.ipv6.src; + fl6->fl6_dport = dport; + fl6->fl6_sport = sport; + prio = info->key.tos; if ((prio == 1) && !geneve->cfg.collect_md) { prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb); @@ -889,7 +896,9 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, + geneve->cfg.info.key.tp_dst, sport); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -919,7 +928,6 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, return -EMSGSIZE; } - sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->cfg.collect_md) { tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; @@ -974,7 +982,9 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, + geneve->cfg.info.key.tp_dst, sport); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -1003,7 +1013,6 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, return -EMSGSIZE; } - sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->cfg.collect_md) { prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; @@ -1085,13 +1094,18 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); struct geneve_dev *geneve = netdev_priv(dev); + __be16 sport; if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; struct flowi4 fl4; + struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); + sport = udp_flow_src_port(geneve->net, skb, + 1, USHRT_MAX, true); - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, + geneve->cfg.info.key.tp_dst, sport); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1101,9 +1115,13 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct flowi6 fl6; + struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); + sport = udp_flow_src_port(geneve->net, skb, + 1, USHRT_MAX, true); - dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, + geneve->cfg.info.key.tp_dst, sport); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -1114,8 +1132,7 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) return -EINVAL; } - info->key.tp_src = udp_flow_src_port(geneve->net, skb, - 1, USHRT_MAX, true); + info->key.tp_src = sport; info->key.tp_dst = geneve->cfg.info.key.tp_dst; return 0; } -- cgit v1.2.3-59-g8ed1b From fd944dc24336922656a48f4608bfb41abdcdc4aa Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 16 Sep 2020 12:08:39 +0200 Subject: net: dsa: microchip: ksz8795: really set the correct number of ports The KSZ9477 and KSZ8795 use the port_cnt field differently: For the KSZ9477, it includes the CPU port(s), while for the KSZ8795, it doesn't. It would be a good cleanup to make the handling of both drivers match, but as a first step, fix the recently broken assignment of num_ports in the KSZ8795 driver (which completely broke probing, as the CPU port index was always failing the num_ports check). Fixes: af199a1a9cb0 ("net: dsa: microchip: set the correct number of ports") Signed-off-by: Matthias Schiffer Reviewed-by: Codrin Ciubotariu Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index f7d59d7b2cbe..f5779e152377 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1260,7 +1260,7 @@ static int ksz8795_switch_init(struct ksz_device *dev) } /* set the real number of ports */ - dev->ds->num_ports = dev->port_cnt; + dev->ds->num_ports = dev->port_cnt + 1; return 0; } -- cgit v1.2.3-59-g8ed1b From 44144185951a0ff9b50bf21c0cd1f79ff688e5ca Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Wed, 16 Sep 2020 11:47:27 +0200 Subject: hv_netvsc: Add validation for untrusted Hyper-V values For additional robustness in the face of Hyper-V errors or malicious behavior, validate all values that originate from packets that Hyper-V has sent to the guest in the host-to-guest ring buffer. Ensure that invalid values cannot cause indexing off the end of an array, or subvert an existing validation via integer overflow. Ensure that outgoing packets do not have any leftover guest memory that has not been zeroed out. Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 4 ++ drivers/net/hyperv/netvsc.c | 124 ++++++++++++++++++++++++++++++++++---- drivers/net/hyperv/netvsc_drv.c | 7 +++ drivers/net/hyperv/rndis_filter.c | 73 +++++++++++++++++++--- 4 files changed, 188 insertions(+), 20 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index ff33f27cdcd3..a0f338cf1424 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -847,6 +847,10 @@ struct nvsp_message { #define NETVSC_XDP_HDRM 256 +#define NETVSC_XFER_HEADER_SIZE(rng_cnt) \ + (offsetof(struct vmtransfer_page_packet_header, ranges) + \ + (rng_cnt) * sizeof(struct vmtransfer_page_range)) + struct multi_send_data { struct sk_buff *skb; /* skb containing the pkt */ struct hv_netvsc_packet *pkt; /* netvsc pkt pending */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 41f5cf0bb997..5a57d1985bae 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -388,6 +388,15 @@ static int netvsc_init_buf(struct hv_device *device, net_device->recv_section_size = resp->sections[0].sub_alloc_size; net_device->recv_section_cnt = resp->sections[0].num_sub_allocs; + /* Ensure buffer will not overflow */ + if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size * + (u64)net_device->recv_section_cnt > (u64)buf_size) { + netdev_err(ndev, "invalid recv_section_size %u\n", + net_device->recv_section_size); + ret = -EINVAL; + goto cleanup; + } + /* Setup receive completion ring. * Add 1 to the recv_section_cnt because at least one entry in a * ring buffer has to be empty. @@ -460,6 +469,12 @@ static int netvsc_init_buf(struct hv_device *device, /* Parse the response */ net_device->send_section_size = init_packet->msg. v1_msg.send_send_buf_complete.section_size; + if (net_device->send_section_size < NETVSC_MTU_MIN) { + netdev_err(ndev, "invalid send_section_size %u\n", + net_device->send_section_size); + ret = -EINVAL; + goto cleanup; + } /* Section count is simply the size divided by the section size. */ net_device->send_section_cnt = buf_size / net_device->send_section_size; @@ -731,12 +746,49 @@ static void netvsc_send_completion(struct net_device *ndev, int budget) { const struct nvsp_message *nvsp_packet = hv_pkt_data(desc); + u32 msglen = hv_pkt_datalen(desc); + + /* Ensure packet is big enough to read header fields */ + if (msglen < sizeof(struct nvsp_message_header)) { + netdev_err(ndev, "nvsp_message length too small: %u\n", msglen); + return; + } switch (nvsp_packet->hdr.msg_type) { case NVSP_MSG_TYPE_INIT_COMPLETE: + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_message_init_complete)) { + netdev_err(ndev, "nvsp_msg length too small: %u\n", + msglen); + return; + } + fallthrough; + case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_1_message_send_receive_buffer_complete)) { + netdev_err(ndev, "nvsp_msg1 length too small: %u\n", + msglen); + return; + } + fallthrough; + case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_1_message_send_send_buffer_complete)) { + netdev_err(ndev, "nvsp_msg1 length too small: %u\n", + msglen); + return; + } + fallthrough; + case NVSP_MSG5_TYPE_SUBCHANNEL: + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_5_subchannel_complete)) { + netdev_err(ndev, "nvsp_msg5 length too small: %u\n", + msglen); + return; + } /* Copy the response back */ memcpy(&net_device->channel_init_pkt, nvsp_packet, sizeof(struct nvsp_message)); @@ -1117,19 +1169,28 @@ static void enq_receive_complete(struct net_device *ndev, static int netvsc_receive(struct net_device *ndev, struct netvsc_device *net_device, struct netvsc_channel *nvchan, - const struct vmpacket_descriptor *desc, - const struct nvsp_message *nvsp) + const struct vmpacket_descriptor *desc) { struct net_device_context *net_device_ctx = netdev_priv(ndev); struct vmbus_channel *channel = nvchan->channel; const struct vmtransfer_page_packet_header *vmxferpage_packet = container_of(desc, const struct vmtransfer_page_packet_header, d); + const struct nvsp_message *nvsp = hv_pkt_data(desc); + u32 msglen = hv_pkt_datalen(desc); u16 q_idx = channel->offermsg.offer.sub_channel_index; char *recv_buf = net_device->recv_buf; u32 status = NVSP_STAT_SUCCESS; int i; int count = 0; + /* Ensure packet is big enough to read header fields */ + if (msglen < sizeof(struct nvsp_message_header)) { + netif_err(net_device_ctx, rx_err, ndev, + "invalid nvsp header, length too small: %u\n", + msglen); + return 0; + } + /* Make sure this is a valid nvsp packet */ if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) { netif_err(net_device_ctx, rx_err, ndev, @@ -1138,6 +1199,14 @@ static int netvsc_receive(struct net_device *ndev, return 0; } + /* Validate xfer page pkt header */ + if ((desc->offset8 << 3) < sizeof(struct vmtransfer_page_packet_header)) { + netif_err(net_device_ctx, rx_err, ndev, + "Invalid xfer page pkt, offset too small: %u\n", + desc->offset8 << 3); + return 0; + } + if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) { netif_err(net_device_ctx, rx_err, ndev, "Invalid xfer page set id - expecting %x got %x\n", @@ -1148,6 +1217,14 @@ static int netvsc_receive(struct net_device *ndev, count = vmxferpage_packet->range_cnt; + /* Check count for a valid value */ + if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) { + netif_err(net_device_ctx, rx_err, ndev, + "Range count is not valid: %d\n", + count); + return 0; + } + /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ for (i = 0; i < count; i++) { u32 offset = vmxferpage_packet->ranges[i].byte_offset; @@ -1155,7 +1232,8 @@ static int netvsc_receive(struct net_device *ndev, void *data; int ret; - if (unlikely(offset + buflen > net_device->recv_buf_size)) { + if (unlikely(offset > net_device->recv_buf_size || + buflen > net_device->recv_buf_size - offset)) { nvchan->rsc.cnt = 0; status = NVSP_STAT_FAIL; netif_err(net_device_ctx, rx_err, ndev, @@ -1194,6 +1272,13 @@ static void netvsc_send_table(struct net_device *ndev, u32 count, offset, *tab; int i; + /* Ensure packet is big enough to read send_table fields */ + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_5_send_indirect_table)) { + netdev_err(ndev, "nvsp_v5_msg length too small: %u\n", msglen); + return; + } + count = nvmsg->msg.v5_msg.send_table.count; offset = nvmsg->msg.v5_msg.send_table.offset; @@ -1225,10 +1310,18 @@ static void netvsc_send_table(struct net_device *ndev, } static void netvsc_send_vf(struct net_device *ndev, - const struct nvsp_message *nvmsg) + const struct nvsp_message *nvmsg, + u32 msglen) { struct net_device_context *net_device_ctx = netdev_priv(ndev); + /* Ensure packet is big enough to read its fields */ + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_4_send_vf_association)) { + netdev_err(ndev, "nvsp_v4_msg length too small: %u\n", msglen); + return; + } + net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; netdev_info(ndev, "VF slot %u %s\n", @@ -1238,16 +1331,24 @@ static void netvsc_send_vf(struct net_device *ndev, static void netvsc_receive_inband(struct net_device *ndev, struct netvsc_device *nvscdev, - const struct nvsp_message *nvmsg, - u32 msglen) + const struct vmpacket_descriptor *desc) { + const struct nvsp_message *nvmsg = hv_pkt_data(desc); + u32 msglen = hv_pkt_datalen(desc); + + /* Ensure packet is big enough to read header fields */ + if (msglen < sizeof(struct nvsp_message_header)) { + netdev_err(ndev, "inband nvsp_message length too small: %u\n", msglen); + return; + } + switch (nvmsg->hdr.msg_type) { case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: netvsc_send_table(ndev, nvscdev, nvmsg, msglen); break; case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: - netvsc_send_vf(ndev, nvmsg); + netvsc_send_vf(ndev, nvmsg, msglen); break; } } @@ -1261,23 +1362,20 @@ static int netvsc_process_raw_pkt(struct hv_device *device, { struct vmbus_channel *channel = nvchan->channel; const struct nvsp_message *nvmsg = hv_pkt_data(desc); - u32 msglen = hv_pkt_datalen(desc); trace_nvsp_recv(ndev, channel, nvmsg); switch (desc->type) { case VM_PKT_COMP: - netvsc_send_completion(ndev, net_device, channel, - desc, budget); + netvsc_send_completion(ndev, net_device, channel, desc, budget); break; case VM_PKT_DATA_USING_XFER_PAGES: - return netvsc_receive(ndev, net_device, nvchan, - desc, nvmsg); + return netvsc_receive(ndev, net_device, nvchan, desc); break; case VM_PKT_DATA_INBAND: - netvsc_receive_inband(ndev, net_device, nvmsg, msglen); + netvsc_receive_inband(ndev, net_device, desc); break; default: diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index b7db3766f5b9..9869e390875e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -748,6 +748,13 @@ void netvsc_linkstatus_callback(struct net_device *net, struct netvsc_reconfig *event; unsigned long flags; + /* Ensure the packet is big enough to access its fields */ + if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_indicate_status)) { + netdev_err(net, "invalid rndis_indicate_status packet, len: %u\n", + resp->msg_len); + return; + } + /* Update the physical link speed when changing to another vSwitch */ if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { u32 speed; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index b81ceba38218..12ad471ac5e1 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -275,6 +275,16 @@ static void rndis_filter_receive_response(struct net_device *ndev, return; } + /* Ensure the packet is big enough to read req_id. Req_id is the 1st + * field in any request/response message, so the payload should have at + * least sizeof(u32) bytes + */ + if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(u32)) { + netdev_err(ndev, "rndis msg_len too small: %u\n", + resp->msg_len); + return; + } + spin_lock_irqsave(&dev->request_lock, flags); list_for_each_entry(request, &dev->req_list, list_ent) { /* @@ -331,8 +341,9 @@ static void rndis_filter_receive_response(struct net_device *ndev, * Get the Per-Packet-Info with the specified type * return NULL if not found. */ -static inline void *rndis_get_ppi(struct rndis_packet *rpkt, - u32 type, u8 internal) +static inline void *rndis_get_ppi(struct net_device *ndev, + struct rndis_packet *rpkt, + u32 rpkt_len, u32 type, u8 internal) { struct rndis_per_packet_info *ppi; int len; @@ -340,11 +351,36 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, if (rpkt->per_pkt_info_offset == 0) return NULL; + /* Validate info_offset and info_len */ + if (rpkt->per_pkt_info_offset < sizeof(struct rndis_packet) || + rpkt->per_pkt_info_offset > rpkt_len) { + netdev_err(ndev, "Invalid per_pkt_info_offset: %u\n", + rpkt->per_pkt_info_offset); + return NULL; + } + + if (rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) { + netdev_err(ndev, "Invalid per_pkt_info_len: %u\n", + rpkt->per_pkt_info_len); + return NULL; + } + ppi = (struct rndis_per_packet_info *)((ulong)rpkt + rpkt->per_pkt_info_offset); len = rpkt->per_pkt_info_len; while (len > 0) { + /* Validate ppi_offset and ppi_size */ + if (ppi->size > len) { + netdev_err(ndev, "Invalid ppi size: %u\n", ppi->size); + continue; + } + + if (ppi->ppi_offset >= ppi->size) { + netdev_err(ndev, "Invalid ppi_offset: %u\n", ppi->ppi_offset); + continue; + } + if (ppi->type == type && ppi->internal == internal) return (void *)((ulong)ppi + ppi->ppi_offset); len -= ppi->size; @@ -388,14 +424,29 @@ static int rndis_filter_receive_data(struct net_device *ndev, const struct ndis_pkt_8021q_info *vlan; const struct rndis_pktinfo_id *pktinfo_id; const u32 *hash_info; - u32 data_offset; + u32 data_offset, rpkt_len; void *data; bool rsc_more = false; int ret; + /* Ensure data_buflen is big enough to read header fields */ + if (data_buflen < RNDIS_HEADER_SIZE + sizeof(struct rndis_packet)) { + netdev_err(ndev, "invalid rndis pkt, data_buflen too small: %u\n", + data_buflen); + return NVSP_STAT_FAIL; + } + + /* Validate rndis_pkt offset */ + if (rndis_pkt->data_offset >= data_buflen - RNDIS_HEADER_SIZE) { + netdev_err(ndev, "invalid rndis packet offset: %u\n", + rndis_pkt->data_offset); + return NVSP_STAT_FAIL; + } + /* Remove the rndis header and pass it back up the stack */ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; + rpkt_len = data_buflen - RNDIS_HEADER_SIZE; data_buflen -= data_offset; /* @@ -410,13 +461,13 @@ static int rndis_filter_receive_data(struct net_device *ndev, return NVSP_STAT_FAIL; } - vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO, 0); + vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0); - csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO, 0); + csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0); - hash_info = rndis_get_ppi(rndis_pkt, NBL_HASH_VALUE, 0); + hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0); - pktinfo_id = rndis_get_ppi(rndis_pkt, RNDIS_PKTINFO_ID, 1); + pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1); data = (void *)msg + data_offset; @@ -474,6 +525,14 @@ int rndis_filter_receive(struct net_device *ndev, if (netif_msg_rx_status(net_device_ctx)) dump_rndis_message(ndev, rndis_msg); + /* Validate incoming rndis_message packet */ + if (buflen < RNDIS_HEADER_SIZE || rndis_msg->msg_len < RNDIS_HEADER_SIZE || + buflen < rndis_msg->msg_len) { + netdev_err(ndev, "Invalid rndis_msg (buflen: %u, msg_len: %u)\n", + buflen, rndis_msg->msg_len); + return NVSP_STAT_FAIL; + } + switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: return rndis_filter_receive_data(ndev, net_dev, nvchan, -- cgit v1.2.3-59-g8ed1b From f4a26a9b311d7ff9db461278faf2869d06496ef8 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 16 Sep 2020 21:50:39 +0530 Subject: cxgb4: fix memory leak during module unload Fix the memory leak in mps during module unload path by freeing mps reference entries if the list adpter->mps_ref is not already empty Fixes: 28b3870578ef ("cxgb4: Re-work the logic for mps refcounting") Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c index b1a073eea60b..a020e8490681 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c @@ -229,7 +229,7 @@ void cxgb4_free_mps_ref_entries(struct adapter *adap) { struct mps_entries_ref *mps_entry, *tmp; - if (!list_empty(&adap->mps_ref)) + if (list_empty(&adap->mps_ref)) return; spin_lock(&adap->mps_ref_lock); -- cgit v1.2.3-59-g8ed1b From 83f9a9c8c1edc222846dc1bde6e3479703e8e5a3 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 16 Sep 2020 09:49:18 -0700 Subject: drivers/net/wan/lapbether: Make skb->protocol consistent with the header This driver is a virtual driver stacked on top of Ethernet interfaces. When this driver transmits data on the Ethernet device, the skb->protocol setting is inconsistent with the Ethernet header prepended to the skb. This causes a user listening on the Ethernet interface with an AF_PACKET socket, to see different sll_protocol values for incoming and outgoing frames, because incoming frames would have this value set by parsing the Ethernet header. This patch changes the skb->protocol value for outgoing Ethernet frames, making it consistent with the Ethernet header prepended. This makes a user listening on the Ethernet device with an AF_PACKET socket, to see the same sll_protocol value for incoming and outgoing frames. Cc: Martin Schiller Signed-off-by: Xie He Signed-off-by: David S. Miller --- drivers/net/wan/lapbether.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 732a6c1851f5..b6be2454b8bd 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -198,8 +198,6 @@ static void lapbeth_data_transmit(struct net_device *ndev, struct sk_buff *skb) struct net_device *dev; int size = skb->len; - skb->protocol = htons(ETH_P_X25); - ptr = skb_push(skb, 2); *ptr++ = size % 256; @@ -210,6 +208,8 @@ static void lapbeth_data_transmit(struct net_device *ndev, struct sk_buff *skb) skb->dev = dev = lapbeth->ethdev; + skb->protocol = htons(ETH_P_DEC); + skb_reset_network_header(skb); dev_hard_header(skb, dev, ETH_P_DEC, bcast_addr, NULL, 0); -- cgit v1.2.3-59-g8ed1b From 9fb030a70431a2a2a1b292dbf0b2f399cc072c16 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 16 Sep 2020 14:25:07 -0700 Subject: drivers/net/wan/hdlc: Set skb->protocol before transmitting This patch sets skb->protocol before transmitting frames on the HDLC device, so that a user listening on the HDLC device with an AF_PACKET socket will see outgoing frames' sll_protocol field correctly set and consistent with that of incoming frames. 1. Control frames in hdlc_cisco and hdlc_ppp When these drivers send control frames, skb->protocol is not set. This value should be set to htons(ETH_P_HDLC), because when receiving control frames, their skb->protocol is set to htons(ETH_P_HDLC). When receiving, hdlc_type_trans in hdlc.h is called, which then calls cisco_type_trans or ppp_type_trans. The skb->protocol of control frames is set to htons(ETH_P_HDLC) so that the control frames can be received by hdlc_rcv in hdlc.c, which calls cisco_rx or ppp_rx to process the control frames. 2. hdlc_fr When this driver sends control frames, skb->protocol is set to internal values used in this driver. When this driver sends data frames (from upper stacked PVC devices), skb->protocol is the same as that of the user data packet being sent on the upper PVC device (for normal PVC devices), or is htons(ETH_P_802_3) (for Ethernet-emulating PVC devices). However, skb->protocol for both control frames and data frames should be set to htons(ETH_P_HDLC), because when receiving, all frames received on the HDLC device will have their skb->protocol set to htons(ETH_P_HDLC). When receiving, hdlc_type_trans in hdlc.h is called, and because this driver doesn't provide a type_trans function in struct hdlc_proto, all frames will have their skb->protocol set to htons(ETH_P_HDLC). The frames are then received by hdlc_rcv in hdlc.c, which calls fr_rx to process the frames (control frames are consumed and data frames are re-received on upper PVC devices). Cc: Krzysztof Halasa Signed-off-by: Xie He Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_cisco.c | 1 + drivers/net/wan/hdlc_fr.c | 3 +++ drivers/net/wan/hdlc_ppp.c | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index 444130655d8e..cb5898f7d68c 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -118,6 +118,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type, skb_put(skb, sizeof(struct cisco_packet)); skb->priority = TC_PRIO_CONTROL; skb->dev = dev; + skb->protocol = htons(ETH_P_HDLC); skb_reset_network_header(skb); dev_queue_xmit(skb); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 12b35404cd8e..d6cfd51613ed 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -433,6 +433,8 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev) if (pvc->state.fecn) /* TX Congestion counter */ dev->stats.tx_compressed++; skb->dev = pvc->frad; + skb->protocol = htons(ETH_P_HDLC); + skb_reset_network_header(skb); dev_queue_xmit(skb); return NETDEV_TX_OK; } @@ -555,6 +557,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) skb_put(skb, i); skb->priority = TC_PRIO_CONTROL; skb->dev = dev; + skb->protocol = htons(ETH_P_HDLC); skb_reset_network_header(skb); dev_queue_xmit(skb); diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 16f33d1ffbfb..64f855651336 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -251,6 +251,7 @@ static void ppp_tx_cp(struct net_device *dev, u16 pid, u8 code, skb->priority = TC_PRIO_CONTROL; skb->dev = dev; + skb->protocol = htons(ETH_P_HDLC); skb_reset_network_header(skb); skb_queue_tail(&tx_queue, skb); } -- cgit v1.2.3-59-g8ed1b From 19a83d36f9837e8bd27435ebb31564a717a5d15a Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 17 Sep 2020 01:04:10 +0200 Subject: ethtool: add and use message type for tunnel info reply Tunnel offload info code uses ETHTOOL_MSG_TUNNEL_INFO_GET message type (cmd field in genetlink header) for replies to tunnel info netlink request, i.e. the same value as the request have. This is a problem because we are using two separate enums for userspace to kernel and kernel to userspace message types so that this ETHTOOL_MSG_TUNNEL_INFO_GET (28) collides with ETHTOOL_MSG_CABLE_TEST_TDR_NTF which is what message type 28 means for kernel to userspace messages. As the tunnel info request reached mainline in 5.9 merge window, we should still be able to fix the reply message type without breaking backward compatibility. Fixes: c7d759eb7b12 ("ethtool: add tunnel info interface") Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 3 +++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/tunnels.c | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index d53bcb31645a..b5a79881551f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -206,6 +206,7 @@ Userspace to kernel: ``ETHTOOL_MSG_TSINFO_GET`` get timestamping info ``ETHTOOL_MSG_CABLE_TEST_ACT`` action start cable test ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` action start raw TDR cable test + ``ETHTOOL_MSG_TUNNEL_INFO_GET`` get tunnel offload info ===================================== ================================ Kernel to userspace: @@ -239,6 +240,7 @@ Kernel to userspace: ``ETHTOOL_MSG_TSINFO_GET_REPLY`` timestamping info ``ETHTOOL_MSG_CABLE_TEST_NTF`` Cable test results ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF`` Cable test TDR results + ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1363,4 +1365,5 @@ are netlink only. ``ETHTOOL_SFECPARAM`` n/a n/a ''ETHTOOL_MSG_CABLE_TEST_ACT'' n/a ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT'' + n/a ``ETHTOOL_MSG_TUNNEL_INFO_GET`` =================================== ===================================== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 5dcd24cb33ea..72ba36be9655 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -79,6 +79,7 @@ enum { ETHTOOL_MSG_TSINFO_GET_REPLY, ETHTOOL_MSG_CABLE_TEST_NTF, ETHTOOL_MSG_CABLE_TEST_TDR_NTF, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c index 84f23289475b..d93bf2da0f34 100644 --- a/net/ethtool/tunnels.c +++ b/net/ethtool/tunnels.c @@ -200,7 +200,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, req_info.dev, - ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, ETHTOOL_A_TUNNEL_INFO_HEADER, info, &reply_payload); if (!rskb) { @@ -273,7 +273,7 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) goto cont; ehdr = ethnl_dump_put(skb, cb, - ETHTOOL_MSG_TUNNEL_INFO_GET); + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY); if (!ehdr) { ret = -EMSGSIZE; goto out; -- cgit v1.2.3-59-g8ed1b From c2b727df7caa33876e7066bde090f40001b6d643 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 16 Sep 2020 20:43:09 -0700 Subject: net: phy: Avoid NPD upon phy_detach() when driver is unbound If we have unbound the PHY driver prior to calling phy_detach() (often via phy_disconnect()) then we can cause a NULL pointer de-reference accessing the driver owner member. The steps to reproduce are: echo unimac-mdio-0:01 > /sys/class/net/eth0/phydev/driver/unbind ip link set eth0 down Fixes: cafe8df8b9bc ("net: phy: Fix lack of reference count on PHY driver") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b93b40cda54a..5dab6be6fc38 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1682,7 +1682,8 @@ void phy_detach(struct phy_device *phydev) phy_led_triggers_unregister(phydev); - module_put(phydev->mdio.dev.driver->owner); + if (phydev->mdio.dev.driver) + module_put(phydev->mdio.dev.driver->owner); /* If the device had no specific driver before (i.e. - it * was using the generic driver), we unbind the device -- cgit v1.2.3-59-g8ed1b From 5116a8ade333b6c2e180782139c9c516a437b21c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 16 Sep 2020 20:43:10 -0700 Subject: net: phy: Do not warn in phy_stop() on PHY_DOWN When phy_is_started() was added to catch incorrect PHY states, phy_stop() would not be qualified against PHY_DOWN. It is possible to reach that state when the PHY driver has been unbound and the network device is then brought down. Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 735a806045ac..8947d58f2a25 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -996,7 +996,7 @@ void phy_stop(struct phy_device *phydev) { struct net_device *dev = phydev->attached_dev; - if (!phy_is_started(phydev)) { + if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) { WARN(1, "called from state %s\n", phy_state_to_str(phydev->state)); return; -- cgit v1.2.3-59-g8ed1b From ce000c61b0bf96506ad4a537f3218a716b6fadcd Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 17 Sep 2020 20:29:50 +0800 Subject: hinic: fix potential resource leak In rx_request_irq(), it will just return what irq_set_affinity_hint() returns. If it is failed, the napi and irq requested are not freed properly. So add exits for failures to handle these. Signed-off-by: Wei Li Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_rx.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 5bee951fe9d4..d0072f5e7efc 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -543,18 +543,25 @@ static int rx_request_irq(struct hinic_rxq *rxq) if (err) { netif_err(nic_dev, drv, rxq->netdev, "Failed to set RX interrupt coalescing attribute\n"); - rx_del_napi(rxq); - return err; + goto err_req_irq; } err = request_irq(rq->irq, rx_irq, 0, rxq->irq_name, rxq); - if (err) { - rx_del_napi(rxq); - return err; - } + if (err) + goto err_req_irq; cpumask_set_cpu(qp->q_id % num_online_cpus(), &rq->affinity_mask); - return irq_set_affinity_hint(rq->irq, &rq->affinity_mask); + err = irq_set_affinity_hint(rq->irq, &rq->affinity_mask); + if (err) + goto err_irq_affinity; + + return 0; + +err_irq_affinity: + free_irq(rq->irq, rxq); +err_req_irq: + rx_del_napi(rxq); + return err; } static void rx_free_irq(struct hinic_rxq *rxq) -- cgit v1.2.3-59-g8ed1b From 5f6857e808a8bd078296575b417c4b9d160b9779 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 17 Sep 2020 10:52:57 -0700 Subject: nfp: use correct define to return NONE fec struct ethtool_fecparam carries bitmasks not bit numbers. We want to return 1 (NONE), not 0. Fixes: 0d0870938337 ("nfp: implement ethtool FEC mode settings") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 6eb9fb9a1814..9c9ae33d84ce 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -829,8 +829,8 @@ nfp_port_get_fecparam(struct net_device *netdev, struct nfp_eth_table_port *eth_port; struct nfp_port *port; - param->active_fec = ETHTOOL_FEC_NONE_BIT; - param->fec = ETHTOOL_FEC_NONE_BIT; + param->active_fec = ETHTOOL_FEC_NONE; + param->fec = ETHTOOL_FEC_NONE; port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); -- cgit v1.2.3-59-g8ed1b From 1f38b8c564b8c24132428f6a4d04e05366fa10f4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 15 Sep 2020 10:59:44 +0200 Subject: mac80211: extend AQL aggregation estimation to HE and fix unit mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unit of the return value of ieee80211_get_rate_duration is nanoseconds, not microseconds. Adjust the duration checks to account for that. For higher data rates, allow larger estimated aggregation sizes, and add some values for HE as well, which can use much larger aggregates. Since small packets with high data rates can now lead to duration values too small for info->tx_time_est, return a minimum of 4us. Fixes: f01cfbaf9b29 ("mac80211: improve AQL aggregation estimation for low data rates") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20200915085945.3782-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/airtime.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 314973033d03..45140e535151 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -668,20 +668,26 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, * This will not be very accurate, but much better than simply * assuming un-aggregated tx in all cases. */ - if (duration > 400) /* <= VHT20 MCS2 1S */ + if (duration > 400 * 1024) /* <= VHT20 MCS2 1S */ agg_shift = 1; - else if (duration > 250) /* <= VHT20 MCS3 1S or MCS1 2S */ + else if (duration > 250 * 1024) /* <= VHT20 MCS3 1S or MCS1 2S */ agg_shift = 2; - else if (duration > 150) /* <= VHT20 MCS5 1S or MCS3 2S */ + else if (duration > 150 * 1024) /* <= VHT20 MCS5 1S or MCS2 2S */ agg_shift = 3; - else + else if (duration > 70 * 1024) /* <= VHT20 MCS5 2S */ agg_shift = 4; + else if (stat.encoding != RX_ENC_HE || + duration > 20 * 1024) /* <= HE40 MCS6 2S */ + agg_shift = 5; + else + agg_shift = 6; duration *= len; duration /= AVG_PKT_SIZE; duration /= 1024; + duration += (overhead >> agg_shift); - return duration + (overhead >> agg_shift); + return max_t(u32, duration, 4); } if (!conf) -- cgit v1.2.3-59-g8ed1b From 8e280369b9076dc08ce13c802c1687b81b4cccd4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 15 Sep 2020 10:59:45 +0200 Subject: mac80211: add AQL support for VHT160 tx rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When converting from struct ieee80211_tx_rate to ieee80211_rx_status, there was one check missing to fill in the bandwidth for 160 MHz Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20200915085945.3782-2-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/airtime.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 45140e535151..26d2f8ba7029 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -560,7 +560,9 @@ static int ieee80211_fill_rx_status(struct ieee80211_rx_status *stat, if (rate->idx < 0 || !rate->count) return -1; - if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) + if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) + stat->bw = RATE_INFO_BW_160; + else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) stat->bw = RATE_INFO_BW_80; else if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) stat->bw = RATE_INFO_BW_40; -- cgit v1.2.3-59-g8ed1b From b959ba9f468b1c581f40e92661ad58b093abaa03 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Wed, 9 Sep 2020 12:54:53 +0300 Subject: lib80211: fix unmet direct dependendices config warning when !CRYPTO When LIB80211_CRYPT_CCMP is enabled and CRYPTO is disabled, it results in unmet direct dependencies config warning. The reason is that LIB80211_CRYPT_CCMP selects CRYPTO_AES and CRYPTO_CCM, which are subordinate to CRYPTO. This is reproducible with CRYPTO disabled and R8188EU enabled, where R8188EU selects LIB80211_CRYPT_CCMP but does not select or depend on CRYPTO. Honor the kconfig menu hierarchy to remove kconfig dependency warnings. Fixes: a11e2f85481c ("lib80211: use crypto API ccm(aes) transform for CCMP processing") Signed-off-by: Necip Fazil Yildiran Link: https://lore.kernel.org/r/20200909095452.3080-1-fazilyildiran@gmail.com Signed-off-by: Johannes Berg --- net/wireless/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index faf74850a1b5..27026f587fa6 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -217,6 +217,7 @@ config LIB80211_CRYPT_WEP config LIB80211_CRYPT_CCMP tristate + select CRYPTO select CRYPTO_AES select CRYPTO_CCM -- cgit v1.2.3-59-g8ed1b From 412a84b5714af56f3eb648bba155107b5edddfdf Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Fri, 11 Sep 2020 01:11:35 +0000 Subject: mac80211: Fix radiotap header channel flag for 6GHz band Radiotap header field 'Channel flags' has '2 GHz spectrum' set to 'true' for 6GHz packet. Change it to 5GHz as there isn't a separate option available for 6GHz. Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/010101747ab7b703-1d7c9851-1594-43bf-81f7-f79ce7a67cc6-000000@us-west-2.amazonses.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 836cde516a18..a959ebf56852 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -451,7 +451,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (status->bw == RATE_INFO_BW_5) channel_flags |= IEEE80211_CHAN_QUARTER; - if (status->band == NL80211_BAND_5GHZ) + if (status->band == NL80211_BAND_5GHZ || + status->band == NL80211_BAND_6GHZ) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ; else if (status->encoding != RX_ENC_LEGACY) channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ; -- cgit v1.2.3-59-g8ed1b From 780a8c9efc65f6d86acd44794495cedcd32eeb26 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 11 Sep 2020 10:29:02 +0000 Subject: mac80211: do not disable HE if HT is missing on 2.4 GHz VHT is not supported on 2.4 GHz, but HE is; don't disable HE if HT is missing there, do that only on 5 GHz (6 GHz is only HE). Fixes: 57fa5e85d53ce51 ("mac80211: determine chandef from HE 6 GHz operation") Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/010101747cb617f2-593c5410-1648-4a42-97a0-f3646a5a6dd1-000000@us-west-2.amazonses.com [rewrite the commit message] Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ac870309b911..2e400b0ff696 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4861,6 +4861,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; struct cfg80211_chan_def chandef; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; + bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; struct ieee80211_bss *bss = (void *)cbss->priv; int ret; u32 i; @@ -4879,7 +4880,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } - if (!sband->vht_cap.vht_supported && !is_6ghz) { + if (!sband->vht_cap.vht_supported && is_5ghz) { ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } -- cgit v1.2.3-59-g8ed1b From c0de8776af6543e10d1a5c8969679fd9f6b66fa9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 17 Sep 2020 11:52:23 +0200 Subject: cfg80211: fix 6 GHz channel conversion We shouldn't accept any channels bigger than 233, fix that. Reported-by: Amar Fixes: d1a1646c0de7 ("cfg80211: adapt to new channelization of the 6GHz band") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200917115222.312ba6f1d461.I3a8c8fbcc3cc019814fd9cd0aced7eb591626136@changeid Signed-off-by: Johannes Berg --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 4a9ff9ef513f..6fa99df52f86 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -95,7 +95,7 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band) /* see 802.11ax D6.1 27.3.23.2 */ if (chan == 2) return MHZ_TO_KHZ(5935); - if (chan <= 253) + if (chan <= 233) return MHZ_TO_KHZ(5950 + chan * 5); break; case NL80211_BAND_60GHZ: -- cgit v1.2.3-59-g8ed1b From 3bd5c7a28a7c3aba07a2d300d43f8e988809e147 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 17 Sep 2020 14:50:31 +0200 Subject: mac80211: do not allow bigger VHT MPDUs than the hardware supports Limit maximum VHT MPDU size by local capability. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200917125031.45009-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 9c6045f9c24d..d1b64d0751f2 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -168,10 +168,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* take some capabilities as-is */ cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); vht_cap->cap = cap_info; - vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | - IEEE80211_VHT_CAP_RXLDPC | + vht_cap->cap &= IEEE80211_VHT_CAP_RXLDPC | IEEE80211_VHT_CAP_VHT_TXOP_PS | IEEE80211_VHT_CAP_HTC_VHT | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | @@ -180,6 +177,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; + vht_cap->cap |= min_t(u32, cap_info & IEEE80211_VHT_CAP_MAX_MPDU_MASK, + own_cap.cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK); + /* and some based on our own capabilities */ switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: -- cgit v1.2.3-59-g8ed1b From 75bcbd6913de649601f4e7d3fb6d2b5effc24e9e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 18 Sep 2020 13:53:04 +0200 Subject: mac80211: fix 80 MHz association to 160/80+80 AP on 6 GHz When trying to associate to an AP support 180 or 80+80 MHz on 6 GHz with a STA that only has 80 Mhz support the cf2 field inside the chandef will get set causing the association to fail when trying to validate the chandef. Fix this by checking the support flags prior to setting cf2. Fixes: 57fa5e85d53ce ("mac80211: determine chandef from HE 6 GHz operation") Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20200918115304.1135693-1-john@phrozen.org [reword commit message a bit] Signed-off-by: Johannes Berg --- net/mac80211/util.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c8504ffc71a1..8d3bfc0fe176 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3353,9 +3353,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, he_chandef.center_freq1 = ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0, NL80211_BAND_6GHZ); - he_chandef.center_freq2 = - ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, - NL80211_BAND_6GHZ); + if (support_80_80 || support_160) + he_chandef.center_freq2 = + ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, + NL80211_BAND_6GHZ); } if (!cfg80211_chandef_valid(&he_chandef)) { -- cgit v1.2.3-59-g8ed1b From 9dda66acddcbcd5c9e94a1292c679738ba999345 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:23 +0300 Subject: net: mscc: ocelot: fix race condition with TX timestamping The TX-timestampable skb is added late to the ocelot_port->tx_skbs. It is in a race with the TX timestamp IRQ, which checks that queue trying to match the timestamp with the skb by the ts_id. The skb should be added to the queue before the IRQ can fire. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_net.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 0668d23cdbfa..cacabc23215a 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -330,6 +330,7 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) u8 grp = 0; /* Send everything on CPU group 0 */ unsigned int i, count, last; int port = priv->chip_port; + bool do_tstamp; val = ocelot_read(ocelot, QS_INJ_STATUS); if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp))) || @@ -344,10 +345,14 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) info.vid = skb_vlan_tag_get(skb); /* Check if timestamping is needed */ + do_tstamp = (ocelot_port_add_txtstamp_skb(ocelot_port, skb) == 0); + if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) { info.rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { info.rew_op |= (ocelot_port->ts_id % 4) << 3; + ocelot_port->ts_id++; + } } ocelot_gen_ifh(ifh, &info); @@ -380,12 +385,9 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; - if (!ocelot_port_add_txtstamp_skb(ocelot_port, skb)) { - ocelot_port->ts_id++; - return NETDEV_TX_OK; - } + if (!do_tstamp) + dev_kfree_skb_any(skb); - dev_kfree_skb_any(skb); return NETDEV_TX_OK; } -- cgit v1.2.3-59-g8ed1b From 6565243c0677aa2befa5a953cf11bc7b4a6f0a47 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:24 +0300 Subject: net: mscc: ocelot: add locking for the port TX timestamp ID The ocelot_port->ts_id is used to: (a) populate skb->cb[0] for matching the TX timestamp in the PTP IRQ with an skb. (b) populate the REW_OP from the injection header of the ongoing skb. Only then is ocelot_port->ts_id incremented. This is a problem because, at least theoretically, another timestampable skb might use the same ocelot_port->ts_id before that is incremented. Normally all transmit calls are serialized by the netdev transmit spinlock, but in this case, ocelot_port_add_txtstamp_skb() is also called by DSA, which has started declaring the NETIF_F_LLTX feature since commit 2b86cb829976 ("net: dsa: declare lockless TX feature for slave ports"). So the logic of using and incrementing the timestamp id should be atomic per port. The solution is to use the global ocelot_port->ts_id only while protected by the associated ocelot_port->ts_id_lock. That's where we populate skb->cb[0]. Note that for ocelot, ocelot_port_add_txtstamp_skb is called for the actual skb, but for felix, it is called for the skb's clone. That is something which will also be changed in the future. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 8 +++++++- drivers/net/ethernet/mscc/ocelot_net.c | 6 ++---- include/soc/mscc/ocelot.h | 1 + net/dsa/tag_ocelot.c | 11 +++++++---- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 5abb7d2b0a9e..83eb7c325061 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -421,10 +421,15 @@ int ocelot_port_add_txtstamp_skb(struct ocelot_port *ocelot_port, if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP && ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { + spin_lock(&ocelot_port->ts_id_lock); + shinfo->tx_flags |= SKBTX_IN_PROGRESS; /* Store timestamp ID in cb[0] of sk_buff */ - skb->cb[0] = ocelot_port->ts_id % 4; + skb->cb[0] = ocelot_port->ts_id; + ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4; skb_queue_tail(&ocelot_port->tx_skbs, skb); + + spin_unlock(&ocelot_port->ts_id_lock); return 0; } return -ENODATA; @@ -1300,6 +1305,7 @@ void ocelot_init_port(struct ocelot *ocelot, int port) struct ocelot_port *ocelot_port = ocelot->ports[port]; skb_queue_head_init(&ocelot_port->tx_skbs); + spin_lock_init(&ocelot_port->ts_id_lock); /* Basic L2 initialization */ diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index cacabc23215a..8490e42e9e2d 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -349,10 +349,8 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) { info.rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - info.rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + info.rew_op |= skb->cb[0] << 3; } ocelot_gen_ifh(ifh, &info); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index da369b12005f..4521dd602ddc 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -566,6 +566,7 @@ struct ocelot_port { u8 ptp_cmd; struct sk_buff_head tx_skbs; u8 ts_id; + spinlock_t ts_id_lock; phy_interface_t phy_mode; diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 42f327c06dca..b4fc05cafaa6 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -160,11 +160,14 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0); if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct sk_buff *clone = DSA_SKB_CB(skb)->clone; + rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } + /* Retrieve timestamp ID populated inside skb->cb[0] of the + * clone by ocelot_port_add_txtstamp_skb + */ + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + rew_op |= clone->cb[0] << 3; packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0); } -- cgit v1.2.3-59-g8ed1b From a63ed92d217fcc2a36a0fbb77d80d8b9ee31aac1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:25 +0300 Subject: net: dsa: seville: fix buffer size of the queue system The VSC9953 Seville switch has 2 megabits of buffer split into 4360 words of 60 bytes each. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/seville_vsc9953.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 625b1891d955..0fdeff22a76c 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1008,7 +1008,7 @@ static const struct felix_info seville_info_vsc9953 = { .vcap_is2_keys = vsc9953_vcap_is2_keys, .vcap_is2_actions = vsc9953_vcap_is2_actions, .vcap = vsc9953_vcap_props, - .shared_queue_sz = 128 * 1024, + .shared_queue_sz = 2048 * 1024, .num_mact_rows = 2048, .num_ports = 10, .mdio_bus_alloc = vsc9953_mdio_bus_alloc, -- cgit v1.2.3-59-g8ed1b From c9d4b2cf16021d1f65e764f4035d7d1616e3bda3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:26 +0300 Subject: net: mscc: ocelot: check for errors on memory allocation of ports Do not proceed probing if we couldn't allocate memory for the ports array, just error out. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 65408bc994c4..904ea299a5e8 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -993,6 +993,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ocelot->ports = devm_kcalloc(&pdev->dev, ocelot->num_phys_ports, sizeof(struct ocelot_port *), GFP_KERNEL); + if (!ocelot->ports) { + err = -ENOMEM; + goto out_put_ports; + } ocelot->vcap_is2_keys = vsc7514_vcap_is2_keys; ocelot->vcap_is2_actions = vsc7514_vcap_is2_actions; -- cgit v1.2.3-59-g8ed1b From d1cc0e932039cc40357baf037ab403d1d2e228fd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:27 +0300 Subject: net: mscc: ocelot: error checking when calling ocelot_init() ocelot_init() allocates memory, resets the switch and polls for a status register, things which can fail. Stop probing the driver in that case, and propagate the error result. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 5 ++++- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 04bfa6e465ff..66da447c4096 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -585,7 +585,10 @@ static int felix_setup(struct dsa_switch *ds) if (err) return err; - ocelot_init(ocelot); + err = ocelot_init(ocelot); + if (err) + return err; + if (ocelot->ptp) { err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info); if (err) { diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 904ea299a5e8..a1cbb20a7757 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1002,7 +1002,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ocelot->vcap_is2_actions = vsc7514_vcap_is2_actions; ocelot->vcap = vsc7514_vcap_props; - ocelot_init(ocelot); + err = ocelot_init(ocelot); + if (err) + goto out_put_ports; + if (ocelot->ptp) { err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info); if (err) { -- cgit v1.2.3-59-g8ed1b From 7c411799e1b3e0efa3a50e44e98cce6a771441c6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:28 +0300 Subject: net: mscc: ocelot: refactor ports parsing code into a dedicated function mscc_ocelot_probe() is already pretty large and hard to follow. So move the code for parsing ports in a separate function. This makes it easier for the next patch to just call mscc_ocelot_release_ports from the error path of mscc_ocelot_init_ports. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 209 +++++++++++++++-------------- 1 file changed, 110 insertions(+), 99 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index a1cbb20a7757..ff4a01424953 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -896,11 +896,115 @@ static struct ptp_clock_info ocelot_ptp_clock_info = { .enable = ocelot_ptp_enable, }; +static int mscc_ocelot_init_ports(struct platform_device *pdev, + struct device_node *ports) +{ + struct ocelot *ocelot = platform_get_drvdata(pdev); + struct device_node *portnp; + int err; + + ocelot->ports = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports, + sizeof(struct ocelot_port *), GFP_KERNEL); + if (!ocelot->ports) + return -ENOMEM; + + /* No NPI port */ + ocelot_configure_cpu(ocelot, -1, OCELOT_TAG_PREFIX_NONE, + OCELOT_TAG_PREFIX_NONE); + + for_each_available_child_of_node(ports, portnp) { + struct ocelot_port_private *priv; + struct ocelot_port *ocelot_port; + struct device_node *phy_node; + phy_interface_t phy_mode; + struct phy_device *phy; + struct regmap *target; + struct resource *res; + struct phy *serdes; + char res_name[8]; + u32 port; + + if (of_property_read_u32(portnp, "reg", &port)) + continue; + + snprintf(res_name, sizeof(res_name), "port%d", port); + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + res_name); + target = ocelot_regmap_init(ocelot, res); + if (IS_ERR(target)) + continue; + + phy_node = of_parse_phandle(portnp, "phy-handle", 0); + if (!phy_node) + continue; + + phy = of_phy_find_device(phy_node); + of_node_put(phy_node); + if (!phy) + continue; + + err = ocelot_probe_port(ocelot, port, target, phy); + if (err) { + of_node_put(portnp); + return err; + } + + ocelot_port = ocelot->ports[port]; + priv = container_of(ocelot_port, struct ocelot_port_private, + port); + + of_get_phy_mode(portnp, &phy_mode); + + ocelot_port->phy_mode = phy_mode; + + switch (ocelot_port->phy_mode) { + case PHY_INTERFACE_MODE_NA: + continue; + case PHY_INTERFACE_MODE_SGMII: + break; + case PHY_INTERFACE_MODE_QSGMII: + /* Ensure clock signals and speed is set on all + * QSGMII links + */ + ocelot_port_writel(ocelot_port, + DEV_CLOCK_CFG_LINK_SPEED + (OCELOT_SPEED_1000), + DEV_CLOCK_CFG); + break; + default: + dev_err(ocelot->dev, + "invalid phy mode for port%d, (Q)SGMII only\n", + port); + of_node_put(portnp); + return -EINVAL; + } + + serdes = devm_of_phy_get(ocelot->dev, portnp, NULL); + if (IS_ERR(serdes)) { + err = PTR_ERR(serdes); + if (err == -EPROBE_DEFER) + dev_dbg(ocelot->dev, "deferring probe\n"); + else + dev_err(ocelot->dev, + "missing SerDes phys for port%d\n", + port); + + of_node_put(portnp); + return err; + } + + priv->serdes = serdes; + } + + return 0; +} + static int mscc_ocelot_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; - struct device_node *ports, *portnp; int err, irq_xtr, irq_ptp_rdy; + struct device_node *ports; struct ocelot *ocelot; struct regmap *hsio; unsigned int i; @@ -985,19 +1089,12 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ports = of_get_child_by_name(np, "ethernet-ports"); if (!ports) { - dev_err(&pdev->dev, "no ethernet-ports child node found\n"); + dev_err(ocelot->dev, "no ethernet-ports child node found\n"); return -ENODEV; } ocelot->num_phys_ports = of_get_child_count(ports); - ocelot->ports = devm_kcalloc(&pdev->dev, ocelot->num_phys_ports, - sizeof(struct ocelot_port *), GFP_KERNEL); - if (!ocelot->ports) { - err = -ENOMEM; - goto out_put_ports; - } - ocelot->vcap_is2_keys = vsc7514_vcap_is2_keys; ocelot->vcap_is2_actions = vsc7514_vcap_is2_actions; ocelot->vcap = vsc7514_vcap_props; @@ -1006,6 +1103,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (err) goto out_put_ports; + err = mscc_ocelot_init_ports(pdev, ports); + if (err) + goto out_put_ports; + if (ocelot->ptp) { err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info); if (err) { @@ -1015,96 +1116,6 @@ static int mscc_ocelot_probe(struct platform_device *pdev) } } - /* No NPI port */ - ocelot_configure_cpu(ocelot, -1, OCELOT_TAG_PREFIX_NONE, - OCELOT_TAG_PREFIX_NONE); - - for_each_available_child_of_node(ports, portnp) { - struct ocelot_port_private *priv; - struct ocelot_port *ocelot_port; - struct device_node *phy_node; - phy_interface_t phy_mode; - struct phy_device *phy; - struct regmap *target; - struct resource *res; - struct phy *serdes; - char res_name[8]; - u32 port; - - if (of_property_read_u32(portnp, "reg", &port)) - continue; - - snprintf(res_name, sizeof(res_name), "port%d", port); - - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, - res_name); - target = ocelot_regmap_init(ocelot, res); - if (IS_ERR(target)) - continue; - - phy_node = of_parse_phandle(portnp, "phy-handle", 0); - if (!phy_node) - continue; - - phy = of_phy_find_device(phy_node); - of_node_put(phy_node); - if (!phy) - continue; - - err = ocelot_probe_port(ocelot, port, target, phy); - if (err) { - of_node_put(portnp); - goto out_put_ports; - } - - ocelot_port = ocelot->ports[port]; - priv = container_of(ocelot_port, struct ocelot_port_private, - port); - - of_get_phy_mode(portnp, &phy_mode); - - ocelot_port->phy_mode = phy_mode; - - switch (ocelot_port->phy_mode) { - case PHY_INTERFACE_MODE_NA: - continue; - case PHY_INTERFACE_MODE_SGMII: - break; - case PHY_INTERFACE_MODE_QSGMII: - /* Ensure clock signals and speed is set on all - * QSGMII links - */ - ocelot_port_writel(ocelot_port, - DEV_CLOCK_CFG_LINK_SPEED - (OCELOT_SPEED_1000), - DEV_CLOCK_CFG); - break; - default: - dev_err(ocelot->dev, - "invalid phy mode for port%d, (Q)SGMII only\n", - port); - of_node_put(portnp); - err = -EINVAL; - goto out_put_ports; - } - - serdes = devm_of_phy_get(ocelot->dev, portnp, NULL); - if (IS_ERR(serdes)) { - err = PTR_ERR(serdes); - if (err == -EPROBE_DEFER) - dev_dbg(ocelot->dev, "deferring probe\n"); - else - dev_err(ocelot->dev, - "missing SerDes phys for port%d\n", - port); - - of_node_put(portnp); - goto out_put_ports; - } - - priv->serdes = serdes; - } - register_netdevice_notifier(&ocelot_netdevice_nb); register_switchdev_notifier(&ocelot_switchdev_nb); register_switchdev_blocking_notifier(&ocelot_switchdev_blocking_nb); -- cgit v1.2.3-59-g8ed1b From 22cdb493de54ca4cb161553376f7b02fcaca2328 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:29 +0300 Subject: net: mscc: ocelot: unregister net devices on unbind This driver was not unregistering its network interfaces on unbind. Now it is. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index ff4a01424953..252c49b5f22b 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -896,6 +896,26 @@ static struct ptp_clock_info ocelot_ptp_clock_info = { .enable = ocelot_ptp_enable, }; +static void mscc_ocelot_release_ports(struct ocelot *ocelot) +{ + int port; + + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port_private *priv; + struct ocelot_port *ocelot_port; + + ocelot_port = ocelot->ports[port]; + if (!ocelot_port) + continue; + + priv = container_of(ocelot_port, struct ocelot_port_private, + port); + + unregister_netdev(priv->dev); + free_netdev(priv->dev); + } +} + static int mscc_ocelot_init_ports(struct platform_device *pdev, struct device_node *ports) { @@ -1132,6 +1152,7 @@ static int mscc_ocelot_remove(struct platform_device *pdev) struct ocelot *ocelot = platform_get_drvdata(pdev); ocelot_deinit_timestamp(ocelot); + mscc_ocelot_release_ports(ocelot); ocelot_deinit(ocelot); unregister_switchdev_blocking_notifier(&ocelot_switchdev_blocking_nb); unregister_switchdev_notifier(&ocelot_switchdev_nb); -- cgit v1.2.3-59-g8ed1b From e5fb512d81d021b7c7a0c2547c3dafb9de759285 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:30 +0300 Subject: net: mscc: ocelot: deinitialize only initialized ports Currently mscc_ocelot_init_ports() will skip initializing a port when it doesn't have a phy-handle, so the ocelot->ports[port] pointer will be NULL. Take this into consideration when tearing down the driver, and add a new function ocelot_deinit_port() to the switch library, mirror of ocelot_init_port(), which needs to be called by the driver for all ports it has initialized. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 3 +++ drivers/net/ethernet/mscc/ocelot.c | 16 ++++++++-------- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 2 ++ include/soc/mscc/ocelot.h | 1 + 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 66da447c4096..01427cd08448 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -643,10 +643,13 @@ static void felix_teardown(struct dsa_switch *ds) { struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); + int port; if (felix->info->mdio_bus_free) felix->info->mdio_bus_free(ocelot); + for (port = 0; port < ocelot->num_phys_ports; port++) + ocelot_deinit_port(ocelot, port); ocelot_deinit_timestamp(ocelot); /* stop workqueue thread */ ocelot_deinit(ocelot); diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 83eb7c325061..8518e1d60da4 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1550,18 +1550,18 @@ EXPORT_SYMBOL(ocelot_init); void ocelot_deinit(struct ocelot *ocelot) { - struct ocelot_port *port; - int i; - cancel_delayed_work(&ocelot->stats_work); destroy_workqueue(ocelot->stats_queue); mutex_destroy(&ocelot->stats_lock); - - for (i = 0; i < ocelot->num_phys_ports; i++) { - port = ocelot->ports[i]; - skb_queue_purge(&port->tx_skbs); - } } EXPORT_SYMBOL(ocelot_deinit); +void ocelot_deinit_port(struct ocelot *ocelot, int port) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + skb_queue_purge(&ocelot_port->tx_skbs); +} +EXPORT_SYMBOL(ocelot_deinit_port); + MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 252c49b5f22b..e02fb8bfab63 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -908,6 +908,8 @@ static void mscc_ocelot_release_ports(struct ocelot *ocelot) if (!ocelot_port) continue; + ocelot_deinit_port(ocelot, port); + priv = container_of(ocelot_port, struct ocelot_port_private, port); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 4521dd602ddc..0ac4e7fba086 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -678,6 +678,7 @@ void ocelot_configure_cpu(struct ocelot *ocelot, int npi, int ocelot_init(struct ocelot *ocelot); void ocelot_deinit(struct ocelot *ocelot); void ocelot_init_port(struct ocelot *ocelot, int port); +void ocelot_deinit_port(struct ocelot *ocelot, int port); /* DSA callbacks */ void ocelot_port_enable(struct ocelot *ocelot, int port, -- cgit v1.2.3-59-g8ed1b From fc25fa97976b1c2fd226ae2b43e1f5abb61b77c9 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 18 Sep 2020 12:09:38 +0800 Subject: hinic: fix sending pkts from core while self testing Call netif_tx_disable firstly before starting doing self-test to avoid sending packet from networking core and self-test packet simultaneously which may cause self-test failure or hw abnormal. Fixes: 4aa218a4fe77 ("hinic: add self test support") Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_ethtool.c | 4 ++++ drivers/net/ethernet/huawei/hinic/hinic_tx.c | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c index 6bb65ade1d77..c340d9acba80 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c @@ -1654,6 +1654,7 @@ static void hinic_diag_test(struct net_device *netdev, } netif_carrier_off(netdev); + netif_tx_disable(netdev); err = do_lp_test(nic_dev, eth_test->flags, LP_DEFAULT_TIME, &test_index); @@ -1662,9 +1663,12 @@ static void hinic_diag_test(struct net_device *netdev, data[test_index] = 1; } + netif_tx_wake_all_queues(netdev); + err = hinic_port_link_state(nic_dev, &link_state); if (!err && link_state == HINIC_LINK_STATE_UP) netif_carrier_on(netdev); + } static int hinic_set_phys_id(struct net_device *netdev, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 2b418b568767..c1f81e9144a1 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -717,8 +717,8 @@ static int free_tx_poll(struct napi_struct *napi, int budget) netdev_txq = netdev_get_tx_queue(txq->netdev, qp->q_id); __netif_tx_lock(netdev_txq, smp_processor_id()); - - netif_wake_subqueue(nic_dev->netdev, qp->q_id); + if (!netif_testing(nic_dev->netdev)) + netif_wake_subqueue(nic_dev->netdev, qp->q_id); __netif_tx_unlock(netdev_txq); -- cgit v1.2.3-59-g8ed1b From a3a94156c15737513a8ddda2b8770d2659bbc617 Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Fri, 18 Sep 2020 14:36:46 +0800 Subject: net: hns: kerneldoc fixes Fix some parameter description mistakes. Signed-off-by: Lu Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index ed3829ae4ef1..a769273b36f7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -334,7 +334,7 @@ static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev, * bit6-11 for ppe0-5 * bit12-17 for roce0-5 * bit18-19 for com/dfx - * @enable: false - request reset , true - drop reset + * @dereset: false - request reset , true - drop reset */ static void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) @@ -357,7 +357,7 @@ hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) * bit6-11 for ppe0-5 * bit12-17 for roce0-5 * bit18-19 for com/dfx - * @enable: false - request reset , true - drop reset + * @dereset: false - request reset , true - drop reset */ static void hns_dsaf_srst_chns_acpi(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) -- cgit v1.2.3-59-g8ed1b From a128592799b89d3985331d261fa4d41868ee4b04 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Fri, 18 Sep 2020 17:22:25 +0800 Subject: dpaa2-eth: fix a build warning in dpmac.c Fix below sparse warning in dpmac.c. warning: cast to restricted __le64 Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h index 3ea51dd9374b..a24b20f76938 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h @@ -66,8 +66,8 @@ struct dpmac_cmd_get_counter { }; struct dpmac_rsp_get_counter { - u64 pad; - u64 counter; + __le64 pad; + __le64 counter; }; #endif /* _FSL_DPMAC_CMD_H */ -- cgit v1.2.3-59-g8ed1b From db7cd91a4be15e1485d6b58c6afc8761c59c4efb Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Thu, 17 Sep 2020 19:46:43 +0300 Subject: net: ipv6: fix kconfig dependency warning for IPV6_SEG6_HMAC When IPV6_SEG6_HMAC is enabled and CRYPTO is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for CRYPTO_HMAC Depends on [n]: CRYPTO [=n] Selected by [y]: - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y] WARNING: unmet direct dependencies detected for CRYPTO_SHA1 Depends on [n]: CRYPTO [=n] Selected by [y]: - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y] WARNING: unmet direct dependencies detected for CRYPTO_SHA256 Depends on [n]: CRYPTO [=n] Selected by [y]: - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y] The reason is that IPV6_SEG6_HMAC selects CRYPTO_HMAC, CRYPTO_SHA1, and CRYPTO_SHA256 without depending on or selecting CRYPTO while those configs are subordinate to CRYPTO. Honor the kconfig menu hierarchy to remove kconfig dependency warnings. Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") Signed-off-by: Necip Fazil Yildiran Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 76bff79d6fed..747f56e0c636 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -303,6 +303,7 @@ config IPV6_SEG6_LWTUNNEL config IPV6_SEG6_HMAC bool "IPv6: Segment Routing HMAC support" depends on IPV6 + select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 select CRYPTO_SHA256 -- cgit v1.2.3-59-g8ed1b From f13d783a184e4868c5fdbdf20c90a8e323f66dd7 Mon Sep 17 00:00:00 2001 From: Cristobal Forno Date: Fri, 18 Sep 2020 13:47:43 -0500 Subject: MAINTAINERS: Update ibmveth maintainer Removed Thomas Falcon. Added myself (Cristobal Forno) as the maintainer of ibmveth. Signed-off-by: Cristobal Forno Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 923c69ad4eec..ad9a35b91325 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8331,7 +8331,7 @@ F: arch/powerpc/platforms/powernv/copy-paste.h F: arch/powerpc/platforms/powernv/vas* IBM Power Virtual Ethernet Device Driver -M: Thomas Falcon +M: Cristobal Forno L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmveth.* -- cgit v1.2.3-59-g8ed1b From 769f5083c5e2de371d9a451e3bb9d4aaa24f3346 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Sep 2020 22:51:26 +0100 Subject: rhashtable: fix indentation of a continue statement A continue statement is indented incorrectly, add in the missing tab. Signed-off-by: Colin Ian King Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index c5a6fef7b45d..76c607ee6db5 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -434,7 +434,7 @@ static int __init test_rhltable(unsigned int entries) } else { if (WARN(err != -ENOENT, "removed non-existent element, error %d not %d", err, -ENOENT)) - continue; + continue; } } -- cgit v1.2.3-59-g8ed1b From b6e11785cf957f4f4dd7da7f5a1f41ca4f1ea913 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 18 Sep 2020 23:25:56 +0200 Subject: net: mvneta: recycle the page in case of out-of-order Recycle the received page into the page_pool cache if the dma descriptors arrived in a wrong order Fixes: ca0e014609f05 ("net: mvneta: move skb build after descriptors processing") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 69a900081165..c4345e3d616f 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2383,8 +2383,12 @@ static int mvneta_rx_swbm(struct napi_struct *napi, mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf, &size, page, &ps); } else { - if (unlikely(!xdp_buf.data_hard_start)) + if (unlikely(!xdp_buf.data_hard_start)) { + rx_desc->buf_phys_addr = 0; + page_pool_put_full_page(rxq->page_pool, page, + true); continue; + } mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf, &size, page); -- cgit v1.2.3-59-g8ed1b From fe81d9f6182d1160e625894eecb3d7ff0222cac5 Mon Sep 17 00:00:00 2001 From: Henry Ptasinski Date: Sat, 19 Sep 2020 00:12:11 +0000 Subject: net: sctp: Fix IPv6 ancestor_size calc in sctp_copy_descendant When calculating ancestor_size with IPv6 enabled, simply using sizeof(struct ipv6_pinfo) doesn't account for extra bytes needed for alignment in the struct sctp6_sock. On x86, there aren't any extra bytes, but on ARM the ipv6_pinfo structure is aligned on an 8-byte boundary so there were 4 pad bytes that were omitted from the ancestor_size calculation. This would lead to corruption of the pd_lobby pointers, causing an oops when trying to free the sctp structure on socket close. Fixes: 636d25d557d1 ("sctp: not copy sctp_sock pd_lobby in sctp_copy_descendant") Signed-off-by: Henry Ptasinski Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 8 +++++--- net/sctp/socket.c | 9 +++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index b33f1aefad09..0bdff38eb4bb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -226,12 +226,14 @@ struct sctp_sock { data_ready_signalled:1; atomic_t pd_mode; + + /* Fields after this point will be skipped on copies, like on accept + * and peeloff operations + */ + /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; - /* These must be the last fields, as they will skipped on copies, - * like on accept and peeloff operations - */ struct list_head auto_asconf_list; int do_auto_asconf; }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 836615f71a7d..53d0a4161df3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9220,13 +9220,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, static inline void sctp_copy_descendant(struct sock *sk_to, const struct sock *sk_from) { - int ancestor_size = sizeof(struct inet_sock) + - sizeof(struct sctp_sock) - - offsetof(struct sctp_sock, pd_lobby); - - if (sk_from->sk_family == PF_INET6) - ancestor_size += sizeof(struct ipv6_pinfo); + size_t ancestor_size = sizeof(struct inet_sock); + ancestor_size += sk_from->sk_prot->obj_size; + ancestor_size -= offsetof(struct sctp_sock, pd_lobby); __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); } -- cgit v1.2.3-59-g8ed1b From 492adcf481292521ee8df1a482dc12acdb28aa15 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 20 Sep 2020 21:08:54 -0400 Subject: bnxt_en: Use memcpy to copy VPD field info. Using strlcpy() to copy from VPD is not correct because VPD strings are not necessarily NULL terminated. Use memcpy() to copy the VPD length up to the destination buffer size - 1. The destination is zeroed memory so it will always be NULL terminated. Fixes: a0d0fd70fed5 ("bnxt_en: Read partno and serialno of the board from VPD") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8eb73fe39e84..4af42b1da827 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12090,7 +12090,7 @@ static int bnxt_init_mac_addr(struct bnxt *bp) static void bnxt_vpd_read_info(struct bnxt *bp) { struct pci_dev *pdev = bp->pdev; - int i, len, pos, ro_size; + int i, len, pos, ro_size, size; ssize_t vpd_size; u8 *vpd_data; @@ -12125,7 +12125,8 @@ static void bnxt_vpd_read_info(struct bnxt *bp) if (len + pos > vpd_size) goto read_sn; - strlcpy(bp->board_partno, &vpd_data[pos], min(len, BNXT_VPD_FLD_LEN)); + size = min(len, BNXT_VPD_FLD_LEN - 1); + memcpy(bp->board_partno, &vpd_data[pos], size); read_sn: pos = pci_vpd_find_info_keyword(vpd_data, i, ro_size, @@ -12138,7 +12139,8 @@ read_sn: if (len + pos > vpd_size) goto exit; - strlcpy(bp->board_serialno, &vpd_data[pos], min(len, BNXT_VPD_FLD_LEN)); + size = min(len, BNXT_VPD_FLD_LEN - 1); + memcpy(bp->board_serialno, &vpd_data[pos], size); exit: kfree(vpd_data); } -- cgit v1.2.3-59-g8ed1b From d69753fa1ecb3218b56b022722f7a5822735b876 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 20 Sep 2020 21:08:55 -0400 Subject: bnxt_en: return proper error codes in bnxt_show_temp Returning "unknown" as a temperature value violates the hwmon interface rules. Appropriate error codes should be returned via device_attribute show instead. These will ultimately be propagated to the user via the file system interface. In addition to the corrected error handling, it is an even better idea to not present the sensor in sysfs at all if it is known that the read will definitely fail. Given that temp1_input is currently the only sensor reported, ensure no hwmon registration if TEMP_MONITOR_QUERY is not supported or if it will fail due to access permissions. Something smarter may be needed if and when other sensors are added. Fixes: 12cce90b934b ("bnxt_en: fix HWRM error when querying VF temperature") Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4af42b1da827..2865e248e35f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9311,18 +9311,16 @@ static ssize_t bnxt_show_temp(struct device *dev, struct hwrm_temp_monitor_query_output *resp; struct bnxt *bp = dev_get_drvdata(dev); u32 len = 0; + int rc; resp = bp->hwrm_cmd_resp_addr; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1); mutex_lock(&bp->hwrm_cmd_lock); - if (!_hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT)) + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */ mutex_unlock(&bp->hwrm_cmd_lock); - - if (len) - return len; - - return sprintf(buf, "unknown\n"); + return rc ?: len; } static SENSOR_DEVICE_ATTR(temp1_input, 0444, bnxt_show_temp, NULL, 0); @@ -9342,7 +9340,16 @@ static void bnxt_hwmon_close(struct bnxt *bp) static void bnxt_hwmon_open(struct bnxt *bp) { + struct hwrm_temp_monitor_query_input req = {0}; struct pci_dev *pdev = bp->pdev; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1); + rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc == -EACCES || rc == -EOPNOTSUPP) { + bnxt_hwmon_close(bp); + return; + } if (bp->hwmon_dev) return; -- cgit v1.2.3-59-g8ed1b From a53906908148d64423398a62c4435efb0d09652c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 20 Sep 2020 21:08:56 -0400 Subject: bnxt_en: Protect bnxt_set_eee() and bnxt_set_pauseparam() with mutex. All changes related to bp->link_info require the protection of the link_lock mutex. It's not sufficient to rely just on RTNL. Fixes: 163e9ef63641 ("bnxt_en: Fix race when modifying pause settings.") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 31 +++++++++++++++-------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index d0928334bdc8..6c751013798d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1788,9 +1788,12 @@ static int bnxt_set_pauseparam(struct net_device *dev, if (!BNXT_PHY_CFG_ABLE(bp)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); if (epause->autoneg) { - if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) - return -EINVAL; + if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) { + rc = -EINVAL; + goto pause_exit; + } link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL; if (bp->hwrm_spec_code >= 0x10201) @@ -1811,11 +1814,11 @@ static int bnxt_set_pauseparam(struct net_device *dev, if (epause->tx_pause) link_info->req_flow_ctrl |= BNXT_LINK_PAUSE_TX; - if (netif_running(dev)) { - mutex_lock(&bp->link_lock); + if (netif_running(dev)) rc = bnxt_hwrm_set_pause(bp); - mutex_unlock(&bp->link_lock); - } + +pause_exit: + mutex_unlock(&bp->link_lock); return rc; } @@ -2552,8 +2555,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) struct bnxt *bp = netdev_priv(dev); struct ethtool_eee *eee = &bp->eee; struct bnxt_link_info *link_info = &bp->link_info; - u32 advertising = - _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0); + u32 advertising; int rc = 0; if (!BNXT_PHY_CFG_ABLE(bp)) @@ -2562,19 +2564,23 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) if (!(bp->flags & BNXT_FLAG_EEE_CAP)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); + advertising = _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0); if (!edata->eee_enabled) goto eee_ok; if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) { netdev_warn(dev, "EEE requires autoneg\n"); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } if (edata->tx_lpi_enabled) { if (bp->lpi_tmr_hi && (edata->tx_lpi_timer > bp->lpi_tmr_hi || edata->tx_lpi_timer < bp->lpi_tmr_lo)) { netdev_warn(dev, "Valid LPI timer range is %d and %d microsecs\n", bp->lpi_tmr_lo, bp->lpi_tmr_hi); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } else if (!bp->lpi_tmr_hi) { edata->tx_lpi_timer = eee->tx_lpi_timer; } @@ -2584,7 +2590,8 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) } else if (edata->advertised & ~advertising) { netdev_warn(dev, "EEE advertised %x must be a subset of autoneg advertised speeds %x\n", edata->advertised, advertising); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } eee->advertised = edata->advertised; @@ -2596,6 +2603,8 @@ eee_ok: if (netif_running(dev)) rc = bnxt_hwrm_set_link_setting(bp, false, true); +eee_exit: + mutex_unlock(&bp->link_lock); return rc; } -- cgit v1.2.3-59-g8ed1b From f0f47b2f8cbc22e6e611ef6cc988f5452e5aec3f Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 20 Sep 2020 21:08:57 -0400 Subject: bnxt_en: Return -EOPNOTSUPP for ETHTOOL_GREGS on VFs. Debug firmware commands are not supported on VFs to read registers. This patch avoids logging unnecessary access_denied error on VFs when user calls ETHTOOL_GREGS. By returning error in get_regs_len() method on the VF, the get_regs() method will not be called. Fixes: b5d600b027eb ("bnxt_en: Add support for 'ethtool -d'") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 6c751013798d..fecdfd875af1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1322,6 +1322,9 @@ static int bnxt_get_regs_len(struct net_device *dev) struct bnxt *bp = netdev_priv(dev); int reg_len; + if (!BNXT_PF(bp)) + return -EOPNOTSUPP; + reg_len = BNXT_PXP_REG_LEN; if (bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED) -- cgit v1.2.3-59-g8ed1b From d2b42d010f2941c2a85d970500b9d4ba79765593 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 20 Sep 2020 21:08:58 -0400 Subject: bnxt_en: Fix HWRM_FUNC_QSTATS_EXT firmware call. Fix it to set the required fid input parameter. The firmware call fails without this patch. Fixes: d752d0536c97 ("bnxt_en: Retrieve hardware counter masks from firmware if available.") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2865e248e35f..d527c9c0fc9d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3782,6 +3782,7 @@ static int bnxt_hwrm_func_qstat_ext(struct bnxt *bp, return -EOPNOTSUPP; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QSTATS_EXT, -1, -1); + req.fid = cpu_to_le16(0xffff); req.flags = FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK; mutex_lock(&bp->hwrm_cmd_lock); rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); -- cgit v1.2.3-59-g8ed1b From c07fa08f02f4053b51dae1a6ee08bc644dc7846d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 20 Sep 2020 21:08:59 -0400 Subject: bnxt_en: Fix wrong flag value passed to HWRM_PORT_QSTATS_EXT fw call. The wrong flag value caused the firmware call to return actual port counters instead of the counter masks. This messed up the counter overflow logic and caused erratic extended port counters to be displayed under ethtool -S. Fixes: 531d1d269c1d ("bnxt_en: Retrieve hardware masks for port counters.") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d527c9c0fc9d..7b7e8b7883c8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3853,7 +3853,7 @@ static void bnxt_init_stats(struct bnxt *bp) tx_masks = stats->hw_masks; tx_count = sizeof(struct tx_port_stats_ext) / 8; - flags = FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK; + flags = PORT_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK; rc = bnxt_hwrm_port_qstats_ext(bp, flags); if (rc) { mask = (1ULL << 40) - 1; -- cgit v1.2.3-59-g8ed1b From 58ed68b592377f7483bcbff937567d8cbaab38ed Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Sep 2020 17:33:11 +0300 Subject: sfc: Fix error code in probe This failure path should return a negative error code but it currently returns success. Fixes: 51b35a454efd ("sfc: skeleton EF100 PF driver") Signed-off-by: Dan Carpenter Acked-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/ef100.c b/drivers/net/ethernet/sfc/ef100.c index c54b7f8243f3..ffdb36715a49 100644 --- a/drivers/net/ethernet/sfc/ef100.c +++ b/drivers/net/ethernet/sfc/ef100.c @@ -490,6 +490,7 @@ static int ef100_pci_probe(struct pci_dev *pci_dev, if (fcw.offset > pci_resource_len(efx->pci_dev, fcw.bar) - ESE_GZ_FCW_LEN) { netif_err(efx, probe, efx->net_dev, "Func control window overruns BAR\n"); + rc = -EIO; goto fail; } -- cgit v1.2.3-59-g8ed1b From 91b2c9a0fdb56e3d7eb6bf06a39ad7a50f000916 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 21 Sep 2020 06:38:56 +0000 Subject: ipv6: route: convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Xu Wang Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5e7e25e2523a..fb075d9545b9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4202,7 +4202,7 @@ static struct fib6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; - cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, + cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; -- cgit v1.2.3-59-g8ed1b From cefc23554fc259114e78a7b0908aac4610ee18eb Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 31 Aug 2020 20:50:42 +0300 Subject: net/mlx5: Fix FTE cleanup Currently, when an FTE is allocated, its refcount is decreased to 0 with the purpose it will not be a stand alone steering object and every rule (destination) of the FTE would increase the refcount. When mlx5_cleanup_fs is called while not all rules were deleted by the steering users, it hit refcount underflow on the FTE once clean_tree calls to tree_remove_node after the deleted rules already decreased the refcount to 0. FTE is no longer destroyed implicitly when the last rule (destination) is deleted. mlx5_del_flow_rules avoids it by increasing the refcount on the FTE and destroy it explicitly after all rules were deleted. So we can avoid the refcount underflow by making FTE as stand alone object. In addition need to set del_hw_func to FTE so the HW object will be destroyed when the FTE is deleted from the cleanup_tree flow. refcount_t: underflow; use-after-free. WARNING: CPU: 2 PID: 15715 at lib/refcount.c:28 refcount_warn_saturate+0xd9/0xe0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: tree_put_node+0xf2/0x140 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x5f/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x5f/0xf0 [mlx5_core] mlx5_cleanup_fs+0x26/0x270 [mlx5_core] mlx5_unload+0x2e/0xa0 [mlx5_core] mlx5_unload_one+0x51/0x120 [mlx5_core] mlx5_devlink_reload_down+0x51/0x90 [mlx5_core] devlink_reload+0x39/0x120 ? devlink_nl_cmd_reload+0x43/0x220 genl_rcv_msg+0x1e4/0x420 ? genl_family_rcv_msg_attrs_parse+0x100/0x100 netlink_rcv_skb+0x47/0x110 genl_rcv+0x24/0x40 netlink_unicast+0x217/0x2f0 netlink_sendmsg+0x30f/0x430 sock_sendmsg+0x30/0x40 __sys_sendto+0x10e/0x140 ? handle_mm_fault+0xc4/0x1f0 ? do_page_fault+0x33f/0x630 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x48/0x130 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 718ce4d601db ("net/mlx5: Consolidate update FTE for all removal changes") Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9ccec5f8b92a..75fa44eee434 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -654,7 +654,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, fte->action = *flow_act; fte->flow_context = spec->flow_context; - tree_init_node(&fte->node, NULL, del_sw_fte); + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); return fte; } @@ -1792,7 +1792,6 @@ skip_search: up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); - tree_put_node(&fte->node, false); return rule; } rule = ERR_PTR(-ENOENT); @@ -1891,7 +1890,6 @@ search_again_locked: up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); - tree_put_node(&fte->node, false); tree_put_node(&g->node, false); return rule; @@ -2001,7 +1999,9 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) up_write_ref_node(&fte->node, false); } else { del_hw_fte(&fte->node); - up_write(&fte->node.lock); + /* Avoid double call to del_hw_fte */ + fte->node.del_hw_func = NULL; + up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); } kfree(handle); -- cgit v1.2.3-59-g8ed1b From fe45386a208277cae4648106133c08246eecd012 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 11 Jun 2020 13:55:19 +0300 Subject: net/mlx5e: Use RCU to protect rq->xdp_prog Currently, the RQs are temporarily deactivated while hot-replacing the XDP program, and napi_synchronize is used to make sure rq->xdp_prog is not in use. However, napi_synchronize is not ideal: instead of waiting till the end of a NAPI cycle, it polls and waits until NAPI is not running, sleeping for 1ms between the periodic checks. Under heavy workloads, this loop will never end, which may even lead to a kernel panic if the kernel detects the hangup. Such workloads include XSK TX and possibly also heavy RX (XSK or normal). The fix is inspired by commit 326fe02d1ed6 ("net/mlx4_en: protect ring->xdp_prog with rcu_read_lock"). As mlx5e_xdp_handle is already protected by rcu_read_lock, and bpf_prog_put uses call_rcu to free the program, there is no need for additional synchronization if proper RCU functions are used to access the pointer. This patch converts all accesses to rq->xdp_prog to use RCU functions. Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 53 +++++++++++------------ 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0cc2080fd847..5d7b79518449 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -600,7 +600,7 @@ struct mlx5e_rq { struct dim dim; /* Dynamic Interrupt Moderation */ /* XDP */ - struct bpf_prog *xdp_prog; + struct bpf_prog __rcu *xdp_prog; struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); struct page_pool *page_pool; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 0e6946fc121f..b28df21981a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -122,7 +122,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, u32 *len, struct xdp_buff *xdp) { - struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); + struct bpf_prog *prog = rcu_dereference(rq->xdp_prog); u32 act; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index aebcf73f8546..bde97b108db5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -399,7 +399,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); - rq->xdp_prog = params->xdp_prog; + RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog); rq_xdp_ix = rq->ix; if (xsk) @@ -408,7 +408,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (err < 0) goto err_rq_wq_destroy; - rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); pool_size = 1 << params->log_rq_mtu_frames; @@ -564,8 +564,8 @@ err_free: } err_rq_wq_destroy: - if (rq->xdp_prog) - bpf_prog_put(rq->xdp_prog); + if (params->xdp_prog) + bpf_prog_put(params->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); @@ -575,10 +575,16 @@ err_rq_wq_destroy: static void mlx5e_free_rq(struct mlx5e_rq *rq) { + struct mlx5e_channel *c = rq->channel; + struct bpf_prog *old_prog = NULL; int i; - if (rq->xdp_prog) - bpf_prog_put(rq->xdp_prog); + /* drop_rq has neither channel nor xdp_prog. */ + if (c) + old_prog = rcu_dereference_protected(rq->xdp_prog, + lockdep_is_held(&c->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -4330,6 +4336,16 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return 0; } +static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + + old_prog = rcu_replace_pointer(rq->xdp_prog, prog, + lockdep_is_held(&rq->channel->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); +} + static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4388,29 +4404,10 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) */ for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; - bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); - - clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); - if (xsk_open) - clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); - napi_synchronize(&c->napi); - /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ - - old_prog = xchg(&c->rq.xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - - if (xsk_open) { - old_prog = xchg(&c->xskrq.xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - } - set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); - if (xsk_open) - set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); - /* napi_schedule in case we have missed anything */ - napi_schedule(&c->napi); + mlx5e_rq_replace_xdp_prog(&c->rq, prog); + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); } unlock: -- cgit v1.2.3-59-g8ed1b From 9c25a22dfb00270372224721fed646965420323a Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 11 Jun 2020 14:25:19 +0300 Subject: net/mlx5e: Use synchronize_rcu to sync with NAPI As described in the previous commit, napi_synchronize doesn't quite fit the purpose when we just need to wait until the currently running NAPI quits. Its implementation waits until NAPI is not running by polling and waiting for 1ms in between. In cases where we need to deactivate one queue (e.g., recovery flows) or where we deactivate them one-by-one (deactivate channel flow), we may get stuck in napi_synchronize forever if other queues keep NAPI active, causing a soft lockup. Depending on kernel configuration (CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC), it may result in a kernel panic. To fix the issue, use synchronize_rcu to wait for NAPI to quit, and wrap the whole NAPI in rcu_read_lock. Fixes: acc6c5953af1 ("net/mlx5e: Split open/close channels to stages") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 14 ++------------ drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++-------- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 ++---------- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 17 +++++++++++++---- 5 files changed, 22 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index a33a1f762c70..40db27bf790b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -31,7 +31,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, { struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk; u32 cqe_bcnt32 = cqe_bcnt; - bool consumed; /* Check packet size. Note LRO doesn't use linear SKB */ if (unlikely(cqe_bcnt > rq->hw_mtu)) { @@ -51,10 +50,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, xsk_buff_dma_sync_for_cpu(xdp); prefetch(xdp->data); - rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp); - rcu_read_unlock(); - /* Possible flows: * - XDP_REDIRECT to XSKMAP: * The page is owned by the userspace from now. @@ -70,7 +65,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, * allocated first from the Reuse Ring, so it has enough space. */ - if (likely(consumed)) { + if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) { if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ @@ -88,7 +83,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, u32 cqe_bcnt) { struct xdp_buff *xdp = wi->di->xsk; - bool consumed; /* wi->offset is not used in this function, because xdp->data and the * DMA address point directly to the necessary place. Furthermore, the @@ -107,11 +101,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, return NULL; } - rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp); - rcu_read_unlock(); - - if (likely(consumed)) + if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp))) return NULL; /* page/packet was consumed by XDP */ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index dd9df519d383..55e65a438de7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -106,8 +106,7 @@ err_free_cparam: void mlx5e_close_xsk(struct mlx5e_channel *c) { clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); - napi_synchronize(&c->napi); - synchronize_rcu(); /* Sync with the XSK wakeup. */ + synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */ mlx5e_close_rq(&c->xskrq); mlx5e_close_cq(&c->xskrq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bde97b108db5..917c28e7f29e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -873,7 +873,7 @@ void mlx5e_activate_rq(struct mlx5e_rq *rq) void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ + synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ } void mlx5e_close_rq(struct mlx5e_rq *rq) @@ -1318,12 +1318,10 @@ void mlx5e_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) { - struct mlx5e_channel *c = sq->channel; struct mlx5_wq_cyc *wq = &sq->wq; clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - /* prevent netif_tx_wake_queue */ - napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ mlx5e_tx_disable_queue(sq->txq); @@ -1398,10 +1396,8 @@ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) { - struct mlx5e_channel *c = icosq->channel; - clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); - napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with NAPI. */ } void mlx5e_close_icosq(struct mlx5e_icosq *sq) @@ -1480,7 +1476,7 @@ void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) struct mlx5e_channel *c = sq->channel; clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with NAPI. */ mlx5e_destroy_sq(c->mdev, sq->sqn); mlx5e_free_xdpsq_descs(sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 65828af120b7..99d102c035b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1132,7 +1132,6 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct xdp_buff xdp; struct sk_buff *skb; void *va, *data; - bool consumed; u32 frag_size; va = page_address(di->page) + wi->offset; @@ -1144,11 +1143,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, prefetchw(va); /* xdp_frame data area */ prefetch(data); - rcu_read_lock(); mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); - consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp); - rcu_read_unlock(); - if (consumed) + if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp)) return NULL; /* page/packet was consumed by XDP */ rx_headroom = xdp.data - xdp.data_hard_start; @@ -1438,7 +1434,6 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct sk_buff *skb; void *va, *data; u32 frag_size; - bool consumed; /* Check packet size. Note LRO doesn't use linear SKB */ if (unlikely(cqe_bcnt > rq->hw_mtu)) { @@ -1455,11 +1450,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, prefetchw(va); /* xdp_frame data area */ prefetch(data); - rcu_read_lock(); mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp); - consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp); - rcu_read_unlock(); - if (consumed) { + if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index de10b06bade5..d5868670f8a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -121,13 +121,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_xdpsq *xsksq = &c->xsksq; struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *rq = &c->rq; - bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); bool aff_change = false; bool busy_xsk = false; bool busy = false; int work_done = 0; + bool xsk_open; int i; + rcu_read_lock(); + + xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + ch_stats->poll++; for (i = 0; i < c->num_tc; i++) @@ -167,8 +171,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) busy |= busy_xsk; if (busy) { - if (likely(mlx5e_channel_no_affinity_change(c))) - return budget; + if (likely(mlx5e_channel_no_affinity_change(c))) { + work_done = budget; + goto out; + } ch_stats->aff_change++; aff_change = true; if (budget && work_done == budget) @@ -176,7 +182,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) } if (unlikely(!napi_complete_done(napi, work_done))) - return work_done; + goto out; ch_stats->arm++; @@ -203,6 +209,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) ch_stats->force_irq++; } +out: + rcu_read_unlock(); + return work_done; } -- cgit v1.2.3-59-g8ed1b From 12a240a41427d37b5e70570700704e84c827452f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 7 Jul 2020 06:16:24 +0000 Subject: net/mlx5e: Fix memory leak of tunnel info when rule under multipath not ready When deleting vxlan flow rule under multipath, tun_info in parse_attr is not freed when the rule is not ready. Fixes: ef06c9ee8933 ("net/mlx5e: Allow one failure when offloading tc encap rules under multipath") Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fd53d101d8fd..7be282d2ddde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1290,11 +1290,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_put_flow_tunnel_id(flow); - if (flow_flag_test(flow, NOT_READY)) { + if (flow_flag_test(flow, NOT_READY)) remove_unready_flow(flow); - kvfree(attr->parse_attr); - return; - } if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, SLOW)) -- cgit v1.2.3-59-g8ed1b From 4c8594adb9d9250120cffc57a7175a1a09f2b084 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 26 Jul 2020 16:37:47 +0300 Subject: net/mlx5e: CT: Fix freeing ct_label mapping Add missing mapping remove call when removing ct rule, as the mapping was allocated when ct rule was adding with ct_label. Also there is a missing mapping remove call in error flow. Fixes: 54b154ecfb8c ("net/mlx5e: CT: Map 128 bits labels to 32 bit map ID") Signed-off-by: Roi Dayan Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 21 ++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h | 26 +++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++-- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index c6bc9224c3b1..bc5f72ec3623 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -699,6 +699,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, err_rule: mlx5e_mod_hdr_detach(ct_priv->esw->dev, &esw->offloads.mod_hdr, zone_rule->mh); + mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); err_mod_hdr: kfree(spec); return err; @@ -958,12 +959,22 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, return 0; } +void mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + + if (!ct_priv || !ct_attr->ct_labels_id) + return; + + mapping_remove(ct_priv->labels_mapping, ct_attr->ct_labels_id); +} + int -mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct mlx5_ct_attr *ct_attr, - struct netlink_ext_ack *extack) +mlx5_tc_ct_match_add(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack) { struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); struct flow_rule *rule = flow_cls_offload_flow_rule(f); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 3baef917a677..708c216325d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -87,12 +87,15 @@ mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv); void mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv); +void +mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr); + int -mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct mlx5_ct_attr *ct_attr, - struct netlink_ext_ack *extack); +mlx5_tc_ct_match_add(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack); int mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec); @@ -130,12 +133,15 @@ mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) { } +static inline void +mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr) {} + static inline int -mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct mlx5_ct_attr *ct_attr, - struct netlink_ext_ack *extack) +mlx5_tc_ct_match_add(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7be282d2ddde..bf0c6f063941 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1312,6 +1312,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } kvfree(attr->parse_attr); + mlx5_tc_ct_match_del(priv, &flow->esw_attr->ct_attr); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); @@ -4399,8 +4401,8 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, goto err_free; /* actions validation depends on parsing the ct matches first */ - err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f, - &flow->esw_attr->ct_attr, extack); + err = mlx5_tc_ct_match_add(priv, &parse_attr->spec, f, + &flow->esw_attr->ct_attr, extack); if (err) goto err_free; -- cgit v1.2.3-59-g8ed1b From 6cec0229ab1959259e71e9a5bbe47c04577950b1 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Wed, 5 Aug 2020 17:56:04 +0300 Subject: net/mlx5e: Enable adding peer miss rules only if merged eswitch is supported The cited commit creates peer miss group during switchdev mode initialization in order to handle miss packets correctly while in VF LAG mode. This is done regardless of FW support of such groups which could cause rules setups failure later on. Fix by adding FW capability check before creating peer groups/rule. Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 52 ++++++++++++---------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d2516922d867..1bcf2609dca8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1219,35 +1219,37 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) } esw->fdb_table.offloads.send_to_vport_grp = g; - /* create peer esw miss group */ - memset(flow_group_in, 0, inlen); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + /* create peer esw miss group */ + memset(flow_group_in, 0, inlen); - esw_set_flow_group_source_port(esw, flow_group_in); + esw_set_flow_group_source_port(esw, flow_group_in); - if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { - match_criteria = MLX5_ADDR_OF(create_flow_group_in, - flow_group_in, - match_criteria); + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); - } + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, - ix + esw->total_vports - 1); - ix += esw->total_vports; + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + esw->total_vports - 1); + ix += esw->total_vports; - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); - goto peer_miss_err; + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); + goto peer_miss_err; + } + esw->fdb_table.offloads.peer_miss_grp = g; } - esw->fdb_table.offloads.peer_miss_grp = g; /* create miss group */ memset(flow_group_in, 0, inlen); @@ -1281,7 +1283,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) miss_rule_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: - mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: @@ -1305,7 +1308,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); - mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_esw_chains_destroy(esw); -- cgit v1.2.3-59-g8ed1b From 82198d8bcdeff01d19215d712aa55031e21bccbc Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Wed, 2 Sep 2020 16:49:52 +0300 Subject: net/mlx5e: Fix endianness when calculating pedit mask first bit The field mask value is provided in network byte order and has to be converted to host byte order before calculating pedit mask first bit. Fixes: 88f30bbcbaaa ("net/mlx5e: Bit sized fields rewrite support") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 34 +++++++++++++++---------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index bf0c6f063941..1c93f92d9210 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2624,6 +2624,22 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; +static unsigned long mask_to_le(unsigned long mask, int size) +{ + __be32 mask_be32; + __be16 mask_be16; + + if (size == 32) { + mask_be32 = (__force __be32)(mask); + mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); + } else if (size == 16) { + mask_be32 = (__force __be32)(mask); + mask_be16 = *(__be16 *)&mask_be32; + mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); + } + + return mask; +} static int offload_pedit_fields(struct mlx5e_priv *priv, int namespace, struct pedit_headers_action *hdrs, @@ -2637,9 +2653,7 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5_fields *f; - unsigned long mask; - __be32 mask_be32; - __be16 mask_be16; + unsigned long mask, field_mask; int err; u8 cmd; @@ -2705,14 +2719,7 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, if (skip) continue; - if (f->field_bsize == 32) { - mask_be32 = (__force __be32)(mask); - mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); - } else if (f->field_bsize == 16) { - mask_be32 = (__force __be32)(mask); - mask_be16 = *(__be16 *)&mask_be32; - mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); - } + mask = mask_to_le(mask, f->field_bsize); first = find_first_bit(&mask, f->field_bsize); next_z = find_next_zero_bit(&mask, f->field_bsize, first); @@ -2743,9 +2750,10 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, if (cmd == MLX5_ACTION_TYPE_SET) { int start; + field_mask = mask_to_le(f->field_mask, f->field_bsize); + /* if field is bit sized it can start not from first bit */ - start = find_first_bit((unsigned long *)&f->field_mask, - f->field_bsize); + start = find_first_bit(&field_mask, f->field_bsize); MLX5_SET(set_action_in, action, offset, first - start); /* length is num of bits to be written, zero means length of 32 */ -- cgit v1.2.3-59-g8ed1b From 47c97e6b10a1e3680cad539929da092bfa535446 Mon Sep 17 00:00:00 2001 From: Ron Diskin Date: Sun, 10 May 2020 14:39:51 +0300 Subject: net/mlx5e: Fix multicast counter not up-to-date in "ip -s" Currently the FW does not generate events for counters other than error counters. Unlike ".get_ethtool_stats", ".ndo_get_stats64" (which ip -s uses) might run in atomic context, while the FW interface is non atomic. Thus, 'ip' is not allowed to issue FW commands, so it will only display cached counters in the driver. Add a SW counter (mcast_packets) in the driver to count rx multicast packets. The counter also counts broadcast packets, as we consider it a special case of multicast. Use the counter value when calling "ip -s"/"ifconfig". Fixes: f62b8bb8f2d3 ("net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality") Signed-off-by: Ron Diskin Reviewed-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 5 +++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +------- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 2 ++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 9334c9c3e208..24336c60123a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -20,6 +20,11 @@ enum mlx5e_icosq_wqe_type { }; /* General */ +static inline bool mlx5e_skb_is_multicast(struct sk_buff *skb) +{ + return skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST; +} + void mlx5e_trigger_irq(struct mlx5e_icosq *sq); void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 917c28e7f29e..d46047235568 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3569,6 +3569,7 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) s->rx_packets += rq_stats->packets + xskrq_stats->packets; s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; + s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -3584,7 +3585,6 @@ void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_vport_stats *vstats = &priv->stats.vport; struct mlx5e_pport_stats *pstats = &priv->stats.pport; /* In switchdev mode, monitor counters doesn't monitor @@ -3619,12 +3619,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + stats->rx_frame_errors; stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; - - /* vport multicast also counts packets that are dropped due to steering - * or rx out of buffer - */ - stats->multicast = - VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); } static void mlx5e_set_rx_mode(struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 99d102c035b0..64c8ac5eabf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -53,6 +53,7 @@ #include "en/xsk/rx.h" #include "en/health.h" #include "en/params.h" +#include "en/txrx.h" static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, @@ -1080,6 +1081,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, mlx5e_enable_ecn(rq, skb); skb->protocol = eth_type_trans(skb, netdev); + + if (unlikely(mlx5e_skb_is_multicast(skb))) + stats->mcast_packets++; } static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 2e1cca1923b9..be7cda283781 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -119,6 +119,7 @@ struct mlx5e_sw_stats { u64 tx_nop; u64 rx_lro_packets; u64 rx_lro_bytes; + u64 rx_mcast_packets; u64 rx_ecn_mark; u64 rx_removed_vlan_packets; u64 rx_csum_unnecessary; @@ -298,6 +299,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 mcast_packets; u64 ecn_mark; u64 removed_vlan_packets; u64 xdp_drop; -- cgit v1.2.3-59-g8ed1b From b521105b68a2e3334d69202fb354d7b711b01aa3 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Tue, 25 Aug 2020 10:41:50 +0300 Subject: net/mlx5e: Fix using wrong stats_grps in mlx5e_update_ndo_stats() The cited commit started to reuse function mlx5e_update_ndo_stats() for the representors as well. However, the function is hard-coded to work on mlx5e_nic_stats_grps only. Due to this issue, the representors statistics were not updated in the output of "ip -s". Fix it to work with the correct group by extracting it from the caller's profile. Also, while at it and since this function became generic, move it to en_stats.c and rename it accordingly. Fixes: 8a236b15144b ("net/mlx5e: Convert rep stats to mlx5e_stats_grp-based infra") Signed-off-by: Alaa Hleihel Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 +----------- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 12 ++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 1 + 6 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5d7b79518449..90d5caabd6af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1005,7 +1005,6 @@ int mlx5e_update_nic_rx(struct mlx5e_priv *priv); void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); -void mlx5e_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_queue_update_stats(struct mlx5e_priv *priv); int mlx5e_bits_invert(unsigned long a, int size); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c index 8fe8b4d6ad1c..254c84739046 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c @@ -51,7 +51,7 @@ static void mlx5e_monitor_counters_work(struct work_struct *work) monitor_counters_work); mutex_lock(&priv->state_lock); - mlx5e_update_ndo_stats(priv); + mlx5e_stats_update_ndo_stats(priv); mutex_unlock(&priv->state_lock); mlx5e_monitor_counter_arm(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d46047235568..b3cda7b6e5e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -158,16 +158,6 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) -{ - int i; - - for (i = mlx5e_nic_stats_grps_num(priv) - 1; i >= 0; i--) - if (mlx5e_nic_stats_grps[i]->update_stats_mask & - MLX5E_NDO_UPDATE_STATS) - mlx5e_nic_stats_grps[i]->update_stats(priv); -} - static void mlx5e_update_stats_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, @@ -5187,7 +5177,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, .update_rx = mlx5e_update_nic_rx, - .update_stats = mlx5e_update_ndo_stats, + .update_stats = mlx5e_stats_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_nic, .max_tc = MLX5E_MAX_NUM_TC, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index e13e5d1b3eae..e979bff64c49 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1171,7 +1171,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_rep_enable, .update_rx = mlx5e_update_rep_rx, - .update_stats = mlx5e_update_ndo_stats, + .update_stats = mlx5e_stats_update_ndo_stats, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = 1, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), @@ -1189,7 +1189,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .enable = mlx5e_uplink_rep_enable, .disable = mlx5e_uplink_rep_disable, .update_rx = mlx5e_update_rep_rx, - .update_stats = mlx5e_update_ndo_stats, + .update_stats = mlx5e_stats_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = MLX5E_MAX_NUM_TC, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index e3b2f59408e6..f6383bc2bc3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -54,6 +54,18 @@ unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv) return total; } +void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = num_stats_grps - 1; i >= 0; i--) + if (stats_grps[i]->update_stats && + stats_grps[i]->update_stats_mask & MLX5E_NDO_UPDATE_STATS) + stats_grps[i]->update_stats(priv); +} + void mlx5e_stats_update(struct mlx5e_priv *priv) { mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index be7cda283781..562263d62141 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -103,6 +103,7 @@ unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv); void mlx5e_stats_update(struct mlx5e_priv *priv); void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx); void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data); +void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv); /* Concrete NIC Stats */ -- cgit v1.2.3-59-g8ed1b From 8f0bcd19b1da3f264223abea985b9462e85a3718 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 28 Jun 2020 13:06:06 +0300 Subject: net/mlx5e: TLS, Do not expose FPGA TLS counter if not supported The set of TLS TX global SW counters in mlx5e_tls_sw_stats_desc is updated from all rings by using atomic ops. This set of stats is used only in the FPGA TLS use case, not in the Connect-X TLS one, where regular per-ring counters are used. Do not expose them in the Connect-X use case, as this would cause counter duplication. For example, tx_tls_drop_no_sync_data would appear twice in the ethtool stats. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c index 01468ec27446..b949b9a7538b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c @@ -35,7 +35,6 @@ #include #include "en.h" -#include "accel/tls.h" #include "fpga/sdk.h" #include "en_accel/tls.h" @@ -51,9 +50,14 @@ static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { #define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc) +static bool is_tls_atomic_stats(struct mlx5e_priv *priv) +{ + return priv->tls && !mlx5_accel_is_ktls_device(priv->mdev); +} + int mlx5e_tls_get_count(struct mlx5e_priv *priv) { - if (!priv->tls) + if (!is_tls_atomic_stats(priv)) return 0; return NUM_TLS_SW_COUNTERS; @@ -63,7 +67,7 @@ int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { unsigned int i, idx = 0; - if (!priv->tls) + if (!is_tls_atomic_stats(priv)) return 0; for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) @@ -77,7 +81,7 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { int i, idx = 0; - if (!priv->tls) + if (!is_tls_atomic_stats(priv)) return 0; for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) -- cgit v1.2.3-59-g8ed1b From 6e8de0b6b4690ba7a685db33801bf7536bac674f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 10 Aug 2020 15:59:41 +0300 Subject: net/mlx5e: kTLS, Fix napi sync and possible use-after-free Using synchronize_rcu() is sufficient to wait until running NAPI quits. See similar upstream fix with detailed explanation: ("net/mlx5e: Use synchronize_rcu to sync with NAPI") This change also fixes a possible use-after-free as the NAPI might be already released at this stage. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index acf6d80a6bb7..f95aa50ab51a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -659,7 +659,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx); set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags); mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL); - napi_synchronize(&priv->channels.c[priv_rx->rxq]->napi); + synchronize_rcu(); /* Sync with NAPI */ if (!cancel_work_sync(&priv_rx->rule.work)) /* completion is needed, as the priv_rx in the add flow * is maintained on the wqe info (wi), not on the socket. -- cgit v1.2.3-59-g8ed1b From 66ce5fc05713e6b90f74c01439f2aaa137ffd9f4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 7 Sep 2020 22:58:50 -0700 Subject: net/mlx5e: kTLS, Add missing dma_unmap in RX resync Progress params dma address is never unmapped, unmap it when completion handling is over. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index f95aa50ab51a..fb4e4f2ebe02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -386,16 +386,17 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, struct mlx5e_ktls_offload_context_rx *priv_rx; struct mlx5e_ktls_rx_resync_ctx *resync; u8 tracker_state, auth_state, *ctx; + struct device *dev; u32 hw_seq; priv_rx = buf->priv_rx; resync = &priv_rx->resync; - + dev = resync->priv->mdev->device; if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) goto out; - dma_sync_single_for_cpu(resync->priv->mdev->device, buf->dma_addr, - PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, + DMA_FROM_DEVICE); ctx = buf->progress.ctx; tracker_state = MLX5_GET(tls_progress_params, ctx, record_tracker_state); @@ -411,6 +412,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, priv_rx->stats->tls_resync_req_end++; out: refcount_dec(&resync->refcnt); + dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); kfree(buf); } -- cgit v1.2.3-59-g8ed1b From 581642f32f33017619355b7d91704c152df1fc2d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 10 Sep 2020 21:16:04 -0700 Subject: net/mlx5e: kTLS, Fix leak on resync error flow Resync progress params buffer and dma weren't released on error, Add missing error unwinding for resync_post_get_progress_params(). Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index fb4e4f2ebe02..e85411bd1fed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -258,7 +258,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) { err = -ENOMEM; - goto err_out; + goto err_free; } buf->priv_rx = priv_rx; @@ -266,7 +266,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1); if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { err = -ENOSPC; - goto err_out; + goto err_dma_unmap; } pi = mlx5e_icosq_get_next_pi(sq, 1); @@ -297,6 +297,10 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, return cseg; +err_dma_unmap: + dma_unmap_single(pdev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); +err_free: + kfree(buf); err_out: priv_rx->stats->tls_resync_req_skip++; return ERR_PTR(err); -- cgit v1.2.3-59-g8ed1b From 94c4fed710c372313157f20b5eced39d04a96f88 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 7 Sep 2020 22:54:43 -0700 Subject: net/mlx5e: kTLS, Avoid kzalloc(GFP_KERNEL) under spinlock The spinlock only needed when accessing the channel's icosq, grab the lock after the buf allocation in resync_post_get_progress_params() to avoid kzalloc(GFP_KERNEL) in atomic context. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Reported-by: YueHaibing Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 25 +++++++++------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index e85411bd1fed..6bbfcf18107d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -234,7 +234,7 @@ mlx5e_get_ktls_rx_priv_ctx(struct tls_context *tls_ctx) /* Re-sync */ /* Runs in work context */ -static struct mlx5_wqe_ctrl_seg * +static int resync_post_get_progress_params(struct mlx5e_icosq *sq, struct mlx5e_ktls_offload_context_rx *priv_rx) { @@ -264,7 +264,11 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, buf->priv_rx = priv_rx; BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1); + + spin_lock(&sq->channel->async_icosq_lock); + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + spin_unlock(&sq->channel->async_icosq_lock); err = -ENOSPC; goto err_dma_unmap; } @@ -294,8 +298,10 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, }; icosq_fill_wi(sq, pi, &wi); sq->pc++; + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + spin_unlock(&sq->channel->async_icosq_lock); - return cseg; + return 0; err_dma_unmap: dma_unmap_single(pdev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@ -303,7 +309,7 @@ err_free: kfree(buf); err_out: priv_rx->stats->tls_resync_req_skip++; - return ERR_PTR(err); + return err; } /* Function is called with elevated refcount. @@ -313,10 +319,8 @@ static void resync_handle_work(struct work_struct *work) { struct mlx5e_ktls_offload_context_rx *priv_rx; struct mlx5e_ktls_rx_resync_ctx *resync; - struct mlx5_wqe_ctrl_seg *cseg; struct mlx5e_channel *c; struct mlx5e_icosq *sq; - struct mlx5_wq_cyc *wq; resync = container_of(work, struct mlx5e_ktls_rx_resync_ctx, work); priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync); @@ -328,18 +332,9 @@ static void resync_handle_work(struct work_struct *work) c = resync->priv->channels.c[priv_rx->rxq]; sq = &c->async_icosq; - wq = &sq->wq; - spin_lock(&c->async_icosq_lock); - - cseg = resync_post_get_progress_params(sq, priv_rx); - if (IS_ERR(cseg)) { + if (resync_post_get_progress_params(sq, priv_rx)) refcount_dec(&resync->refcnt); - goto unlock; - } - mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); -unlock: - spin_unlock(&c->async_icosq_lock); } static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, -- cgit v1.2.3-59-g8ed1b From cb39ccc5cbe1011d8d21886b75e2468070ac672c Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 11 Sep 2020 11:00:06 -0700 Subject: net/mlx5e: mlx5e_fec_in_caps() returns a boolean Returning errno is a bug, fix that. Also fixes smatch warnings: drivers/net/ethernet/mellanox/mlx5/core/en/port.c:453 mlx5e_fec_in_caps() warn: signedness bug returning '(-95)' Fixes: 2132b71f78d2 ("net/mlx5e: Advertise globaly supported FEC modes") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Saeed Mahameed Reviewed-by: Moshe Shemesh Reviewed-by: Aya Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 5de1cb9f5330..96608dbb9314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -490,11 +490,8 @@ bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) int err; int i; - if (!MLX5_CAP_GEN(dev, pcam_reg)) - return -EOPNOTSUPP; - - if (!MLX5_CAP_PCAM_REG(dev, pplm)) - return -EOPNOTSUPP; + if (!MLX5_CAP_GEN(dev, pcam_reg) || !MLX5_CAP_PCAM_REG(dev, pplm)) + return false; MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); -- cgit v1.2.3-59-g8ed1b From 2b617c11d7c0791a18402407222ca9f2c343c47b Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 22 Sep 2020 07:09:23 +0800 Subject: net: Update MAINTAINERS for MediaTek switch driver Update maintainers for MediaTek switch driver with Landen Chao who is familiar with MediaTek MT753x switch devices and will help maintenance from the vendor side. Cc: Steven Liu Signed-off-by: Sean Wang Signed-off-by: Landen Chao Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad9a35b91325..8f04b81e19a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11029,6 +11029,7 @@ F: drivers/char/hw_random/mtk-rng.c MEDIATEK SWITCH DRIVER M: Sean Wang +M: Landen Chao L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mt7530.* -- cgit v1.2.3-59-g8ed1b From 99f62a746066fa436aa15d4606a538569540db08 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 22 Sep 2020 01:07:09 +0300 Subject: net: bridge: br_vlan_get_pvid_rcu() should dereference the VLAN group under RCU When calling the RCU brother of br_vlan_get_pvid(), lockdep warns: ============================= WARNING: suspicious RCU usage 5.9.0-rc3-01631-g13c17acb8e38-dirty #814 Not tainted ----------------------------- net/bridge/br_private.h:1054 suspicious rcu_dereference_protected() usage! Call trace: lockdep_rcu_suspicious+0xd4/0xf8 __br_vlan_get_pvid+0xc0/0x100 br_vlan_get_pvid_rcu+0x78/0x108 The warning is because br_vlan_get_pvid_rcu() calls nbp_vlan_group() which calls rtnl_dereference() instead of rcu_dereference(). In turn, rtnl_dereference() calls rcu_dereference_protected() which assumes operation under an RCU write-side critical section, which obviously is not the case here. So, when the incorrect primitive is used to access the RCU-protected VLAN group pointer, READ_ONCE() is not used, which may cause various unexpected problems. I'm sad to say that br_vlan_get_pvid() and br_vlan_get_pvid_rcu() cannot share the same implementation. So fix the bug by splitting the 2 functions, and making br_vlan_get_pvid_rcu() retrieve the VLAN groups under proper locking annotations. Fixes: 7582f5b70f9a ("bridge: add br_vlan_get_pvid_rcu()") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index f9092c71225f..61c94cefa843 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1288,11 +1288,13 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, } } -static int __br_vlan_get_pvid(const struct net_device *dev, - struct net_bridge_port *p, u16 *p_pvid) +int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) { struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + ASSERT_RTNL(); + p = br_port_get_check_rtnl(dev); if (p) vg = nbp_vlan_group(p); else if (netif_is_bridge_master(dev)) @@ -1303,18 +1305,23 @@ static int __br_vlan_get_pvid(const struct net_device *dev, *p_pvid = br_get_pvid(vg); return 0; } - -int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) -{ - ASSERT_RTNL(); - - return __br_vlan_get_pvid(dev, br_port_get_check_rtnl(dev), p_pvid); -} EXPORT_SYMBOL_GPL(br_vlan_get_pvid); int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid) { - return __br_vlan_get_pvid(dev, br_port_get_check_rcu(dev), p_pvid); + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + + p = br_port_get_check_rcu(dev); + if (p) + vg = nbp_vlan_group_rcu(p); + else if (netif_is_bridge_master(dev)) + vg = br_vlan_group_rcu(netdev_priv(dev)); + else + return -EINVAL; + + *p_pvid = br_get_pvid(vg); + return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu); -- cgit v1.2.3-59-g8ed1b From d5e4d0a5e692a942f0c212e37dc6aeac47ecbdea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Sep 2020 07:27:20 -0700 Subject: inet_diag: validate INET_DIAG_REQ_PROTOCOL attribute User space could send an invalid INET_DIAG_REQ_PROTOCOL attribute as caught by syzbot. BUG: KMSAN: uninit-value in inet_diag_lock_handler net/ipv4/inet_diag.c:55 [inline] BUG: KMSAN: uninit-value in __inet_diag_dump+0x58c/0x720 net/ipv4/inet_diag.c:1147 CPU: 0 PID: 8505 Comm: syz-executor174 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:122 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:219 inet_diag_lock_handler net/ipv4/inet_diag.c:55 [inline] __inet_diag_dump+0x58c/0x720 net/ipv4/inet_diag.c:1147 inet_diag_dump_compat+0x2a5/0x380 net/ipv4/inet_diag.c:1254 netlink_dump+0xb73/0x1cb0 net/netlink/af_netlink.c:2246 __netlink_dump_start+0xcf2/0xea0 net/netlink/af_netlink.c:2354 netlink_dump_start include/linux/netlink.h:246 [inline] inet_diag_rcv_msg_compat+0x5da/0x6c0 net/ipv4/inet_diag.c:1288 sock_diag_rcv_msg+0x24f/0x620 net/core/sock_diag.c:256 netlink_rcv_skb+0x6d7/0x7e0 net/netlink/af_netlink.c:2470 sock_diag_rcv+0x63/0x80 net/core/sock_diag.c:275 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x11c8/0x1490 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x173a/0x1840 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] ____sys_sendmsg+0xc82/0x1240 net/socket.c:2353 ___sys_sendmsg net/socket.c:2407 [inline] __sys_sendmsg+0x6d1/0x820 net/socket.c:2440 __do_sys_sendmsg net/socket.c:2449 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x441389 Code: e8 fc ab 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 1b 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff3b02ce98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441389 RDX: 0000000000000000 RSI: 0000000020001500 RDI: 0000000000000003 RBP: 00000000006cb018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 0000000000000004 R11: 0000000000000246 R12: 0000000000402130 R13: 00000000004021c0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:143 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:126 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:80 slab_alloc_node mm/slub.c:2907 [inline] __kmalloc_node_track_caller+0x9aa/0x12f0 mm/slub.c:4511 __kmalloc_reserve net/core/skbuff.c:142 [inline] __alloc_skb+0x35f/0xb30 net/core/skbuff.c:210 alloc_skb include/linux/skbuff.h:1094 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1176 [inline] netlink_sendmsg+0xdb9/0x1840 net/netlink/af_netlink.c:1894 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] ____sys_sendmsg+0xc82/0x1240 net/socket.c:2353 ___sys_sendmsg net/socket.c:2407 [inline] __sys_sendmsg+0x6d1/0x820 net/socket.c:2440 __do_sys_sendmsg net/socket.c:2449 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 3f935c75eb52 ("inet_diag: support for wider protocol numbers") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Cc: Christoph Paasch Cc: Mat Martineau Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4a98dd736270..f1bd95f243b3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -186,8 +186,8 @@ errout: } EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); -static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, - struct nlattr **req_nlas) +static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr **req_nlas) { struct nlattr *nla; int remaining; @@ -195,9 +195,13 @@ static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) { int type = nla_type(nla); + if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32)) + return -EINVAL; + if (type < __INET_DIAG_REQ_MAX) req_nlas[type] = nla; } + return 0; } static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req, @@ -574,7 +578,10 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, int err, protocol; memset(&dump_data, 0, sizeof(dump_data)); - inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); + err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); + if (err) + return err; + protocol = inet_diag_get_protocol(req, &dump_data); handler = inet_diag_lock_handler(protocol); @@ -1180,8 +1187,11 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) if (!cb_data) return -ENOMEM; - inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); - + err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); + if (err) { + kfree(cb_data); + return err; + } nla = cb_data->inet_diag_nla_bc; if (nla) { err = inet_diag_bc_audit(nla, skb); -- cgit v1.2.3-59-g8ed1b From 8b9e03cd08250c60409099c791e858157838d9eb Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Tue, 22 Sep 2020 01:56:36 +0300 Subject: net: dsa: felix: fix some key offsets for IP4_TCP_UDP VCAP IS2 entries Some of the IS2 IP4_TCP_UDP keys are not correct, like L4_DPORT, L4_SPORT and other L4 keys. This prevents offloaded tc-flower rules from matching on src_port and dst_port for TCP and UDP packets. Signed-off-by: Xiaoliang Yang Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix_vsc9959.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 9b720c8ddfc3..6855c94256f8 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -645,17 +645,17 @@ static struct vcap_field vsc9959_vcap_is2_keys[] = { [VCAP_IS2_HK_DIP_EQ_SIP] = {118, 1}, /* IP4_TCP_UDP (TYPE=100) */ [VCAP_IS2_HK_TCP] = {119, 1}, - [VCAP_IS2_HK_L4_SPORT] = {120, 16}, - [VCAP_IS2_HK_L4_DPORT] = {136, 16}, + [VCAP_IS2_HK_L4_DPORT] = {120, 16}, + [VCAP_IS2_HK_L4_SPORT] = {136, 16}, [VCAP_IS2_HK_L4_RNG] = {152, 8}, [VCAP_IS2_HK_L4_SPORT_EQ_DPORT] = {160, 1}, [VCAP_IS2_HK_L4_SEQUENCE_EQ0] = {161, 1}, - [VCAP_IS2_HK_L4_URG] = {162, 1}, - [VCAP_IS2_HK_L4_ACK] = {163, 1}, - [VCAP_IS2_HK_L4_PSH] = {164, 1}, - [VCAP_IS2_HK_L4_RST] = {165, 1}, - [VCAP_IS2_HK_L4_SYN] = {166, 1}, - [VCAP_IS2_HK_L4_FIN] = {167, 1}, + [VCAP_IS2_HK_L4_FIN] = {162, 1}, + [VCAP_IS2_HK_L4_SYN] = {163, 1}, + [VCAP_IS2_HK_L4_RST] = {164, 1}, + [VCAP_IS2_HK_L4_PSH] = {165, 1}, + [VCAP_IS2_HK_L4_ACK] = {166, 1}, + [VCAP_IS2_HK_L4_URG] = {167, 1}, [VCAP_IS2_HK_L4_1588_DOM] = {168, 8}, [VCAP_IS2_HK_L4_1588_VER] = {176, 4}, /* IP4_OTHER (TYPE=101) */ -- cgit v1.2.3-59-g8ed1b From 7a0230759ea6795af037f3fd9526d00dc896ce19 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 22 Sep 2020 01:56:37 +0300 Subject: net: dsa: seville: fix some key offsets for IP4_TCP_UDP VCAP IS2 entries Since these were copied from the Felix VCAP IS2 code, and only the offsets were adjusted, the order of the bit fields is still wrong. Fix it. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/seville_vsc9953.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 0fdeff22a76c..29df0797ecf5 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -659,17 +659,17 @@ static struct vcap_field vsc9953_vcap_is2_keys[] = { [VCAP_IS2_HK_DIP_EQ_SIP] = {122, 1}, /* IP4_TCP_UDP (TYPE=100) */ [VCAP_IS2_HK_TCP] = {123, 1}, - [VCAP_IS2_HK_L4_SPORT] = {124, 16}, - [VCAP_IS2_HK_L4_DPORT] = {140, 16}, + [VCAP_IS2_HK_L4_DPORT] = {124, 16}, + [VCAP_IS2_HK_L4_SPORT] = {140, 16}, [VCAP_IS2_HK_L4_RNG] = {156, 8}, [VCAP_IS2_HK_L4_SPORT_EQ_DPORT] = {164, 1}, [VCAP_IS2_HK_L4_SEQUENCE_EQ0] = {165, 1}, - [VCAP_IS2_HK_L4_URG] = {166, 1}, - [VCAP_IS2_HK_L4_ACK] = {167, 1}, - [VCAP_IS2_HK_L4_PSH] = {168, 1}, - [VCAP_IS2_HK_L4_RST] = {169, 1}, - [VCAP_IS2_HK_L4_SYN] = {170, 1}, - [VCAP_IS2_HK_L4_FIN] = {171, 1}, + [VCAP_IS2_HK_L4_FIN] = {166, 1}, + [VCAP_IS2_HK_L4_SYN] = {167, 1}, + [VCAP_IS2_HK_L4_RST] = {168, 1}, + [VCAP_IS2_HK_L4_PSH] = {169, 1}, + [VCAP_IS2_HK_L4_ACK] = {170, 1}, + [VCAP_IS2_HK_L4_URG] = {171, 1}, /* IP4_OTHER (TYPE=101) */ [VCAP_IS2_HK_IP4_L3_PROTO] = {123, 8}, [VCAP_IS2_HK_L3_PAYLOAD] = {131, 56}, -- cgit v1.2.3-59-g8ed1b From 8194d8fa719f78007c0c4d5c721b6b0dfa290f81 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 22 Sep 2020 01:56:38 +0300 Subject: net: mscc: ocelot: fix some key offsets for IP4_TCP_UDP VCAP IS2 entries The IS2 IP4_TCP_UDP key offsets do not correspond to the VSC7514 datasheet. Whether they work or not is unknown to me. On VSC9959 and VSC9953, with the same mistake and same discrepancy from the documentation, tc-flower src_port and dst_port rules did not work, so I am assuming the same is true here. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index e02fb8bfab63..dfb1535f26f2 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -806,17 +806,17 @@ static const struct vcap_field vsc7514_vcap_is2_keys[] = { [VCAP_IS2_HK_DIP_EQ_SIP] = {123, 1}, /* IP4_TCP_UDP (TYPE=100) */ [VCAP_IS2_HK_TCP] = {124, 1}, - [VCAP_IS2_HK_L4_SPORT] = {125, 16}, - [VCAP_IS2_HK_L4_DPORT] = {141, 16}, + [VCAP_IS2_HK_L4_DPORT] = {125, 16}, + [VCAP_IS2_HK_L4_SPORT] = {141, 16}, [VCAP_IS2_HK_L4_RNG] = {157, 8}, [VCAP_IS2_HK_L4_SPORT_EQ_DPORT] = {165, 1}, [VCAP_IS2_HK_L4_SEQUENCE_EQ0] = {166, 1}, - [VCAP_IS2_HK_L4_URG] = {167, 1}, - [VCAP_IS2_HK_L4_ACK] = {168, 1}, - [VCAP_IS2_HK_L4_PSH] = {169, 1}, - [VCAP_IS2_HK_L4_RST] = {170, 1}, - [VCAP_IS2_HK_L4_SYN] = {171, 1}, - [VCAP_IS2_HK_L4_FIN] = {172, 1}, + [VCAP_IS2_HK_L4_FIN] = {167, 1}, + [VCAP_IS2_HK_L4_SYN] = {168, 1}, + [VCAP_IS2_HK_L4_RST] = {169, 1}, + [VCAP_IS2_HK_L4_PSH] = {170, 1}, + [VCAP_IS2_HK_L4_ACK] = {171, 1}, + [VCAP_IS2_HK_L4_URG] = {172, 1}, [VCAP_IS2_HK_L4_1588_DOM] = {173, 8}, [VCAP_IS2_HK_L4_1588_VER] = {181, 4}, /* IP4_OTHER (TYPE=101) */ -- cgit v1.2.3-59-g8ed1b