From e6a41c23df0d5da01540d2abef41591589c0b4be Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 12 Feb 2020 17:45:38 +0100 Subject: net: macb: ensure interface is not suspended on at91rm9200 Because of autosuspend, at91ether_start is called with clocks disabled. Ensure that pm_runtime doesn't suspend the interface as soon as it is opened as there is no pm_runtime support is the other relevant parts of the platform support for at91rm9200. Fixes: d54f89af6cc4 ("net: macb: Add pm runtime support") Signed-off-by: Alexandre Belloni Reviewed-by: Claudiu Beznea Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4508f0d150da..def94e91883a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3790,6 +3790,10 @@ static int at91ether_open(struct net_device *dev) u32 ctl; int ret; + ret = pm_runtime_get_sync(&lp->pdev->dev); + if (ret < 0) + return ret; + /* Clear internal statistics */ ctl = macb_readl(lp, NCR); macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT)); @@ -3854,7 +3858,7 @@ static int at91ether_close(struct net_device *dev) q->rx_buffers, q->rx_buffers_dma); q->rx_buffers = NULL; - return 0; + return pm_runtime_put(&lp->pdev->dev); } /* Transmit packet */ -- cgit v1.2.3-59-g8ed1b From 15beab0a9d797be1b7c67458da007a62269be29a Mon Sep 17 00:00:00 2001 From: Dmitry Bezrukov Date: Fri, 14 Feb 2020 18:44:51 +0300 Subject: net: atlantic: checksum compat issue Yet another checksum offload compatibility issue was found. The known issue is that AQC HW marks tcp packets with 0xFFFF checksum as invalid (1). This is workarounded in driver, passing all the suspicious packets up to the stack for further csum validation. Another HW problem (2) is that it hides invalid csum of LRO aggregated packets inside of the individual descriptors. That was workarounded by forced scan of all LRO descriptors for checksum errors. However the scan logic was joint for both LRO and multi-descriptor packets (jumbos). And this causes the issue. We have to drop LRO packets with the detected bad checksum because of (2), but we have to pass jumbo packets to stack because of (1). When using windows tcp partner with jumbo frames but with LSO disabled driver discards such frames as bad checksummed. But only LRO frames should be dropped, not jumbos. On such a configurations tcp stream have a chance of drops and stucks. (1) 76f254d4afe2 ("net: aquantia: tcp checksum 0xffff being handled incorrectly") (2) d08b9a0a3ebd ("net: aquantia: do not pass lro session with invalid tcp checksum") Fixes: d08b9a0a3ebd ("net: aquantia: do not pass lro session with invalid tcp checksum") Signed-off-by: Dmitry Bezrukov Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 3 ++- drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 3 ++- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 951d86f8b66e..6941999ae845 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -351,7 +351,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, err = 0; goto err_exit; } - if (buff->is_error || buff->is_cso_err) { + if (buff->is_error || + (buff->is_lro && buff->is_cso_err)) { buff_ = buff; do { next_ = buff_->next, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 991e4d31b094..2c96f20f6289 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -78,7 +78,8 @@ struct __packed aq_ring_buff_s { u32 is_cleaned:1; u32 is_error:1; u32 is_vlan:1; - u32 rsvd3:4; + u32 is_lro:1; + u32 rsvd3:3; u16 eop_index; u16 rsvd4; }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index ec041f78d063..5784da26f868 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -823,6 +823,8 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, } } + buff->is_lro = !!(HW_ATL_B0_RXD_WB_STAT2_RSCCNT & + rxd_wb->status); if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) { buff->len = rxd_wb->pkt_len % AQ_CFG_RX_FRAME_MAX; @@ -835,8 +837,7 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ? AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len; - if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT & - rxd_wb->status) { + if (buff->is_lro) { /* LRO */ buff->next = rxd_wb->next_desc_ptr; ++ring->stats.rx.lro_packets; -- cgit v1.2.3-59-g8ed1b From e7b5f97e6574dc4918e375d5f8d24ec31653cd6d Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Fri, 14 Feb 2020 18:44:52 +0300 Subject: net: atlantic: check rpc result and wait for rpc address Artificial HW reliability tests revealed a possible hangup in the driver. Normally, when device disappears from bus, all register reads returns 0xFFFFFFFF. At remote procedure invocation towards FW there is a logic where result is compared with -1 in a loop. That caused an infinite loop if hardware due to some issues disappears from bus. Add extra result checks to prevent this. Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index f547baa6c954..354705f9bc49 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -22,6 +22,7 @@ #define HW_ATL_MIF_ADDR 0x0208U #define HW_ATL_MIF_VAL 0x020CU +#define HW_ATL_MPI_RPC_ADDR 0x0334U #define HW_ATL_RPC_CONTROL_ADR 0x0338U #define HW_ATL_RPC_STATE_ADR 0x033CU @@ -53,15 +54,14 @@ enum mcp_area { }; static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual); - static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self, enum hal_atl_utils_fw_state_e state); - static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self); static u32 hw_atl_utils_mpi_get_state(struct aq_hw_s *self); static u32 hw_atl_utils_mif_cmd_get(struct aq_hw_s *self); static u32 hw_atl_utils_mif_addr_get(struct aq_hw_s *self); static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self); +static u32 aq_fw1x_rpc_get(struct aq_hw_s *self); int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops) { @@ -476,6 +476,10 @@ static int hw_atl_utils_init_ucp(struct aq_hw_s *self, self, self->mbox_addr, self->mbox_addr != 0U, 1000U, 10000U); + err = readx_poll_timeout_atomic(aq_fw1x_rpc_get, self, + self->rpc_addr, + self->rpc_addr != 0U, + 1000U, 100000U); return err; } @@ -531,6 +535,12 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, self, fw.val, sw.tid == fw.tid, 1000U, 100000U); + if (err < 0) + goto err_exit; + + err = aq_hw_err_from_flags(self); + if (err < 0) + goto err_exit; if (fw.len == 0xFFFFU) { err = hw_atl_utils_fw_rpc_call(self, sw.len); @@ -1025,6 +1035,11 @@ static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self) return aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR); } +static u32 aq_fw1x_rpc_get(struct aq_hw_s *self) +{ + return aq_hw_read_reg(self, HW_ATL_MPI_RPC_ADDR); +} + const struct aq_fw_ops aq_fw_1x_ops = { .init = hw_atl_utils_mpi_create, .deinit = hw_atl_fw1x_deinit, -- cgit v1.2.3-59-g8ed1b From f08a464c27ca0a4050333baa271504b27ce834b7 Mon Sep 17 00:00:00 2001 From: Egor Pomozov Date: Fri, 14 Feb 2020 18:44:53 +0300 Subject: net: atlantic: ptp gpio adjustments Clock adjustment data should be passed to FW as well, otherwise in some cases a drift was observed when using GPIO features. Signed-off-by: Egor Pomozov Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 2 ++ drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 4 +++- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index cc70c606b6ef..251767c31f7e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -337,6 +337,8 @@ struct aq_fw_ops { void (*enable_ptp)(struct aq_hw_s *self, int enable); + void (*adjust_ptp)(struct aq_hw_s *self, uint64_t adj); + int (*set_eee_rate)(struct aq_hw_s *self, u32 speed); int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 5784da26f868..9acdb3fbb750 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -1162,6 +1162,8 @@ static int hw_atl_b0_adj_sys_clock(struct aq_hw_s *self, s64 delta) { self->ptp_clk_offset += delta; + self->aq_fw_ops->adjust_ptp(self, self->ptp_clk_offset); + return 0; } @@ -1212,7 +1214,7 @@ static int hw_atl_b0_gpio_pulse(struct aq_hw_s *self, u32 index, fwreq.ptp_gpio_ctrl.index = index; fwreq.ptp_gpio_ctrl.period = period; /* Apply time offset */ - fwreq.ptp_gpio_ctrl.start = start - self->ptp_clk_offset; + fwreq.ptp_gpio_ctrl.start = start; size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_gpio_ctrl); return self->aq_fw_ops->send_fw_request(self, &fwreq, size); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index 97ebf849695f..77a4ed64830f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -30,6 +30,9 @@ #define HW_ATL_FW3X_EXT_CONTROL_ADDR 0x378 #define HW_ATL_FW3X_EXT_STATE_ADDR 0x37c +#define HW_ATL_FW3X_PTP_ADJ_LSW_ADDR 0x50a0 +#define HW_ATL_FW3X_PTP_ADJ_MSW_ADDR 0x50a4 + #define HW_ATL_FW2X_CAP_PAUSE BIT(CAPS_HI_PAUSE) #define HW_ATL_FW2X_CAP_ASYM_PAUSE BIT(CAPS_HI_ASYMMETRIC_PAUSE) #define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY) @@ -475,6 +478,14 @@ static void aq_fw3x_enable_ptp(struct aq_hw_s *self, int enable) aq_hw_write_reg(self, HW_ATL_FW3X_EXT_CONTROL_ADDR, ptp_opts); } +static void aq_fw3x_adjust_ptp(struct aq_hw_s *self, uint64_t adj) +{ + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_LSW_ADDR, + (adj >> 0) & 0xffffffff); + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_MSW_ADDR, + (adj >> 32) & 0xffffffff); +} + static int aq_fw2x_led_control(struct aq_hw_s *self, u32 mode) { if (self->fw_ver_actual < HW_ATL_FW_VER_LED) @@ -633,4 +644,5 @@ const struct aq_fw_ops aq_fw_2x_ops = { .enable_ptp = aq_fw3x_enable_ptp, .led_control = aq_fw2x_led_control, .set_phyloopback = aq_fw2x_set_phyloopback, + .adjust_ptp = aq_fw3x_adjust_ptp, }; -- cgit v1.2.3-59-g8ed1b From b42726fcf76e9367e524392e0ead7e672cc0791c Mon Sep 17 00:00:00 2001 From: Nikita Danilov Date: Fri, 14 Feb 2020 18:44:54 +0300 Subject: net: atlantic: better loopback mode handling Add checks to not enable multiple loopback modes simultaneously, It was also discovered that for dma loopback to function correctly promisc mode should be enabled on device. Fixes: ea4b4d7fc106 ("net: atlantic: loopback tests via private flags") Signed-off-by: Nikita Danilov Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c | 5 +++++ drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 13 ++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a1f99bef4a68..7b55633d2cb9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -722,6 +722,11 @@ static int aq_ethtool_set_priv_flags(struct net_device *ndev, u32 flags) if (flags & ~AQ_PRIV_FLAGS_MASK) return -EOPNOTSUPP; + if (hweight32((flags | priv_flags) & AQ_HW_LOOPBACK_MASK) > 1) { + netdev_info(ndev, "Can't enable more than one loopback simultaneously\n"); + return -EINVAL; + } + cfg->priv_flags = flags; if ((priv_flags ^ flags) & BIT(AQ_HW_LOOPBACK_DMA_NET)) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 9acdb3fbb750..d20d91cdece8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -885,13 +885,16 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, { struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; unsigned int i = 0U; + u32 vlan_promisc; + u32 l2_promisc; - hw_atl_rpfl2promiscuous_mode_en_set(self, - IS_FILTER_ENABLED(IFF_PROMISC)); + l2_promisc = IS_FILTER_ENABLED(IFF_PROMISC) || + !!(cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)); + vlan_promisc = l2_promisc || cfg->is_vlan_force_promisc; - hw_atl_rpf_vlan_prom_mode_en_set(self, - IS_FILTER_ENABLED(IFF_PROMISC) || - cfg->is_vlan_force_promisc); + hw_atl_rpfl2promiscuous_mode_en_set(self, l2_promisc); + + hw_atl_rpf_vlan_prom_mode_en_set(self, vlan_promisc); hw_atl_rpfl2multicast_flr_en_set(self, IS_FILTER_ENABLED(IFF_ALLMULTI) && -- cgit v1.2.3-59-g8ed1b From a4980919ad6a7be548d499bc5338015e1a9191c6 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Fri, 14 Feb 2020 18:44:55 +0300 Subject: net: atlantic: fix use after free kasan warn skb->len is used to calculate statistics after xmit invocation. Under a stress load it may happen that skb will be xmited, rx interrupt will come and skb will be freed, all before xmit function is even returned. Eventually, skb->len will access unallocated area. Moving stats calculation into tx_clean routine. Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Reported-by: Christophe Vu-Brugier Signed-off-by: Igor Russkikh Signed-off-by: Pavel Belous Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 ---- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 7 +++++-- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index c85e3e29012c..263beea1859c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -655,10 +655,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) if (likely(frags)) { err = self->aq_hw_ops->hw_ring_tx_xmit(self->aq_hw, ring, frags); - if (err >= 0) { - ++ring->stats.tx.packets; - ring->stats.tx.bytes += skb->len; - } } else { err = NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 6941999ae845..bae95a618560 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -272,9 +272,12 @@ bool aq_ring_tx_clean(struct aq_ring_s *self) } } - if (unlikely(buff->is_eop)) - dev_kfree_skb_any(buff->skb); + if (unlikely(buff->is_eop)) { + ++self->stats.rx.packets; + self->stats.tx.bytes += buff->skb->len; + dev_kfree_skb_any(buff->skb); + } buff->pa = 0U; buff->eop_index = 0xffffU; self->sw_head = aq_ring_next_dx(self, self->sw_head); -- cgit v1.2.3-59-g8ed1b From 380ec5b9af7f0d57dbf6ac067fd9f33cff2fef71 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Fri, 14 Feb 2020 18:44:56 +0300 Subject: net: atlantic: fix potential error handling Code inspection found that in case of mapping error we do return current 'ret' value. But beside error, it is used to count number of descriptors allocated for the packet. In that case map_skb function could return '1'. Changing it to return zero (number of mapped descriptors for skb) Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 263beea1859c..e95f6a6bef73 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -533,8 +533,10 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb, dx_buff->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { + ret = 0; goto exit; + } first = dx_buff; dx_buff->len_pkt = skb->len; -- cgit v1.2.3-59-g8ed1b From 52a22f4d6ff95e8bdca557765c04893eb5dd83fd Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Fri, 14 Feb 2020 18:44:57 +0300 Subject: net: atlantic: possible fault in transition to hibernation during hibernation freeze, aq_nic_stop could be invoked on a stopped device. That may cause panic on access to not yet allocated vector/ring structures. Add a check to stop device if it is not yet stopped. Similiarly after freeze in hibernation thaw, aq_nic_start could be invoked on a not initialized net device. Result will be the same. Add a check to start device if it is initialized. In our case, this is the same as started. Fixes: 8aaa112a57c1 ("net: atlantic: refactoring pm logic") Signed-off-by: Pavel Belous Signed-off-by: Nikita Danilov Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 6b27af0db499..78b6f3248756 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -359,7 +359,8 @@ static int aq_suspend_common(struct device *dev, bool deep) netif_device_detach(nic->ndev); netif_tx_stop_all_queues(nic->ndev); - aq_nic_stop(nic); + if (netif_running(nic->ndev)) + aq_nic_stop(nic); if (deep) { aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); @@ -375,7 +376,7 @@ static int atl_resume_common(struct device *dev, bool deep) { struct pci_dev *pdev = to_pci_dev(dev); struct aq_nic_s *nic; - int ret; + int ret = 0; nic = pci_get_drvdata(pdev); @@ -390,9 +391,11 @@ static int atl_resume_common(struct device *dev, bool deep) goto err_exit; } - ret = aq_nic_start(nic); - if (ret) - goto err_exit; + if (netif_running(nic->ndev)) { + ret = aq_nic_start(nic); + if (ret) + goto err_exit; + } netif_device_attach(nic->ndev); netif_tx_start_all_queues(nic->ndev); -- cgit v1.2.3-59-g8ed1b From 5a292c89a84d49b598f8978f154bdda48b1072c0 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 14 Feb 2020 18:44:58 +0300 Subject: net: atlantic: fix out of range usage of active_vlans array fix static checker warning: drivers/net/ethernet/aquantia/atlantic/aq_filters.c:166 aq_check_approve_fvlan() error: passing untrusted data to 'test_bit()' Reported-by: Dan Carpenter Fixes: 7975d2aff5af: ("net: aquantia: add support of rx-vlan-filter offload") Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_filters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 6102251bb909..03ff92bc4a7f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -163,7 +163,7 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic, } if ((aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) && - (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci), + (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci) & VLAN_VID_MASK, aq_nic->active_vlans))) { netdev_err(aq_nic->ndev, "ethtool: unknown vlan-id specified"); -- cgit v1.2.3-59-g8ed1b From 6f08e98d62799e53c89dbf2c9a49d77e20ca648c Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Fri, 14 Feb 2020 13:47:46 -0800 Subject: net: phy: restore mdio regs in the iproc mdio driver The mii management register in iproc mdio block does not have a retention register so it is lost on suspend. Save and restore value of register while resuming from suspend. Fixes: bb1a619735b4 ("net: phy: Initialize mdio clock at probe function") Signed-off-by: Arun Parameswaran Signed-off-by: Scott Branden Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio-bcm-iproc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index 7e9975d25066..f1ded03f0229 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -178,6 +178,23 @@ static int iproc_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +int iproc_mdio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); + + /* restore the mii clock configuration */ + iproc_mdio_config_clk(priv->base); + + return 0; +} + +static const struct dev_pm_ops iproc_mdio_pm_ops = { + .resume = iproc_mdio_resume +}; +#endif /* CONFIG_PM_SLEEP */ + static const struct of_device_id iproc_mdio_of_match[] = { { .compatible = "brcm,iproc-mdio", }, { /* sentinel */ }, @@ -188,6 +205,9 @@ static struct platform_driver iproc_mdio_driver = { .driver = { .name = "iproc-mdio", .of_match_table = iproc_mdio_of_match, +#ifdef CONFIG_PM_SLEEP + .pm = &iproc_mdio_pm_ops, +#endif }, .probe = iproc_mdio_probe, .remove = iproc_mdio_remove, -- cgit v1.2.3-59-g8ed1b From 2a8a4df36462aa85b0db87b7c5ea145ba67e34a8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 14 Feb 2020 23:57:21 +0100 Subject: wireguard: receive: reset last_under_load to zero This is a small optimization that prevents more expensive comparisons from happening when they are no longer necessary, by clearing the last_under_load variable whenever we wind up in a state where we were under load but we no longer are. Signed-off-by: Jason A. Donenfeld Suggested-by: Matt Dunwoodie Signed-off-by: David S. Miller --- drivers/net/wireguard/receive.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 9c6bab9c981f..4a153894cee2 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -118,10 +118,13 @@ static void wg_receive_handshake_packet(struct wg_device *wg, under_load = skb_queue_len(&wg->incoming_handshakes) >= MAX_QUEUED_INCOMING_HANDSHAKES / 8; - if (under_load) + if (under_load) { last_under_load = ktime_get_coarse_boottime_ns(); - else if (last_under_load) + } else if (last_under_load) { under_load = !wg_birthdate_has_expired(last_under_load, 1); + if (!under_load) + last_under_load = 0; + } mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, under_load); if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || -- cgit v1.2.3-59-g8ed1b From 175f1ca9a9ed8689d2028da1a7c624bb4fb4ff7e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 14 Feb 2020 23:57:22 +0100 Subject: wireguard: send: account for mtu=0 devices It turns out there's an easy way to get packets queued up while still having an MTU of zero, and that's via persistent keep alive. This commit makes sure that in whatever condition, we don't wind up dividing by zero. Note that an MTU of zero for a wireguard interface is something quasi-valid, so I don't think the correct fix is to limit it via min_mtu. This can be reproduced easily with: ip link add wg0 type wireguard ip link add wg1 type wireguard ip link set wg0 up mtu 0 ip link set wg1 up wg set wg0 private-key <(wg genkey) wg set wg1 listen-port 1 private-key <(wg genkey) peer $(wg show wg0 public-key) wg set wg0 peer $(wg show wg1 public-key) persistent-keepalive 1 endpoint 127.0.0.1:1 However, while min_mtu=0 seems fine, it makes sense to restrict the max_mtu. This commit also restricts the maximum MTU to the greatest number for which rounding up to the padding multiple won't overflow a signed integer. Packets this large were always rejected anyway eventually, due to checks deeper in, but it seems more sound not to even let the administrator configure something that won't work anyway. We use this opportunity to clean up this function a bit so that it's clear which paths we're expecting. Signed-off-by: Jason A. Donenfeld Cc: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/wireguard/device.c | 7 ++++--- drivers/net/wireguard/send.c | 16 +++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 43db442b1373..cdc96968b0f4 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -258,6 +258,8 @@ static void wg_setup(struct net_device *dev) enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; + const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) + + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); dev->netdev_ops = &netdev_ops; dev->hard_header_len = 0; @@ -271,9 +273,8 @@ static void wg_setup(struct net_device *dev) dev->features |= WG_NETDEV_FEATURES; dev->hw_features |= WG_NETDEV_FEATURES; dev->hw_enc_features |= WG_NETDEV_FEATURES; - dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - - sizeof(struct udphdr) - - max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); + dev->mtu = ETH_DATA_LEN - overhead; + dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead; SET_NETDEV_DEVTYPE(dev, &device_type); diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index c13260563446..7348c10cbae3 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_peer *peer) static unsigned int calculate_skb_padding(struct sk_buff *skb) { + unsigned int padded_size, last_unit = skb->len; + + if (unlikely(!PACKET_CB(skb)->mtu)) + return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; + /* We do this modulo business with the MTU, just in case the networking * layer gives us a packet that's bigger than the MTU. In that case, we * wouldn't want the final subtraction to overflow in the case of the - * padded_size being clamped. + * padded_size being clamped. Fortunately, that's very rarely the case, + * so we optimize for that not happening. */ - unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; - unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); + if (unlikely(last_unit > PACKET_CB(skb)->mtu)) + last_unit %= PACKET_CB(skb)->mtu; - if (padded_size > PACKET_CB(skb)->mtu) - padded_size = PACKET_CB(skb)->mtu; + padded_size = min(PACKET_CB(skb)->mtu, + ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); return padded_size - last_unit; } -- cgit v1.2.3-59-g8ed1b From 1fbc33b0a7feb6ca72bf7dc8a05d81485ee8ee2e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 14 Feb 2020 23:57:23 +0100 Subject: wireguard: socket: remove extra call to synchronize_net synchronize_net() is a wrapper around synchronize_rcu(), so there's no point in having synchronize_net and synchronize_rcu back to back, despite the documentation comment suggesting maybe it's somewhat useful, "Wait for packets currently being received to be done." This commit removes the extra call. Signed-off-by: Jason A. Donenfeld Suggested-by: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/wireguard/socket.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 262f3b5c819d..b0d6541582d3 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4, wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); mutex_unlock(&wg->socket_update_lock); synchronize_rcu(); - synchronize_net(); sock_free(old4); sock_free(old6); } -- cgit v1.2.3-59-g8ed1b From d965a5432d4c3e6b9c3d2bc1d4a800013bbf76f6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 14 Feb 2020 15:26:19 -0800 Subject: net: dsa: b53: Ensure the default VID is untagged We need to ensure that the default VID is untagged otherwise the switch will be sending tagged frames and the results can be problematic. This is especially true with b53 switches that use VID 0 as their default VLAN since VID 0 has a special meaning. Fixes: fea83353177a ("net: dsa: b53: Fix default VLAN ID") Fixes: 061f6a505ac3 ("net: dsa: Add ndo_vlan_rx_{add, kill}_vid implementation") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 449a22172e07..1a69286daa8d 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1366,6 +1366,9 @@ void b53_vlan_add(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vid, vl); + if (vid == 0 && vid == b53_default_pvid(dev)) + untagged = true; + vl->members |= BIT(port); if (untagged && !dsa_is_cpu_port(ds, port)) vl->untag |= BIT(port); -- cgit v1.2.3-59-g8ed1b From 064ff66e2bef84f1153087612032b5b9eab005bd Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 15 Feb 2020 10:50:08 +0000 Subject: bonding: add missing netdev_update_lockdep_key() After bond_release(), netdev_update_lockdep_key() should be called. But both ioctl path and attribute path don't call netdev_update_lockdep_key(). This patch adds missing netdev_update_lockdep_key(). Test commands: ip link add bond0 type bond ip link add bond1 type bond ifenslave bond0 bond1 ifenslave -d bond0 bond1 ifenslave bond1 bond0 Splat looks like: [ 29.501182][ T1046] WARNING: possible circular locking dependency detected [ 29.501945][ T1039] hardirqs last disabled at (1962): [] handle_mm_fault+0x13f/0x700 [ 29.503442][ T1046] 5.5.0+ #322 Not tainted [ 29.503447][ T1046] ------------------------------------------------------ [ 29.504277][ T1039] softirqs last enabled at (1180): [] __do_softirq+0x678/0x981 [ 29.505443][ T1046] ifenslave/1046 is trying to acquire lock: [ 29.505886][ T1039] softirqs last disabled at (1169): [] irq_exit+0x17a/0x1a0 [ 29.509997][ T1046] ffff88805d5da280 (&dev->addr_list_lock_key#3){+...}, at: dev_mc_sync_multiple+0x95/0x120 [ 29.511243][ T1046] [ 29.511243][ T1046] but task is already holding lock: [ 29.512192][ T1046] ffff8880460f2280 (&dev->addr_list_lock_key#4){+...}, at: bond_enslave+0x4482/0x47b0 [bonding] [ 29.514124][ T1046] [ 29.514124][ T1046] which lock already depends on the new lock. [ 29.514124][ T1046] [ 29.517297][ T1046] [ 29.517297][ T1046] the existing dependency chain (in reverse order) is: [ 29.518231][ T1046] [ 29.518231][ T1046] -> #1 (&dev->addr_list_lock_key#4){+...}: [ 29.519076][ T1046] _raw_spin_lock+0x30/0x70 [ 29.519588][ T1046] dev_mc_sync_multiple+0x95/0x120 [ 29.520208][ T1046] bond_enslave+0x448d/0x47b0 [bonding] [ 29.520862][ T1046] bond_option_slaves_set+0x1a3/0x370 [bonding] [ 29.521640][ T1046] __bond_opt_set+0x1ff/0xbb0 [bonding] [ 29.522438][ T1046] __bond_opt_set_notify+0x2b/0xf0 [bonding] [ 29.523251][ T1046] bond_opt_tryset_rtnl+0x92/0xf0 [bonding] [ 29.524082][ T1046] bonding_sysfs_store_option+0x8a/0xf0 [bonding] [ 29.524959][ T1046] kernfs_fop_write+0x276/0x410 [ 29.525620][ T1046] vfs_write+0x197/0x4a0 [ 29.526218][ T1046] ksys_write+0x141/0x1d0 [ 29.526818][ T1046] do_syscall_64+0x99/0x4f0 [ 29.527430][ T1046] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 29.528265][ T1046] [ 29.528265][ T1046] -> #0 (&dev->addr_list_lock_key#3){+...}: [ 29.529272][ T1046] __lock_acquire+0x2d8d/0x3de0 [ 29.529935][ T1046] lock_acquire+0x164/0x3b0 [ 29.530638][ T1046] _raw_spin_lock+0x30/0x70 [ 29.531187][ T1046] dev_mc_sync_multiple+0x95/0x120 [ 29.531790][ T1046] bond_enslave+0x448d/0x47b0 [bonding] [ 29.532451][ T1046] bond_option_slaves_set+0x1a3/0x370 [bonding] [ 29.533163][ T1046] __bond_opt_set+0x1ff/0xbb0 [bonding] [ 29.533789][ T1046] __bond_opt_set_notify+0x2b/0xf0 [bonding] [ 29.534595][ T1046] bond_opt_tryset_rtnl+0x92/0xf0 [bonding] [ 29.535500][ T1046] bonding_sysfs_store_option+0x8a/0xf0 [bonding] [ 29.536379][ T1046] kernfs_fop_write+0x276/0x410 [ 29.537057][ T1046] vfs_write+0x197/0x4a0 [ 29.537640][ T1046] ksys_write+0x141/0x1d0 [ 29.538251][ T1046] do_syscall_64+0x99/0x4f0 [ 29.538870][ T1046] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 29.539659][ T1046] [ 29.539659][ T1046] other info that might help us debug this: [ 29.539659][ T1046] [ 29.540953][ T1046] Possible unsafe locking scenario: [ 29.540953][ T1046] [ 29.541883][ T1046] CPU0 CPU1 [ 29.542540][ T1046] ---- ---- [ 29.543209][ T1046] lock(&dev->addr_list_lock_key#4); [ 29.543880][ T1046] lock(&dev->addr_list_lock_key#3); [ 29.544873][ T1046] lock(&dev->addr_list_lock_key#4); [ 29.545863][ T1046] lock(&dev->addr_list_lock_key#3); [ 29.546525][ T1046] [ 29.546525][ T1046] *** DEADLOCK *** [ 29.546525][ T1046] [ 29.547542][ T1046] 5 locks held by ifenslave/1046: [ 29.548196][ T1046] #0: ffff88806044c478 (sb_writers#5){.+.+}, at: vfs_write+0x3bb/0x4a0 [ 29.549248][ T1046] #1: ffff88805af00890 (&of->mutex){+.+.}, at: kernfs_fop_write+0x1cf/0x410 [ 29.550343][ T1046] #2: ffff88805b8b54b0 (kn->count#157){.+.+}, at: kernfs_fop_write+0x1f2/0x410 [ 29.551575][ T1046] #3: ffffffffaecf4cf0 (rtnl_mutex){+.+.}, at: bond_opt_tryset_rtnl+0x5f/0xf0 [bonding] [ 29.552819][ T1046] #4: ffff8880460f2280 (&dev->addr_list_lock_key#4){+...}, at: bond_enslave+0x4482/0x47b0 [bonding] [ 29.554175][ T1046] [ 29.554175][ T1046] stack backtrace: [ 29.554907][ T1046] CPU: 0 PID: 1046 Comm: ifenslave Not tainted 5.5.0+ #322 [ 29.555854][ T1046] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 29.557064][ T1046] Call Trace: [ 29.557504][ T1046] dump_stack+0x96/0xdb [ 29.558054][ T1046] check_noncircular+0x371/0x450 [ 29.558723][ T1046] ? print_circular_bug.isra.35+0x310/0x310 [ 29.559486][ T1046] ? hlock_class+0x130/0x130 [ 29.560100][ T1046] ? __lock_acquire+0x2d8d/0x3de0 [ 29.560761][ T1046] __lock_acquire+0x2d8d/0x3de0 [ 29.561366][ T1046] ? register_lock_class+0x14d0/0x14d0 [ 29.562045][ T1046] ? find_held_lock+0x39/0x1d0 [ 29.562641][ T1046] lock_acquire+0x164/0x3b0 [ 29.563199][ T1046] ? dev_mc_sync_multiple+0x95/0x120 [ 29.563872][ T1046] _raw_spin_lock+0x30/0x70 [ 29.564464][ T1046] ? dev_mc_sync_multiple+0x95/0x120 [ 29.565146][ T1046] dev_mc_sync_multiple+0x95/0x120 [ 29.565793][ T1046] bond_enslave+0x448d/0x47b0 [bonding] [ 29.566487][ T1046] ? bond_update_slave_arr+0x940/0x940 [bonding] [ 29.567279][ T1046] ? bstr_printf+0xc20/0xc20 [ 29.567857][ T1046] ? stack_trace_consume_entry+0x160/0x160 [ 29.568614][ T1046] ? deactivate_slab.isra.77+0x2c5/0x800 [ 29.569320][ T1046] ? check_chain_key+0x236/0x5d0 [ 29.569939][ T1046] ? sscanf+0x93/0xc0 [ 29.570442][ T1046] ? vsscanf+0x1e20/0x1e20 [ 29.571003][ T1046] bond_option_slaves_set+0x1a3/0x370 [bonding] [ ... ] Fixes: ab92d68fc22f ("net: core: add generic lockdep keys") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ drivers/net/bonding/bond_options.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 48d5ec770b94..1e9d5d35fc78 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3640,6 +3640,8 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd case BOND_RELEASE_OLD: case SIOCBONDRELEASE: res = bond_release(bond_dev, slave_dev); + if (!res) + netdev_update_lockdep_key(slave_dev); break; case BOND_SETHWADDR_OLD: case SIOCBONDSETHWADDR: diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index ddb3916d3506..215c10923289 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1398,6 +1398,8 @@ static int bond_option_slaves_set(struct bonding *bond, case '-': slave_dbg(bond->dev, dev, "Releasing interface\n"); ret = bond_release(bond->dev, dev); + if (!ret) + netdev_update_lockdep_key(dev); break; default: -- cgit v1.2.3-59-g8ed1b From b3e80d44f5b1b470dd9e2dbc6816e63a5c519709 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 15 Feb 2020 10:50:40 +0000 Subject: bonding: fix lockdep warning in bond_get_stats() In the "struct bonding", there is stats_lock. This lock protects "bond_stats" in the "struct bonding". bond_stats is updated in the bond_get_stats() and this function would be executed concurrently. So, the lock is needed. Bonding interfaces would be nested. So, either stats_lock should use dynamic lockdep class key or stats_lock should be used by spin_lock_nested(). In the current code, stats_lock is using a dynamic lockdep class key. But there is no updating stats_lock_key routine So, lockdep warning will occur. Test commands: ip link add bond0 type bond ip link add bond1 type bond ip link set bond0 master bond1 ip link set bond0 nomaster ip link set bond1 master bond0 Splat looks like: [ 38.420603][ T957] 5.5.0+ #394 Not tainted [ 38.421074][ T957] ------------------------------------------------------ [ 38.421837][ T957] ip/957 is trying to acquire lock: [ 38.422399][ T957] ffff888063262cd8 (&bond->stats_lock_key#2){+.+.}, at: bond_get_stats+0x90/0x4d0 [bonding] [ 38.423528][ T957] [ 38.423528][ T957] but task is already holding lock: [ 38.424526][ T957] ffff888065fd2cd8 (&bond->stats_lock_key){+.+.}, at: bond_get_stats+0x90/0x4d0 [bonding] [ 38.426075][ T957] [ 38.426075][ T957] which lock already depends on the new lock. [ 38.426075][ T957] [ 38.428536][ T957] [ 38.428536][ T957] the existing dependency chain (in reverse order) is: [ 38.429475][ T957] [ 38.429475][ T957] -> #1 (&bond->stats_lock_key){+.+.}: [ 38.430273][ T957] _raw_spin_lock+0x30/0x70 [ 38.430812][ T957] bond_get_stats+0x90/0x4d0 [bonding] [ 38.431451][ T957] dev_get_stats+0x1ec/0x270 [ 38.432088][ T957] bond_get_stats+0x1a5/0x4d0 [bonding] [ 38.432767][ T957] dev_get_stats+0x1ec/0x270 [ 38.433322][ T957] rtnl_fill_stats+0x44/0xbe0 [ 38.433866][ T957] rtnl_fill_ifinfo+0xeb2/0x3720 [ 38.434474][ T957] rtmsg_ifinfo_build_skb+0xca/0x170 [ 38.435081][ T957] rtmsg_ifinfo_event.part.33+0x1b/0xb0 [ 38.436848][ T957] rtnetlink_event+0xcd/0x120 [ 38.437455][ T957] notifier_call_chain+0x90/0x160 [ 38.438067][ T957] netdev_change_features+0x74/0xa0 [ 38.438708][ T957] bond_compute_features.isra.45+0x4e6/0x6f0 [bonding] [ 38.439522][ T957] bond_enslave+0x3639/0x47b0 [bonding] [ 38.440225][ T957] do_setlink+0xaab/0x2ef0 [ 38.440786][ T957] __rtnl_newlink+0x9c5/0x1270 [ 38.441463][ T957] rtnl_newlink+0x65/0x90 [ 38.442075][ T957] rtnetlink_rcv_msg+0x4a8/0x890 [ 38.442774][ T957] netlink_rcv_skb+0x121/0x350 [ 38.443451][ T957] netlink_unicast+0x42e/0x610 [ 38.444282][ T957] netlink_sendmsg+0x65a/0xb90 [ 38.444992][ T957] ____sys_sendmsg+0x5ce/0x7a0 [ 38.445679][ T957] ___sys_sendmsg+0x10f/0x1b0 [ 38.446365][ T957] __sys_sendmsg+0xc6/0x150 [ 38.447007][ T957] do_syscall_64+0x99/0x4f0 [ 38.447668][ T957] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 38.448538][ T957] [ 38.448538][ T957] -> #0 (&bond->stats_lock_key#2){+.+.}: [ 38.449554][ T957] __lock_acquire+0x2d8d/0x3de0 [ 38.450148][ T957] lock_acquire+0x164/0x3b0 [ 38.450711][ T957] _raw_spin_lock+0x30/0x70 [ 38.451292][ T957] bond_get_stats+0x90/0x4d0 [bonding] [ 38.451950][ T957] dev_get_stats+0x1ec/0x270 [ 38.452425][ T957] bond_get_stats+0x1a5/0x4d0 [bonding] [ 38.453362][ T957] dev_get_stats+0x1ec/0x270 [ 38.453825][ T957] rtnl_fill_stats+0x44/0xbe0 [ 38.454390][ T957] rtnl_fill_ifinfo+0xeb2/0x3720 [ 38.456257][ T957] rtmsg_ifinfo_build_skb+0xca/0x170 [ 38.456998][ T957] rtmsg_ifinfo_event.part.33+0x1b/0xb0 [ 38.459351][ T957] rtnetlink_event+0xcd/0x120 [ 38.460086][ T957] notifier_call_chain+0x90/0x160 [ 38.460829][ T957] netdev_change_features+0x74/0xa0 [ 38.461752][ T957] bond_compute_features.isra.45+0x4e6/0x6f0 [bonding] [ 38.462705][ T957] bond_enslave+0x3639/0x47b0 [bonding] [ 38.463476][ T957] do_setlink+0xaab/0x2ef0 [ 38.464141][ T957] __rtnl_newlink+0x9c5/0x1270 [ 38.464897][ T957] rtnl_newlink+0x65/0x90 [ 38.465522][ T957] rtnetlink_rcv_msg+0x4a8/0x890 [ 38.466215][ T957] netlink_rcv_skb+0x121/0x350 [ 38.466895][ T957] netlink_unicast+0x42e/0x610 [ 38.467583][ T957] netlink_sendmsg+0x65a/0xb90 [ 38.468285][ T957] ____sys_sendmsg+0x5ce/0x7a0 [ 38.469202][ T957] ___sys_sendmsg+0x10f/0x1b0 [ 38.469884][ T957] __sys_sendmsg+0xc6/0x150 [ 38.470587][ T957] do_syscall_64+0x99/0x4f0 [ 38.471245][ T957] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 38.472093][ T957] [ 38.472093][ T957] other info that might help us debug this: [ 38.472093][ T957] [ 38.473438][ T957] Possible unsafe locking scenario: [ 38.473438][ T957] [ 38.474898][ T957] CPU0 CPU1 [ 38.476234][ T957] ---- ---- [ 38.480171][ T957] lock(&bond->stats_lock_key); [ 38.480808][ T957] lock(&bond->stats_lock_key#2); [ 38.481791][ T957] lock(&bond->stats_lock_key); [ 38.482754][ T957] lock(&bond->stats_lock_key#2); [ 38.483416][ T957] [ 38.483416][ T957] *** DEADLOCK *** [ 38.483416][ T957] [ 38.484505][ T957] 3 locks held by ip/957: [ 38.485048][ T957] #0: ffffffffbccf6230 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x457/0x890 [ 38.486198][ T957] #1: ffff888065fd2cd8 (&bond->stats_lock_key){+.+.}, at: bond_get_stats+0x90/0x4d0 [bonding] [ 38.487625][ T957] #2: ffffffffbc9254c0 (rcu_read_lock){....}, at: bond_get_stats+0x5/0x4d0 [bonding] [ 38.488897][ T957] [ 38.488897][ T957] stack backtrace: [ 38.489646][ T957] CPU: 1 PID: 957 Comm: ip Not tainted 5.5.0+ #394 [ 38.490497][ T957] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 38.492810][ T957] Call Trace: [ 38.493219][ T957] dump_stack+0x96/0xdb [ 38.493709][ T957] check_noncircular+0x371/0x450 [ 38.494344][ T957] ? lookup_address+0x60/0x60 [ 38.494923][ T957] ? print_circular_bug.isra.35+0x310/0x310 [ 38.495699][ T957] ? hlock_class+0x130/0x130 [ 38.496334][ T957] ? __lock_acquire+0x2d8d/0x3de0 [ 38.496979][ T957] __lock_acquire+0x2d8d/0x3de0 [ 38.497607][ T957] ? register_lock_class+0x14d0/0x14d0 [ 38.498333][ T957] ? check_chain_key+0x236/0x5d0 [ 38.499003][ T957] lock_acquire+0x164/0x3b0 [ 38.499800][ T957] ? bond_get_stats+0x90/0x4d0 [bonding] [ 38.500706][ T957] _raw_spin_lock+0x30/0x70 [ 38.501435][ T957] ? bond_get_stats+0x90/0x4d0 [bonding] [ 38.502311][ T957] bond_get_stats+0x90/0x4d0 [bonding] [ ... ] But, there is another problem. The dynamic lockdep class key is protected by RTNL, but bond_get_stats() would be called outside of RTNL. So, it would use an invalid dynamic lockdep class key. In order to fix this issue, stats_lock uses spin_lock_nested() instead of a dynamic lockdep key. The bond_get_stats() calls bond_get_lowest_level_rcu() to get the correct nest level value, which will be used by spin_lock_nested(). The "dev->lower_level" indicates lower nest level value, but this value is invalid outside of RTNL. So, bond_get_lowest_level_rcu() returns valid lower nest level value in the RCU critical section. bond_get_lowest_level_rcu() will be work only when LOCKDEP is enabled. Fixes: 089bca2caed0 ("bonding: use dynamic lockdep key instead of subclass") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 53 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1e9d5d35fc78..d10805e5e623 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3526,6 +3526,47 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res, } } +#ifdef CONFIG_LOCKDEP +static int bond_get_lowest_level_rcu(struct net_device *dev) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int cur = 0, max = 0; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + next = NULL; + while (1) { + ldev = netdev_next_lower_dev_rcu(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + if (max <= cur) + max = cur; + break; + } + + if (!next) { + if (!cur) + return max; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return max; +} +#endif + static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) { @@ -3533,11 +3574,17 @@ static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 temp; struct list_head *iter; struct slave *slave; + int nest_level = 0; - spin_lock(&bond->stats_lock); - memcpy(stats, &bond->bond_stats, sizeof(*stats)); rcu_read_lock(); +#ifdef CONFIG_LOCKDEP + nest_level = bond_get_lowest_level_rcu(bond_dev); +#endif + + spin_lock_nested(&bond->stats_lock, nest_level); + memcpy(stats, &bond->bond_stats, sizeof(*stats)); + bond_for_each_slave_rcu(bond, slave, iter) { const struct rtnl_link_stats64 *new = dev_get_stats(slave->dev, &temp); @@ -3547,10 +3594,10 @@ static void bond_get_stats(struct net_device *bond_dev, /* save off the slave stats for the next run */ memcpy(&slave->slave_stats, new, sizeof(*new)); } - rcu_read_unlock(); memcpy(&bond->bond_stats, stats, sizeof(*stats)); spin_unlock(&bond->stats_lock); + rcu_read_unlock(); } static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) -- cgit v1.2.3-59-g8ed1b From 69233bba6543a37755158ca3382765387b8078df Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:17 +0100 Subject: net: ks8851-ml: Remove 8-bit bus accessors This driver is mixing 8-bit and 16-bit bus accessors for reasons unknown, however the speculation is that this was some sort of attempt to support the 8-bit bus mode. As per the KS8851-16MLL documentation, all two registers accessed via the 8-bit accessors are internally 16-bit registers, so reading them using 16-bit accessors is fine. The KS_CCR read can be converted to 16-bit read outright, as it is already a concatenation of two 8-bit reads of that register. The KS_RXQCR accesses are 8-bit only, however writing the top 8 bits of the register is OK as well, since the driver caches the entire 16-bit register value anyway. Finally, the driver is not used by any hardware in the kernel right now. The only hardware available to me is one with 16-bit bus, so I have no way to test the 8-bit bus mode, however it is unlikely this ever really worked anyway. If the 8-bit bus mode is ever required, it can be easily added by adjusting the 16-bit accessors to do 2 consecutive accesses, which is how this should have been done from the beginning. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_mll.c | 45 ++++---------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index a41a90c589db..e2fb20154511 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -156,24 +156,6 @@ static int msg_enable; * chip is busy transferring packet data (RX/TX FIFO accesses). */ -/** - * ks_rdreg8 - read 8 bit register from device - * @ks : The chip information - * @offset: The register address - * - * Read a 8bit register from the chip, returning the result - */ -static u8 ks_rdreg8(struct ks_net *ks, int offset) -{ - u16 data; - u8 shift_bit = offset & 0x03; - u8 shift_data = (offset & 1) << 3; - ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - data = ioread16(ks->hw_addr); - return (u8)(data >> shift_data); -} - /** * ks_rdreg16 - read 16 bit register from device * @ks : The chip information @@ -189,22 +171,6 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) return ioread16(ks->hw_addr); } -/** - * ks_wrreg8 - write 8bit register value to chip - * @ks: The chip information - * @offset: The register address - * @value: The value to write - * - */ -static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) -{ - u8 shift_bit = (offset & 0x03); - u16 value_write = (u16)(value << ((offset & 1) << 3)); - ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - iowrite16(value_write, ks->hw_addr); -} - /** * ks_wrreg16 - write 16bit register value to chip * @ks: The chip information @@ -324,8 +290,7 @@ static void ks_read_config(struct ks_net *ks) u16 reg_data = 0; /* Regardless of bus width, 8 bit read should always work.*/ - reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF; - reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8; + reg_data = ks_rdreg16(ks, KS_CCR); /* addr/data bus are multiplexed */ ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; @@ -429,7 +394,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) /* 1. set sudo DMA mode */ ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. read prepend data */ /** @@ -446,7 +411,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) ks_inblk(ks, buf, ALIGN(len, 4)); /* 4. reset sudo DMA Mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); } /** @@ -679,13 +644,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) ks->txh.txw[1] = cpu_to_le16(len); /* 1. set sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. write status/lenth info */ ks_outblk(ks, ks->txh.txw, 4); /* 3. write pkt data */ ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); /* 4. reset sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); /* 6. wait until TXQCR_METFE is auto-cleared */ -- cgit v1.2.3-59-g8ed1b From edacb098ea9c31589276152f09b4439052c0f2b1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:18 +0100 Subject: net: ks8851-ml: Fix 16-bit data access The packet data written to and read from Micrel KSZ8851-16MLLI must be byte-swapped in 16-bit mode, add this byte-swapping. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_mll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index e2fb20154511..5ae206ae5d2b 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -197,7 +197,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - *wptr++ = (u16)ioread16(ks->hw_addr); + *wptr++ = be16_to_cpu(ioread16(ks->hw_addr)); } /** @@ -211,7 +211,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - iowrite16(*wptr++, ks->hw_addr); + iowrite16(cpu_to_be16(*wptr++), ks->hw_addr); } static void ks_disable_int(struct ks_net *ks) -- cgit v1.2.3-59-g8ed1b From 58292104832fef6cb4a89f736012c0e0724c3442 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:19 +0100 Subject: net: ks8851-ml: Fix 16-bit IO operation The Micrel KSZ8851-16MLLI datasheet DS00002357B page 12 states that BE[3:0] signals are active high. This contradicts the measurements of the behavior of the actual chip, where these signals behave as active low. For example, to read the CIDER register, the bus must expose 0xc0c0 during the address phase, which means BE[3:0]=4'b1100. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_mll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 5ae206ae5d2b..1c9e70c8cc30 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -166,7 +166,7 @@ static int msg_enable; static u16 ks_rdreg16(struct ks_net *ks, int offset) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); return ioread16(ks->hw_addr); } @@ -181,7 +181,7 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); iowrite16(value, ks->hw_addr); } -- cgit v1.2.3-59-g8ed1b From 9a6a0dea16177ccaecc116f560232e63bec115f1 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 16 Feb 2020 16:39:43 -0300 Subject: net: ethernet: dm9000: Handle -EPROBE_DEFER in dm9000_parse_dt() The call to of_get_mac_address() can return -EPROBE_DEFER, for instance when the MAC address is read from a NVMEM driver that did not probe yet. Cc: H. Nikolaus Schaller Cc: Mathieu Malaterre Signed-off-by: Paul Cercueil Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/davicom/dm9000.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 1ea3372775e6..e94ae9b94dbf 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1405,6 +1405,8 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) ether_addr_copy(pdata->dev_addr, mac_addr); + else if (PTR_ERR(mac_addr) == -EPROBE_DEFER) + return ERR_CAST(mac_addr); return pdata; } -- cgit v1.2.3-59-g8ed1b From a81541041ceb55bcec9a8bb8ad3482263f0a205a Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 17 Feb 2020 09:31:33 +0100 Subject: net: mscc: fix in frame extraction Each extracted frame on Ocelot has an IFH. The frame and IFH are extracted by reading chuncks of 4 bytes from a register. In case the IFH and frames were read corretly it would try to read the next frame. In case there are no more frames in the queue, it checks if there were any previous errors and in that case clear the queue. But this check will always succeed also when there are no errors. Because when extracting the IFH the error is checked against 4(number of bytes read) and then the error is set only if the extraction of the frame failed. So in a happy case where there are no errors the err variable is still 4. So it could be a case where after the check that there are no more frames in the queue, a frame will arrive in the queue but because the error is not reseted, it would try to flush the queue. So the frame will be lost. The fix consist in resetting the error after reading the IFH. Signed-off-by: Horatiu Vultur Acked-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_board.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index b38820849faa..1135a18019c7 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -114,6 +114,14 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) if (err != 4) break; + /* At this point the IFH was read correctly, so it is safe to + * presume that there is no error. The err needs to be reset + * otherwise a frame could come in CPU queue between the while + * condition and the check for error later on. And in that case + * the new frame is just removed and not processed. + */ + err = 0; + ocelot_parse_ifh(ifh, &info); ocelot_port = ocelot->ports[info.port]; -- cgit v1.2.3-59-g8ed1b From 82969e6ef0430a31d58342009e64c7634512eb53 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 14 Feb 2020 15:32:24 +0100 Subject: net: cnic: fix spelling mistake "reserverd" -> "reserved" The reserved member should be named reserved3. Signed-off-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/cnic_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/cnic_defs.h b/drivers/net/ethernet/broadcom/cnic_defs.h index b38499774071..99e2c6d4d8c3 100644 --- a/drivers/net/ethernet/broadcom/cnic_defs.h +++ b/drivers/net/ethernet/broadcom/cnic_defs.h @@ -543,13 +543,13 @@ struct l4_kwq_update_pg { #define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2 #endif #if defined(__BIG_ENDIAN) - u16 reserverd3; + u16 reserved3; u8 da0; u8 da1; #elif defined(__LITTLE_ENDIAN) u8 da1; u8 da0; - u16 reserverd3; + u16 reserved3; #endif #if defined(__BIG_ENDIAN) u8 da2; -- cgit v1.2.3-59-g8ed1b From af6565adb02d3129d3fae4d9d5da945abaf4417a Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 17 Feb 2020 13:37:18 +0200 Subject: qede: Fix race between rdma destroy workqueue and link change event If an event is added while the rdma workqueue is being destroyed it could lead to several races, list corruption, null pointer dereference during queue_work or init_queue. This fixes the race between the two flows which can occur during shutdown. A kref object and a completion object are added to the rdma_dev structure, these are initialized before the workqueue is created. The refcnt is used to indicate work is being added to the workqueue and ensures the cleanup flow won't start while we're in the middle of adding the event. Once the work is added, the refcnt is decreased and the cleanup flow is safe to run. Fixes: cee9fbd8e2e ("qede: Add qedr framework") Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 2 ++ drivers/net/ethernet/qlogic/qede/qede_rdma.c | 29 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index e8a1b27db84d..234c6f30effb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -163,6 +163,8 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + struct kref refcnt; + struct completion event_comp; bool exp_recovery; }; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index ffabc2d2f082..2d873ae8a234 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -59,6 +59,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) static int qede_rdma_create_wq(struct qede_dev *edev) { INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list); + kref_init(&edev->rdma_info.refcnt); + init_completion(&edev->rdma_info.event_comp); + edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq"); if (!edev->rdma_info.rdma_wq) { DP_NOTICE(edev, "qedr: Could not create workqueue\n"); @@ -83,8 +86,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev) } } +static void qede_rdma_complete_event(struct kref *ref) +{ + struct qede_rdma_dev *rdma_dev = + container_of(ref, struct qede_rdma_dev, refcnt); + + /* no more events will be added after this */ + complete(&rdma_dev->event_comp); +} + static void qede_rdma_destroy_wq(struct qede_dev *edev) { + /* Avoid race with add_event flow, make sure it finishes before + * we start accessing the list and cleaning up the work + */ + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); + wait_for_completion(&edev->rdma_info.event_comp); + qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); } @@ -310,15 +328,24 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (!edev->rdma_info.qedr_dev) return; + /* We don't want the cleanup flow to start while we're allocating and + * scheduling the work + */ + if (!kref_get_unless_zero(&edev->rdma_info.refcnt)) + return; /* already being destroyed */ + event_node = qede_rdma_get_free_event_node(edev); if (!event_node) - return; + goto out; event_node->event = event; event_node->ptr = edev; INIT_WORK(&event_node->work, qede_rdma_handle_event); queue_work(edev->rdma_info.rdma_wq, &event_node->work); + +out: + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); } void qede_rdma_dev_event_open(struct qede_dev *edev) -- cgit v1.2.3-59-g8ed1b From 29f20dd6258a6f9c434992a0f1fc522caecda7ef Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Tue, 18 Feb 2020 16:47:01 +0100 Subject: net: phy: broadcom: Fix a typo ("firsly") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Neuschäfer Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 7d68b28bb893..a62229a8b1a4 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -410,7 +410,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev) struct device_node *np = phydev->mdio.dev.of_node; int ret; - /* Aneg firsly. */ + /* Aneg firstly. */ ret = genphy_config_aneg(phydev); /* Then we can set up the delay. */ @@ -463,7 +463,7 @@ static int bcm54616s_config_aneg(struct phy_device *phydev) { int ret; - /* Aneg firsly. */ + /* Aneg firstly. */ if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) ret = genphy_c37_config_aneg(phydev); else -- cgit v1.2.3-59-g8ed1b From 3d9c5e023a0dbf3e117bb416cfefd9405bf5af0c Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Mon, 3 Feb 2020 16:32:18 -0600 Subject: net/mlx5: Fix sleep while atomic in mlx5_eswitch_get_vepa rtnl_bridge_getlink is protected by rcu lock, so mlx5_eswitch_get_vepa cannot take mutex lock. Two possible issues can happen: 1. User at the same time change vepa mode via RTM_SETLINK command. 2. User at the same time change the switchdev mode via devlink netlink interface. Case 1 cannot happen because rtnl executes one message in order. Case 2 can happen but we do not expect user to change the switchdev mode when changing vepa. Even if a user does it, so he will read a value which is no longer valid. Fixes: 8da202b24913 ("net/mlx5: E-Switch, Add support for VEPA in legacy mode.") Signed-off-by: Huy Nguyen Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5acf60b1bbfe..564d42605892 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2452,25 +2452,17 @@ out: int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) { - int err = 0; - if (!esw) return -EOPNOTSUPP; if (!ESW_ALLOWED(esw)) return -EPERM; - mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { - err = -EOPNOTSUPP; - goto out; - } + if (esw->mode != MLX5_ESWITCH_LEGACY) + return -EOPNOTSUPP; *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; - -out: - mutex_unlock(&esw->state_lock); - return err; + return 0; } int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, -- cgit v1.2.3-59-g8ed1b From 5ee090ed0da649b1febae2b7c285ac77d1e55a0c Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 9 Dec 2019 14:08:18 +0200 Subject: net/mlx5e: Reset RQ doorbell counter before moving RQ state from RST to RDY Initialize RQ doorbell counters to zero prior to moving an RQ from RST to RDY state. Per HW spec, when RQ is back to RDY state, the descriptor ID on the completion is reset. The doorbell record must comply. Fixes: 8276ea1353a4 ("net/mlx5e: Report and recover from CQE with error on RQ") Signed-off-by: Aya Levin Reported-by: Tariq Toukan Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 8 +++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++ drivers/net/ethernet/mellanox/mlx5/core/wq.c | 39 +++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/wq.h | 2 ++ 4 files changed, 43 insertions(+), 9 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 7c8796d9743f..a226277b0980 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -179,6 +179,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) } } +static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) +{ + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + mlx5_wq_ll_reset(&rq->mpwqe.wq); + else + mlx5_wq_cyc_reset(&rq->wqe.wq); +} + /* SW parser related functions */ struct mlx5e_swp_spec { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 454d3459bd8b..966983674663 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -712,6 +712,9 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) if (!in) return -ENOMEM; + if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY) + mlx5e_rqwq_reset(rq); + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rq_state, curr_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 02f7e4a39578..01f075fac276 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -94,6 +94,13 @@ void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides) print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false); } +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_cyc_update_db_record(wq); +} + int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, struct mlx5_wq_ctrl *wq_ctrl) @@ -192,6 +199,19 @@ err_db_free: return err; } +static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq) +{ + struct mlx5_wqe_srq_next_seg *next_seg; + int i; + + for (i = 0; i < wq->fbc.sz_m1; i++) { + next_seg = mlx5_wq_ll_get_wqe(wq, i); + next_seg->next_wqe_index = cpu_to_be16(i + 1); + } + next_seg = mlx5_wq_ll_get_wqe(wq, i); + wq->tail_next = &next_seg->next_wqe_index; +} + int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl) @@ -199,9 +219,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; - struct mlx5_wqe_srq_next_seg *next_seg; int err; - int i; err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { @@ -220,13 +238,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); - for (i = 0; i < fbc->sz_m1; i++) { - next_seg = mlx5_wq_ll_get_wqe(wq, i); - next_seg->next_wqe_index = cpu_to_be16(i + 1); - } - next_seg = mlx5_wq_ll_get_wqe(wq, i); - wq->tail_next = &next_seg->next_wqe_index; - + mlx5_wq_ll_init_list(wq); wq_ctrl->mdev = mdev; return 0; @@ -237,6 +249,15 @@ err_db_free: return err; } +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq) +{ + wq->head = 0; + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_ll_init_list(wq); + mlx5_wq_ll_update_db_record(wq); +} + void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl) { mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index d9a94bc223c0..4cadc336593f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -80,6 +80,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl); void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides); +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq); int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, @@ -92,6 +93,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl); +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq); void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); -- cgit v1.2.3-59-g8ed1b From 1ad6c43c6a7b8627240c6cc19c69e31fedc596a7 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 12 Feb 2020 15:17:25 +0200 Subject: net/mlx5e: Fix crash in recovery flow without devlink reporter When health reporters are not supported, recovery function is invoked directly, not via devlink health reporters. In this direct flow, the recover function input parameter was passed incorrectly and is causing a kernel oops. This patch is fixing the input parameter. Following call trace is observed on rx error health reporting. Internal error: Oops: 96000007 [#1] PREEMPT SMP Process kworker/u16:4 (pid: 4584, stack limit = 0x00000000c9e45703) Call trace: mlx5e_rx_reporter_err_rq_cqe_recover+0x30/0x164 [mlx5_core] mlx5e_health_report+0x60/0x6c [mlx5_core] mlx5e_reporter_rq_cqe_err+0x6c/0x90 [mlx5_core] mlx5e_rq_err_cqe_work+0x20/0x2c [mlx5_core] process_one_work+0x168/0x3d0 worker_thread+0x58/0x3d0 kthread+0x108/0x134 Fixes: c50de4af1d63 ("net/mlx5e: Generalize tx reporter's functionality") Signed-off-by: Aya Levin Signed-off-by: Parav Pandit Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 3a975641f902..20b907dc1e29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -200,7 +200,7 @@ int mlx5e_health_report(struct mlx5e_priv *priv, netdev_err(priv->netdev, err_str); if (!reporter) - return err_ctx->recover(&err_ctx->ctx); + return err_ctx->recover(err_ctx->ctx); return devlink_health_report(reporter, err_str, err_ctx); } -- cgit v1.2.3-59-g8ed1b From 52d214976d4f64504c1bbb52d47b46a5a3d5ee42 Mon Sep 17 00:00:00 2001 From: Hamdan Igbaria Date: Wed, 5 Feb 2020 14:31:12 +0200 Subject: net/mlx5: DR, Fix matching on vport gvmi Set vport gvmi in the tag, only when source gvmi is set in the bit mask. Fixes: 26d688e3 ("net/mlx5: DR, Add Steering entry (STE) utilities") Signed-off-by: Hamdan Igbaria Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index c6c7d1defbd7..aade62a9ee5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -2307,7 +2307,9 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, struct mlx5dr_cmd_vport_cap *vport_cap; struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_cmd_caps *caps; + u8 *bit_mask = sb->bit_mask; u8 *tag = hw_ste->tag; + bool source_gvmi_set; DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); @@ -2328,7 +2330,8 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, if (!vport_cap) return -EINVAL; - if (vport_cap->vport_gvmi) + source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi); + if (vport_cap->vport_gvmi && source_gvmi_set) MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); misc->source_eswitch_owner_vhca_id = 0; -- cgit v1.2.3-59-g8ed1b From 383de108157c881074f32914b61125e299820bd2 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 12 Feb 2020 11:32:39 +0200 Subject: net/mlx5e: Don't clear the whole vf config when switching modes There is no need to reset all vf config (except link state) between legacy and switchdev modes changes. Also, set link state to AUTO, when legacy enabled. Fixes: 3b83b6c2e024 ("net/mlx5e: Clear VF config when switching modes") Signed-off-by: Dmytro Linkin Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 564d42605892..e49acd0c5da5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -459,12 +459,16 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) static int esw_legacy_enable(struct mlx5_eswitch *esw) { - int ret; + struct mlx5_vport *vport; + int ret, i; ret = esw_create_legacy_table(esw); if (ret) return ret; + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); if (ret) esw_destroy_legacy_table(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 979f13bdc203..1a57b2bd74b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1172,7 +1172,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw, true); + mlx5_eswitch_disable(esw, false); mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err) { @@ -2065,7 +2065,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw, true); + mlx5_eswitch_disable(esw, false); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); -- cgit v1.2.3-59-g8ed1b From 76781623f009d5615b67f0675230ef90eaa9272a Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 31 Dec 2019 17:04:15 +0200 Subject: net/mlx5: Fix lowest FDB pool size The pool sizes represent the pool sizes in the fw. when we request a pool size from fw, it will return the next possible group. We track how many pools the fw has left and start requesting groups from the big to the small. When we start request 4k group, which doesn't exists in fw, fw wants to allocate the next possible size, 64k, but will fail since its exhausted. The correct smallest pool size in fw is 128 and not 4k. Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities") Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c index c5a446e295aa..4276194b633f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -35,7 +35,7 @@ static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, 1 * 1024 * 1024, 64 * 1024, - 4 * 1024, }; + 128 }; struct mlx5_esw_chains_priv { struct rhashtable chains_ht; -- cgit v1.2.3-59-g8ed1b From 13a7e459a41a56d788ab33d825c6205379bbb711 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Tue, 14 Jan 2020 09:27:27 +0200 Subject: net/mlx5: DR, Handle reformat capability over sw-steering tables On flow table creation, send the relevant flags according to what the FW currently supports. When FW doesn't support reformat option over SW-steering managed table, the driver shouldn't pass this. Fixes: 988fd6b32d07 ("net/mlx5: DR, Pass table flags at creation to lower layer") Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c | 9 +++++++-- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 3abfc8125926..c2027192e21e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -66,15 +66,20 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *next_ft) { struct mlx5dr_table *tbl; + u32 flags; int err; if (mlx5_dr_is_fw_table(ft->flags)) return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, log_size, next_ft); + flags = ft->flags; + /* turn off encap/decap if not supported for sw-str by fw */ + if (!MLX5_CAP_FLOWTABLE(ns->dev, sw_owner_reformat_supported)) + flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, - ft->level, ft->flags); + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags); if (!tbl) { mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); return -EINVAL; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ff8c9d527bb4..bfdf41537cf1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -688,7 +688,10 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; u8 nic_rx_multi_path_tirs_fts[0x1]; u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; - u8 reserved_at_3[0x1d]; + u8 reserved_at_3[0x4]; + u8 sw_owner_reformat_supported[0x1]; + u8 reserved_at_8[0x18]; + u8 encap_general_header[0x1]; u8 reserved_at_21[0xa]; u8 log_max_packet_reformat_context[0x5]; -- cgit v1.2.3-59-g8ed1b From 840f8ad0aaf20044e2fb099095bbce27c02f58da Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 13 Feb 2020 13:31:23 -0800 Subject: ice: Don't reject odd values of usecs set by user Currently if a user sets an odd [tx|rx]-usecs value through ethtool, the request is denied because the hardware is set to have an ITR granularity of 2us. This caused poor customer experience. Fix this by aligning to a register allowed value, which results in rounding down. Also, print a once per ring container type message to be clear about our intentions. Also, change the ITR_TO_REG define to be the bitwise and of the ITR setting and the ICE_ITR_MASK. This makes the purpose of ITR_TO_REG more obvious. Signed-off-by: Brett Creeley Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 49 +++++++++++++++++++++------- drivers/net/ethernet/intel/ice/ice_txrx.h | 2 +- 2 files changed, 39 insertions(+), 12 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index b002ab4e5838..a88763066681 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3489,21 +3489,13 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, return -EINVAL; } - /* hardware only supports an ITR granularity of 2us */ - if (coalesce_usecs % 2 != 0) { - netdev_info(vsi->netdev, "Invalid value, %s-usecs must be even\n", - c_type_str); - return -EINVAL; - } - if (use_adaptive_coalesce) { rc->itr_setting |= ICE_ITR_DYNAMIC; } else { - /* store user facing value how it was set */ + /* save the user set usecs */ rc->itr_setting = coalesce_usecs; - /* set to static and convert to value HW understands */ - rc->target_itr = - ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting)); + /* device ITR granularity is in 2 usec increments */ + rc->target_itr = ITR_REG_ALIGN(rc->itr_setting); } return 0; @@ -3596,6 +3588,30 @@ ice_is_coalesce_param_invalid(struct net_device *netdev, return 0; } +/** + * ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs + * @netdev: netdev used for print + * @itr_setting: previous user setting + * @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled + * @coalesce_usecs: requested value of [tx|rx]-usecs + * @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs + */ +static void +ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting, + u32 use_adaptive_coalesce, u32 coalesce_usecs, + const char *c_type_str) +{ + if (use_adaptive_coalesce) + return; + + itr_setting = ITR_TO_REG(itr_setting); + + if (itr_setting != coalesce_usecs && (coalesce_usecs % 2)) + netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n", + c_type_str, coalesce_usecs, c_type_str, + ITR_REG_ALIGN(coalesce_usecs)); +} + /** * __ice_set_coalesce - set ITR/INTRL values for the device * @netdev: pointer to the netdev associated with this query @@ -3616,8 +3632,19 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, return -EINVAL; if (q_num < 0) { + struct ice_q_vector *q_vector = vsi->q_vectors[0]; int v_idx; + if (q_vector) { + ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting, + ec->use_adaptive_rx_coalesce, + ec->rx_coalesce_usecs, "rx"); + + ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting, + ec->use_adaptive_tx_coalesce, + ec->tx_coalesce_usecs, "tx"); + } + ice_for_each_q_vector(vsi, v_idx) { /* In some cases if DCB is configured the num_[rx|tx]q * can be less than vsi->num_q_vectors. This check diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 14a1bf445889..7ee00a128663 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -222,7 +222,7 @@ enum ice_rx_dtype { #define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */ #define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S) #define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ -#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK) +#define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK) #define ICE_ITR_ADAPTIVE_MIN_INC 0x0002 #define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002 -- cgit v1.2.3-59-g8ed1b From 8a55c08d3bbc9ffc9639f69f742e59ebd99f913b Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Thu, 13 Feb 2020 13:31:24 -0800 Subject: ice: Don't tell the OS that link is going down Remove code that tell the OS that link is going down when user change flow control via ethtool. When link is up it isn't certain that link goes down after 0x0605 aq command. If link doesn't go down, OS thinks that link is down, but physical link is up. To reset this state user have to take interface down and up. If link goes down after 0x0605 command, FW send information about that and after that driver tells the OS that the link goes down. So this code in ethtool is unnecessary. Signed-off-by: Michal Swiatkowski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index a88763066681..77c412a7e7a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2936,13 +2936,6 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) else return -EINVAL; - /* Tell the OS link is going down, the link will go back up when fw - * says it is ready asynchronously - */ - ice_print_link_msg(vsi, false); - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); - /* Set the FC mode and only restart AN if link is up */ status = ice_set_fc(pi, &aq_failures, link_up); -- cgit v1.2.3-59-g8ed1b From c54d209c78b8a3d0a75e710993833ebe1eb3273b Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Tue, 18 Feb 2020 13:22:06 -0800 Subject: ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml:
> virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 134 +++++++++++++---------- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h | 3 +- 2 files changed, 76 insertions(+), 61 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 262714d5f54a..75c70d432c72 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -1873,6 +1873,48 @@ error_param: NULL, 0); } +/** + * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset + * @vf: The VF being resseting + * + * The max poll time is about ~800ms, which is about the maximum time it takes + * for a VF to be reset and/or a VF driver to be removed. + */ +static void ice_wait_on_vf_reset(struct ice_vf *vf) +{ + int i; + + for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) { + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) + break; + msleep(ICE_MAX_VF_RESET_SLEEP_MS); + } +} + +/** + * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried + * @vf: VF to check if it's ready to be configured/queried + * + * The purpose of this function is to make sure the VF is not in reset, not + * disabled, and initialized so it can be configured and/or queried by a host + * administrator. + */ +static int ice_check_vf_ready_for_cfg(struct ice_vf *vf) +{ + struct ice_pf *pf; + + ice_wait_on_vf_reset(vf); + + if (ice_is_vf_disabled(vf)) + return -EINVAL; + + pf = vf->pf; + if (ice_check_vf_init(pf, vf)) + return -EBUSY; + + return 0; +} + /** * ice_set_vf_spoofchk * @netdev: network interface device structure @@ -1890,16 +1932,16 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) enum ice_status status; struct device *dev; struct ice_vf *vf; - int ret = 0; + int ret; dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; vf_vsi = pf->vsi[vf->lan_vsi_idx]; if (!vf_vsi) { @@ -2696,7 +2738,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, struct ice_vsi *vsi; struct device *dev; struct ice_vf *vf; - int ret = 0; + int ret; dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) @@ -2714,13 +2756,15 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; if (le16_to_cpu(vsi->info.pvid) == vlanprio) { /* duplicate request, so just return success */ dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio); - return ret; + return 0; } /* If PVID, then remove all filters on the old VLAN */ @@ -2731,7 +2775,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, if (vlan_id || qos) { ret = ice_vsi_manage_pvid(vsi, vlanprio, true); if (ret) - goto error_set_pvid; + return ret; } else { ice_vsi_manage_pvid(vsi, 0, false); vsi->info.pvid = 0; @@ -2744,7 +2788,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, /* add new VLAN filter for each MAC */ ret = ice_vsi_add_vlan(vsi, vlan_id); if (ret) - goto error_set_pvid; + return ret; } /* The Port VLAN needs to be saved across resets the same as the @@ -2752,8 +2796,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, */ vf->port_vlan_id = le16_to_cpu(vsi->info.pvid); -error_set_pvid: - return ret; + return 0; } /** @@ -3236,23 +3279,6 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) return 0; } -/** - * ice_wait_on_vf_reset - * @vf: The VF being resseting - * - * Poll to make sure a given VF is ready after reset - */ -static void ice_wait_on_vf_reset(struct ice_vf *vf) -{ - int i; - - for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) { - if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) - break; - msleep(20); - } -} - /** * ice_set_vf_mac * @netdev: network interface device structure @@ -3265,29 +3291,21 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; - int ret = 0; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - vf = &pf->vf[vf_id]; - /* Don't set MAC on disabled VF */ - if (ice_is_vf_disabled(vf)) - return -EINVAL; - - /* In case VF is in reset mode, wait until it is completed. Depending - * on factors like queue disabling routine, this could take ~250ms - */ - ice_wait_on_vf_reset(vf); - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; - if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) { netdev_err(netdev, "%pM not a valid unicast address\n", mac); return -EINVAL; } + vf = &pf->vf[vf_id]; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; + /* copy MAC into dflt_lan_addr and trigger a VF reset. The reset * flow will use the updated dflt_lan_addr and add a MAC filter * using ice_add_mac. Also set pf_set_mac to indicate that the PF has @@ -3299,7 +3317,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) vf_id, mac); ice_vc_reset_vf(vf); - return ret; + return 0; } /** @@ -3314,22 +3332,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - /* Don't set Trusted Mode on disabled VF */ - if (ice_is_vf_disabled(vf)) - return -EINVAL; - - /* In case VF is in reset mode, wait until it is completed. Depending - * on factors like queue disabling routine, this could take ~250ms - */ - ice_wait_on_vf_reset(vf); - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; /* Check if already trusted */ if (trusted == vf->trusted) @@ -3355,13 +3366,15 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; switch (link_state) { case IFLA_VF_LINK_STATE_AUTO: @@ -3397,14 +3410,15 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id, struct ice_eth_stats *stats; struct ice_vsi *vsi; struct ice_vf *vf; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 4647d636ed36..ac67982751df 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -38,7 +38,8 @@ #define ICE_MAX_POLICY_INTR_PER_VF 33 #define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) #define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) -#define ICE_MAX_VF_RESET_WAIT 15 +#define ICE_MAX_VF_RESET_TRIES 40 +#define ICE_MAX_VF_RESET_SLEEP_MS 20 #define ice_for_each_vf(pf, i) \ for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++) -- cgit v1.2.3-59-g8ed1b From ac2fcfa9fd26db67d7000677c05629c34cc94564 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 19 Feb 2020 15:15:51 +0100 Subject: net: macb: Properly handle phylink on at91rm9200 at91ether_init was handling the phy mode and speed but since the switch to phylink, the NCFGR register got overwritten by macb_mac_config(). The issue is that the RM9200_RMII bit and the MACB_CLK_DIV32 field are cleared but never restored as they conflict with the PAE, GBE and PCSSEL bits. Add new capability to differentiate between EMAC and the other versions of the IP and use it to set and avoid clearing the relevant bits. Also, this fixes a NULL pointer dereference in macb_mac_link_up as the EMAC doesn't use any rings/bufffers/queues. Fixes: 7897b071ac3b ("net: macb: convert to phylink") Signed-off-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.h | 1 + drivers/net/ethernet/cadence/macb_main.c | 60 +++++++++++++++++--------------- 2 files changed, 33 insertions(+), 28 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index dbf7070fcdba..a3f0f27fc79a 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -652,6 +652,7 @@ #define MACB_CAPS_GEM_HAS_PTP 0x00000040 #define MACB_CAPS_BD_RD_PREFETCH 0x00000080 #define MACB_CAPS_NEEDS_RSTONUBR 0x00000100 +#define MACB_CAPS_MACB_IS_EMAC 0x08000000 #define MACB_CAPS_FIFO_MODE 0x10000000 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index def94e91883a..2c28da1737fe 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -572,8 +572,21 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode, old_ctrl = ctrl = macb_or_gem_readl(bp, NCFGR); /* Clear all the bits we might set later */ - ctrl &= ~(GEM_BIT(GBE) | MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE) | - GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); + ctrl &= ~(MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE)); + + if (bp->caps & MACB_CAPS_MACB_IS_EMAC) { + if (state->interface == PHY_INTERFACE_MODE_RMII) + ctrl |= MACB_BIT(RM9200_RMII); + } else { + ctrl &= ~(GEM_BIT(GBE) | GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); + + /* We do not support MLO_PAUSE_RX yet */ + if (state->pause & MLO_PAUSE_TX) + ctrl |= MACB_BIT(PAE); + + if (state->interface == PHY_INTERFACE_MODE_SGMII) + ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); + } if (state->speed == SPEED_1000) ctrl |= GEM_BIT(GBE); @@ -583,13 +596,6 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode, if (state->duplex) ctrl |= MACB_BIT(FD); - /* We do not support MLO_PAUSE_RX yet */ - if (state->pause & MLO_PAUSE_TX) - ctrl |= MACB_BIT(PAE); - - if (state->interface == PHY_INTERFACE_MODE_SGMII) - ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); - /* Apply the new configuration, if any */ if (old_ctrl ^ ctrl) macb_or_gem_writel(bp, NCFGR, ctrl); @@ -608,9 +614,10 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode, unsigned int q; u32 ctrl; - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) - queue_writel(queue, IDR, - bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + queue_writel(queue, IDR, + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); /* Disable Rx and Tx */ ctrl = macb_readl(bp, NCR) & ~(MACB_BIT(RE) | MACB_BIT(TE)); @@ -627,17 +634,19 @@ static void macb_mac_link_up(struct phylink_config *config, unsigned int mode, struct macb_queue *queue; unsigned int q; - macb_set_tx_clk(bp->tx_clk, bp->speed, ndev); + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { + macb_set_tx_clk(bp->tx_clk, bp->speed, ndev); - /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down - * cleared the pipeline and control registers. - */ - bp->macbgem_ops.mog_init_rings(bp); - macb_init_buffers(bp); + /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down + * cleared the pipeline and control registers. + */ + bp->macbgem_ops.mog_init_rings(bp); + macb_init_buffers(bp); - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) - queue_writel(queue, IER, - bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + queue_writel(queue, IER, + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); + } /* Enable Rx and Tx */ macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE)); @@ -4041,7 +4050,6 @@ static int at91ether_init(struct platform_device *pdev) struct net_device *dev = platform_get_drvdata(pdev); struct macb *bp = netdev_priv(dev); int err; - u32 reg; bp->queues[0].bp = bp; @@ -4055,11 +4063,7 @@ static int at91ether_init(struct platform_device *pdev) macb_writel(bp, NCR, 0); - reg = MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG); - if (bp->phy_interface == PHY_INTERFACE_MODE_RMII) - reg |= MACB_BIT(RM9200_RMII); - - macb_writel(bp, NCFGR, reg); + macb_writel(bp, NCFGR, MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG)); return 0; } @@ -4218,7 +4222,7 @@ static const struct macb_config sama5d4_config = { }; static const struct macb_config emac_config = { - .caps = MACB_CAPS_NEEDS_RSTONUBR, + .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC, .clk_init = at91ether_clk_init, .init = at91ether_init, }; -- cgit v1.2.3-59-g8ed1b From 68b759a75d6257759d1e37ff13f2d0659baf1112 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 19 Feb 2020 14:59:42 -0800 Subject: ionic: fix fw_status read The fw_status field is only 8 bits, so fix the read. Also, we only want to look at the one status bit, to allow for future use of the other bits, and watch for a bad PCI read. Fixes: 97ca486592c0 ("ionic: add heartbeat check") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_dev.c | 11 +++++++---- drivers/net/ethernet/pensando/ionic/ionic_if.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 87f82f36812f..46107de5e6c3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -103,7 +103,7 @@ int ionic_heartbeat_check(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; unsigned long hb_time; - u32 fw_status; + u8 fw_status; u32 hb; /* wait a little more than one second before testing again */ @@ -111,9 +111,12 @@ int ionic_heartbeat_check(struct ionic *ionic) if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period))) return 0; - /* firmware is useful only if fw_status is non-zero */ - fw_status = ioread32(&idev->dev_info_regs->fw_status); - if (!fw_status) + /* firmware is useful only if the running bit is set and + * fw_status != 0xff (bad PCI read) + */ + fw_status = ioread8(&idev->dev_info_regs->fw_status); + if (fw_status == 0xff || + !(fw_status & IONIC_FW_STS_F_RUNNING)) return -ENXIO; /* early FW has no heartbeat, else FW will return non-zero */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index ce07c2931a72..54547d53b0f2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -2445,6 +2445,7 @@ union ionic_dev_info_regs { u8 version; u8 asic_type; u8 asic_rev; +#define IONIC_FW_STS_F_RUNNING 0x1 u8 fw_status; u32 fw_heartbeat; char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN]; -- cgit v1.2.3-59-g8ed1b From 971617c3b761c876d686a2188220a33898c90e99 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Wed, 19 Feb 2020 15:19:36 -0800 Subject: net: thunderx: workaround BGX TX Underflow issue While it is not yet understood why a TX underflow can easily occur for SGMII interfaces resulting in a TX wedge. It has been found that disabling/re-enabling the LMAC resolves the issue. Signed-off-by: Tim Harvey Reviewed-by: Robert Jones Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 62 +++++++++++++++++++++-- drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 9 ++++ 2 files changed, 68 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 17a4110c2e49..8ff28ed04b7f 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -410,10 +410,19 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) lmac = &bgx->lmac[lmacid]; cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); - if (enable) + if (enable) { cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; - else + + /* enable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S, + GMI_TXX_INT_UNDFLW); + } else { cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); + + /* Disable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C, + GMI_TXX_INT_UNDFLW); + } bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); if (bgx->is_rgx) @@ -1535,6 +1544,48 @@ static int bgx_init_phy(struct bgx *bgx) return bgx_init_of_phy(bgx); } +static irqreturn_t bgx_intr_handler(int irq, void *data) +{ + struct bgx *bgx = (struct bgx *)data; + u64 status, val; + int lmac; + + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT); + if (status & GMI_TXX_INT_UNDFLW) { + pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n", + bgx->bgx_id, lmac); + val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG); + val &= ~CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + val |= CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + } + /* clear interrupts */ + bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status); + } + + return IRQ_HANDLED; +} + +static void bgx_register_intr(struct pci_dev *pdev) +{ + struct bgx *bgx = pci_get_drvdata(pdev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET, + BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + pci_err(pdev, "Req for #%d msix vectors failed\n", + BGX_LMAC_VEC_OFFSET); + return; + } + ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL, + bgx, "BGX%d", bgx->bgx_id); + if (ret) + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); +} + static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int err; @@ -1550,7 +1601,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, bgx); - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(dev, "Failed to enable PCI device\n"); pci_set_drvdata(pdev, NULL); @@ -1604,6 +1655,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_init_hw(bgx); + bgx_register_intr(pdev); + /* Enable all LMACs */ for (lmac = 0; lmac < bgx->lmac_count; lmac++) { err = bgx_lmac_enable(bgx, lmac); @@ -1620,6 +1673,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); err_release_regions: pci_release_regions(pdev); err_disable_device: @@ -1637,6 +1691,8 @@ static void bgx_remove(struct pci_dev *pdev) for (lmac = 0; lmac < bgx->lmac_count; lmac++) bgx_lmac_disable(bgx, lmac); + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); + bgx_vnic[bgx->bgx_id] = NULL; pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 25888706bdcd..cdea49392185 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -180,6 +180,15 @@ #define BGX_GMP_GMI_TXX_BURST 0x38228 #define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 #define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 +#define BGX_GMP_GMI_TXX_INT 0x38500 +#define BGX_GMP_GMI_TXX_INT_W1S 0x38508 +#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510 +#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518 +#define GMI_TXX_INT_PTP_LOST BIT_ULL(4) +#define GMI_TXX_INT_LATE_COL BIT_ULL(3) +#define GMI_TXX_INT_XSDEF BIT_ULL(2) +#define GMI_TXX_INT_XSCOL BIT_ULL(1) +#define GMI_TXX_INT_UNDFLW BIT_ULL(0) #define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ #define BGX_MSIX_VEC_0_29_CTL 0x400008 -- cgit v1.2.3-59-g8ed1b From 5567ae4a8d569d996d0d88d0eceb76205e4c7ce5 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 20 Feb 2020 17:26:34 -0500 Subject: bnxt_en: Improve device shutdown method. Especially when bnxt_shutdown() is called during kexec, we need to disable MSIX and disable Bus Master to completely quiesce the device. Make these 2 calls unconditionally in the shutdown method. Fixes: c20dc142dd7b ("bnxt_en: Disable bus master during PCI shutdown and driver unload.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 597e6fd5bfea..2ad007e5ee7f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11983,10 +11983,10 @@ static void bnxt_shutdown(struct pci_dev *pdev) dev_close(dev); bnxt_ulp_shutdown(bp); + bnxt_clear_int_mode(bp); + pci_disable_device(pdev); if (system_state == SYSTEM_POWER_OFF) { - bnxt_clear_int_mode(bp); - pci_disable_device(pdev); pci_wake_from_d3(pdev, bp->wol); pci_set_power_state(pdev, PCI_D3hot); } -- cgit v1.2.3-59-g8ed1b From 8743db4a9acfd51f805ac0c87bcaae92c42d1061 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 20 Feb 2020 17:26:35 -0500 Subject: bnxt_en: Issue PCIe FLR in kdump kernel to cleanup pending DMAs. If crashed kernel does not shutdown the NIC properly, PCIe FLR is required in the kdump kernel in order to initialize all the functions properly. Fixes: d629522e1d66 ("bnxt_en: Reduce memory usage when running in kdump kernel.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2ad007e5ee7f..fd6e0e48cd51 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11786,6 +11786,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (version_printed++ == 0) pr_info("%s", version); + /* Clear any pending DMA transactions from crash kernel + * while loading driver in capture kernel. + */ + if (is_kdump_kernel()) { + pci_clear_master(pdev); + pcie_flr(pdev); + } + max_irqs = bnxt_get_max_irq(pdev); dev = alloc_etherdev_mq(sizeof(*bp), max_irqs); if (!dev) -- cgit v1.2.3-59-g8ed1b From eae7172f8141eb98e64e6e81acc9e9d5b2add127 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 21 Feb 2020 14:17:05 +0100 Subject: net: usb: qmi_wwan: restore mtu min/max values after raw_ip switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit usbnet creates network interfaces with min_mtu = 0 and max_mtu = ETH_MAX_MTU. These values are not modified by qmi_wwan when the network interface is created initially, allowing, for example, to set mtu greater than 1500. When a raw_ip switch is done (raw_ip set to 'Y', then set to 'N') the mtu values for the network interface are set through ether_setup, with min_mtu = ETH_MIN_MTU and max_mtu = ETH_DATA_LEN, not allowing anymore to set mtu greater than 1500 (error: mtu greater than device maximum). The patch restores the original min/max mtu values set by usbnet after a raw_ip switch. Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3b7a3b8a5e06..5754bb6ca0ee 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -337,6 +337,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net) netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + /* Restoring min/max mtu values set originally by usbnet */ + net->min_mtu = 0; + net->max_mtu = ETH_MAX_MTU; clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } -- cgit v1.2.3-59-g8ed1b From f6f13c125e05603f68f5bf31f045b95e6d493598 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 21 Feb 2020 08:32:18 -0800 Subject: hv_netvsc: Fix unwanted wakeup in netvsc_attach() When netvsc_attach() is called by operations like changing MTU, etc., an extra wakeup may happen while netvsc_attach() calling rndis_filter_device_add() which sends rndis messages when queue is stopped in netvsc_detach(). The completion message will wake up queue 0. We can reproduce the issue by changing MTU etc., then the wake_queue counter from "ethtool -S" will increase beyond stop_queue counter: stop_queue: 0 wake_queue: 1 The issue causes queue wake up, and counter increment, no other ill effects in current code. So we didn't see any network problem for now. To fix this, initialize tx_disable to true, and set it to false when the NIC is ready to be attached or registered. Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ae3f3084c2ed..1b320bcf150a 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -99,7 +99,7 @@ static struct netvsc_device *alloc_net_device(void) init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; - net_device->tx_disable = false; + net_device->tx_disable = true; net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 65e12cb07f45..2c0a24c606fc 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1068,6 +1068,7 @@ static int netvsc_attach(struct net_device *ndev, } /* In any case device is now ready */ + nvdev->tx_disable = false; netif_device_attach(ndev); /* Note: enable and attach happen when sub-channels setup */ @@ -2476,6 +2477,8 @@ static int netvsc_probe(struct hv_device *dev, else net->max_mtu = ETH_DATA_LEN; + nvdev->tx_disable = false; + ret = register_netdevice(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); -- cgit v1.2.3-59-g8ed1b From 44343418d0f2f623cb9da6f5000df793131cbe3b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 23 Feb 2020 14:38:40 +0100 Subject: net: ks8851-ml: Fix IRQ handling and locking The KS8851 requires that packet RX and TX are mutually exclusive. Currently, the driver hopes to achieve this by disabling interrupt from the card by writing the card registers and by disabling the interrupt on the interrupt controller. This however is racy on SMP. Replace this approach by expanding the spinlock used around the ks_start_xmit() TX path to ks_irq() RX path to assure true mutual exclusion and remove the interrupt enabling/disabling, which is now not needed anymore. Furthermore, disable interrupts also in ks_net_stop(), which was missing before. Note that a massive improvement here would be to re-use the KS8851 driver approach, which is to move the TX path into a worker thread, interrupt handling to threaded interrupt, and synchronize everything with mutexes, but that would be a much bigger rework, for a separate patch. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_mll.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 1c9e70c8cc30..58579baf3f7a 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -513,14 +513,17 @@ static irqreturn_t ks_irq(int irq, void *pw) { struct net_device *netdev = pw; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; u16 status; + spin_lock_irqsave(&ks->statelock, flags); /*this should be the first in IRQ handler */ ks_save_cmd_reg(ks); status = ks_rdreg16(ks, KS_ISR); if (unlikely(!status)) { ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_NONE; } @@ -546,6 +549,7 @@ static irqreturn_t ks_irq(int irq, void *pw) ks->netdev->stats.rx_over_errors++; /* this should be the last in IRQ handler*/ ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_HANDLED; } @@ -615,6 +619,7 @@ static int ks_net_stop(struct net_device *netdev) /* shutdown RX/TX QMU */ ks_disable_qmu(ks); + ks_disable_int(ks); /* set powermode to soft power down to save power */ ks_set_powermode(ks, PMECR_PM_SOFTDOWN); @@ -671,10 +676,9 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) { netdev_tx_t retv = NETDEV_TX_OK; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; - disable_irq(netdev->irq); - ks_disable_int(ks); - spin_lock(&ks->statelock); + spin_lock_irqsave(&ks->statelock, flags); /* Extra space are required: * 4 byte for alignment, 4 for status/length, 4 for CRC @@ -688,9 +692,7 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb(skb); } else retv = NETDEV_TX_BUSY; - spin_unlock(&ks->statelock); - ks_enable_int(ks); - enable_irq(netdev->irq); + spin_unlock_irqrestore(&ks->statelock, flags); return retv; } -- cgit v1.2.3-59-g8ed1b From 503ba7c6961034ff0047707685644cad9287c226 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Feb 2020 15:34:53 -0800 Subject: net: phy: Avoid multiple suspends It is currently possible for a PHY device to be suspended as part of a network device driver's suspend call while it is still being attached to that net_device, either via phy_suspend() or implicitly via phy_stop(). Later on, when the MDIO bus controller get suspended, we would attempt to suspend again the PHY because it is still attached to a network device. This is both a waste of time and creates an opportunity for improper clock/power management bugs to creep in. Fixes: 803dd9c77ac3 ("net: phy: avoid suspending twice a PHY") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 6a5056e0ae77..6131aca79823 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -247,7 +247,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) * MDIO bus driver and clock gated at this point. */ if (!netdev) - return !phydev->suspended; + goto out; if (netdev->wol_enabled) return false; @@ -267,7 +267,8 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (device_may_wakeup(&netdev->dev)) return false; - return true; +out: + return !phydev->suspended; } static int mdio_bus_phy_suspend(struct device *dev) -- cgit v1.2.3-59-g8ed1b From 84823ff80f7403752b59e00bb198724100dc611c Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 21 Feb 2020 07:47:21 +0100 Subject: net: ll_temac: Fix race condition causing TX hang It is possible that the interrupt handler fires and frees up space in the TX ring in between checking for sufficient TX ring space and stopping the TX queue in temac_start_xmit. If this happens, the queue wake from the interrupt handler will occur before the queue is stopped, causing a lost wakeup and the adapter's transmit hanging. To avoid this, after stopping the queue, check again whether there is sufficient space in the TX ring. If so, wake up the queue again. This is a port of the similar fix in axienet driver, commit 7de44285c1f6 ("net: axienet: Fix race condition causing TX hang"). Fixes: 23ecc4bde21f ("net: ll_temac: fix checksum offload logic") Signed-off-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac_main.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 6f11f52c9a9e..996004ef8bd4 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -788,6 +788,9 @@ static void temac_start_xmit_done(struct net_device *ndev) stat = be32_to_cpu(cur_p->app0); } + /* Matches barrier in temac_start_xmit */ + smp_mb(); + netif_wake_queue(ndev); } @@ -830,9 +833,19 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; if (temac_check_tx_bd_space(lp, num_frag + 1)) { - if (!netif_queue_stopped(ndev)) - netif_stop_queue(ndev); - return NETDEV_TX_BUSY; + if (netif_queue_stopped(ndev)) + return NETDEV_TX_BUSY; + + netif_stop_queue(ndev); + + /* Matches barrier in temac_start_xmit_done */ + smp_mb(); + + /* Space might have just been freed - check again */ + if (temac_check_tx_bd_space(lp, num_frag)) + return NETDEV_TX_BUSY; + + netif_wake_queue(ndev); } cur_p->app0 = 0; -- cgit v1.2.3-59-g8ed1b From d07c849cd2b97d6809430dfb7e738ad31088037a Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 21 Feb 2020 07:47:33 +0100 Subject: net: ll_temac: Add more error handling of dma_map_single() calls This adds error handling to the remaining dma_map_single() calls, so that behavior is well defined if/when we run out of DMA memory. Fixes: 92744989533c ("net: add Xilinx ll_temac device driver") Signed-off-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac_main.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 996004ef8bd4..c368c3914bda 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -367,6 +367,8 @@ static int temac_dma_bd_init(struct net_device *ndev) skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) + goto out; lp->rx_bd_v[i].phys = cpu_to_be32(skb_dma_addr); lp->rx_bd_v[i].len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE); lp->rx_bd_v[i].app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND); @@ -863,12 +865,13 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb_headlen(skb), DMA_TO_DEVICE); cur_p->len = cpu_to_be32(skb_headlen(skb)); + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) + return NETDEV_TX_BUSY; cur_p->phys = cpu_to_be32(skb_dma_addr); ptr_to_txbd((void *)skb, cur_p); for (ii = 0; ii < num_frag; ii++) { - lp->tx_bd_tail++; - if (lp->tx_bd_tail >= TX_BD_NUM) + if (++lp->tx_bd_tail >= TX_BD_NUM) lp->tx_bd_tail = 0; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; @@ -876,6 +879,25 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_frag_address(frag), skb_frag_size(frag), DMA_TO_DEVICE); + if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) { + if (--lp->tx_bd_tail < 0) + lp->tx_bd_tail = TX_BD_NUM - 1; + cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; + while (--ii >= 0) { + --frag; + dma_unmap_single(ndev->dev.parent, + be32_to_cpu(cur_p->phys), + skb_frag_size(frag), + DMA_TO_DEVICE); + if (--lp->tx_bd_tail < 0) + lp->tx_bd_tail = TX_BD_NUM - 1; + cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; + } + dma_unmap_single(ndev->dev.parent, + be32_to_cpu(cur_p->phys), + skb_headlen(skb), DMA_TO_DEVICE); + return NETDEV_TX_BUSY; + } cur_p->phys = cpu_to_be32(skb_dma_addr); cur_p->len = cpu_to_be32(skb_frag_size(frag)); cur_p->app0 = 0; -- cgit v1.2.3-59-g8ed1b From 770d9c67974c4c71af4beb786dc43162ad2a15ba Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 21 Feb 2020 07:47:45 +0100 Subject: net: ll_temac: Fix RX buffer descriptor handling on GFP_ATOMIC pressure Failures caused by GFP_ATOMIC memory pressure have been observed, and due to the missing error handling, results in kernel crash such as [1876998.350133] kernel BUG at mm/slub.c:3952! [1876998.350141] invalid opcode: 0000 [#1] PREEMPT SMP PTI [1876998.350147] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.3.0-scnxt #1 [1876998.350150] Hardware name: N/A N/A/COMe-bIP2, BIOS CCR2R920 03/01/2017 [1876998.350160] RIP: 0010:kfree+0x1ca/0x220 [1876998.350164] Code: 85 db 74 49 48 8b 95 68 01 00 00 48 31 c2 48 89 10 e9 d7 fe ff ff 49 8b 04 24 a9 00 00 01 00 75 0b 49 8b 44 24 08 a8 01 75 02 <0f> 0b 49 8b 04 24 31 f6 a9 00 00 01 00 74 06 41 0f b6 74 24 5b [1876998.350172] RSP: 0018:ffffc900000f0df0 EFLAGS: 00010246 [1876998.350177] RAX: ffffea00027f0708 RBX: ffff888008d78000 RCX: 0000000000391372 [1876998.350181] RDX: 0000000000000000 RSI: ffffe8ffffd01400 RDI: ffff888008d78000 [1876998.350185] RBP: ffff8881185a5d00 R08: ffffc90000087dd8 R09: 000000000000280a [1876998.350189] R10: 0000000000000002 R11: 0000000000000000 R12: ffffea0000235e00 [1876998.350193] R13: ffff8881185438a0 R14: 0000000000000000 R15: ffff888118543870 [1876998.350198] FS: 0000000000000000(0000) GS:ffff88811f300000(0000) knlGS:0000000000000000 [1876998.350203] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 s#1 Part1 [1876998.350206] CR2: 00007f8dac7b09f0 CR3: 000000011e20a006 CR4: 00000000001606e0 [1876998.350210] Call Trace: [1876998.350215] [1876998.350224] ? __netif_receive_skb_core+0x70a/0x920 [1876998.350229] kfree_skb+0x32/0xb0 [1876998.350234] __netif_receive_skb_core+0x70a/0x920 [1876998.350240] __netif_receive_skb_one_core+0x36/0x80 [1876998.350245] process_backlog+0x8b/0x150 [1876998.350250] net_rx_action+0xf7/0x340 [1876998.350255] __do_softirq+0x10f/0x353 [1876998.350262] irq_exit+0xb2/0xc0 [1876998.350265] do_IRQ+0x77/0xd0 [1876998.350271] common_interrupt+0xf/0xf [1876998.350274] In order to handle such failures more graceful, this change splits the receive loop into one for consuming the received buffers, and one for allocating new buffers. When GFP_ATOMIC allocations fail, the receive will continue with the buffers that is still there, and with the expectation that the allocations will succeed in a later call to receive. Fixes: 92744989533c ("net: add Xilinx ll_temac device driver") Signed-off-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac.h | 1 + drivers/net/ethernet/xilinx/ll_temac_main.c | 112 ++++++++++++++++++++-------- 2 files changed, 82 insertions(+), 31 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h index 276292bca334..99fe059e5c7f 100644 --- a/drivers/net/ethernet/xilinx/ll_temac.h +++ b/drivers/net/ethernet/xilinx/ll_temac.h @@ -375,6 +375,7 @@ struct temac_local { int tx_bd_next; int tx_bd_tail; int rx_bd_ci; + int rx_bd_tail; /* DMA channel control setup */ u32 tx_chnl_ctrl; diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index c368c3914bda..255207f2fd27 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -389,12 +389,13 @@ static int temac_dma_bd_init(struct net_device *ndev) lp->tx_bd_next = 0; lp->tx_bd_tail = 0; lp->rx_bd_ci = 0; + lp->rx_bd_tail = RX_BD_NUM - 1; /* Enable RX DMA transfers */ wmb(); lp->dma_out(lp, RX_CURDESC_PTR, lp->rx_bd_p); lp->dma_out(lp, RX_TAILDESC_PTR, - lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1))); + lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * lp->rx_bd_tail)); /* Prepare for TX DMA transfer */ lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p); @@ -923,27 +924,41 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) static void ll_temac_recv(struct net_device *ndev) { struct temac_local *lp = netdev_priv(ndev); - struct sk_buff *skb, *new_skb; - unsigned int bdstat; - struct cdmac_bd *cur_p; - dma_addr_t tail_p, skb_dma_addr; - int length; unsigned long flags; + int rx_bd; + bool update_tail = false; spin_lock_irqsave(&lp->rx_lock, flags); - tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; - cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; - - bdstat = be32_to_cpu(cur_p->app0); - while ((bdstat & STS_CTRL_APP0_CMPLT)) { + /* Process all received buffers, passing them on network + * stack. After this, the buffer descriptors will be in an + * un-allocated stage, where no skb is allocated for it, and + * they are therefore not available for TEMAC/DMA. + */ + do { + struct cdmac_bd *bd = &lp->rx_bd_v[lp->rx_bd_ci]; + struct sk_buff *skb = lp->rx_skb[lp->rx_bd_ci]; + unsigned int bdstat = be32_to_cpu(bd->app0); + int length; + + /* While this should not normally happen, we can end + * here when GFP_ATOMIC allocations fail, and we + * therefore have un-allocated buffers. + */ + if (!skb) + break; - skb = lp->rx_skb[lp->rx_bd_ci]; - length = be32_to_cpu(cur_p->app4) & 0x3FFF; + /* Loop over all completed buffer descriptors */ + if (!(bdstat & STS_CTRL_APP0_CMPLT)) + break; - dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys), + dma_unmap_single(ndev->dev.parent, be32_to_cpu(bd->phys), XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); + /* The buffer is not valid for DMA anymore */ + bd->phys = 0; + bd->len = 0; + length = be32_to_cpu(bd->app4) & 0x3FFF; skb_put(skb, length); skb->protocol = eth_type_trans(skb, ndev); skb_checksum_none_assert(skb); @@ -958,39 +973,74 @@ static void ll_temac_recv(struct net_device *ndev) * (back) for proper IP checksum byte order * (be16). */ - skb->csum = htons(be32_to_cpu(cur_p->app3) & 0xFFFF); + skb->csum = htons(be32_to_cpu(bd->app3) & 0xFFFF); skb->ip_summed = CHECKSUM_COMPLETE; } if (!skb_defer_rx_timestamp(skb)) netif_rx(skb); + /* The skb buffer is now owned by network stack above */ + lp->rx_skb[lp->rx_bd_ci] = NULL; ndev->stats.rx_packets++; ndev->stats.rx_bytes += length; - new_skb = netdev_alloc_skb_ip_align(ndev, - XTE_MAX_JUMBO_FRAME_SIZE); - if (!new_skb) { - spin_unlock_irqrestore(&lp->rx_lock, flags); - return; + rx_bd = lp->rx_bd_ci; + if (++lp->rx_bd_ci >= RX_BD_NUM) + lp->rx_bd_ci = 0; + } while (rx_bd != lp->rx_bd_tail); + + /* Allocate new buffers for those buffer descriptors that were + * passed to network stack. Note that GFP_ATOMIC allocations + * can fail (e.g. when a larger burst of GFP_ATOMIC + * allocations occurs), so while we try to allocate all + * buffers in the same interrupt where they were processed, we + * continue with what we could get in case of allocation + * failure. Allocation of remaining buffers will be retried + * in following calls. + */ + while (1) { + struct sk_buff *skb; + struct cdmac_bd *bd; + dma_addr_t skb_dma_addr; + + rx_bd = lp->rx_bd_tail + 1; + if (rx_bd >= RX_BD_NUM) + rx_bd = 0; + bd = &lp->rx_bd_v[rx_bd]; + + if (bd->phys) + break; /* All skb's allocated */ + + skb = netdev_alloc_skb_ip_align(ndev, XTE_MAX_JUMBO_FRAME_SIZE); + if (!skb) { + dev_warn(&ndev->dev, "skb alloc failed\n"); + break; } - cur_p->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND); - skb_dma_addr = dma_map_single(ndev->dev.parent, new_skb->data, + skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); - cur_p->phys = cpu_to_be32(skb_dma_addr); - cur_p->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE); - lp->rx_skb[lp->rx_bd_ci] = new_skb; + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, + skb_dma_addr))) { + dev_kfree_skb_any(skb); + break; + } - lp->rx_bd_ci++; - if (lp->rx_bd_ci >= RX_BD_NUM) - lp->rx_bd_ci = 0; + bd->phys = cpu_to_be32(skb_dma_addr); + bd->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE); + bd->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND); + lp->rx_skb[rx_bd] = skb; + + lp->rx_bd_tail = rx_bd; + update_tail = true; + } - cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; - bdstat = be32_to_cpu(cur_p->app0); + /* Move tail pointer when buffers have been allocated */ + if (update_tail) { + lp->dma_out(lp, RX_TAILDESC_PTR, + lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_tail); } - lp->dma_out(lp, RX_TAILDESC_PTR, tail_p); spin_unlock_irqrestore(&lp->rx_lock, flags); } -- cgit v1.2.3-59-g8ed1b From 1d63b8d66d146deaaedbe16c80de105f685ea012 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 21 Feb 2020 07:47:58 +0100 Subject: net: ll_temac: Handle DMA halt condition caused by buffer underrun The SDMA engine used by TEMAC halts operation when it has finished processing of the last buffer descriptor in the buffer ring. Unfortunately, no interrupt event is generated when this happens, so we need to setup another mechanism to make sure DMA operation is restarted when enough buffers have been added to the ring. Fixes: 92744989533c ("net: add Xilinx ll_temac device driver") Signed-off-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac.h | 3 ++ drivers/net/ethernet/xilinx/ll_temac_main.c | 58 ++++++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h index 99fe059e5c7f..53fb8141f1a6 100644 --- a/drivers/net/ethernet/xilinx/ll_temac.h +++ b/drivers/net/ethernet/xilinx/ll_temac.h @@ -380,6 +380,9 @@ struct temac_local { /* DMA channel control setup */ u32 tx_chnl_ctrl; u32 rx_chnl_ctrl; + u8 coalesce_count_rx; + + struct delayed_work restart_work; }; /* Wrappers for temac_ior()/temac_iow() function pointers above */ diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 255207f2fd27..9461acec6f70 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -866,8 +867,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb_headlen(skb), DMA_TO_DEVICE); cur_p->len = cpu_to_be32(skb_headlen(skb)); - if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) - return NETDEV_TX_BUSY; + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) { + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; + return NETDEV_TX_OK; + } cur_p->phys = cpu_to_be32(skb_dma_addr); ptr_to_txbd((void *)skb, cur_p); @@ -897,7 +901,9 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys), skb_headlen(skb), DMA_TO_DEVICE); - return NETDEV_TX_BUSY; + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; + return NETDEV_TX_OK; } cur_p->phys = cpu_to_be32(skb_dma_addr); cur_p->len = cpu_to_be32(skb_frag_size(frag)); @@ -920,6 +926,17 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +static int ll_temac_recv_buffers_available(struct temac_local *lp) +{ + int available; + + if (!lp->rx_skb[lp->rx_bd_ci]) + return 0; + available = 1 + lp->rx_bd_tail - lp->rx_bd_ci; + if (available <= 0) + available += RX_BD_NUM; + return available; +} static void ll_temac_recv(struct net_device *ndev) { @@ -990,6 +1007,18 @@ static void ll_temac_recv(struct net_device *ndev) lp->rx_bd_ci = 0; } while (rx_bd != lp->rx_bd_tail); + /* DMA operations will halt when the last buffer descriptor is + * processed (ie. the one pointed to by RX_TAILDESC_PTR). + * When that happens, no more interrupt events will be + * generated. No IRQ_COAL or IRQ_DLY, and not even an + * IRQ_ERR. To avoid stalling, we schedule a delayed work + * when there is a potential risk of that happening. The work + * will call this function, and thus re-schedule itself until + * enough buffers are available again. + */ + if (ll_temac_recv_buffers_available(lp) < lp->coalesce_count_rx) + schedule_delayed_work(&lp->restart_work, HZ / 1000); + /* Allocate new buffers for those buffer descriptors that were * passed to network stack. Note that GFP_ATOMIC allocations * can fail (e.g. when a larger burst of GFP_ATOMIC @@ -1045,6 +1074,18 @@ static void ll_temac_recv(struct net_device *ndev) spin_unlock_irqrestore(&lp->rx_lock, flags); } +/* Function scheduled to ensure a restart in case of DMA halt + * condition caused by running out of buffer descriptors. + */ +static void ll_temac_restart_work_func(struct work_struct *work) +{ + struct temac_local *lp = container_of(work, struct temac_local, + restart_work.work); + struct net_device *ndev = lp->ndev; + + ll_temac_recv(ndev); +} + static irqreturn_t ll_temac_tx_irq(int irq, void *_ndev) { struct net_device *ndev = _ndev; @@ -1137,6 +1178,8 @@ static int temac_stop(struct net_device *ndev) dev_dbg(&ndev->dev, "temac_close()\n"); + cancel_delayed_work_sync(&lp->restart_work); + free_irq(lp->tx_irq, ndev); free_irq(lp->rx_irq, ndev); @@ -1258,6 +1301,7 @@ static int temac_probe(struct platform_device *pdev) lp->dev = &pdev->dev; lp->options = XTE_OPTION_DEFAULTS; spin_lock_init(&lp->rx_lock); + INIT_DELAYED_WORK(&lp->restart_work, ll_temac_restart_work_func); /* Setup mutex for synchronization of indirect register access */ if (pdata) { @@ -1364,6 +1408,7 @@ static int temac_probe(struct platform_device *pdev) */ lp->tx_chnl_ctrl = 0x10220000; lp->rx_chnl_ctrl = 0xff070000; + lp->coalesce_count_rx = 0x07; /* Finished with the DMA node; drop the reference */ of_node_put(dma_np); @@ -1395,11 +1440,14 @@ static int temac_probe(struct platform_device *pdev) (pdata->tx_irq_count << 16); else lp->tx_chnl_ctrl = 0x10220000; - if (pdata->rx_irq_timeout || pdata->rx_irq_count) + if (pdata->rx_irq_timeout || pdata->rx_irq_count) { lp->rx_chnl_ctrl = (pdata->rx_irq_timeout << 24) | (pdata->rx_irq_count << 16); - else + lp->coalesce_count_rx = pdata->rx_irq_count; + } else { lp->rx_chnl_ctrl = 0xff070000; + lp->coalesce_count_rx = 0x07; + } } /* Error handle returned DMA RX and TX interrupts */ -- cgit v1.2.3-59-g8ed1b From 9a005c3898aa07cd5cdca77b7096814e6c478c92 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Mon, 24 Feb 2020 15:29:09 -0800 Subject: bnxt_en: add newline to netdev_*() format strings Add missing newlines to netdev_* format strings so the lines aren't buffered by the printk subsystem. Nitpicked-by: Jakub Kicinski Signed-off-by: Jonathan Lemon Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 10 ++--- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 +- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 48 +++++++++++------------ drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 10 ++--- 5 files changed, 38 insertions(+), 38 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fd6e0e48cd51..f9a8151f092c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11252,7 +11252,7 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp) } } if (test_and_clear_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event)) - netdev_info(bp->dev, "Receive PF driver unload event!"); + netdev_info(bp->dev, "Receive PF driver unload event!\n"); } #else @@ -11759,7 +11759,7 @@ static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[]) u32 dw; if (!pos) { - netdev_info(bp->dev, "Unable do read adapter's DSN"); + netdev_info(bp->dev, "Unable do read adapter's DSN\n"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index eec0168330b7..d3c93ccee86a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -641,14 +641,14 @@ static int bnxt_dl_params_register(struct bnxt *bp) rc = devlink_params_register(bp->dl, bnxt_dl_params, ARRAY_SIZE(bnxt_dl_params)); if (rc) { - netdev_warn(bp->dev, "devlink_params_register failed. rc=%d", + netdev_warn(bp->dev, "devlink_params_register failed. rc=%d\n", rc); return rc; } rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params, ARRAY_SIZE(bnxt_dl_port_params)); if (rc) { - netdev_err(bp->dev, "devlink_port_params_register failed"); + netdev_err(bp->dev, "devlink_port_params_register failed\n"); devlink_params_unregister(bp->dl, bnxt_dl_params, ARRAY_SIZE(bnxt_dl_params)); return rc; @@ -679,7 +679,7 @@ int bnxt_dl_register(struct bnxt *bp) else dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl)); if (!dl) { - netdev_warn(bp->dev, "devlink_alloc failed"); + netdev_warn(bp->dev, "devlink_alloc failed\n"); return -ENOMEM; } @@ -692,7 +692,7 @@ int bnxt_dl_register(struct bnxt *bp) rc = devlink_register(dl, &bp->pdev->dev); if (rc) { - netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc); + netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc); goto err_dl_free; } @@ -704,7 +704,7 @@ int bnxt_dl_register(struct bnxt *bp) sizeof(bp->dsn)); rc = devlink_port_register(dl, &bp->dl_port, bp->pf.port_id); if (rc) { - netdev_err(bp->dev, "devlink_port_register failed"); + netdev_err(bp->dev, "devlink_port_register failed\n"); goto err_dl_unreg; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 6171fa8b3677..e8fc1671c581 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2028,7 +2028,7 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, } if (fw->size > item_len) { - netdev_err(dev, "PKG insufficient update area in nvram: %lu", + netdev_err(dev, "PKG insufficient update area in nvram: %lu\n", (unsigned long)fw->size); rc = -EFBIG; } else { @@ -3338,7 +3338,7 @@ err: kfree(coredump.data); *dump_len += sizeof(struct bnxt_coredump_record); if (rc == -ENOBUFS) - netdev_err(bp->dev, "Firmware returned large coredump buffer"); + netdev_err(bp->dev, "Firmware returned large coredump buffer\n"); return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 0cc6ec51f45f..9bec256b0934 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -50,7 +50,7 @@ static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev) /* check if dev belongs to the same switch */ if (!netdev_port_same_parent_id(pf_bp->dev, dev)) { - netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch", + netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n", dev->ifindex); return BNXT_FID_INVALID; } @@ -70,7 +70,7 @@ static int bnxt_tc_parse_redir(struct bnxt *bp, struct net_device *dev = act->dev; if (!dev) { - netdev_info(bp->dev, "no dev in mirred action"); + netdev_info(bp->dev, "no dev in mirred action\n"); return -EINVAL; } @@ -106,7 +106,7 @@ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, const struct ip_tunnel_key *tun_key = &tun_info->key; if (ip_tunnel_info_af(tun_info) != AF_INET) { - netdev_info(bp->dev, "only IPv4 tunnel-encap is supported"); + netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n"); return -EOPNOTSUPP; } @@ -295,7 +295,7 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, int i, rc; if (!flow_action_has_entries(flow_action)) { - netdev_info(bp->dev, "no actions"); + netdev_info(bp->dev, "no actions\n"); return -EINVAL; } @@ -370,7 +370,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, /* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */ if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 || (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) { - netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x", + netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x\n", dissector->used_keys); return -EOPNOTSUPP; } @@ -508,7 +508,7 @@ static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); return rc; } @@ -841,7 +841,7 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, resp = bnxt_get_hwrm_resp_addr(bp, &req); *decap_filter_handle = resp->decap_filter_id; } else { - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); } mutex_unlock(&bp->hwrm_cmd_lock); @@ -859,7 +859,7 @@ static int hwrm_cfa_decap_filter_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); return rc; } @@ -906,7 +906,7 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp, resp = bnxt_get_hwrm_resp_addr(bp, &req); *encap_record_handle = resp->encap_record_id; } else { - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); } mutex_unlock(&bp->hwrm_cmd_lock); @@ -924,7 +924,7 @@ static int hwrm_cfa_encap_record_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); return rc; } @@ -943,7 +943,7 @@ static int bnxt_tc_put_l2_node(struct bnxt *bp, tc_info->l2_ht_params); if (rc) netdev_err(bp->dev, - "Error: %s: rhashtable_remove_fast: %d", + "Error: %s: rhashtable_remove_fast: %d\n", __func__, rc); kfree_rcu(l2_node, rcu); } @@ -972,7 +972,7 @@ bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table, if (rc) { kfree_rcu(l2_node, rcu); netdev_err(bp->dev, - "Error: %s: rhashtable_insert_fast: %d", + "Error: %s: rhashtable_insert_fast: %d\n", __func__, rc); return NULL; } @@ -1031,7 +1031,7 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow) if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) && (flow->l4_key.ip_proto != IPPROTO_TCP && flow->l4_key.ip_proto != IPPROTO_UDP)) { - netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports", + netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n", flow->l4_key.ip_proto); return false; } @@ -1088,7 +1088,7 @@ static int bnxt_tc_put_tunnel_node(struct bnxt *bp, rc = rhashtable_remove_fast(tunnel_table, &tunnel_node->node, *ht_params); if (rc) { - netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc); rc = -1; } kfree_rcu(tunnel_node, rcu); @@ -1129,7 +1129,7 @@ bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table, tunnel_node->refcount++; return tunnel_node; err: - netdev_info(bp->dev, "error rc=%d", rc); + netdev_info(bp->dev, "error rc=%d\n", rc); return NULL; } @@ -1187,7 +1187,7 @@ static void bnxt_tc_put_decap_l2_node(struct bnxt *bp, &decap_l2_node->node, tc_info->decap_l2_ht_params); if (rc) - netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc); kfree_rcu(decap_l2_node, rcu); } } @@ -1227,7 +1227,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, rt = ip_route_output_key(dev_net(real_dst_dev), &flow); if (IS_ERR(rt)) { - netdev_info(bp->dev, "no route to %pI4b", &flow.daddr); + netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr); return -EOPNOTSUPP; } @@ -1241,7 +1241,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, if (vlan->real_dev != real_dst_dev) { netdev_info(bp->dev, - "dst_dev(%s) doesn't use PF-if(%s)", + "dst_dev(%s) doesn't use PF-if(%s)\n", netdev_name(dst_dev), netdev_name(real_dst_dev)); rc = -EOPNOTSUPP; @@ -1253,7 +1253,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, #endif } else if (dst_dev != real_dst_dev) { netdev_info(bp->dev, - "dst_dev(%s) for %pI4b is not PF-if(%s)", + "dst_dev(%s) for %pI4b is not PF-if(%s)\n", netdev_name(dst_dev), &flow.daddr, netdev_name(real_dst_dev)); rc = -EOPNOTSUPP; @@ -1262,7 +1262,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, nbr = dst_neigh_lookup(&rt->dst, &flow.daddr); if (!nbr) { - netdev_info(bp->dev, "can't lookup neighbor for %pI4b", + netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n", &flow.daddr); rc = -EOPNOTSUPP; goto put_rt; @@ -1472,7 +1472,7 @@ static int __bnxt_tc_del_flow(struct bnxt *bp, rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node, tc_info->flow_ht_params); if (rc) - netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d", + netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n", __func__, rc); kfree_rcu(flow_node, rcu); @@ -1587,7 +1587,7 @@ unlock: free_node: kfree_rcu(new_node, rcu); done: - netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d", + netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n", __func__, tc_flow_cmd->cookie, rc); return rc; } @@ -1700,7 +1700,7 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows, le64_to_cpu(resp_bytes[i]); } } else { - netdev_info(bp->dev, "error rc=%d", rc); + netdev_info(bp->dev, "error rc=%d\n", rc); } mutex_unlock(&bp->hwrm_cmd_lock); @@ -1970,7 +1970,7 @@ static int bnxt_tc_indr_block_event(struct notifier_block *nb, bp); if (rc) netdev_info(bp->dev, - "Failed to register indirect blk: dev: %s", + "Failed to register indirect blk: dev: %s\n", netdev->name); break; case NETDEV_UNREGISTER: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index b010b34cdaf8..6f2faf81c1ae 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -43,7 +43,7 @@ static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx, netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x", *tx_cfa_action, *rx_cfa_code); } else { - netdev_info(bp->dev, "%s error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc); } mutex_unlock(&bp->hwrm_cmd_lock); @@ -60,7 +60,7 @@ static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx) rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) - netdev_info(bp->dev, "%s error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc); return rc; } @@ -465,7 +465,7 @@ static int bnxt_vf_reps_create(struct bnxt *bp) return 0; err: - netdev_info(bp->dev, "%s error=%d", __func__, rc); + netdev_info(bp->dev, "%s error=%d\n", __func__, rc); kfree(cfa_code_map); __bnxt_vf_reps_destroy(bp); return rc; @@ -488,7 +488,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode, mutex_lock(&bp->sriov_lock); if (bp->eswitch_mode == mode) { - netdev_info(bp->dev, "already in %s eswitch mode", + netdev_info(bp->dev, "already in %s eswitch mode\n", mode == DEVLINK_ESWITCH_MODE_LEGACY ? "legacy" : "switchdev"); rc = -EINVAL; @@ -508,7 +508,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode, } if (pci_num_vf(bp->pdev) == 0) { - netdev_info(bp->dev, "Enable VFs before setting switchdev mode"); + netdev_info(bp->dev, "Enable VFs before setting switchdev mode\n"); rc = -EPERM; goto done; } -- cgit v1.2.3-59-g8ed1b From 98c5f7d44fef309e692c24c6d71131ee0f0871fb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 24 Feb 2020 15:56:32 -0800 Subject: net: dsa: bcm_sf2: Forcibly configure IMP port for 1Gb/sec We are still experiencing some packet loss with the existing advanced congestion buffering (ACB) settings with the IMP port configured for 2Gb/sec, so revert to conservative link speeds that do not produce packet loss until this is resolved. Fixes: 8f1880cbe8d0 ("net: dsa: bcm_sf2: Configure IMP port for 2Gb/sec") Fixes: de34d7084edd ("net: dsa: bcm_sf2: Only 7278 supports 2Gb/sec IMP port") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index d1955543acd1..b0f5280a83cb 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -69,8 +69,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) /* Force link status for IMP port */ reg = core_readl(priv, offset); reg |= (MII_SW_OR | LINK_STS); - if (priv->type == BCM7278_DEVICE_ID) - reg |= GMII_SPEED_UP_2G; + reg &= ~GMII_SPEED_UP_2G; core_writel(priv, reg, offset); /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ -- cgit v1.2.3-59-g8ed1b From 402482a6a78e5c61d8a2ec6311fc5b4aca392cd6 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 25 Feb 2020 14:11:59 +0100 Subject: net: bcmgenet: Clear ID_MODE_DIS in EXT_RGMII_OOB_CTRL when not needed Outdated Raspberry Pi 4 firmware might configure the external PHY as rgmii although the kernel currently sets it as rgmii-rxid. This makes connections unreliable as ID_MODE_DIS is left enabled. To avoid this, explicitly clear that bit whenever we don't need it. Fixes: da38802211cc ("net: bcmgenet: Add RGMII_RXID support") Signed-off-by: Nicolas Saenz Julienne Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 6392a2530183..10244941a7a6 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -294,6 +294,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) */ if (priv->ext_phy) { reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); + reg &= ~ID_MODE_DIS; reg |= id_mode_dis; if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) reg |= RGMII_MODE_EN_V123; -- cgit v1.2.3-59-g8ed1b From f596c87005f7b1baeb7d62d9a9e25d68c3dfae10 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Wed, 26 Feb 2020 11:54:35 +0800 Subject: slip: not call free_netdev before rtnl_unlock in slip_open As the description before netdev_run_todo, we cannot call free_netdev before rtnl_unlock, fix it by reorder the code. Signed-off-by: yangerkun Reviewed-by: Oliver Hartkopp Signed-off-by: David S. Miller --- drivers/net/slip/slip.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 6f4d7ba8b109..babb01888b78 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -863,7 +863,10 @@ err_free_chan: tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); sl_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); -- cgit v1.2.3-59-g8ed1b From 4f31c532ad400a34dbdd836c204ed964d1ec2da5 Mon Sep 17 00:00:00 2001 From: Sudheesh Mavila Date: Wed, 26 Feb 2020 12:40:45 +0530 Subject: net: phy: corrected the return value for genphy_check_and_restart_aneg and genphy_c45_check_and_restart_aneg When auto-negotiation is not required, return value should be zero. Changes v1->v2: - improved comments and code as Andrew Lunn and Heiner Kallweit suggestion - fixed issue in genphy_c45_check_and_restart_aneg as Russell King suggestion. Fixes: 2a10ab043ac5 ("net: phy: add genphy_check_and_restart_aneg()") Fixes: 1af9f16840e9 ("net: phy: add genphy_c45_check_and_restart_aneg()") Signed-off-by: Sudheesh Mavila Reviewed-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 6 +++--- drivers/net/phy/phy_device.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index a1caeee12236..dd2e23fb67c0 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -167,7 +167,7 @@ EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg); */ int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart) { - int ret = 0; + int ret; if (!restart) { /* Configure and restart aneg if it wasn't set before */ @@ -180,9 +180,9 @@ int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart) } if (restart) - ret = genphy_c45_restart_aneg(phydev); + return genphy_c45_restart_aneg(phydev); - return ret; + return 0; } EXPORT_SYMBOL_GPL(genphy_c45_check_and_restart_aneg); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 6131aca79823..c8b0c34030d3 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1793,7 +1793,7 @@ EXPORT_SYMBOL(genphy_restart_aneg); */ int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart) { - int ret = 0; + int ret; if (!restart) { /* Advertisement hasn't changed, but maybe aneg was never on to @@ -1808,9 +1808,9 @@ int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart) } if (restart) - ret = genphy_restart_aneg(phydev); + return genphy_restart_aneg(phydev); - return ret; + return 0; } EXPORT_SYMBOL(genphy_check_and_restart_aneg); -- cgit v1.2.3-59-g8ed1b From c87a9d6fc6d555e4981f2ded77d9a8cce950743e Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 26 Feb 2020 16:26:50 +0100 Subject: net: phy: mscc: fix firmware paths The firmware paths for the VSC8584 PHYs not not contain the leading 'microchip/' directory, as used in linux-firmware, resulting in an error when probing the driver. This patch fixes it. Fixes: a5afc1678044 ("net: phy: mscc: add support for VSC8584 PHY") Signed-off-by: Antoine Tenart Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mscc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 937ac7da2789..f686f40f6bdc 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -345,11 +345,11 @@ enum macsec_bank { BIT(VSC8531_FORCE_LED_OFF) | \ BIT(VSC8531_FORCE_LED_ON)) -#define MSCC_VSC8584_REVB_INT8051_FW "mscc_vsc8584_revb_int8051_fb48.bin" +#define MSCC_VSC8584_REVB_INT8051_FW "microchip/mscc_vsc8584_revb_int8051_fb48.bin" #define MSCC_VSC8584_REVB_INT8051_FW_START_ADDR 0xe800 #define MSCC_VSC8584_REVB_INT8051_FW_CRC 0xfb48 -#define MSCC_VSC8574_REVB_INT8051_FW "mscc_vsc8574_revb_int8051_29e8.bin" +#define MSCC_VSC8574_REVB_INT8051_FW "microchip/mscc_vsc8574_revb_int8051_29e8.bin" #define MSCC_VSC8574_REVB_INT8051_FW_START_ADDR 0x4000 #define MSCC_VSC8574_REVB_INT8051_FW_CRC 0x29e8 -- cgit v1.2.3-59-g8ed1b From 474a31e13a4e9749fb3ee55794d69d0f17ee0998 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 26 Feb 2020 18:49:01 +0200 Subject: net: stmmac: fix notifier registration We cannot register the same netdev notifier multiple times when probing stmmac devices. Register the notifier only once in module init, and also make debugfs creation/deletion safe against simultaneous notifier call. Fixes: 481a7d154cbb ("stmmac: debugfs entry name is not be changed when udev rename device name.") Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5836b21edd7e..7da18c9afa01 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4405,6 +4405,8 @@ static void stmmac_init_fs(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + rtnl_lock(); + /* Create per netdev entries */ priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir); @@ -4416,14 +4418,13 @@ static void stmmac_init_fs(struct net_device *dev) debugfs_create_file("dma_cap", 0444, priv->dbgfs_dir, dev, &stmmac_dma_cap_fops); - register_netdevice_notifier(&stmmac_notifier); + rtnl_unlock(); } static void stmmac_exit_fs(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - unregister_netdevice_notifier(&stmmac_notifier); debugfs_remove_recursive(priv->dbgfs_dir); } #endif /* CONFIG_DEBUG_FS */ @@ -4940,14 +4941,14 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); -#ifdef CONFIG_DEBUG_FS - stmmac_exit_fs(ndev); -#endif stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); unregister_netdev(ndev); +#ifdef CONFIG_DEBUG_FS + stmmac_exit_fs(ndev); +#endif phylink_destroy(priv->phylink); if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); @@ -5166,6 +5167,7 @@ static int __init stmmac_init(void) /* Create debugfs main directory if it doesn't exist yet */ if (!stmmac_fs_dir) stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL); + register_netdevice_notifier(&stmmac_notifier); #endif return 0; @@ -5174,6 +5176,7 @@ static int __init stmmac_init(void) static void __exit stmmac_exit(void) { #ifdef CONFIG_DEBUG_FS + unregister_netdevice_notifier(&stmmac_notifier); debugfs_remove_recursive(stmmac_fs_dir); #endif } -- cgit v1.2.3-59-g8ed1b From 0bff777bd0cba73ad4cd0145696ad284d7e6a99f Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 27 Feb 2020 06:34:42 +0000 Subject: hinic: fix a irq affinity bug can not use a local variable as an input parameter of irq_set_affinity_hint Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h | 1 + drivers/net/ethernet/huawei/hinic/hinic_rx.c | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h index f4a339b10b10..79091e131418 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h @@ -94,6 +94,7 @@ struct hinic_rq { struct hinic_wq *wq; + struct cpumask affinity_mask; u32 irq; u16 msix_entry; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 56ea6d692f1c..2695ad69fca6 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -475,7 +475,6 @@ static int rx_request_irq(struct hinic_rxq *rxq) struct hinic_hwdev *hwdev = nic_dev->hwdev; struct hinic_rq *rq = rxq->rq; struct hinic_qp *qp; - struct cpumask mask; int err; rx_add_napi(rxq); @@ -492,8 +491,8 @@ static int rx_request_irq(struct hinic_rxq *rxq) } qp = container_of(rq, struct hinic_qp, rq); - cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask); - return irq_set_affinity_hint(rq->irq, &mask); + cpumask_set_cpu(qp->q_id % num_online_cpus(), &rq->affinity_mask); + return irq_set_affinity_hint(rq->irq, &rq->affinity_mask); } static void rx_free_irq(struct hinic_rxq *rxq) -- cgit v1.2.3-59-g8ed1b From d2ed69ce9ed3477e2a9527e6b89fe4689d99510e Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 27 Feb 2020 06:34:43 +0000 Subject: hinic: fix a bug of setting hw_ioctxt a reserved field is used to signify prime physical function index in the latest firmware version, so we must assign a value to it correctly Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c | 1 + drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h | 2 +- drivers/net/ethernet/huawei/hinic/hinic_hw_if.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 6f2cf569a283..79b3d53f2fbf 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -297,6 +297,7 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth, } hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif); + hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif); hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT; hw_ioctxt.cmdq_depth = 0; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index b069045de416..66fd2340d447 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -151,8 +151,8 @@ struct hinic_cmd_hw_ioctxt { u8 lro_en; u8 rsvd3; + u8 ppf_idx; u8 rsvd4; - u8 rsvd5; u16 rq_depth; u16 rx_buf_sz_idx; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h index 517794509eb2..c7bb9ceca72c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h @@ -137,6 +137,7 @@ #define HINIC_HWIF_FUNC_IDX(hwif) ((hwif)->attr.func_idx) #define HINIC_HWIF_PCI_INTF(hwif) ((hwif)->attr.pci_intf_idx) #define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx) +#define HINIC_HWIF_PPF_IDX(hwif) ((hwif)->attr.ppf_idx) #define HINIC_FUNC_TYPE(hwif) ((hwif)->attr.func_type) #define HINIC_IS_PF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PF) -- cgit v1.2.3-59-g8ed1b From 386d4716fd91869e07c731657f2cde5a33086516 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 27 Feb 2020 06:34:44 +0000 Subject: hinic: fix a bug of rss configuration should use real receive queue number to configure hw rss indirect table rather than maximal queue number Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 02a14f5e7fe3..13560975c103 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -356,7 +356,8 @@ static void hinic_enable_rss(struct hinic_dev *nic_dev) if (!num_cpus) num_cpus = num_online_cpus(); - nic_dev->num_qps = min_t(u16, nic_dev->max_qps, num_cpus); + nic_dev->num_qps = hinic_hwdev_num_qps(hwdev); + nic_dev->num_qps = min_t(u16, nic_dev->num_qps, num_cpus); nic_dev->rss_limit = nic_dev->num_qps; nic_dev->num_rss = nic_dev->num_qps; -- cgit v1.2.3-59-g8ed1b From e387f7d5fccf95299135c88b799184c3bef6a705 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 27 Feb 2020 08:22:10 +0100 Subject: mlx5: register lag notifier for init network namespace only The current code causes problems when the unregistering netdevice could be different then the registering one. Since the check in mlx5_lag_netdev_event() does not allow any other network namespace anyway, fix this by registerting the lag notifier per init network namespace only. Fixes: d48834f9d4b4 ("mlx5: Use dev_net netdevice notifier registrations") Signed-off-by: Jiri Pirko Tested-by: Aya Levin Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 11 +++-------- drivers/net/ethernet/mellanox/mlx5/core/lag.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- 5 files changed, 6 insertions(+), 14 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 966983674663..21de4764d4c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5147,7 +5147,6 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) static void mlx5e_nic_disable(struct mlx5e_priv *priv) { - struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -5168,7 +5167,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5e_monitor_counter_cleanup(priv); mlx5e_disable_async_events(priv); - mlx5_lag_remove(mdev, netdev); + mlx5_lag_remove(mdev); } int mlx5e_update_nic_rx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 7b48ccacebe2..6ed307d7f191 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1861,7 +1861,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) { - struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1870,7 +1869,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) #endif mlx5_notifier_unregister(mdev, &priv->events_nb); cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); - mlx5_lag_remove(mdev, netdev); + mlx5_lag_remove(mdev); } static MLX5E_DEFINE_STATS_GRP(sw_rep, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index b91eabc09fbc..8e19f6ab8393 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -464,9 +464,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, struct mlx5_lag *ldev; int changed = 0; - if (!net_eq(dev_net(ndev), &init_net)) - return NOTIFY_DONE; - if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) return NOTIFY_DONE; @@ -586,8 +583,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) if (!ldev->nb.notifier_call) { ldev->nb.notifier_call = mlx5_lag_netdev_event; - if (register_netdevice_notifier_dev_net(netdev, &ldev->nb, - &ldev->nn)) { + if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { ldev->nb.notifier_call = NULL; mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); } @@ -600,7 +596,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) } /* Must be called with intf_mutex held */ -void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev) +void mlx5_lag_remove(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; int i; @@ -620,8 +616,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev) if (i == MLX5_MAX_PORTS) { if (ldev->nb.notifier_call) - unregister_netdevice_notifier_dev_net(netdev, &ldev->nb, - &ldev->nn); + unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); mlx5_lag_dev_free(ldev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h index 316ab09e2664..f1068aac6406 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h @@ -44,7 +44,6 @@ struct mlx5_lag { struct workqueue_struct *wq; struct delayed_work bond_work; struct notifier_block nb; - struct netdev_net_notifier nn; struct lag_mp lag_mp; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index fcce9e0fc82c..da67b28d6e23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -157,7 +157,7 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, u8 feature_group, u8 access_reg_group); void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); -void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev); +void mlx5_lag_remove(struct mlx5_core_dev *dev); int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); -- cgit v1.2.3-59-g8ed1b From b82cf17ff1957ec35eaee7dc519c365ecd06ba38 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 27 Feb 2020 09:44:49 +0000 Subject: net: phy: marvell: don't interpret PHY status unless resolved Don't attempt to interpret the PHY specific status register unless the PHY is indicating that the resolution is valid. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 28e33ece4ce1..9a8badafea8a 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1306,6 +1306,9 @@ static int marvell_read_status_page_an(struct phy_device *phydev, } } + if (!(status & MII_M1011_PHY_STATUS_RESOLVED)) + return 0; + if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) phydev->duplex = DUPLEX_FULL; else @@ -1365,6 +1368,8 @@ static int marvell_read_status_page(struct phy_device *phydev, int page) linkmode_zero(phydev->lp_advertising); phydev->pause = 0; phydev->asym_pause = 0; + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; if (phydev->autoneg == AUTONEG_ENABLE) err = marvell_read_status_page_an(phydev, fiber, status); -- cgit v1.2.3-59-g8ed1b From 93b5cbfa9636d385126f211dca9efa7e3f683202 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:23:52 +0000 Subject: net: rmnet: fix NULL pointer dereference in rmnet_newlink() rmnet registers IFLA_LINK interface as a lower interface. But, IFLA_LINK could be NULL. In the current code, rmnet doesn't check IFLA_LINK. So, panic would occur. Test commands: modprobe rmnet ip link add rmnet0 type rmnet mux_id 1 Splat looks like: [ 36.826109][ T1115] general protection fault, probably for non-canonical address 0xdffffc0000000000I [ 36.838817][ T1115] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 36.839908][ T1115] CPU: 1 PID: 1115 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 36.840569][ T1115] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 36.841408][ T1115] RIP: 0010:rmnet_newlink+0x54/0x510 [rmnet] [ 36.841986][ T1115] Code: 83 ec 18 48 c1 e9 03 80 3c 01 00 0f 85 d4 03 00 00 48 8b 6a 28 48 b8 00 00 00 00 00 c [ 36.843923][ T1115] RSP: 0018:ffff8880b7e0f1c0 EFLAGS: 00010247 [ 36.844756][ T1115] RAX: dffffc0000000000 RBX: ffff8880d14cca00 RCX: 1ffff11016fc1e99 [ 36.845859][ T1115] RDX: 0000000000000000 RSI: ffff8880c3d04000 RDI: 0000000000000004 [ 36.846961][ T1115] RBP: 0000000000000000 R08: ffff8880b7e0f8b0 R09: ffff8880b6ac2d90 [ 36.848020][ T1115] R10: ffffffffc0589a40 R11: ffffed1016d585b7 R12: ffffffff88ceaf80 [ 36.848788][ T1115] R13: ffff8880c3d04000 R14: ffff8880b7e0f8b0 R15: ffff8880c3d04000 [ 36.849546][ T1115] FS: 00007f50ab3360c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 36.851784][ T1115] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.852422][ T1115] CR2: 000055871afe5ab0 CR3: 00000000ae246001 CR4: 00000000000606e0 [ 36.853181][ T1115] Call Trace: [ 36.853514][ T1115] __rtnl_newlink+0xbdb/0x1270 [ 36.853967][ T1115] ? lock_downgrade+0x6e0/0x6e0 [ 36.854420][ T1115] ? rtnl_link_unregister+0x220/0x220 [ 36.854936][ T1115] ? lock_acquire+0x164/0x3b0 [ 36.855376][ T1115] ? is_bpf_image_address+0xff/0x1d0 [ 36.855884][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.856304][ T1115] ? kernel_text_address+0x111/0x140 [ 36.856857][ T1115] ? __kernel_text_address+0xe/0x30 [ 36.857440][ T1115] ? unwind_get_return_address+0x5f/0xa0 [ 36.858063][ T1115] ? create_prof_cpu_mask+0x20/0x20 [ 36.858644][ T1115] ? arch_stack_walk+0x83/0xb0 [ 36.859171][ T1115] ? stack_trace_save+0x82/0xb0 [ 36.859710][ T1115] ? stack_trace_consume_entry+0x160/0x160 [ 36.860357][ T1115] ? deactivate_slab.isra.78+0x2c5/0x800 [ 36.860928][ T1115] ? kasan_unpoison_shadow+0x30/0x40 [ 36.861520][ T1115] ? kmem_cache_alloc_trace+0x135/0x350 [ 36.862125][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.864073][ T1115] rtnl_newlink+0x65/0x90 [ ... ] Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 06de59521fc4..471e3b2a1403 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -135,6 +135,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, int err = 0; u16 mux_id; + if (!tb[IFLA_LINK]) { + NL_SET_ERR_MSG_MOD(extack, "link not specified"); + return -EINVAL; + } + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev || !dev) return -ENODEV; -- cgit v1.2.3-59-g8ed1b From 1eb1f43a6e37282348a41e3d68f5e9a6a4359212 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:24:26 +0000 Subject: net: rmnet: fix NULL pointer dereference in rmnet_changelink() In the rmnet_changelink(), it uses IFLA_LINK without checking NULL pointer. tb[IFLA_LINK] could be NULL pointer. So, NULL-ptr-deref could occur. rmnet already has a lower interface (real_dev). So, after this patch, rmnet_changelink() does not use IFLA_LINK anymore. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link set rmnet0 type rmnet mux_id 2 Splat looks like: [ 90.578726][ T1131] general protection fault, probably for non-canonical address 0xdffffc0000000000I [ 90.581121][ T1131] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 90.582380][ T1131] CPU: 2 PID: 1131 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 90.584285][ T1131] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 90.587506][ T1131] RIP: 0010:rmnet_changelink+0x5a/0x8a0 [rmnet] [ 90.588546][ T1131] Code: 83 ec 20 48 c1 ea 03 80 3c 02 00 0f 85 6f 07 00 00 48 8b 5e 28 48 b8 00 00 00 00 00 0 [ 90.591447][ T1131] RSP: 0018:ffff8880ce78f1b8 EFLAGS: 00010247 [ 90.592329][ T1131] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffff8880ce78f8b0 [ 90.593253][ T1131] RDX: 0000000000000000 RSI: ffff8880ce78f4a0 RDI: 0000000000000004 [ 90.594058][ T1131] RBP: ffff8880cf543e00 R08: 0000000000000002 R09: 0000000000000002 [ 90.594859][ T1131] R10: ffffffffc0586a40 R11: 0000000000000000 R12: ffff8880ca47c000 [ 90.595690][ T1131] R13: ffff8880ca47c000 R14: ffff8880cf545000 R15: 0000000000000000 [ 90.596553][ T1131] FS: 00007f21f6c7e0c0(0000) GS:ffff8880da400000(0000) knlGS:0000000000000000 [ 90.597504][ T1131] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 90.599418][ T1131] CR2: 0000556e413db458 CR3: 00000000c917a002 CR4: 00000000000606e0 [ 90.600289][ T1131] Call Trace: [ 90.600631][ T1131] __rtnl_newlink+0x922/0x1270 [ 90.601194][ T1131] ? lock_downgrade+0x6e0/0x6e0 [ 90.601724][ T1131] ? rtnl_link_unregister+0x220/0x220 [ 90.602309][ T1131] ? lock_acquire+0x164/0x3b0 [ 90.602784][ T1131] ? is_bpf_image_address+0xff/0x1d0 [ 90.603331][ T1131] ? rtnl_newlink+0x4c/0x90 [ 90.603810][ T1131] ? kernel_text_address+0x111/0x140 [ 90.604419][ T1131] ? __kernel_text_address+0xe/0x30 [ 90.604981][ T1131] ? unwind_get_return_address+0x5f/0xa0 [ 90.605616][ T1131] ? create_prof_cpu_mask+0x20/0x20 [ 90.606304][ T1131] ? arch_stack_walk+0x83/0xb0 [ 90.606985][ T1131] ? stack_trace_save+0x82/0xb0 [ 90.607656][ T1131] ? stack_trace_consume_entry+0x160/0x160 [ 90.608503][ T1131] ? deactivate_slab.isra.78+0x2c5/0x800 [ 90.609336][ T1131] ? kasan_unpoison_shadow+0x30/0x40 [ 90.610096][ T1131] ? kmem_cache_alloc_trace+0x135/0x350 [ 90.610889][ T1131] ? rtnl_newlink+0x4c/0x90 [ 90.611512][ T1131] rtnl_newlink+0x65/0x90 [ ... ] Fixes: 23790ef12082 ("net: qualcomm: rmnet: Allow to configure flags for existing devices") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 471e3b2a1403..ac58f584190b 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -300,10 +300,8 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[], if (!dev) return -ENODEV; - real_dev = __dev_get_by_index(dev_net(dev), - nla_get_u32(tb[IFLA_LINK])); - - if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) + real_dev = priv->real_dev; + if (!rmnet_is_real_dev_registered(real_dev)) return -ENODEV; port = rmnet_get_port_rtnl(real_dev); -- cgit v1.2.3-59-g8ed1b From 102210f7664442d8c0ce332c006ea90626df745b Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:24:45 +0000 Subject: net: rmnet: fix suspicious RCU usage rmnet_get_port() internally calls rcu_dereference_rtnl(), which checks RTNL. But rmnet_get_port() could be called by packet path. The packet path is not protected by RTNL. So, the suspicious RCU usage problem occurs. Test commands: modprobe rmnet ip netns add nst ip link add veth0 type veth peer name veth1 ip link set veth1 netns nst ip link add rmnet0 link veth0 type rmnet mux_id 1 ip netns exec nst ip link add rmnet1 link veth1 type rmnet mux_id 1 ip netns exec nst ip link set veth1 up ip netns exec nst ip link set rmnet1 up ip netns exec nst ip a a 192.168.100.2/24 dev rmnet1 ip link set veth0 up ip link set rmnet0 up ip a a 192.168.100.1/24 dev rmnet0 ping 192.168.100.2 Splat looks like: [ 146.630958][ T1174] WARNING: suspicious RCU usage [ 146.631735][ T1174] 5.6.0-rc1+ #447 Not tainted [ 146.632387][ T1174] ----------------------------- [ 146.633151][ T1174] drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c:386 suspicious rcu_dereference_check() ! [ 146.634742][ T1174] [ 146.634742][ T1174] other info that might help us debug this: [ 146.634742][ T1174] [ 146.645992][ T1174] [ 146.645992][ T1174] rcu_scheduler_active = 2, debug_locks = 1 [ 146.646937][ T1174] 5 locks held by ping/1174: [ 146.647609][ T1174] #0: ffff8880c31dea70 (sk_lock-AF_INET){+.+.}, at: raw_sendmsg+0xab8/0x2980 [ 146.662463][ T1174] #1: ffffffff93925660 (rcu_read_lock_bh){....}, at: ip_finish_output2+0x243/0x2150 [ 146.671696][ T1174] #2: ffffffff93925660 (rcu_read_lock_bh){....}, at: __dev_queue_xmit+0x213/0x2940 [ 146.673064][ T1174] #3: ffff8880c19ecd58 (&dev->qdisc_running_key#7){+...}, at: ip_finish_output2+0x714/0x2150 [ 146.690358][ T1174] #4: ffff8880c5796898 (&dev->qdisc_xmit_lock_key#3){+.-.}, at: sch_direct_xmit+0x1e2/0x1020 [ 146.699875][ T1174] [ 146.699875][ T1174] stack backtrace: [ 146.701091][ T1174] CPU: 0 PID: 1174 Comm: ping Not tainted 5.6.0-rc1+ #447 [ 146.705215][ T1174] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 146.706565][ T1174] Call Trace: [ 146.707102][ T1174] dump_stack+0x96/0xdb [ 146.708007][ T1174] rmnet_get_port.part.9+0x76/0x80 [rmnet] [ 146.709233][ T1174] rmnet_egress_handler+0x107/0x420 [rmnet] [ 146.710492][ T1174] ? sch_direct_xmit+0x1e2/0x1020 [ 146.716193][ T1174] rmnet_vnd_start_xmit+0x3d/0xa0 [rmnet] [ 146.717012][ T1174] dev_hard_start_xmit+0x160/0x740 [ 146.717854][ T1174] sch_direct_xmit+0x265/0x1020 [ 146.718577][ T1174] ? register_lock_class+0x14d0/0x14d0 [ 146.719429][ T1174] ? dev_watchdog+0xac0/0xac0 [ 146.723738][ T1174] ? __dev_queue_xmit+0x15fd/0x2940 [ 146.724469][ T1174] ? lock_acquire+0x164/0x3b0 [ 146.725172][ T1174] __dev_queue_xmit+0x20c7/0x2940 [ ... ] Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 13 ++++++------- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 2 +- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index ac58f584190b..fc68ecdd804b 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -382,11 +382,10 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = { .fill_info = rmnet_fill_info, }; -/* Needs either rcu_read_lock() or rtnl lock */ -struct rmnet_port *rmnet_get_port(struct net_device *real_dev) +struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev) { if (rmnet_is_real_dev_registered(real_dev)) - return rcu_dereference_rtnl(real_dev->rx_handler_data); + return rcu_dereference_bh(real_dev->rx_handler_data); else return NULL; } @@ -412,7 +411,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, struct rmnet_port *port, *slave_port; int err; - port = rmnet_get_port(real_dev); + port = rmnet_get_port_rtnl(real_dev); /* If there is more than one rmnet dev attached, its probably being * used for muxing. Skip the briding in that case @@ -427,7 +426,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (err) return -EBUSY; - slave_port = rmnet_get_port(slave_dev); + slave_port = rmnet_get_port_rtnl(slave_dev); slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; slave_port->bridge_ep = real_dev; @@ -445,11 +444,11 @@ int rmnet_del_bridge(struct net_device *rmnet_dev, struct net_device *real_dev = priv->real_dev; struct rmnet_port *port, *slave_port; - port = rmnet_get_port(real_dev); + port = rmnet_get_port_rtnl(real_dev); port->rmnet_mode = RMNET_EPMODE_VND; port->bridge_ep = NULL; - slave_port = rmnet_get_port(slave_dev); + slave_port = rmnet_get_port_rtnl(slave_dev); rmnet_unregister_real_device(slave_dev, slave_port); netdev_dbg(slave_dev, "removed from rmnet as slave\n"); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index cd0a6bcbe74a..0d568dcfd65a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -65,7 +65,7 @@ struct rmnet_priv { struct rmnet_priv_stats stats; }; -struct rmnet_port *rmnet_get_port(struct net_device *real_dev); +struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev); struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id); int rmnet_add_bridge(struct net_device *rmnet_dev, struct net_device *slave_dev, diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 1b74bc160402..074a8b326c30 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -184,7 +184,7 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) return RX_HANDLER_PASS; dev = skb->dev; - port = rmnet_get_port(dev); + port = rmnet_get_port_rcu(dev); switch (port->rmnet_mode) { case RMNET_EPMODE_VND: @@ -217,7 +217,7 @@ void rmnet_egress_handler(struct sk_buff *skb) skb->dev = priv->real_dev; mux_id = priv->mux_id; - port = rmnet_get_port(skb->dev); + port = rmnet_get_port_rcu(skb->dev); if (!port) goto drop; -- cgit v1.2.3-59-g8ed1b From c026d970102e9af9958edefb4a015702c6aab636 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:25:05 +0000 Subject: net: rmnet: remove rcu_read_lock in rmnet_force_unassociate_device() The notifier_call() of the slave interface removes rmnet interface with unregister_netdevice_queue(). But, before calling unregister_netdevice_queue(), it acquires rcu readlock. In the RCU critical section, sleeping isn't be allowed. But, unregister_netdevice_queue() internally calls synchronize_net(), which would sleep. So, suspicious RCU usage warning occurs. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add dummy1 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link set dummy1 master rmnet0 ip link del dummy0 Splat looks like: [ 79.639245][ T1195] ============================= [ 79.640134][ T1195] WARNING: suspicious RCU usage [ 79.640852][ T1195] 5.6.0-rc1+ #447 Not tainted [ 79.641657][ T1195] ----------------------------- [ 79.642472][ T1195] ./include/linux/rcupdate.h:273 Illegal context switch in RCU read-side critical section! [ 79.644043][ T1195] [ 79.644043][ T1195] other info that might help us debug this: [ 79.644043][ T1195] [ 79.645682][ T1195] [ 79.645682][ T1195] rcu_scheduler_active = 2, debug_locks = 1 [ 79.646980][ T1195] 2 locks held by ip/1195: [ 79.647629][ T1195] #0: ffffffffa3cf64f0 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x457/0x890 [ 79.649312][ T1195] #1: ffffffffa39256c0 (rcu_read_lock){....}, at: rmnet_config_notify_cb+0xf0/0x590 [rmnet] [ 79.651717][ T1195] [ 79.651717][ T1195] stack backtrace: [ 79.652650][ T1195] CPU: 3 PID: 1195 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 79.653702][ T1195] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 79.655037][ T1195] Call Trace: [ 79.655560][ T1195] dump_stack+0x96/0xdb [ 79.656252][ T1195] ___might_sleep+0x345/0x440 [ 79.656994][ T1195] synchronize_net+0x18/0x30 [ 79.661132][ T1195] netdev_rx_handler_unregister+0x40/0xb0 [ 79.666266][ T1195] rmnet_unregister_real_device+0x42/0xb0 [rmnet] [ 79.667211][ T1195] rmnet_config_notify_cb+0x1f7/0x590 [rmnet] [ 79.668121][ T1195] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 79.669166][ T1195] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 79.670286][ T1195] ? __module_text_address+0x13/0x140 [ 79.671139][ T1195] notifier_call_chain+0x90/0x160 [ 79.671973][ T1195] rollback_registered_many+0x660/0xcf0 [ 79.672893][ T1195] ? netif_set_real_num_tx_queues+0x780/0x780 [ 79.675091][ T1195] ? __lock_acquire+0xdfe/0x3de0 [ 79.675825][ T1195] ? memset+0x1f/0x40 [ 79.676367][ T1195] ? __nla_validate_parse+0x98/0x1ab0 [ 79.677290][ T1195] unregister_netdevice_many.part.133+0x13/0x1b0 [ 79.678163][ T1195] rtnl_delete_link+0xbc/0x100 [ ... ] Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index fc68ecdd804b..0ad64aa66592 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -230,7 +230,6 @@ static void rmnet_force_unassociate_device(struct net_device *dev) port = rmnet_get_port_rtnl(dev); - rcu_read_lock(); rmnet_unregister_bridge(dev, port); hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { @@ -241,7 +240,6 @@ static void rmnet_force_unassociate_device(struct net_device *dev) kfree(ep); } - rcu_read_unlock(); unregister_netdevice_many(&list); rmnet_unregister_real_device(real_dev, port); -- cgit v1.2.3-59-g8ed1b From 1dc49e9d164cd7e11c81279c83db84a147e14740 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:25:19 +0000 Subject: net: rmnet: do not allow to change mux id if mux id is duplicated Basically, duplicate mux id isn't be allowed. So, the creation of rmnet will be failed if there is duplicate mux id is existing. But, changelink routine doesn't check duplicate mux id. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link add rmnet1 link dummy0 type rmnet mux_id 2 ip link set rmnet1 type rmnet mux_id 1 Fixes: 23790ef12082 ("net: qualcomm: rmnet: Allow to configure flags for existing devices") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 0ad64aa66592..3c0e6d24d083 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -306,6 +306,10 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[], if (data[IFLA_RMNET_MUX_ID]) { mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]); + if (rmnet_get_endpoint(port, mux_id)) { + NL_SET_ERR_MSG_MOD(extack, "MUX ID already exists"); + return -EINVAL; + } ep = rmnet_get_endpoint(port, priv->mux_id); if (!ep) return -ENODEV; -- cgit v1.2.3-59-g8ed1b From 037f9cdf72fb8a7ff9ec2b5dd05336ec1492bdf1 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:25:43 +0000 Subject: net: rmnet: use upper/lower device infrastructure netdev_upper_dev_link() is useful to manage lower/upper interfaces. And this function internally validates looping, maximum depth. All or most virtual interfaces that could have a real interface (e.g. macsec, macvlan, ipvlan etc.) use lower/upper infrastructure. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet1 link dummy0 type rmnet mux_id 1 for i in {2..100} do let A=$i-1 ip link add rmnet$i link rmnet$A type rmnet mux_id $i done ip link del dummy0 The purpose of the test commands is to make stack overflow. Splat looks like: [ 52.411438][ T1395] BUG: KASAN: slab-out-of-bounds in find_busiest_group+0x27e/0x2c00 [ 52.413218][ T1395] Write of size 64 at addr ffff8880c774bde0 by task ip/1395 [ 52.414841][ T1395] [ 52.430720][ T1395] CPU: 1 PID: 1395 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 52.496511][ T1395] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 52.513597][ T1395] Call Trace: [ 52.546516][ T1395] [ 52.558773][ T1395] Allocated by task 3171537984: [ 52.588290][ T1395] BUG: unable to handle page fault for address: ffffffffb999e260 [ 52.589311][ T1395] #PF: supervisor read access in kernel mode [ 52.590529][ T1395] #PF: error_code(0x0000) - not-present page [ 52.591374][ T1395] PGD d6818067 P4D d6818067 PUD d6819063 PMD 0 [ 52.592288][ T1395] Thread overran stack, or stack corrupted [ 52.604980][ T1395] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 52.605856][ T1395] CPU: 1 PID: 1395 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 52.611764][ T1395] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 52.621520][ T1395] RIP: 0010:stack_depot_fetch+0x10/0x30 [ 52.622296][ T1395] Code: ff e9 f9 fe ff ff 48 89 df e8 9c 1d 91 ff e9 ca fe ff ff cc cc cc cc cc cc cc 89 f8 0 [ 52.627887][ T1395] RSP: 0018:ffff8880c774bb60 EFLAGS: 00010006 [ 52.628735][ T1395] RAX: 00000000001f8880 RBX: ffff8880c774d140 RCX: 0000000000000000 [ 52.631773][ T1395] RDX: 000000000000001d RSI: ffff8880c774bb68 RDI: 0000000000003ff0 [ 52.649584][ T1395] RBP: ffffea00031dd200 R08: ffffed101b43e403 R09: ffffed101b43e403 [ 52.674857][ T1395] R10: 0000000000000001 R11: ffffed101b43e402 R12: ffff8880d900e5c0 [ 52.678257][ T1395] R13: ffff8880c774c000 R14: 0000000000000000 R15: dffffc0000000000 [ 52.694541][ T1395] FS: 00007fe867f6e0c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 52.764039][ T1395] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 52.815008][ T1395] CR2: ffffffffb999e260 CR3: 00000000c26aa005 CR4: 00000000000606e0 [ 52.862312][ T1395] Call Trace: [ 52.887133][ T1395] Modules linked in: dummy rmnet veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_dex [ 52.936749][ T1395] CR2: ffffffffb999e260 [ 52.965695][ T1395] ---[ end trace 7e32ca99482dbb31 ]--- [ 52.966556][ T1395] RIP: 0010:stack_depot_fetch+0x10/0x30 [ 52.971083][ T1395] Code: ff e9 f9 fe ff ff 48 89 df e8 9c 1d 91 ff e9 ca fe ff ff cc cc cc cc cc cc cc 89 f8 0 [ 53.003650][ T1395] RSP: 0018:ffff8880c774bb60 EFLAGS: 00010006 [ 53.043183][ T1395] RAX: 00000000001f8880 RBX: ffff8880c774d140 RCX: 0000000000000000 [ 53.076480][ T1395] RDX: 000000000000001d RSI: ffff8880c774bb68 RDI: 0000000000003ff0 [ 53.093858][ T1395] RBP: ffffea00031dd200 R08: ffffed101b43e403 R09: ffffed101b43e403 [ 53.112795][ T1395] R10: 0000000000000001 R11: ffffed101b43e402 R12: ffff8880d900e5c0 [ 53.139837][ T1395] R13: ffff8880c774c000 R14: 0000000000000000 R15: dffffc0000000000 [ 53.141500][ T1395] FS: 00007fe867f6e0c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 53.143343][ T1395] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.152007][ T1395] CR2: ffffffffb999e260 CR3: 00000000c26aa005 CR4: 00000000000606e0 [ 53.156459][ T1395] Kernel panic - not syncing: Fatal exception [ 54.213570][ T1395] Shutting down cpus with NMI [ 54.354112][ T1395] Kernel Offset: 0x33000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0x) [ 54.355687][ T1395] Rebooting in 5 seconds.. Fixes: b37f78f234bf ("net: qualcomm: rmnet: Fix crash on real dev unregistration") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 35 ++++++++++------------ 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 3c0e6d24d083..e3fbf2331b96 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -61,9 +61,6 @@ static int rmnet_unregister_real_device(struct net_device *real_dev, kfree(port); - /* release reference on real_dev */ - dev_put(real_dev); - netdev_dbg(real_dev, "Removed from rmnet\n"); return 0; } @@ -89,9 +86,6 @@ static int rmnet_register_real_device(struct net_device *real_dev) return -EBUSY; } - /* hold on to real dev for MAP data */ - dev_hold(real_dev); - for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++) INIT_HLIST_HEAD(&port->muxed_ep[entry]); @@ -162,6 +156,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, if (err) goto err1; + err = netdev_upper_dev_link(real_dev, dev, extack); + if (err < 0) + goto err2; + port->rmnet_mode = mode; hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); @@ -178,6 +176,8 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, return 0; +err2: + unregister_netdevice(dev); err1: rmnet_unregister_real_device(real_dev, port); err0: @@ -209,33 +209,30 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) rmnet_vnd_dellink(mux_id, port, ep); kfree(ep); } + netdev_upper_dev_unlink(real_dev, dev); rmnet_unregister_real_device(real_dev, port); unregister_netdevice_queue(dev, head); } -static void rmnet_force_unassociate_device(struct net_device *dev) +static void rmnet_force_unassociate_device(struct net_device *real_dev) { - struct net_device *real_dev = dev; struct hlist_node *tmp_ep; struct rmnet_endpoint *ep; struct rmnet_port *port; unsigned long bkt_ep; LIST_HEAD(list); - if (!rmnet_is_real_dev_registered(real_dev)) - return; - ASSERT_RTNL(); - port = rmnet_get_port_rtnl(dev); + port = rmnet_get_port_rtnl(real_dev); - rmnet_unregister_bridge(dev, port); + rmnet_unregister_bridge(real_dev, port); hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { + netdev_upper_dev_unlink(real_dev, ep->egress_dev); unregister_netdevice_queue(ep->egress_dev, &list); rmnet_vnd_dellink(ep->mux_id, port, ep); - hlist_del_init_rcu(&ep->hlnode); kfree(ep); } @@ -248,15 +245,15 @@ static void rmnet_force_unassociate_device(struct net_device *dev) static int rmnet_config_notify_cb(struct notifier_block *nb, unsigned long event, void *data) { - struct net_device *dev = netdev_notifier_info_to_dev(data); + struct net_device *real_dev = netdev_notifier_info_to_dev(data); - if (!dev) + if (!rmnet_is_real_dev_registered(real_dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: - netdev_dbg(dev, "Kernel unregister\n"); - rmnet_force_unassociate_device(dev); + netdev_dbg(real_dev, "Kernel unregister\n"); + rmnet_force_unassociate_device(real_dev); break; default: @@ -477,8 +474,8 @@ static int __init rmnet_init(void) static void __exit rmnet_exit(void) { - unregister_netdevice_notifier(&rmnet_dev_notifier); rtnl_link_unregister(&rmnet_link_ops); + unregister_netdevice_notifier(&rmnet_dev_notifier); } module_init(rmnet_init) -- cgit v1.2.3-59-g8ed1b From d939b6d30bea1a2322bc536b12be0a7c4c2bccd7 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:26:02 +0000 Subject: net: rmnet: fix bridge mode bugs In order to attach a bridge interface to the rmnet interface, "master" operation is used. (e.g. ip link set dummy1 master rmnet0) But, in the rmnet_add_bridge(), which is a callback of ->ndo_add_slave() doesn't register lower interface. So, ->ndo_del_slave() doesn't work. There are other problems too. 1. It couldn't detect circular upper/lower interface relationship. 2. It couldn't prevent stack overflow because of too deep depth of upper/lower interface 3. It doesn't check the number of lower interfaces. 4. Panics because of several reasons. The root problem of these issues is actually the same. So, in this patch, these all problems will be fixed. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link add dummy1 master rmnet0 type dummy ip link add dummy2 master rmnet0 type dummy ip link del rmnet0 ip link del dummy2 ip link del dummy1 Splat looks like: [ 41.867595][ T1164] general protection fault, probably for non-canonical address 0xdffffc0000000101I [ 41.869993][ T1164] KASAN: null-ptr-deref in range [0x0000000000000808-0x000000000000080f] [ 41.872950][ T1164] CPU: 0 PID: 1164 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 41.873915][ T1164] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 41.875161][ T1164] RIP: 0010:rmnet_unregister_bridge.isra.6+0x71/0xf0 [rmnet] [ 41.876178][ T1164] Code: 48 89 ef 48 89 c6 5b 5d e9 fc fe ff ff e8 f7 f3 ff ff 48 8d b8 08 08 00 00 48 ba 00 7 [ 41.878925][ T1164] RSP: 0018:ffff8880c4d0f188 EFLAGS: 00010202 [ 41.879774][ T1164] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000101 [ 41.887689][ T1164] RDX: dffffc0000000000 RSI: ffffffffb8cf64f0 RDI: 0000000000000808 [ 41.888727][ T1164] RBP: ffff8880c40e4000 R08: ffffed101b3c0e3c R09: 0000000000000001 [ 41.889749][ T1164] R10: 0000000000000001 R11: ffffed101b3c0e3b R12: 1ffff110189a1e3c [ 41.890783][ T1164] R13: ffff8880c4d0f200 R14: ffffffffb8d56160 R15: ffff8880ccc2c000 [ 41.891794][ T1164] FS: 00007f4300edc0c0(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000 [ 41.892953][ T1164] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 41.893800][ T1164] CR2: 00007f43003bc8c0 CR3: 00000000ca53e001 CR4: 00000000000606f0 [ 41.894824][ T1164] Call Trace: [ 41.895274][ T1164] ? rcu_is_watching+0x2c/0x80 [ 41.895895][ T1164] rmnet_config_notify_cb+0x1f7/0x590 [rmnet] [ 41.896687][ T1164] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 41.897611][ T1164] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 41.898508][ T1164] ? __module_text_address+0x13/0x140 [ 41.899162][ T1164] notifier_call_chain+0x90/0x160 [ 41.899814][ T1164] rollback_registered_many+0x660/0xcf0 [ 41.900544][ T1164] ? netif_set_real_num_tx_queues+0x780/0x780 [ 41.901316][ T1164] ? __lock_acquire+0xdfe/0x3de0 [ 41.901958][ T1164] ? memset+0x1f/0x40 [ 41.902468][ T1164] ? __nla_validate_parse+0x98/0x1ab0 [ 41.903166][ T1164] unregister_netdevice_many.part.133+0x13/0x1b0 [ 41.903988][ T1164] rtnl_delete_link+0xbc/0x100 [ ... ] Fixes: 60d58f971c10 ("net: qualcomm: rmnet: Implement bridge mode") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 131 ++++++++++----------- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 1 + drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 8 -- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h | 1 - 4 files changed, 64 insertions(+), 77 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index e3fbf2331b96..fbf4cbcf1a65 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -13,25 +13,6 @@ #include "rmnet_vnd.h" #include "rmnet_private.h" -/* Locking scheme - - * The shared resource which needs to be protected is realdev->rx_handler_data. - * For the writer path, this is using rtnl_lock(). The writer paths are - * rmnet_newlink(), rmnet_dellink() and rmnet_force_unassociate_device(). These - * paths are already called with rtnl_lock() acquired in. There is also an - * ASSERT_RTNL() to ensure that we are calling with rtnl acquired. For - * dereference here, we will need to use rtnl_dereference(). Dev list writing - * needs to happen with rtnl_lock() acquired for netdev_master_upper_dev_link(). - * For the reader path, the real_dev->rx_handler_data is called in the TX / RX - * path. We only need rcu_read_lock() for these scenarios. In these cases, - * the rcu_read_lock() is held in __dev_queue_xmit() and - * netif_receive_skb_internal(), so readers need to use rcu_dereference_rtnl() - * to get the relevant information. For dev list reading, we again acquire - * rcu_read_lock() in rmnet_dellink() for netdev_master_upper_dev_get_rcu(). - * We also use unregister_netdevice_many() to free all rmnet devices in - * rmnet_force_unassociate_device() so we dont lose the rtnl_lock() and free in - * same context. - */ - /* Local Definitions and Declarations */ static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = { @@ -51,9 +32,10 @@ rmnet_get_port_rtnl(const struct net_device *real_dev) return rtnl_dereference(real_dev->rx_handler_data); } -static int rmnet_unregister_real_device(struct net_device *real_dev, - struct rmnet_port *port) +static int rmnet_unregister_real_device(struct net_device *real_dev) { + struct rmnet_port *port = rmnet_get_port_rtnl(real_dev); + if (port->nr_rmnet_devs) return -EINVAL; @@ -93,28 +75,33 @@ static int rmnet_register_real_device(struct net_device *real_dev) return 0; } -static void rmnet_unregister_bridge(struct net_device *dev, - struct rmnet_port *port) +static void rmnet_unregister_bridge(struct rmnet_port *port) { - struct rmnet_port *bridge_port; - struct net_device *bridge_dev; + struct net_device *bridge_dev, *real_dev, *rmnet_dev; + struct rmnet_port *real_port; if (port->rmnet_mode != RMNET_EPMODE_BRIDGE) return; - /* bridge slave handling */ + rmnet_dev = port->rmnet_dev; if (!port->nr_rmnet_devs) { - bridge_dev = port->bridge_ep; + /* bridge device */ + real_dev = port->bridge_ep; + bridge_dev = port->dev; - bridge_port = rmnet_get_port_rtnl(bridge_dev); - bridge_port->bridge_ep = NULL; - bridge_port->rmnet_mode = RMNET_EPMODE_VND; + real_port = rmnet_get_port_rtnl(real_dev); + real_port->bridge_ep = NULL; + real_port->rmnet_mode = RMNET_EPMODE_VND; } else { + /* real device */ bridge_dev = port->bridge_ep; - bridge_port = rmnet_get_port_rtnl(bridge_dev); - rmnet_unregister_real_device(bridge_dev, bridge_port); + port->bridge_ep = NULL; + port->rmnet_mode = RMNET_EPMODE_VND; } + + netdev_upper_dev_unlink(bridge_dev, rmnet_dev); + rmnet_unregister_real_device(bridge_dev); } static int rmnet_newlink(struct net *src_net, struct net_device *dev, @@ -161,6 +148,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, goto err2; port->rmnet_mode = mode; + port->rmnet_dev = dev; hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); @@ -178,8 +166,9 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, err2: unregister_netdevice(dev); + rmnet_vnd_dellink(mux_id, port, ep); err1: - rmnet_unregister_real_device(real_dev, port); + rmnet_unregister_real_device(real_dev); err0: kfree(ep); return err; @@ -188,30 +177,32 @@ err0: static void rmnet_dellink(struct net_device *dev, struct list_head *head) { struct rmnet_priv *priv = netdev_priv(dev); - struct net_device *real_dev; + struct net_device *real_dev, *bridge_dev; + struct rmnet_port *real_port, *bridge_port; struct rmnet_endpoint *ep; - struct rmnet_port *port; - u8 mux_id; + u8 mux_id = priv->mux_id; real_dev = priv->real_dev; - if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) + if (!rmnet_is_real_dev_registered(real_dev)) return; - port = rmnet_get_port_rtnl(real_dev); - - mux_id = rmnet_vnd_get_mux(dev); + real_port = rmnet_get_port_rtnl(real_dev); + bridge_dev = real_port->bridge_ep; + if (bridge_dev) { + bridge_port = rmnet_get_port_rtnl(bridge_dev); + rmnet_unregister_bridge(bridge_port); + } - ep = rmnet_get_endpoint(port, mux_id); + ep = rmnet_get_endpoint(real_port, mux_id); if (ep) { hlist_del_init_rcu(&ep->hlnode); - rmnet_unregister_bridge(dev, port); - rmnet_vnd_dellink(mux_id, port, ep); + rmnet_vnd_dellink(mux_id, real_port, ep); kfree(ep); } - netdev_upper_dev_unlink(real_dev, dev); - rmnet_unregister_real_device(real_dev, port); + netdev_upper_dev_unlink(real_dev, dev); + rmnet_unregister_real_device(real_dev); unregister_netdevice_queue(dev, head); } @@ -223,23 +214,23 @@ static void rmnet_force_unassociate_device(struct net_device *real_dev) unsigned long bkt_ep; LIST_HEAD(list); - ASSERT_RTNL(); - port = rmnet_get_port_rtnl(real_dev); - rmnet_unregister_bridge(real_dev, port); - - hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { - netdev_upper_dev_unlink(real_dev, ep->egress_dev); - unregister_netdevice_queue(ep->egress_dev, &list); - rmnet_vnd_dellink(ep->mux_id, port, ep); - hlist_del_init_rcu(&ep->hlnode); - kfree(ep); + if (port->nr_rmnet_devs) { + /* real device */ + rmnet_unregister_bridge(port); + hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { + unregister_netdevice_queue(ep->egress_dev, &list); + netdev_upper_dev_unlink(real_dev, ep->egress_dev); + rmnet_vnd_dellink(ep->mux_id, port, ep); + hlist_del_init_rcu(&ep->hlnode); + kfree(ep); + } + rmnet_unregister_real_device(real_dev); + unregister_netdevice_many(&list); + } else { + rmnet_unregister_bridge(port); } - - unregister_netdevice_many(&list); - - rmnet_unregister_real_device(real_dev, port); } static int rmnet_config_notify_cb(struct notifier_block *nb, @@ -418,6 +409,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (port->nr_rmnet_devs > 1) return -EINVAL; + if (port->rmnet_mode != RMNET_EPMODE_VND) + return -EINVAL; + if (rmnet_is_real_dev_registered(slave_dev)) return -EBUSY; @@ -425,9 +419,17 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (err) return -EBUSY; + err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL, + extack); + if (err) { + rmnet_unregister_real_device(slave_dev); + return err; + } + slave_port = rmnet_get_port_rtnl(slave_dev); slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; slave_port->bridge_ep = real_dev; + slave_port->rmnet_dev = rmnet_dev; port->rmnet_mode = RMNET_EPMODE_BRIDGE; port->bridge_ep = slave_dev; @@ -439,16 +441,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, int rmnet_del_bridge(struct net_device *rmnet_dev, struct net_device *slave_dev) { - struct rmnet_priv *priv = netdev_priv(rmnet_dev); - struct net_device *real_dev = priv->real_dev; - struct rmnet_port *port, *slave_port; - - port = rmnet_get_port_rtnl(real_dev); - port->rmnet_mode = RMNET_EPMODE_VND; - port->bridge_ep = NULL; + struct rmnet_port *port = rmnet_get_port_rtnl(slave_dev); - slave_port = rmnet_get_port_rtnl(slave_dev); - rmnet_unregister_real_device(slave_dev, slave_port); + rmnet_unregister_bridge(port); netdev_dbg(slave_dev, "removed from rmnet as slave\n"); return 0; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 0d568dcfd65a..be515982d628 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -28,6 +28,7 @@ struct rmnet_port { u8 rmnet_mode; struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP]; struct net_device *bridge_ep; + struct net_device *rmnet_dev; }; extern struct rtnl_link_ops rmnet_link_ops; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 509dfc895a33..26ad40f19c64 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -266,14 +266,6 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, return 0; } -u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev) -{ - struct rmnet_priv *priv; - - priv = netdev_priv(rmnet_dev); - return priv->mux_id; -} - int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable) { netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h index 54cbaf3c3bc4..14d77c709d4a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h @@ -16,6 +16,5 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, struct rmnet_endpoint *ep); void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev); void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev); -u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev); void rmnet_vnd_setup(struct net_device *dev); #endif /* _RMNET_VND_H_ */ -- cgit v1.2.3-59-g8ed1b From ad3cc31b599ea80f06b29ebdc18b3a39878a48d6 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:26:15 +0000 Subject: net: rmnet: fix packet forwarding in rmnet bridge mode Packet forwarding is not working in rmnet bridge mode. Because when a packet is forwarded, skb_push() for an ethernet header is needed. But it doesn't call skb_push(). So, the ethernet header will be lost. Test commands: modprobe rmnet ip netns add nst ip netns add nst2 ip link add veth0 type veth peer name veth1 ip link add veth2 type veth peer name veth3 ip link set veth1 netns nst ip link set veth3 netns nst2 ip link add rmnet0 link veth0 type rmnet mux_id 1 ip link set veth2 master rmnet0 ip link set veth0 up ip link set veth2 up ip link set rmnet0 up ip a a 192.168.100.1/24 dev rmnet0 ip netns exec nst ip link set veth1 up ip netns exec nst ip a a 192.168.100.2/24 dev veth1 ip netns exec nst2 ip link set veth3 up ip netns exec nst2 ip a a 192.168.100.3/24 dev veth3 ip netns exec nst2 ping 192.168.100.2 Fixes: 60d58f971c10 ("net: qualcomm: rmnet: Implement bridge mode") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 074a8b326c30..29a7bfa2584d 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -159,6 +159,9 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, static void rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev) { + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + if (bridge_dev) { skb->dev = bridge_dev; dev_queue_xmit(skb); -- cgit v1.2.3-59-g8ed1b From 23797b98909f34b75fd130369bde86f760db69d0 Mon Sep 17 00:00:00 2001 From: "Alex Maftei (amaftei)" Date: Wed, 26 Feb 2020 17:33:19 +0000 Subject: sfc: fix timestamp reconstruction at 16-bit rollover points We can't just use the top bits of the last sync event as they could be off-by-one every 65,536 seconds, giving an error in reconstruction of 65,536 seconds. This patch uses the difference in the bottom 16 bits (mod 2^16) to calculate an offset that needs to be applied to the last sync event to get to the current time. Signed-off-by: Alexandru-Mihai Maftei Acked-by: Martin Habets Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ptp.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index af15a737c675..59b4f16896a8 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -560,13 +560,45 @@ efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx, u32 nic_major, u32 nic_minor, s32 correction) { + u32 sync_timestamp; ktime_t kt = { 0 }; + s16 delta; if (!(nic_major & 0x80000000)) { WARN_ON_ONCE(nic_major >> 16); - /* Use the top bits from the latest sync event. */ - nic_major &= 0xffff; - nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000); + + /* Medford provides 48 bits of timestamp, so we must get the top + * 16 bits from the timesync event state. + * + * We only have the lower 16 bits of the time now, but we do + * have a full resolution timestamp at some point in past. As + * long as the difference between the (real) now and the sync + * is less than 2^15, then we can reconstruct the difference + * between those two numbers using only the lower 16 bits of + * each. + * + * Put another way + * + * a - b = ((a mod k) - b) mod k + * + * when -k/2 < (a-b) < k/2. In our case k is 2^16. We know + * (a mod k) and b, so can calculate the delta, a - b. + * + */ + sync_timestamp = last_sync_timestamp_major(efx); + + /* Because delta is s16 this does an implicit mask down to + * 16 bits which is what we need, assuming + * MEDFORD_TX_SECS_EVENT_BITS is 16. delta is signed so that + * we can deal with the (unlikely) case of sync timestamps + * arriving from the future. + */ + delta = nic_major - sync_timestamp; + + /* Recover the fully specified time now, by applying the offset + * to the (fully specified) sync time. + */ + nic_major = sync_timestamp + delta; kt = ptp->nic_to_kernel_time(nic_major, nic_minor, correction); -- cgit v1.2.3-59-g8ed1b From ac004e84164e27d69017731a97b11402a69d854b Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 27 Feb 2020 21:07:53 +0100 Subject: mlxsw: pci: Wait longer before accessing the device after reset During initialization the driver issues a reset to the device and waits for 100ms before checking if the firmware is ready. The waiting is necessary because before that the device is irresponsive and the first read can result in a completion timeout. While 100ms is sufficient for Spectrum-1 and Spectrum-2, it is insufficient for Spectrum-3. Fix this by increasing the timeout to 200ms. Fixes: da382875c616 ("mlxsw: spectrum: Extend to support Spectrum-3 ASIC") Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index e0d7d2d9a0c8..43fa8c85b5d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -28,7 +28,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000 -#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 +#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF #define MLXSW_PCI_FW_READY_MAGIC 0x5E -- cgit v1.2.3-59-g8ed1b From 3ee339eb28959629db33aaa2b8cde4c63c6289eb Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 27 Feb 2020 21:20:49 +0100 Subject: net: dsa: mv88e6xxx: Fix masking of egress port Add missing ~ to the usage of the mask. Reported-by: Kevin Benson Reported-by: Chris Healy Fixes: 5c74c54ce6ff ("net: dsa: mv88e6xxx: Split monitor port configuration") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/global1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index b016cc205f81..ca3a7a7a73c3 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -278,13 +278,13 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip, switch (direction) { case MV88E6XXX_EGRESS_DIR_INGRESS: dest_port_chip = &chip->ingress_dest_port; - reg &= MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; + reg &= ~MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; reg |= port << __bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK); break; case MV88E6XXX_EGRESS_DIR_EGRESS: dest_port_chip = &chip->egress_dest_port; - reg &= MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; + reg &= ~MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; reg |= port << __bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK); break; -- cgit v1.2.3-59-g8ed1b