diff options
29 files changed, 166 insertions, 170 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 3563492e4eba..43d5f8709779 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20594,8 +20594,8 @@ F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml F: drivers/i2c/busses/i2c-emev2.c RENESAS ETHERNET AVB DRIVER -M: Paul Barker <paul.barker.ct@bp.renesas.com> M: Niklas Söderlund <niklas.soderlund@ragnatech.se> +R: Paul Barker <paul@pbarker.dev> L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org S: Maintained diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a8e930d5dbb0..7564705d6478 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -20,6 +20,7 @@ #include <asm/byteorder.h> #include <linux/bitmap.h> #include <linux/auxiliary_bus.h> +#include <net/netdev_lock.h> #include "bnxt_hsi.h" #include "bnxt.h" @@ -309,14 +310,12 @@ void bnxt_ulp_irq_stop(struct bnxt *bp) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_stop) return; if (test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) reset = true; ops->ulp_irq_stop(ulp->handle, reset); - netdev_unlock(bp->dev); } } @@ -335,8 +334,7 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_restart) return; @@ -348,7 +346,6 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) bnxt_fill_msix_vecs(bp, ent); } ops->ulp_irq_restart(ulp->handle, ent); - netdev_unlock(bp->dev); kfree(ent); } } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index a0bcfb5a713d..ff3295b60a69 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -61,6 +61,8 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) return -EBUSY; } + netif_device_detach(priv->netdev); + priv->reset_type = type; set_bit(HBG_NIC_STATE_RESETTING, &priv->state); clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); @@ -91,6 +93,8 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type) return ret; } + netif_device_attach(priv->netdev); + dev_info(&priv->pdev->dev, "reset done\n"); return ret; } @@ -117,16 +121,13 @@ void hbg_err_reset(struct hbg_priv *priv) if (running) dev_close(priv->netdev); - hbg_reset(priv); - - /* in hbg_pci_err_detected(), we will detach first, - * so we need to attach before open - */ - if (!netif_device_present(priv->netdev)) - netif_device_attach(priv->netdev); + if (hbg_reset(priv)) + goto err_unlock; if (running) dev_open(priv->netdev, NULL); + +err_unlock: rtnl_unlock(); } @@ -160,7 +161,6 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); hbg_err_reset(priv); - netif_device_attach(netdev); return PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c index 8f1107b85fbb..55520053270a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c @@ -317,6 +317,9 @@ static void hbg_update_stats_by_info(struct hbg_priv *priv, const struct hbg_ethtool_stats *stats; u32 i; + if (test_bit(HBG_NIC_STATE_RESETTING, &priv->state)) + return; + for (i = 0; i < info_len; i++) { stats = &info[i]; if (!stats->reg) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 22371011c249..2410aee59fb2 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1321,12 +1321,18 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) */ if (!primary_lag) { lag->primary = true; + if (!ice_is_switchdev_running(lag->pf)) + return; + /* Configure primary's SWID to be shared */ ice_lag_primary_swid(lag, true); primary_lag = lag; } else { u16 swid; + if (!ice_is_switchdev_running(primary_lag->pf)) + return; + swid = primary_lag->pf->hw.port_info->sw_id; ice_lag_set_swid(swid, lag, true); ice_lag_add_prune_list(primary_lag, lag->pf); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 7c3006eb68dd..6446d0fcc052 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -4275,7 +4275,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) } ice_vfhw_mac_add(vf, &al->list[i]); - vf->num_mac++; break; } diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index aef0e9775a33..70dbf80f3bb7 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -143,6 +143,7 @@ enum idpf_vport_state { * @vport_id: Vport identifier * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index + * @max_tx_hdr_size: Max header length hardware can support * @state: See enum idpf_vport_state * @netstats: Packet and byte stats * @stats_lock: Lock to protect stats update @@ -153,6 +154,7 @@ struct idpf_netdev_priv { u32 vport_id; u32 link_speed_mbps; u16 vport_idx; + u16 max_tx_hdr_size; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; spinlock_t stats_lock; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 82f09b4030bc..3a033ce19cda 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -723,6 +723,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->vport = vport; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); vport->netdev = netdev; return idpf_init_mac_addr(vport, netdev); @@ -740,6 +741,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->adapter = adapter; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); spin_lock_init(&np->stats_lock); @@ -2203,8 +2205,8 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { - struct idpf_vport *vport = idpf_netdev_to_vport(netdev); - struct idpf_adapter *adapter = vport->adapter; + struct idpf_netdev_priv *np = netdev_priv(netdev); + u16 max_tx_hdr_size = np->max_tx_hdr_size; size_t len; /* No point in doing any of this if neither checksum nor GSO are @@ -2227,7 +2229,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, goto unsupported; len = skb_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; if (!skb->encapsulation) @@ -2240,7 +2242,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, /* IPLEN can support at most 127 dwords */ len = skb_inner_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; /* No need to validate L4LEN as TCP is the only protocol with a diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index bdf52cef3891..2d5f5c9f91ce 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -4025,6 +4025,14 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return budget; } + /* Switch to poll mode in the tear-down path after sending disable + * queues virtchnl message, as the interrupts will be disabled after + * that. + */ + if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, + q_vector->tx[0]))) + return budget; + work_done = min_t(int, work_done, budget - 1); /* Exit the polling mode, but don't re-enable interrupts if stack might @@ -4035,15 +4043,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) else idpf_vport_intr_set_wb_on_itr(q_vector); - /* Switch to poll mode in the tear-down path after sending disable - * queues virtchnl message, as the interrupts will be disabled after - * that - */ - if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, - q_vector->tx[0]))) - return budget; - else - return work_done; + return work_done; } /** diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 7fa98aeb3663..4a3370a40dd8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -13,19 +13,26 @@ /* RVU LMTST */ #define LMT_TBL_OP_READ 0 #define LMT_TBL_OP_WRITE 1 -#define LMT_MAP_TABLE_SIZE (128 * 1024) #define LMT_MAPTBL_ENTRY_SIZE 16 +#define LMT_MAX_VFS 256 + +#define LMT_MAP_ENTRY_ENA BIT_ULL(20) +#define LMT_MAP_ENTRY_LINES GENMASK_ULL(18, 16) /* Function to perform operations (read/write) on lmtst map table */ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, int lmt_tbl_op) { void __iomem *lmt_map_base; - u64 tbl_base; + u64 tbl_base, cfg; + int pfs, vfs; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + cfg = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + vfs = 1 << (cfg & 0xF); + pfs = 1 << ((cfg >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE); + lmt_map_base = ioremap_wc(tbl_base, pfs * vfs * LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); return -ENOMEM; @@ -35,6 +42,13 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, *val = readq(lmt_map_base + index); } else { writeq((*val), (lmt_map_base + index)); + + cfg = FIELD_PREP(LMT_MAP_ENTRY_ENA, 0x1); + /* 2048 LMTLINES */ + cfg |= FIELD_PREP(LMT_MAP_ENTRY_LINES, 0x6); + + writeq(cfg, (lmt_map_base + (index + 8))); + /* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S * changes effective. Write 1 for flush and read is being used as a * barrier and sets up a data dependency. Write to 0 after a write @@ -52,7 +66,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, #define LMT_MAP_TBL_W1_OFF 8 static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc) { - return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) + + return ((rvu_get_pf(pcifunc) * LMT_MAX_VFS) + (pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE; } @@ -69,7 +83,7 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, mutex_lock(&rvu->rsrc_lock); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova); - pf = rvu_get_pf(pcifunc) & 0x1F; + pf = rvu_get_pf(pcifunc) & RVU_PFVF_PF_MASK; val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 | ((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a1f9ec03c2ce..c827da626471 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -553,6 +553,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, u64 lmt_addr, val, tbl_base; int pf, vf, num_vfs, hw_vfs; void __iomem *lmt_map_base; + int apr_pfs, apr_vfs; int buf_size = 10240; size_t off = 0; int index = 0; @@ -568,8 +569,12 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, return -ENOMEM; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + val = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + apr_vfs = 1 << (val & 0xF); + apr_pfs = 1 << ((val >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, 128 * 1024); + lmt_map_base = ioremap_wc(tbl_base, apr_pfs * apr_vfs * + LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); kfree(buf); @@ -591,7 +596,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d \t\t\t", pf); - index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE; + index = pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE; off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t", (tbl_base + index)); lmt_addr = readq(lmt_map_base + index); @@ -604,7 +609,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, /* Reading num of VFs per PF */ rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs); for (vf = 0; vf < num_vfs; vf++) { - index = (pf * rvu->hw->total_vfs * 16) + + index = (pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE) + ((vf + 1) * LMT_MAPTBL_ENTRY_SIZE); off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d:VF%d \t\t", pf, vf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 7ef3ba477d49..9b28be4c4a5d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -729,9 +729,12 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) } #ifdef CONFIG_DCB - err = otx2_dcbnl_set_ops(netdev); - if (err) - goto err_free_zc_bmap; + /* Priority flow control is not supported for LBK and SDP vf(s) */ + if (!(is_otx2_lbkvf(vf->pdev) || is_otx2_sdp_rep(vf->pdev))) { + err = otx2_dcbnl_set_ops(netdev); + if (err) + goto err_free_zc_bmap; + } #endif otx2_qos_init(vf, qos_txqs); diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e2d6bfb5d693..a70b88037a20 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3495,6 +3495,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { struct lan743x_tx *tx; + u32 sgmii_ctl; int index; int ret; @@ -3507,6 +3508,15 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); pci11x1x_set_rfe_rd_fifo_threshold(adapter); + sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); + if (adapter->is_sgmii_en) { + sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; + } else { + sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; + } + lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; @@ -3558,7 +3568,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) { - u32 sgmii_ctl; int ret; adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev); @@ -3570,10 +3579,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) adapter->mdiobus->priv = (void *)adapter; if (adapter->is_pci11x1x) { if (adapter->is_sgmii_en) { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "SGMII operation\n"); adapter->mdiobus->read = lan743x_mdiobus_read_c22; @@ -3584,10 +3589,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) netif_dbg(adapter, drv, adapter->netdev, "lan743x-mdiobus-c45\n"); } else { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "RGMII operation\n"); // Only C22 support when RGMII I/F diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 85723a78793a..6c7e8655a7eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -964,7 +964,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev, /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY * address. No need to mask it again. */ - reg |= 1 << H3_EPHY_ADDR_SHIFT; + reg |= ret << H3_EPHY_ADDR_SHIFT; } else { /* For SoCs without internal PHY the PHY selection bit should be * set to 0 (external PHY). diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 1e6d2335293d..30665ffe78cf 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2685,7 +2685,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) port->slave.mac_addr); if (!is_valid_ether_addr(port->slave.mac_addr)) { eth_random_addr(port->slave.mac_addr); - dev_err(dev, "Use random MAC address\n"); + dev_info(dev, "Use random MAC address\n"); } } diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index c316b551df8d..0ee5bc766810 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -98,6 +98,9 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, &qdisc_xmit_lock_key); \ } +#define netdev_lock_dereference(p, dev) \ + rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock)) + int netdev_debug_event(struct notifier_block *nb, unsigned long event, void *ptr); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 39365fd2ea17..06ab2a3d2ebd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -236,7 +236,6 @@ struct xfrm_state { /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; - struct sock __rcu *encap_sk; /* NAT keepalive */ u32 nat_keepalive_interval; /* seconds */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 0e4076866c0a..f14a41ee4aa1 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -120,47 +120,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) } #ifdef CONFIG_INET_ESPINTCP -struct esp_tcp_sk { - struct sock *sk; - struct rcu_head rcu; -}; - -static void esp_free_tcp_sk(struct rcu_head *head) -{ - struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); - - sock_put(esk->sk); - kfree(esk); -} - static struct sock *esp_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; struct net *net = xs_net(x); - struct esp_tcp_sk *esk; __be16 sport, dport; - struct sock *nsk; struct sock *sk; - sk = rcu_dereference(x->encap_sk); - if (sk && sk->sk_state == TCP_ESTABLISHED) - return sk; - spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (sk && sk == nsk) { - esk = kmalloc(sizeof(*esk), GFP_ATOMIC); - if (!esk) { - spin_unlock_bh(&x->lock); - return ERR_PTR(-ENOMEM); - } - RCU_INIT_POINTER(x->encap_sk, NULL); - esk->sk = sk; - call_rcu(&esk->rcu, esp_free_tcp_sk); - } spin_unlock_bh(&x->lock); sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4, @@ -173,20 +142,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x) return ERR_PTR(-EINVAL); } - spin_lock_bh(&x->lock); - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (encap->encap_sport != sport || - encap->encap_dport != dport) { - sock_put(sk); - sk = nsk ?: ERR_PTR(-EREMCHG); - } else if (sk == nsk) { - sock_put(sk); - } else { - rcu_assign_pointer(x->encap_sk, sk); - } - spin_unlock_bh(&x->lock); - return sk; } @@ -199,8 +154,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) sk = esp_find_tcp_sk(x); err = PTR_ERR_OR_ZERO(sk); - if (err) + if (err) { + kfree_skb(skb); goto out; + } bh_lock_sock(sk); if (sock_owned_by_user(sk)) @@ -209,6 +166,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) err = espintcp_push_skb(sk, skb); bh_unlock_sock(sk); + sock_put(sk); + out: rcu_read_unlock(); return err; @@ -392,6 +351,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x, if (IS_ERR(sk)) return ERR_CAST(sk); + sock_put(sk); + *lenp = htons(len); esph = (struct ip_esp_hdr *)(lenp + 1); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index b5b06323cfd9..0d31a8c108d4 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -182,11 +182,15 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; - int ret; - - offset = offset - sizeof(struct udphdr); + int len, dlen; + __u8 *udpdata; + __be32 *udpdata32; - if (!pskb_pull(skb, offset)) + len = skb->len - offset; + dlen = offset + min(len, 8); + udpdata = skb_gro_header(skb, dlen, offset); + udpdata32 = (__be32 *)udpdata; + if (unlikely(!udpdata)) return NULL; rcu_read_lock(); @@ -194,11 +198,10 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (!ops || !ops->callbacks.gro_receive) goto out; - ret = __xfrm4_udp_encap_rcv(sk, skb, false); - if (ret) + /* check if it is a keepalive or IKE packet */ + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; - skb_push(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); @@ -208,7 +211,6 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, out: rcu_read_unlock(); - skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9e73944e3b53..72adfc107b55 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -137,47 +137,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) } #ifdef CONFIG_INET6_ESPINTCP -struct esp_tcp_sk { - struct sock *sk; - struct rcu_head rcu; -}; - -static void esp_free_tcp_sk(struct rcu_head *head) -{ - struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); - - sock_put(esk->sk); - kfree(esk); -} - static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; struct net *net = xs_net(x); - struct esp_tcp_sk *esk; __be16 sport, dport; - struct sock *nsk; struct sock *sk; - sk = rcu_dereference(x->encap_sk); - if (sk && sk->sk_state == TCP_ESTABLISHED) - return sk; - spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (sk && sk == nsk) { - esk = kmalloc(sizeof(*esk), GFP_ATOMIC); - if (!esk) { - spin_unlock_bh(&x->lock); - return ERR_PTR(-ENOMEM); - } - RCU_INIT_POINTER(x->encap_sk, NULL); - esk->sk = sk; - call_rcu(&esk->rcu, esp_free_tcp_sk); - } spin_unlock_bh(&x->lock); sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6, @@ -190,20 +159,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) return ERR_PTR(-EINVAL); } - spin_lock_bh(&x->lock); - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (encap->encap_sport != sport || - encap->encap_dport != dport) { - sock_put(sk); - sk = nsk ?: ERR_PTR(-EREMCHG); - } else if (sk == nsk) { - sock_put(sk); - } else { - rcu_assign_pointer(x->encap_sk, sk); - } - spin_unlock_bh(&x->lock); - return sk; } @@ -216,8 +171,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) sk = esp6_find_tcp_sk(x); err = PTR_ERR_OR_ZERO(sk); - if (err) + if (err) { + kfree_skb(skb); goto out; + } bh_lock_sock(sk); if (sock_owned_by_user(sk)) @@ -226,6 +183,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) err = espintcp_push_skb(sk, skb); bh_unlock_sock(sk); + sock_put(sk); + out: rcu_read_unlock(); return err; @@ -422,6 +381,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, if (IS_ERR(sk)) return ERR_CAST(sk); + sock_put(sk); + *lenp = htons(len); esph = (struct ip_esp_hdr *)(lenp + 1); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4abc5e9d6322..841c81abaaf4 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -179,14 +179,18 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; - int ret; + int len, dlen; + __u8 *udpdata; + __be32 *udpdata32; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_gro_udp_encap_rcv(sk, head, skb); - offset = offset - sizeof(struct udphdr); - - if (!pskb_pull(skb, offset)) + len = skb->len - offset; + dlen = offset + min(len, 8); + udpdata = skb_gro_header(skb, dlen, offset); + udpdata32 = (__be32 *)udpdata; + if (unlikely(!udpdata)) return NULL; rcu_read_lock(); @@ -194,11 +198,10 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (!ops || !ops->callbacks.gro_receive) goto out; - ret = __xfrm6_udp_encap_rcv(sk, skb, false); - if (ret) + /* check if it is a keepalive or IKE packet */ + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; - skb_push(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); @@ -208,7 +211,6 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, out: rcu_read_unlock(); - skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index cb8c525ea20e..7986145a527c 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1569,6 +1569,9 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } + sch->qstats.backlog += len; + sch->q.qlen++; + if (first && !cl->cl_nactive) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); @@ -1584,9 +1587,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } - sch->qstats.backlog += len; - sch->q.qlen++; - return NET_XMIT_SUCCESS; } diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index c524421ec652..8584893b4785 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -817,12 +817,16 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, goto exit; } + /* Get net to avoid freed tipc_crypto when delete namespace */ + get_net(aead->crypto->net); + /* Now, do encrypt */ rc = crypto_aead_encrypt(req); if (rc == -EINPROGRESS || rc == -EBUSY) return rc; tipc_bearer_put(b); + put_net(aead->crypto->net); exit: kfree(ctx); @@ -860,6 +864,7 @@ static void tipc_aead_encrypt_done(void *data, int err) kfree(tx_ctx); tipc_bearer_put(b); tipc_aead_put(aead); + put_net(net); } /** diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 4abc81f33d3e..72c000c0ae5f 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1304,7 +1304,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->queue_id = qid; xp_add_xsk(xs->pool, xs); - if (xs->zc && qid < dev->real_num_rx_queues) { + if (qid < dev->real_num_rx_queues) { struct netdev_rx_queue *rxq; rxq = __netif_get_rx_queue(dev, qid); diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index fe82e2d07300..fc7a603b04f1 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -171,8 +171,10 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) struct espintcp_ctx *ctx = espintcp_getctx(sk); if (skb_queue_len(&ctx->out_queue) >= - READ_ONCE(net_hotdata.max_backlog)) + READ_ONCE(net_hotdata.max_backlog)) { + kfree_skb(skb); return -ENOBUFS; + } __skb_queue_tail(&ctx->out_queue, skb); diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 0c1420534394..907c3ccb440d 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -48,7 +48,6 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) { struct acomp_req *req = ipcomp_cb(skb)->req; struct ipcomp_req_extra *extra; - const int plen = skb->data_len; struct scatterlist *dsg; int len, dlen; @@ -64,7 +63,7 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) /* Only update truesize on input. */ if (!hlen) - skb->truesize += dlen - plen; + skb->truesize += dlen; skb->data_len = dlen; skb->len += dlen; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 143ac3aa7537..f4bad8c895d6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1581,6 +1581,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct xfrm_policy *delpol; struct hlist_head *chain; + /* Sanitize mark before store */ + policy->mark.v &= policy->mark.m; + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); if (chain) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 341d79ecb5c2..07fe8e5daa32 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -838,9 +838,6 @@ int __xfrm_state_delete(struct xfrm_state *x) xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); - if (x->encap_sk) - sock_put(rcu_dereference_raw(x->encap_sk)); - xfrm_dev_state_delete(x); /* All xfrm_state objects are created by xfrm_state_alloc. @@ -1721,6 +1718,9 @@ static void __xfrm_state_insert(struct xfrm_state *x) list_add(&x->km.all, &net->xfrm.state_all); + /* Sanitize mark before store */ + x->mark.v &= x->mark.m; + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index a951c0d33cd2..ddc97ecd8b39 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -573,5 +573,32 @@ "teardown": [ "$TC qdisc del dev $DEV1 handle 1: root" ] + }, + { + "id": "831d", + "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain", + "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"], + "plugins": { "requires": ["nsPlugin", "scapyPlugin"] }, + "setup": [ + "$IP link set dev $DEV1 up || true", + "$TC qdisc add dev $DEV1 root handle 1: drr", + "$TC filter add dev $DEV1 parent 1: basic classid 1:1", + "$TC class add dev $DEV1 parent 1: classid 1:1 drr", + "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem", + "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1", + "matchPattern": "qdisc hfsc", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"] } ] |