diff options
56 files changed, 847 insertions, 600 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml index 4c78c546343f..d6c957a33b48 100644 --- a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml +++ b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml @@ -16,6 +16,7 @@ properties: compatible: oneOf: - const: brcm,bcm5325 + - const: brcm,bcm53101 - const: brcm,bcm53115 - const: brcm,bcm53125 - const: brcm,bcm53128 @@ -77,6 +78,7 @@ allOf: contains: enum: - brcm,bcm5325 + - brcm,bcm53101 - brcm,bcm53115 - brcm,bcm53125 - brcm,bcm53128 diff --git a/Documentation/netlink/specs/rt_rule.yaml b/Documentation/netlink/specs/rt_rule.yaml index a9debac3058a..b30c924087fa 100644 --- a/Documentation/netlink/specs/rt_rule.yaml +++ b/Documentation/netlink/specs/rt_rule.yaml @@ -182,6 +182,14 @@ attribute-sets: type: u32 byte-order: big-endian display-hint: hex + - + name: sport-mask + type: u16 + display-hint: hex + - + name: dport-mask + type: u16 + display-hint: hex operations: enum-model: directional @@ -215,6 +223,8 @@ operations: - dscp - flowlabel - flowlabel-mask + - sport-mask + - dport-mask - name: newrule-ntf doc: Notify a rule creation diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 79dc77835681..61d164ffb3ae 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2410,6 +2410,19 @@ static const struct b53_chip_data b53_switch_chips[] = { .jumbo_size_reg = B53_JUMBO_MAX_SIZE, }, { + .chip_id = BCM53101_DEVICE_ID, + .dev_name = "BCM53101", + .vlans = 4096, + .enabled_ports = 0x11f, + .arl_bins = 4, + .arl_buckets = 512, + .vta_regs = B53_VTA_REGS, + .imp_port = 8, + .duplex_reg = B53_DUPLEX_STAT_GE, + .jumbo_pm_reg = B53_JUMBO_PORT_MASK, + .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + }, + { .chip_id = BCM53115_DEVICE_ID, .dev_name = "BCM53115", .vlans = 4096, @@ -2789,6 +2802,7 @@ int b53_switch_detect(struct b53_device *dev) return ret; switch (id32) { + case BCM53101_DEVICE_ID: case BCM53115_DEVICE_ID: case BCM53125_DEVICE_ID: case BCM53128_DEVICE_ID: diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c index 31d070bf161a..43a3b37b731b 100644 --- a/drivers/net/dsa/b53/b53_mdio.c +++ b/drivers/net/dsa/b53/b53_mdio.c @@ -374,6 +374,7 @@ static void b53_mdio_shutdown(struct mdio_device *mdiodev) static const struct of_device_id b53_of_match[] = { { .compatible = "brcm,bcm5325" }, + { .compatible = "brcm,bcm53101" }, { .compatible = "brcm,bcm53115" }, { .compatible = "brcm,bcm53125" }, { .compatible = "brcm,bcm53128" }, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 9e9b5bc0c5d6..0166c37a13a7 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -66,6 +66,7 @@ enum { BCM5395_DEVICE_ID = 0x95, BCM5397_DEVICE_ID = 0x97, BCM5398_DEVICE_ID = 0x98, + BCM53101_DEVICE_ID = 0x53101, BCM53115_DEVICE_ID = 0x53115, BCM53125_DEVICE_ID = 0x53125, BCM53128_DEVICE_ID = 0x53128, @@ -188,6 +189,7 @@ static inline int is531x5(struct b53_device *dev) { return dev->chip_id == BCM53115_DEVICE_ID || dev->chip_id == BCM53125_DEVICE_ID || + dev->chip_id == BCM53101_DEVICE_ID || dev->chip_id == BCM53128_DEVICE_ID || dev->chip_id == BCM53134_DEVICE_ID; } diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index be95336ce089..c15a5ef4674e 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -666,8 +666,11 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } queue->head = 0; queue->tail = 0; @@ -688,6 +691,8 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -770,7 +775,13 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, } gmi->dma_region_handle = resp.dma_region_handle; + dev_dbg(gc->dev, "Created DMA region handle 0x%llx\n", + gmi->dma_region_handle); out: + if (err) + dev_dbg(gc->dev, + "Failed to create DMA region of length: %u, page_type: %d, status: 0x%x, err: %d\n", + length, req->gdma_page_type, resp.hdr.status, err); kfree(req); return err; } @@ -793,8 +804,11 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -815,6 +829,8 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -841,8 +857,11 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -862,6 +881,8 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -1157,8 +1178,11 @@ int mana_gd_post_and_ring(struct gdma_queue *queue, int err; err = mana_gd_post_work_request(queue, wqe_req, wqe_info); - if (err) + if (err) { + dev_err(gc->dev, "Failed to post work req from queue type %d of size %u (err=%d)\n", + queue->type, queue->queue_size, err); return err; + } mana_gd_wq_ring_doorbell(gc, queue); @@ -1435,8 +1459,10 @@ static int mana_gd_setup(struct pci_dev *pdev) mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); err = mana_gd_setup_irqs(pdev); - if (err) + if (err) { + dev_err(gc->dev, "Failed to setup IRQs: %d\n", err); return err; + } err = mana_hwc_create_channel(gc); if (err) @@ -1454,12 +1480,14 @@ static int mana_gd_setup(struct pci_dev *pdev) if (err) goto destroy_hwc; + dev_dbg(&pdev->dev, "mana gdma setup successful\n"); return 0; destroy_hwc: mana_hwc_destroy_channel(gc); remove_irq: mana_gd_remove_irqs(pdev); + dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err); return err; } @@ -1470,6 +1498,7 @@ static void mana_gd_cleanup(struct pci_dev *pdev) mana_hwc_destroy_channel(gc); mana_gd_remove_irqs(pdev); + dev_dbg(&pdev->dev, "mana gdma cleanup successful\n"); } static bool mana_is_pf(unsigned short dev_id) @@ -1488,8 +1517,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE); err = pci_enable_device(pdev); - if (err) + if (err) { + dev_err(&pdev->dev, "Failed to enable pci device (err=%d)\n", err); return -ENXIO; + } pci_set_master(pdev); @@ -1498,9 +1529,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto disable_dev; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (err) + if (err) { + dev_err(&pdev->dev, "DMA set mask failed: %d\n", err); goto release_region; - + } dma_set_max_seg_size(&pdev->dev, UINT_MAX); err = -ENOMEM; @@ -1575,6 +1607,8 @@ static void mana_gd_remove(struct pci_dev *pdev) pci_release_regions(pdev); pci_disable_device(pdev); + + dev_dbg(&pdev->dev, "mana gdma remove successful\n"); } /* The 'state' parameter is not used. */ diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index a00f915c5188..1ba49602089b 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -440,7 +440,8 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); if (err) { - dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); + dev_err(hwc->dev, "Failed to allocate DMA buffer size: %u, err %d\n", + buf_size, err); goto out; } @@ -529,6 +530,9 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, out: if (err) mana_hwc_destroy_wq(hwc, hwc_wq); + + dev_err(hwc->dev, "Failed to create HWC queue size= %u type= %d err= %d\n", + queue_size, q_type, err); return err; } diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index aa1e47233fe5..0411a1897f57 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -52,10 +52,12 @@ static int mana_open(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); int err; - err = mana_alloc_queues(ndev); - if (err) + + if (err) { + netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err); return err; + } apc->port_is_up = true; @@ -64,7 +66,7 @@ static int mana_open(struct net_device *ndev) netif_carrier_on(ndev); netif_tx_wake_all_queues(ndev); - + netdev_dbg(ndev, "%s successful\n", __func__); return 0; } @@ -176,6 +178,9 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, return 0; frag_err: + if (net_ratelimit()) + netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n", + skb->len); for (i = sg_i - 1; i >= hsg; i--) dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], DMA_TO_DEVICE); @@ -256,6 +261,9 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (skb_cow_head(skb, MANA_HEADROOM)) goto tx_drop_count; + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto tx_drop_count; + txq = &apc->tx_qp[txq_idx].txq; gdma_sq = txq->gdma_sq; cq = &apc->tx_qp[txq_idx].tx_cq; @@ -687,6 +695,7 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu return 0; error: + netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues); mana_pre_dealloc_rxbufs(mpc); return -ENOMEM; } @@ -1304,8 +1313,10 @@ static int mana_create_eq(struct mana_context *ac) for (i = 0; i < gc->max_num_queues; i++) { spec.eq.msix_index = (i + 1) % gc->num_msix_usable; err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); - if (err) + if (err) { + dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err); goto out; + } mana_create_eq_debugfs(ac, i); } @@ -2080,6 +2091,8 @@ static int mana_create_txq(struct mana_port_context *apc, return 0; out: + netdev_err(net, "Failed to create %d TX queues, %d\n", + apc->num_queues, err); mana_destroy_txq(apc); return err; } @@ -2415,6 +2428,7 @@ static int mana_add_rx_queues(struct mana_port_context *apc, rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); if (!rxq) { err = -ENOMEM; + netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err); goto out; } @@ -2661,12 +2675,18 @@ int mana_alloc_queues(struct net_device *ndev) int err; err = mana_create_vport(apc, ndev); - if (err) + if (err) { + netdev_err(ndev, "Failed to create vPort %u : %d\n", apc->port_idx, err); return err; + } err = netif_set_real_num_tx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } err = mana_add_rx_queues(apc, ndev); if (err) @@ -2675,14 +2695,20 @@ int mana_alloc_queues(struct net_device *ndev) apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; err = netif_set_real_num_rx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } mana_rss_table_init(apc); err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); - if (err) + if (err) { + netdev_err(ndev, "Failed to configure RSS table: %d\n", err); goto destroy_vport; + } if (gd->gdma_context->is_pf) { err = mana_pf_register_filter(apc); @@ -2823,8 +2849,10 @@ int mana_detach(struct net_device *ndev, bool from_close) if (apc->port_st_save) { err = mana_dealloc_queues(ndev); - if (err) + if (err) { + netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err); return err; + } } if (!from_close) { @@ -2873,6 +2901,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->dev_port = port_idx; SET_NETDEV_DEV(ndev, gc->dev); + netif_set_tso_max_size(ndev, GSO_MAX_SIZE); + netif_carrier_off(ndev); netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); @@ -2968,6 +2998,8 @@ static int add_adev(struct gdma_dev *gd) goto add_fail; gd->adev = adev; + dev_dbg(gd->gdma_context->dev, + "Auxiliary device added successfully\n"); return 0; add_fail: @@ -3009,8 +3041,10 @@ int mana_probe(struct gdma_dev *gd, bool resuming) } err = mana_create_eq(ac); - if (err) + if (err) { + dev_err(dev, "Failed to create EQs: %d\n", err); goto out; + } err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION, &num_ports); @@ -3066,8 +3100,14 @@ int mana_probe(struct gdma_dev *gd, bool resuming) err = add_adev(gd); out: - if (err) + if (err) { mana_remove(gd, false); + } else { + dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n", + gd, gd->dev_id.as_uint32, ac->num_ports, + gd->dev_id.type, gd->dev_id.instance); + dev_dbg(dev, "%s succeeded\n", __func__); + } return err; } @@ -3129,6 +3169,7 @@ out: gd->driver_data = NULL; gd->gdma_context = NULL; kfree(ac); + dev_dbg(dev, "%s succeeded\n", __func__); } struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 1fadb8ba1d2f..392574bdd4a4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -141,7 +141,7 @@ static int dwc_qos_probe(struct platform_device *pdev, #define AUTO_CAL_STATUS 0x880c #define AUTO_CAL_STATUS_ACTIVE BIT(31) -static void tegra_eqos_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode) { struct tegra_eqos *eqos = priv; bool needs_calibration = false; @@ -160,7 +160,7 @@ static void tegra_eqos_fix_speed(void *priv, unsigned int speed, unsigned int mo break; default: - dev_err(eqos->dev, "invalid speed %u\n", speed); + dev_err(eqos->dev, "invalid speed %d\n", speed); break; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 20d3a202bb8d..610204b51e3f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -51,7 +51,7 @@ struct imx_dwmac_ops { int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat); - void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); + void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); }; struct imx_priv_data { @@ -192,7 +192,7 @@ static void imx_dwmac_exit(struct platform_device *pdev, void *priv) /* nothing to do now */ } -static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void imx_dwmac_fix_speed(void *priv, int speed, unsigned int mode) { struct plat_stmmacenet_data *plat_dat; struct imx_priv_data *dwmac = priv; @@ -208,7 +208,7 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod rate = rgmii_clock(speed); if (rate < 0) { - dev_err(dwmac->dev, "invalid speed %u\n", speed); + dev_err(dwmac->dev, "invalid speed %d\n", speed); return; } @@ -217,7 +217,7 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod dev_err(dwmac->dev, "failed to set tx rate %lu\n", rate); } -static void imx93_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode) { struct imx_priv_data *dwmac = priv; unsigned int iface; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index ddee6154d40b..0591756a2100 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -22,13 +22,13 @@ struct intel_dwmac { }; struct intel_dwmac_data { - void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); + void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); unsigned long ptp_ref_clk_rate; unsigned long tx_clk_rate; bool tx_clk_en; }; -static void kmb_eth_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void kmb_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct intel_dwmac *dwmac = priv; long rate; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 61227dcf56dc..7f4b9c1cc32b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -112,7 +112,7 @@ struct ipq806x_gmac { phy_interface_t phy_mode; }; -static int get_clk_div_sgmii(struct ipq806x_gmac *gmac, unsigned int speed) +static int get_clk_div_sgmii(struct ipq806x_gmac *gmac, int speed) { struct device *dev = &gmac->pdev->dev; int div; @@ -138,7 +138,7 @@ static int get_clk_div_sgmii(struct ipq806x_gmac *gmac, unsigned int speed) return div; } -static int get_clk_div_rgmii(struct ipq806x_gmac *gmac, unsigned int speed) +static int get_clk_div_rgmii(struct ipq806x_gmac *gmac, int speed) { struct device *dev = &gmac->pdev->dev; int div; @@ -164,7 +164,7 @@ static int get_clk_div_rgmii(struct ipq806x_gmac *gmac, unsigned int speed) return div; } -static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed) +static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, int speed) { uint32_t clk_bits, val; int div; @@ -260,7 +260,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) return PTR_ERR_OR_ZERO(gmac->qsgmii_csr); } -static void ipq806x_gmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void ipq806x_gmac_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct ipq806x_gmac *gmac = priv; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 79acdf38c525..60a4e3330ccd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -149,8 +149,7 @@ static struct stmmac_pci_info loongson_gmac_pci_info = { .setup = loongson_gmac_data, }; -static void loongson_gnet_fix_speed(void *priv, unsigned int speed, - unsigned int mode) +static void loongson_gnet_fix_speed(void *priv, int speed, unsigned int mode) { struct loongson_data *ld = (struct loongson_data *)priv; struct net_device *ndev = dev_get_drvdata(ld->dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c index 5469fa1b429e..b115b7873cef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c @@ -22,7 +22,7 @@ struct meson_dwmac { void __iomem *reg; }; -static void meson6_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void meson6_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct meson_dwmac *dwmac = priv; unsigned int val; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 2a5b38723635..192f270197c8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -111,7 +111,7 @@ struct qcom_ethqos { unsigned int link_clk_rate; struct clk *link_clk; struct phy *serdes_phy; - unsigned int speed; + int speed; int serdes_speed; phy_interface_t phy_mode; @@ -175,7 +175,7 @@ static void rgmii_dump(void *priv) #define RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ (5 * 1000 * 1000UL) static void -ethqos_update_link_clk(struct qcom_ethqos *ethqos, unsigned int speed) +ethqos_update_link_clk(struct qcom_ethqos *ethqos, int speed) { if (!phy_interface_mode_is_rgmii(ethqos->phy_mode)) return; @@ -699,7 +699,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) return ethqos->configure_func(ethqos); } -static void ethqos_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct qcom_ethqos *ethqos = priv; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index a4dc89e23a68..83d104a274c5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1920,7 +1920,7 @@ static void rk_gmac_powerdown(struct rk_priv_data *gmac) gmac_clk_enable(gmac, false); } -static void rk_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void rk_fix_speed(void *priv, int speed, unsigned int mode) { struct rk_priv_data *bsp_priv = priv; struct device *dev = &bsp_priv->pdev->dev; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c index 9cc0e5817416..6a498833b8ed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c @@ -100,7 +100,7 @@ static void s32_gmac_exit(struct platform_device *pdev, void *priv) clk_disable_unprepare(gmac->rx_clk); } -static void s32_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void s32_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct s32_priv_data *gmac = priv; long tx_clk_rate; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 16020b72dec8..6b78ae730466 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -61,7 +61,7 @@ struct socfpga_dwmac { struct mdio_device *pcs_mdiodev; }; -static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void socfpga_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c index 0a0a363d3730..282c846dad0b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c @@ -31,7 +31,7 @@ struct starfive_dwmac { const struct starfive_dwmac_data *data; }; -static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void starfive_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct starfive_dwmac *dwmac = priv; long rate; @@ -39,7 +39,7 @@ static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigne rate = rgmii_clock(speed); if (rate < 0) { - dev_err(dwmac->dev, "invalid speed %u\n", speed); + dev_err(dwmac->dev, "invalid speed %d\n", speed); return; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index f25461c292fe..13b9c2a51fce 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -99,12 +99,12 @@ struct sti_dwmac { int clk_sel_reg; /* GMAC ext clk selection register */ struct regmap *regmap; bool gmac_en; - u32 speed; - void (*fix_retime_src)(void *priv, unsigned int speed, unsigned int mode); + int speed; + void (*fix_retime_src)(void *priv, int speed, unsigned int mode); }; struct sti_dwmac_of_data { - void (*fix_retime_src)(void *priv, unsigned int speed, unsigned int mode); + void (*fix_retime_src)(void *priv, int speed, unsigned int mode); }; static u32 phy_intf_sels[] = { @@ -132,7 +132,7 @@ static u32 stih4xx_tx_retime_val[] = { | STIH4XX_ETH_SEL_INTERNAL_NOTEXT_PHYCLK, }; -static void stih4xx_fix_retime_src(void *priv, u32 spd, unsigned int mode) +static void stih4xx_fix_retime_src(void *priv, int spd, unsigned int mode) { struct sti_dwmac *dwmac = priv; u32 src = dwmac->tx_retime_src; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 9ae318436c4a..1b1ce2888b2e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -72,7 +72,7 @@ static void sun7i_gmac_exit(struct platform_device *pdev, void *priv) regulator_disable(gmac->regulator); } -static void sun7i_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void sun7i_fix_speed(void *priv, int speed, unsigned int mode) { struct sunxi_priv_data *gmac = priv; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c index dce84ed184e9..ddb1d8aba321 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c @@ -104,7 +104,7 @@ static int thead_dwmac_set_txclk_dir(struct plat_stmmacenet_data *plat) return 0; } -static void thead_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mode) +static void thead_dwmac_fix_speed(void *priv, int speed, unsigned int mode) { struct plat_stmmacenet_data *plat; struct thead_dwmac *dwmac = priv; @@ -142,7 +142,7 @@ static void thead_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int m div = rate * 10 / GMAC_MII_RATE; break; default: - dev_err(dwmac->dev, "invalid speed %u\n", speed); + dev_err(dwmac->dev, "invalid speed %d\n", speed); return; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index eccf7f537467..33cf99797df5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -54,7 +54,7 @@ struct visconti_eth { spinlock_t lock; /* lock to protect register update */ }; -static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +static void visconti_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct visconti_eth *dwmac = priv; struct net_device *netdev = dev_get_drvdata(dwmac->dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index f05cae103d83..3395188c198a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -282,8 +282,7 @@ struct stmmac_priv { struct stmmac_channel channel[STMMAC_CH_MAX]; int speed; - unsigned int flow_ctrl; - unsigned int pause; + unsigned int pause_time; struct mii_bus *mii; struct phylink_config phylink_config; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index fd8ca1524e43..4d542f482ecb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -88,13 +88,13 @@ MODULE_PARM_DESC(phyaddr, "Physical device address"); #define STMMAC_XDP_TX BIT(1) #define STMMAC_XDP_REDIRECT BIT(2) -static int flow_ctrl = FLOW_AUTO; +static int flow_ctrl = 0xdead; module_param(flow_ctrl, int, 0644); -MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]"); +MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off] (obsolete)"); static int pause = PAUSE_TIME; module_param(pause, int, 0644); -MODULE_PARM_DESC(pause, "Flow Control Pause Time"); +MODULE_PARM_DESC(pause, "Flow Control Pause Time (units of 512 bit times)"); #define TC_DEFAULT 64 static int tc = TC_DEFAULT; @@ -188,12 +188,11 @@ static void stmmac_verify_args(void) watchdog = TX_TIMEO; if (unlikely((buf_sz < DEFAULT_BUFSIZE) || (buf_sz > BUF_SIZE_16KiB))) buf_sz = DEFAULT_BUFSIZE; - if (unlikely(flow_ctrl > 1)) - flow_ctrl = FLOW_AUTO; - else if (likely(flow_ctrl < 0)) - flow_ctrl = FLOW_OFF; if (unlikely((pause < 0) || (pause > 0xffff))) pause = PAUSE_TIME; + + if (flow_ctrl != 0xdead) + pr_warn("stmmac: module parameter 'flow_ctrl' is obsolete - please remove from your module configuration\n"); } static void __stmmac_disable_all_queues(struct stmmac_priv *priv) @@ -858,14 +857,16 @@ static void stmmac_release_ptp(struct stmmac_priv *priv) * stmmac_mac_flow_ctrl - Configure flow control in all queues * @priv: driver private structure * @duplex: duplex passed to the next function + * @flow_ctrl: desired flow control modes * Description: It is used for configuring the flow control in all queues */ -static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex) +static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex, + unsigned int flow_ctrl) { u32 tx_cnt = priv->plat->tx_queues_to_use; - stmmac_flow_ctrl(priv, priv->hw, duplex, priv->flow_ctrl, - priv->pause, tx_cnt); + stmmac_flow_ctrl(priv, priv->hw, duplex, flow_ctrl, priv->pause_time, + tx_cnt); } static unsigned long stmmac_mac_get_caps(struct phylink_config *config, @@ -925,6 +926,7 @@ static void stmmac_mac_link_up(struct phylink_config *config, bool tx_pause, bool rx_pause) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + unsigned int flow_ctrl; u32 old_ctrl, ctrl; if ((priv->plat->flags & STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP) && @@ -1005,15 +1007,15 @@ static void stmmac_mac_link_up(struct phylink_config *config, /* Flow Control operation */ if (rx_pause && tx_pause) - priv->flow_ctrl = FLOW_AUTO; + flow_ctrl = FLOW_AUTO; else if (rx_pause && !tx_pause) - priv->flow_ctrl = FLOW_RX; + flow_ctrl = FLOW_RX; else if (!rx_pause && tx_pause) - priv->flow_ctrl = FLOW_TX; + flow_ctrl = FLOW_TX; else - priv->flow_ctrl = FLOW_OFF; + flow_ctrl = FLOW_OFF; - stmmac_mac_flow_ctrl(priv, duplex); + stmmac_mac_flow_ctrl(priv, duplex, flow_ctrl); if (ctrl != old_ctrl) writel(ctrl, priv->ioaddr + MAC_CTRL_REG); @@ -7404,7 +7406,7 @@ int stmmac_dvr_probe(struct device *device, return -ENOMEM; stmmac_set_ethtool_ops(ndev); - priv->pause = pause; + priv->pause_time = pause; priv->plat = plat_dat; priv->ioaddr = res->addr; priv->dev->base_addr = (unsigned long)res->addr; @@ -7600,9 +7602,6 @@ int stmmac_dvr_probe(struct device *device, "%s: warning: maxmtu having invalid value (%d)\n", __func__, priv->plat->maxmtu); - if (flow_ctrl) - priv->flow_ctrl = FLOW_AUTO; /* RX/TX pause on */ - ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; /* Setup channels NAPI */ diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 2806238629f8..3e671be95d6f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -164,6 +164,7 @@ #define AM65_CPSW_CPPI_TX_PKT_TYPE 0x7 /* XDP */ +#define AM65_CPSW_XDP_TX BIT(2) #define AM65_CPSW_XDP_CONSUMED BIT(1) #define AM65_CPSW_XDP_REDIRECT BIT(0) #define AM65_CPSW_XDP_PASS 0 @@ -829,19 +830,19 @@ static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) { struct am65_cpsw_tx_chn *tx_chn = data; enum am65_cpsw_tx_buf_type buf_type; + struct am65_cpsw_tx_swdata *swdata; struct cppi5_host_desc_t *desc_tx; struct xdp_frame *xdpf; struct sk_buff *skb; - void **swdata; desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { - skb = *(swdata); + skb = swdata->skb; dev_kfree_skb_any(skb); } else { - xdpf = *(swdata); + xdpf = swdata->xdpf; xdp_return_frame(xdpf); } @@ -1098,10 +1099,10 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev, struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_port *port = am65_ndev_to_port(ndev); struct cppi5_host_desc_t *host_desc; + struct am65_cpsw_tx_swdata *swdata; struct netdev_queue *netif_txq; dma_addr_t dma_desc, dma_buf; u32 pkt_len = xdpf->len; - void **swdata; int ret; host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); @@ -1131,7 +1132,8 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev, cppi5_hdesc_attach_buf(host_desc, dma_buf, pkt_len, dma_buf, pkt_len); swdata = cppi5_hdesc_get_swdata(host_desc); - *(swdata) = xdpf; + swdata->ndev = ndev; + swdata->xdpf = xdpf; /* Report BQL before sending the packet */ netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); @@ -1167,17 +1169,16 @@ pool_free: static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, struct am65_cpsw_port *port, - struct xdp_buff *xdp, - int cpu, int *len) + struct xdp_buff *xdp, int *len) { struct am65_cpsw_common *common = flow->common; struct net_device *ndev = port->ndev; int ret = AM65_CPSW_XDP_CONSUMED; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct xdp_frame *xdpf; struct bpf_prog *prog; - struct page *page; int pkt_len; u32 act; int err; @@ -1193,8 +1194,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, switch (act) { case XDP_PASS: - ret = AM65_CPSW_XDP_PASS; - goto out; + return AM65_CPSW_XDP_PASS; case XDP_TX: tx_chn = &common->tx_chns[cpu % AM65_CPSW_MAX_QUEUES]; netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); @@ -1213,15 +1213,13 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, goto drop; dev_sw_netstats_rx_add(ndev, pkt_len); - ret = AM65_CPSW_XDP_CONSUMED; - goto out; + return AM65_CPSW_XDP_TX; case XDP_REDIRECT: if (unlikely(xdp_do_redirect(ndev, xdp, prog))) goto drop; dev_sw_netstats_rx_add(ndev, pkt_len); - ret = AM65_CPSW_XDP_REDIRECT; - goto out; + return AM65_CPSW_XDP_REDIRECT; default: bpf_warn_invalid_xdp_action(ndev, prog, act); fallthrough; @@ -1233,10 +1231,6 @@ drop: ndev->stats.rx_dropped++; } - page = virt_to_head_page(xdp->data); - am65_cpsw_put_page(flow, page, true); - -out: return ret; } @@ -1274,7 +1268,7 @@ static void am65_cpsw_nuss_rx_csum(struct sk_buff *skb, u32 csum_info) } static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, - int cpu, int *xdp_state) + int *xdp_state) { struct am65_cpsw_rx_chn *rx_chn = &flow->common->rx_chns; u32 buf_dma_len, pkt_len, port_id = 0, csum_info; @@ -1334,8 +1328,13 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]); xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM, pkt_len, false); - *xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, - cpu, &pkt_len); + *xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, &pkt_len); + if (*xdp_state == AM65_CPSW_XDP_CONSUMED) { + page = virt_to_head_page(xdp.data); + am65_cpsw_put_page(flow, page, true); + goto allocate; + } + if (*xdp_state != AM65_CPSW_XDP_PASS) goto allocate; @@ -1401,7 +1400,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) { struct am65_cpsw_rx_flow *flow = am65_cpsw_napi_to_rx_flow(napi_rx); struct am65_cpsw_common *common = flow->common; - int cpu = smp_processor_id(); int xdp_state_or = 0; int cur_budget, ret; int xdp_state; @@ -1410,7 +1408,7 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) /* process only this flow */ cur_budget = budget; while (cur_budget--) { - ret = am65_cpsw_nuss_rx_packets(flow, cpu, &xdp_state); + ret = am65_cpsw_nuss_rx_packets(flow, &xdp_state); xdp_state_or |= xdp_state; if (ret) break; @@ -1438,52 +1436,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) return num_rx; } -static struct sk_buff * -am65_cpsw_nuss_tx_compl_packet_skb(struct am65_cpsw_tx_chn *tx_chn, - dma_addr_t desc_dma) -{ - struct cppi5_host_desc_t *desc_tx; - struct sk_buff *skb; - void **swdata; - - desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, - desc_dma); - swdata = cppi5_hdesc_get_swdata(desc_tx); - skb = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); - - am65_cpts_tx_timestamp(tx_chn->common->cpts, skb); - - dev_sw_netstats_tx_add(skb->dev, 1, skb->len); - - return skb; -} - -static struct xdp_frame * -am65_cpsw_nuss_tx_compl_packet_xdp(struct am65_cpsw_common *common, - struct am65_cpsw_tx_chn *tx_chn, - dma_addr_t desc_dma, - struct net_device **ndev) -{ - struct cppi5_host_desc_t *desc_tx; - struct am65_cpsw_port *port; - struct xdp_frame *xdpf; - u32 port_id = 0; - void **swdata; - - desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); - cppi5_desc_get_tags_ids(&desc_tx->hdr, NULL, &port_id); - swdata = cppi5_hdesc_get_swdata(desc_tx); - xdpf = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); - - port = am65_common_get_port(common, port_id); - dev_sw_netstats_tx_add(port->ndev, 1, xdpf->len); - *ndev = port->ndev; - - return xdpf; -} - static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_device *ndev, struct netdev_queue *netif_txq) { @@ -1504,13 +1456,17 @@ static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_d static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, int chn, unsigned int budget, bool *tdown) { + bool single_port = AM65_CPSW_IS_CPSW2G(common); enum am65_cpsw_tx_buf_type buf_type; + struct am65_cpsw_tx_swdata *swdata; + struct cppi5_host_desc_t *desc_tx; struct device *dev = common->dev; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; unsigned int total_bytes = 0; struct net_device *ndev; struct xdp_frame *xdpf; + unsigned int pkt_len; struct sk_buff *skb; dma_addr_t desc_dma; int res, num_tx = 0; @@ -1518,9 +1474,12 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, tx_chn = &common->tx_chns[chn]; while (true) { - spin_lock(&tx_chn->lock); + if (!single_port) + spin_lock(&tx_chn->lock); res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma); - spin_unlock(&tx_chn->lock); + if (!single_port) + spin_unlock(&tx_chn->lock); + if (res == -ENODATA) break; @@ -1531,27 +1490,43 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, break; } + desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, + desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_tx); + ndev = swdata->ndev; buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { - skb = am65_cpsw_nuss_tx_compl_packet_skb(tx_chn, desc_dma); - ndev = skb->dev; - total_bytes = skb->len; + skb = swdata->skb; + am65_cpts_tx_timestamp(tx_chn->common->cpts, skb); + pkt_len = skb->len; napi_consume_skb(skb, budget); } else { - xdpf = am65_cpsw_nuss_tx_compl_packet_xdp(common, tx_chn, - desc_dma, &ndev); - total_bytes = xdpf->len; + xdpf = swdata->xdpf; + pkt_len = xdpf->len; if (buf_type == AM65_CPSW_TX_BUF_TYPE_XDP_TX) xdp_return_frame_rx_napi(xdpf); else xdp_return_frame(xdpf); } + + total_bytes += pkt_len; num_tx++; + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); + dev_sw_netstats_tx_add(ndev, 1, pkt_len); + if (!single_port) { + /* as packets from multi ports can be interleaved + * on the same channel, we have to figure out the + * port/queue at every packet and report it/wake queue. + */ + netif_txq = netdev_get_tx_queue(ndev, chn); + netdev_tx_completed_queue(netif_txq, 1, pkt_len); + am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); + } + } + if (single_port) { netif_txq = netdev_get_tx_queue(ndev, chn); - netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); - am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); } @@ -1560,66 +1535,6 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, return num_tx; } -static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common, - int chn, unsigned int budget, bool *tdown) -{ - enum am65_cpsw_tx_buf_type buf_type; - struct device *dev = common->dev; - struct am65_cpsw_tx_chn *tx_chn; - struct netdev_queue *netif_txq; - unsigned int total_bytes = 0; - struct net_device *ndev; - struct xdp_frame *xdpf; - struct sk_buff *skb; - dma_addr_t desc_dma; - int res, num_tx = 0; - - tx_chn = &common->tx_chns[chn]; - - while (true) { - res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma); - if (res == -ENODATA) - break; - - if (cppi5_desc_is_tdcm(desc_dma)) { - if (atomic_dec_and_test(&common->tdown_cnt)) - complete(&common->tdown_complete); - *tdown = true; - break; - } - - buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); - if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { - skb = am65_cpsw_nuss_tx_compl_packet_skb(tx_chn, desc_dma); - ndev = skb->dev; - total_bytes += skb->len; - napi_consume_skb(skb, budget); - } else { - xdpf = am65_cpsw_nuss_tx_compl_packet_xdp(common, tx_chn, - desc_dma, &ndev); - total_bytes += xdpf->len; - if (buf_type == AM65_CPSW_TX_BUF_TYPE_XDP_TX) - xdp_return_frame_rx_napi(xdpf); - else - xdp_return_frame(xdpf); - } - num_tx++; - } - - if (!num_tx) - return 0; - - netif_txq = netdev_get_tx_queue(ndev, chn); - - netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); - - am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); - - dev_dbg(dev, "%s:%u pkt:%d\n", __func__, chn, num_tx); - - return num_tx; -} - static enum hrtimer_restart am65_cpsw_nuss_tx_timer_callback(struct hrtimer *timer) { struct am65_cpsw_tx_chn *tx_chns = @@ -1635,13 +1550,8 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget) bool tdown = false; int num_tx; - if (AM65_CPSW_IS_CPSW2G(tx_chn->common)) - num_tx = am65_cpsw_nuss_tx_compl_packets_2g(tx_chn->common, tx_chn->id, - budget, &tdown); - else - num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, - tx_chn->id, budget, &tdown); - + num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, + tx_chn->id, budget, &tdown); if (num_tx >= budget) return budget; @@ -1685,12 +1595,12 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct cppi5_host_desc_t *first_desc, *next_desc, *cur_desc; struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct am65_cpsw_tx_swdata *swdata; struct device *dev = common->dev; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; dma_addr_t desc_dma, buf_dma; int ret, q_idx, i; - void **swdata; u32 *psdata; u32 pkt_len; @@ -1736,7 +1646,8 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len); swdata = cppi5_hdesc_get_swdata(first_desc); - *(swdata) = skb; + swdata->ndev = ndev; + swdata->skb = skb; psdata = cppi5_hdesc_get_psdata(first_desc); /* HW csum offload if enabled */ @@ -3578,6 +3489,10 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) __be64 id_temp; int ret, i; + BUILD_BUG_ON_MSG(sizeof(struct am65_cpsw_tx_swdata) > AM65_CPSW_NAV_SW_DATA_SIZE, + "TX SW_DATA size exceeds AM65_CPSW_NAV_SW_DATA_SIZE"); + BUILD_BUG_ON_MSG(sizeof(struct am65_cpsw_swdata) > AM65_CPSW_NAV_SW_DATA_SIZE, + "SW_DATA size exceeds AM65_CPSW_NAV_SW_DATA_SIZE"); common = devm_kzalloc(dev, sizeof(struct am65_cpsw_common), GFP_KERNEL); if (!common) return -ENOMEM; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index e7832a5cf3cc..917c37e4e89b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -104,6 +104,14 @@ struct am65_cpsw_rx_flow { char name[32]; }; +struct am65_cpsw_tx_swdata { + struct net_device *ndev; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; +}; + struct am65_cpsw_swdata { u32 flow_id; struct page *page; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 234db693cefa..70f7cb383228 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1166,6 +1166,8 @@ struct netvsc_device { u32 max_chn; u32 num_chn; + u32 netvsc_gso_max_size; + atomic_t open_chn; struct work_struct subchan_work; wait_queue_head_t subchan_open; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index d6c4abfc3a28..9c6501bf27bd 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2461,6 +2461,21 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) } else { netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); + + /* In Azure, when accelerated networking in enabled, other NICs + * like MANA, MLX, are configured as a bonded nic with + * Netvsc(failover) NIC. For bonded NICs, the min of the max + * pkt aggregate size of the members is propagated in the stack. + * In order to allow these NICs (MANA/MLX) to use up to + * GSO_MAX_SIZE gso packet size, we need to allow Netvsc NIC to + * also support this in the guest. + * This value is only increased for netvsc NIC when datapath is + * switched over to the VF + */ + if (vf_is_up) + netif_set_tso_max_size(ndev, vf_netdev->tso_max_size); + else + netif_set_tso_max_size(ndev, netvsc_dev->netvsc_gso_max_size); } return NOTIFY_OK; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index c0ceeef4fcd8..82747dfacd70 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1356,9 +1356,10 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, struct net_device_context *net_device_ctx = netdev_priv(net); struct ndis_offload hwcaps; struct ndis_offload_params offloads; - unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE; int ret; + nvdev->netvsc_gso_max_size = GSO_LEGACY_MAX_SIZE; + /* Find HW offload capabilities */ ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps); if (ret != 0) @@ -1390,8 +1391,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO; - if (hwcaps.lsov2.ip4_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip4_maxsz; + if (hwcaps.lsov2.ip4_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip4_maxsz; } if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) { @@ -1411,8 +1412,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO6; - if (hwcaps.lsov2.ip6_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip6_maxsz; + if (hwcaps.lsov2.ip6_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip6_maxsz; } if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) { @@ -1438,7 +1439,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, */ net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features; - netif_set_tso_max_size(net, gso_max_size); + netif_set_tso_max_size(net, nvdev->netvsc_gso_max_size); ret = rndis_filter_set_offload_params(net, nvdev, &offloads); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 24422ac4e417..6d2aa77ea963 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -231,7 +231,7 @@ struct plat_stmmacenet_data { u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; - void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); + void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 710caacad9da..5927910ec06e 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -43,6 +43,8 @@ struct fib_rule { struct fib_kuid_range uid_range; struct fib_rule_port_range sport_range; struct fib_rule_port_range dport_range; + u16 sport_mask; + u16 dport_mask; struct rcu_head rcu; }; @@ -146,6 +148,17 @@ static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, ntohs(port) <= a->end; } +static inline bool fib_rule_port_match(const struct fib_rule_port_range *range, + u16 port_mask, __be16 port) +{ + if ((range->start ^ ntohs(port)) & port_mask) + return false; + if (!port_mask && fib_rule_port_range_set(range) && + !fib_rule_port_inrange(range, port)) + return false; + return true; +} + static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) { return a->start != 0 && a->end != 0 && a->end < 0xffff && @@ -159,6 +172,12 @@ static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, a->end == b->end; } +static inline bool +fib_rule_port_is_range(const struct fib_rule_port_range *range) +{ + return range->start != range->end; +} + static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) { return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto || diff --git a/include/net/sock.h b/include/net/sock.h index fac65ed30983..edbb870e3f86 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1285,10 +1285,6 @@ struct proto { unsigned int inuse_idx; #endif -#if IS_ENABLED(CONFIG_MPTCP) - int (*forward_alloc_get)(const struct sock *sk); -#endif - bool (*stream_memory_free)(const struct sock *sk, int wake); bool (*sock_is_readable)(struct sock *sk); /* Memory pressure */ @@ -1349,15 +1345,6 @@ int sock_load_diag_module(int family, int protocol); INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake)); -static inline int sk_forward_alloc_get(const struct sock *sk) -{ -#if IS_ENABLED(CONFIG_MPTCP) - if (sk->sk_prot->forward_alloc_get) - return sk->sk_prot->forward_alloc_get(sk); -#endif - return READ_ONCE(sk->sk_forward_alloc); -} - static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 00e9890ca3c0..95ec01b15c65 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -70,6 +70,8 @@ enum { FRA_DSCP, /* dscp */ FRA_FLOWLABEL, /* flowlabel */ FRA_FLOWLABEL_MASK, /* flowlabel mask */ + FRA_SPORT_MASK, /* sport mask */ + FRA_DPORT_MASK, /* dport mask */ __FRA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 424b4cd4e9e5..5ddd34cbe7f6 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -481,11 +481,17 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops, &rule->sport_range)) continue; + if (rule->sport_mask && r->sport_mask != rule->sport_mask) + continue; + if (fib_rule_port_range_set(&rule->dport_range) && !fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; + if (rule->dport_mask && r->dport_mask != rule->dport_mask) + continue; + if (!ops->compare(r, frh, tb)) continue; return r; @@ -515,6 +521,33 @@ static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, } #endif +static int fib_nl2rule_port_mask(const struct nlattr *mask_attr, + const struct fib_rule_port_range *range, + u16 *port_mask, + struct netlink_ext_ack *extack) +{ + if (!fib_rule_port_range_valid(range)) { + NL_SET_ERR_MSG_ATTR(extack, mask_attr, + "Cannot specify port mask without port value"); + return -EINVAL; + } + + if (fib_rule_port_is_range(range)) { + NL_SET_ERR_MSG_ATTR(extack, mask_attr, + "Cannot specify port mask for port range"); + return -EINVAL; + } + + if (range->start & ~nla_get_u16(mask_attr)) { + NL_SET_ERR_MSG_ATTR(extack, mask_attr, "Invalid port mask"); + return -EINVAL; + } + + *port_mask = nla_get_u16(mask_attr); + + return 0; +} + static int fib_nl2rule(struct net *net, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct fib_rules_ops *ops, @@ -644,6 +677,16 @@ static int fib_nl2rule(struct net *net, struct nlmsghdr *nlh, NL_SET_ERR_MSG(extack, "Invalid sport range"); goto errout_free; } + if (!fib_rule_port_is_range(&nlrule->sport_range)) + nlrule->sport_mask = U16_MAX; + } + + if (tb[FRA_SPORT_MASK]) { + err = fib_nl2rule_port_mask(tb[FRA_SPORT_MASK], + &nlrule->sport_range, + &nlrule->sport_mask, extack); + if (err) + goto errout_free; } if (tb[FRA_DPORT_RANGE]) { @@ -653,6 +696,16 @@ static int fib_nl2rule(struct net *net, struct nlmsghdr *nlh, NL_SET_ERR_MSG(extack, "Invalid dport range"); goto errout_free; } + if (!fib_rule_port_is_range(&nlrule->dport_range)) + nlrule->dport_mask = U16_MAX; + } + + if (tb[FRA_DPORT_MASK]) { + err = fib_nl2rule_port_mask(tb[FRA_DPORT_MASK], + &nlrule->dport_range, + &nlrule->dport_mask, extack); + if (err) + goto errout_free; } *rule = nlrule; @@ -751,10 +804,16 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, &rule->sport_range)) continue; + if (r->sport_mask != rule->sport_mask) + continue; + if (!fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; + if (r->dport_mask != rule->dport_mask) + continue; + if (!ops->compare(r, frh, tb)) continue; return 1; @@ -784,6 +843,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2), [FRA_FLOWLABEL] = { .type = NLA_BE32 }, [FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 }, + [FRA_SPORT_MASK] = { .type = NLA_U16 }, + [FRA_DPORT_MASK] = { .type = NLA_U16 }, }; int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1049,7 +1110,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(1) /* FRA_PROTOCOL */ + nla_total_size(1) /* FRA_IP_PROTO */ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */ - + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */ + + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_DPORT_RANGE */ + + nla_total_size(2) /* FRA_SPORT_MASK */ + + nla_total_size(2); /* FRA_DPORT_MASK */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -1117,8 +1180,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, nla_put_uid_range(skb, &rule->uid_range)) || (fib_rule_port_range_set(&rule->sport_range) && nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) || + (rule->sport_mask && nla_put_u16(skb, FRA_SPORT_MASK, + rule->sport_mask)) || (fib_rule_port_range_set(&rule->dport_range) && nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) || + (rule->dport_mask && nla_put_u16(skb, FRA_DPORT_MASK, + rule->dport_mask)) || (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto))) goto nla_put_failure; diff --git a/net/core/sock.c b/net/core/sock.c index 53c7af0038c4..0d385bf27b38 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3882,7 +3882,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); - mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk); + mem[SK_MEMINFO_FWD_ALLOC] = READ_ONCE(sk->sk_forward_alloc); mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 21f46ee7b6e9..5df1f1325259 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk) WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc)); WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); WARN_ON_ONCE(sk->sk_wmem_queued); - WARN_ON_ONCE(sk_forward_alloc_get(sk)); + WARN_ON_ONCE(sk->sk_forward_alloc); kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 041c46787d94..6b3d6a957822 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -201,12 +201,12 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto)) return 0; - if (fib_rule_port_range_set(&rule->sport_range) && - !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport)) + if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask, + fl4->fl4_sport)) return 0; - if (fib_rule_port_range_set(&rule->dport_range) && - !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport)) + if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask, + fl4->fl4_dport)) return 0; return 1; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 321acc8abf17..efe2a085cf68 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -282,7 +282,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), - .idiag_fmem = sk_forward_alloc_get(sk), + .idiag_fmem = READ_ONCE(sk->sk_forward_alloc), .idiag_tmem = sk_wmem_alloc_get(sk), }; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 40af8fd6efa7..0144d01417d9 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -340,12 +340,12 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) return 0; - if (fib_rule_port_range_set(&rule->sport_range) && - !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport)) + if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask, + fl6->fl6_sport)) return 0; - if (fib_rule_port_range_set(&rule->dport_range) && - !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport)) + if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask, + fl6->fl6_dport)) return 0; return 1; diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index a29ff901df75..b9e451197902 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -40,17 +40,17 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf tp->copied_seq += skb->len; subflow->ssn_offset += skb->len; - /* initialize a dummy sequence number, we will update it at MPC - * completion, if needed - */ + /* Only the sequence delta is relevant */ MPTCP_SKB_CB(skb)->map_seq = -skb->len; MPTCP_SKB_CB(skb)->end_seq = 0; MPTCP_SKB_CB(skb)->offset = 0; MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + MPTCP_SKB_CB(skb)->cant_coalesce = 1; mptcp_data_lock(sk); + DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk)); - mptcp_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); mptcp_sk(sk)->bytes_received += skb->len; @@ -58,22 +58,3 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf mptcp_data_unlock(sk); } - -void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) -{ - struct sock *sk = (struct sock *)msk; - struct sk_buff *skb; - - skb = skb_peek_tail(&sk->sk_receive_queue); - if (skb) { - WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq); - pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk, - MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq, - MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq); - MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq; - MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq; - } - - pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq); -} diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6bd819047470..6b61b7dee33b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -118,24 +118,14 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size) -{ - WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc, - mptcp_sk(sk)->rmem_fwd_alloc + size); -} - -static void mptcp_rmem_charge(struct sock *sk, int size) -{ - mptcp_rmem_fwd_alloc_add(sk, -size); -} - static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, struct sk_buff *from) { bool fragstolen; int delta; - if (MPTCP_SKB_CB(from)->offset || + if (unlikely(MPTCP_SKB_CB(to)->cant_coalesce) || + MPTCP_SKB_CB(from)->offset || ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) || !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; @@ -150,7 +140,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, * negative one */ atomic_add(delta, &sk->sk_rmem_alloc); - mptcp_rmem_charge(sk, delta); + sk_mem_charge(sk, delta); kfree_skb_partial(from, fragstolen); return true; @@ -165,44 +155,6 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, return mptcp_try_coalesce((struct sock *)msk, to, from); } -static void __mptcp_rmem_reclaim(struct sock *sk, int amount) -{ - amount >>= PAGE_SHIFT; - mptcp_rmem_charge(sk, amount << PAGE_SHIFT); - __sk_mem_reduce_allocated(sk, amount); -} - -static void mptcp_rmem_uncharge(struct sock *sk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - int reclaimable; - - mptcp_rmem_fwd_alloc_add(sk, size); - reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); - - /* see sk_mem_uncharge() for the rationale behind the following schema */ - if (unlikely(reclaimable >= PAGE_SIZE)) - __mptcp_rmem_reclaim(sk, reclaimable); -} - -static void mptcp_rfree(struct sk_buff *skb) -{ - unsigned int len = skb->truesize; - struct sock *sk = skb->sk; - - atomic_sub(len, &sk->sk_rmem_alloc); - mptcp_rmem_uncharge(sk, len); -} - -void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb_orphan(skb); - skb->sk = sk; - skb->destructor = mptcp_rfree; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - mptcp_rmem_charge(sk, skb->truesize); -} - /* "inspired" by tcp_data_queue_ofo(), main differences: * - use mptcp seqs * - don't cope with sacks @@ -315,25 +267,7 @@ merge_right: end: skb_condense(skb); - mptcp_set_owner_r(skb, sk); -} - -static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - int amt, amount; - - if (size <= msk->rmem_fwd_alloc) - return true; - - size -= msk->rmem_fwd_alloc; - amt = sk_mem_pages(size); - amount = amt << PAGE_SHIFT; - if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) - return false; - - mptcp_rmem_fwd_alloc_add(sk, amount); - return true; + skb_set_owner_r(skb, sk); } static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, @@ -351,7 +285,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); goto drop; } @@ -366,6 +300,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len; MPTCP_SKB_CB(skb)->offset = offset; MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; + MPTCP_SKB_CB(skb)->cant_coalesce = 0; if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { /* in sequence */ @@ -375,7 +310,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, if (tail && mptcp_try_coalesce(sk, tail, skb)) return true; - mptcp_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); return true; } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) { @@ -561,7 +496,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied) bool cleanup, rx_empty; cleanup = (space > 0) && (space >= (old_space << 1)) && copied; - rx_empty = !__mptcp_rmem(sk) && copied; + rx_empty = !sk_rmem_alloc_get(sk) && copied; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -634,27 +569,13 @@ static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) } static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, - struct sock *ssk, - unsigned int *bytes) + struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; bool more_data_avail; struct tcp_sock *tp; - bool done = false; - int sk_rbuf; - - sk_rbuf = READ_ONCE(sk->sk_rcvbuf); - - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); - - if (unlikely(ssk_rbuf > sk_rbuf)) { - WRITE_ONCE(sk->sk_rcvbuf, ssk_rbuf); - sk_rbuf = ssk_rbuf; - } - } + bool ret = false; pr_debug("msk=%p ssk=%p\n", msk, ssk); tp = tcp_sk(ssk); @@ -664,20 +585,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct sk_buff *skb; bool fin; + if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) + break; + /* try to move as much data as available */ map_remaining = subflow->map_data_len - mptcp_subflow_get_map_offset(subflow); skb = skb_peek(&ssk->sk_receive_queue); - if (!skb) { - /* With racing move_skbs_to_msk() and __mptcp_move_skbs(), - * a different CPU can have already processed the pending - * data, stop here or we can enter an infinite loop - */ - if (!moved) - done = true; + if (unlikely(!skb)) break; - } if (__mptcp_check_fallback(msk)) { /* Under fallback skbs have no MPTCP extension and TCP could @@ -690,19 +607,13 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, offset = seq - TCP_SKB_CB(skb)->seq; fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; - if (fin) { - done = true; + if (fin) seq++; - } if (offset < skb->len) { size_t len = skb->len - offset; - if (tp->urg_data) - done = true; - - if (__mptcp_move_skb(msk, ssk, skb, offset, len)) - moved += len; + ret = __mptcp_move_skb(msk, ssk, skb, offset, len) || ret; seq += len; if (unlikely(map_remaining < len)) { @@ -716,22 +627,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } sk_eat_skb(ssk, skb); - done = true; } WRITE_ONCE(tp->copied_seq, seq); more_data_avail = mptcp_subflow_data_available(ssk); - if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) { - done = true; - break; - } } while (more_data_avail); - if (moved > 0) + if (ret) msk->last_data_recv = tcp_jiffies32; - *bytes += moved; - return done; + return ret; } static bool __mptcp_ofo_queue(struct mptcp_sock *msk) @@ -825,9 +730,9 @@ void __mptcp_error_report(struct sock *sk) static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; + bool moved; - __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + moved = __mptcp_move_skbs_from_subflow(msk, ssk); __mptcp_ofo_queue(msk); if (unlikely(ssk->sk_err)) { if (!sock_owned_by_user(sk)) @@ -843,14 +748,29 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) */ if (mptcp_pending_data_fin(sk, NULL)) mptcp_schedule_work(sk); - return moved > 0; + return moved; +} + +static void __mptcp_rcvbuf_update(struct sock *sk, struct sock *ssk) +{ + if (unlikely(ssk->sk_rcvbuf > sk->sk_rcvbuf)) + WRITE_ONCE(sk->sk_rcvbuf, ssk->sk_rcvbuf); +} + +static void __mptcp_data_ready(struct sock *sk, struct sock *ssk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_rcvbuf_update(sk, ssk); + + /* Wake-up the reader only for in-sequence data */ + if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) + sk->sk_data_ready(sk); } void mptcp_data_ready(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); - struct mptcp_sock *msk = mptcp_sk(sk); - int sk_rbuf, ssk_rbuf; /* The peer can send data while we are shutting down this * subflow at msk destruction time, but we must avoid enqueuing @@ -859,19 +779,11 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) if (unlikely(subflow->disposable)) return; - ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); - sk_rbuf = READ_ONCE(sk->sk_rcvbuf); - if (unlikely(ssk_rbuf > sk_rbuf)) - sk_rbuf = ssk_rbuf; - - /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ - if (__mptcp_rmem(sk) > sk_rbuf) - return; - - /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); - if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) - sk->sk_data_ready(sk); + if (!sock_owned_by_user(sk)) + __mptcp_data_ready(sk, ssk); + else + __set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags); mptcp_data_unlock(sk); } @@ -950,20 +862,6 @@ bool mptcp_schedule_work(struct sock *sk) return false; } -static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - - msk_owned_by_me(msk); - - mptcp_for_each_subflow(msk, subflow) { - if (READ_ONCE(subflow->data_avail)) - return mptcp_subflow_tcp_sock(subflow); - } - - return NULL; -} - static bool mptcp_skb_can_collapse_to(u64 write_seq, const struct sk_buff *skb, const struct mptcp_ext *mpext) @@ -1944,16 +1842,17 @@ do_error: static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); -static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, +static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, size_t len, int flags, struct scm_timestamping_internal *tss, int *cmsg_flags) { + struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; int copied = 0; - skb_queue_walk_safe(&msk->receive_queue, skb, tmp) { + skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { u32 offset = MPTCP_SKB_CB(skb)->offset; u32 data_len = skb->len - offset; u32 count = min_t(size_t, len - copied, data_len); @@ -1985,10 +1884,11 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, } if (!(flags & MSG_PEEK)) { - /* we will bulk release the skb memory later */ + /* avoid the indirect call, we know the destructor is sock_wfree */ skb->destructor = NULL; - WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); - __skb_unlink(skb, &msk->receive_queue); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_uncharge(sk, skb->truesize); + __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); msk->bytes_consumed += count; } @@ -2101,66 +2001,65 @@ new_measure: msk->rcvq_space.time = mstamp; } -static void __mptcp_update_rmem(struct sock *sk) +static struct mptcp_subflow_context * +__mptcp_first_ready_from(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) { - struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_subflow_context *start_subflow = subflow; - if (!msk->rmem_released) - return; - - atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); - mptcp_rmem_uncharge(sk, msk->rmem_released); - WRITE_ONCE(msk->rmem_released, 0); + while (!READ_ONCE(subflow->data_avail)) { + subflow = mptcp_next_subflow(msk, subflow); + if (subflow == start_subflow) + return NULL; + } + return subflow; } -static void __mptcp_splice_receive_queue(struct sock *sk) +static bool __mptcp_move_skbs(struct sock *sk) { + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); + bool ret = false; - skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue); -} + if (list_empty(&msk->conn_list)) + return false; -static bool __mptcp_move_skbs(struct mptcp_sock *msk) -{ - struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; - bool ret, done; + /* verify we can move any data from the subflow, eventually updating */ + if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) + mptcp_for_each_subflow(msk, subflow) + __mptcp_rcvbuf_update(sk, subflow->tcp_sock); - do { - struct sock *ssk = mptcp_subflow_recv_lookup(msk); + subflow = list_first_entry(&msk->conn_list, + struct mptcp_subflow_context, node); + for (;;) { + struct sock *ssk; bool slowpath; - /* we can have data pending in the subflows only if the msk - * receive buffer was full at subflow_data_ready() time, - * that is an unlikely slow path. + /* + * As an optimization avoid traversing the subflows list + * and ev. acquiring the subflow socket lock before baling out */ - if (likely(!ssk)) + if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) break; - slowpath = lock_sock_fast(ssk); - mptcp_data_lock(sk); - __mptcp_update_rmem(sk); - done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); - mptcp_data_unlock(sk); + subflow = __mptcp_first_ready_from(msk, subflow); + if (!subflow) + break; + ssk = mptcp_subflow_tcp_sock(subflow); + slowpath = lock_sock_fast(ssk); + ret = __mptcp_move_skbs_from_subflow(msk, ssk) || ret; if (unlikely(ssk->sk_err)) __mptcp_error_report(sk); unlock_sock_fast(ssk, slowpath); - } while (!done); - /* acquire the data lock only if some input data is pending */ - ret = moved > 0; - if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) || - !skb_queue_empty_lockless(&sk->sk_receive_queue)) { - mptcp_data_lock(sk); - __mptcp_update_rmem(sk); - ret |= __mptcp_ofo_queue(msk); - __mptcp_splice_receive_queue(sk); - mptcp_data_unlock(sk); + subflow = mptcp_next_subflow(msk, subflow); } + + __mptcp_ofo_queue(msk); if (ret) mptcp_check_data_fin((struct sock *)msk); - return !skb_queue_empty(&msk->receive_queue); + return ret; } static unsigned int mptcp_inq_hint(const struct sock *sk) @@ -2168,7 +2067,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk) const struct mptcp_sock *msk = mptcp_sk(sk); const struct sk_buff *skb; - skb = skb_peek(&msk->receive_queue); + skb = skb_peek(&sk->sk_receive_queue); if (skb) { u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq; @@ -2214,7 +2113,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, while (copied < len) { int err, bytes_read; - bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags); + bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@ -2223,7 +2122,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) + if (skb_queue_empty(&sk->sk_receive_queue) && __mptcp_move_skbs(sk)) continue; /* only the MPTCP socket status is relevant here. The exit @@ -2249,7 +2148,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* race breaker: the shutdown could be after the * previous receive queue check */ - if (__mptcp_move_skbs(msk)) + if (__mptcp_move_skbs(sk)) continue; break; } @@ -2293,9 +2192,8 @@ out_err: } } - pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n", - msk, skb_queue_empty_lockless(&sk->sk_receive_queue), - skb_queue_empty(&msk->receive_queue), copied); + pr_debug("msk=%p rx queue empty=%d copied=%d\n", + msk, skb_queue_empty(&sk->sk_receive_queue), copied); release_sock(sk); return copied; @@ -2822,11 +2720,8 @@ static void __mptcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&msk->join_list); INIT_LIST_HEAD(&msk->rtx_queue); INIT_WORK(&msk->work, mptcp_worker); - __skb_queue_head_init(&msk->receive_queue); msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; - WRITE_ONCE(msk->rmem_fwd_alloc, 0); - WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival = TCP_RTO_MIN; msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; @@ -3052,8 +2947,6 @@ static void __mptcp_destroy_sock(struct sock *sk) sk->sk_prot->destroy(sk); - WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc)); - WARN_ON_ONCE(msk->rmem_released); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); @@ -3405,18 +3298,12 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags); - /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ - mptcp_data_lock(sk); - skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue); __skb_queue_purge(&sk->sk_receive_queue); skb_rbtree_purge(&msk->out_of_order_queue); - mptcp_data_unlock(sk); /* move all the rx fwd alloc into the sk_mem_reclaim_final in * inet_sock_destruct() will dispose it */ - sk_forward_alloc_add(sk, msk->rmem_fwd_alloc); - WRITE_ONCE(msk->rmem_fwd_alloc, 0); mptcp_token_destroy(msk); mptcp_pm_free_anno_list(msk); mptcp_free_local_addr_list(msk); @@ -3453,7 +3340,8 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) #define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \ BIT(MPTCP_RETRANSMIT) | \ - BIT(MPTCP_FLUSH_JOIN_LIST)) + BIT(MPTCP_FLUSH_JOIN_LIST) | \ + BIT(MPTCP_DEQUEUE)) /* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) @@ -3487,6 +3375,11 @@ static void mptcp_release_cb(struct sock *sk) __mptcp_push_pending(sk, 0); if (flags & BIT(MPTCP_RETRANSMIT)) __mptcp_retrans(sk); + if ((flags & BIT(MPTCP_DEQUEUE)) && __mptcp_move_skbs(sk)) { + /* notify ack seq update */ + mptcp_cleanup_rbuf(msk, 0); + sk->sk_data_ready(sk); + } cond_resched(); spin_lock_bh(&sk->sk_lock.slock); @@ -3506,8 +3399,6 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) __mptcp_sync_sndbuf(sk); } - - __mptcp_update_rmem(sk); } /* MP_JOIN client subflow must wait for 4th ack before sending any data: @@ -3678,12 +3569,6 @@ static void mptcp_shutdown(struct sock *sk, int how) __mptcp_wr_shutdown(sk); } -static int mptcp_forward_alloc_get(const struct sock *sk) -{ - return READ_ONCE(sk->sk_forward_alloc) + - READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc); -} - static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) { const struct sock *sk = (void *)msk; @@ -3724,7 +3609,8 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg) return -EINVAL; lock_sock(sk); - __mptcp_move_skbs(msk); + if (__mptcp_move_skbs(sk)) + mptcp_cleanup_rbuf(msk, 0); *karg = mptcp_inq_hint(sk); release_sock(sk); break; @@ -3841,7 +3727,6 @@ static struct proto mptcp_prot = { .hash = mptcp_hash, .unhash = mptcp_unhash, .get_port = mptcp_get_port, - .forward_alloc_get = mptcp_forward_alloc_get, .stream_memory_free = mptcp_stream_memory_free, .sockets_allocated = &mptcp_sockets_allocated, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 37226cdd9e37..ca65f8bff632 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -124,12 +124,14 @@ #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 +#define MPTCP_DEQUEUE 8 struct mptcp_skb_cb { u64 map_seq; u64 end_seq; u32 offset; - u8 has_rxtstamp:1; + u8 has_rxtstamp; + u8 cant_coalesce; }; #define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) @@ -279,7 +281,6 @@ struct mptcp_sock { u64 rcv_data_fin_seq; u64 bytes_retrans; u64 bytes_consumed; - int rmem_fwd_alloc; int snd_burst; int old_wspace; u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; @@ -294,7 +295,6 @@ struct mptcp_sock { u32 last_ack_recv; unsigned long timer_ival; u32 token; - int rmem_released; unsigned long flags; unsigned long cb_flags; bool recovery; /* closing subflow write queue reinjected */ @@ -324,7 +324,6 @@ struct mptcp_sock { struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; - struct sk_buff_head receive_queue; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; @@ -355,6 +354,8 @@ struct mptcp_sock { list_for_each_entry(__subflow, &((__msk)->conn_list), node) #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) +#define mptcp_next_subflow(__msk, __subflow) \ + list_next_entry_circular(__subflow, &((__msk)->conn_list), node) extern struct genl_family mptcp_genl_family; @@ -381,14 +382,6 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk) #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) #endif -/* the msk socket don't use the backlog, also account for the bulk - * free memory - */ -static inline int __mptcp_rmem(const struct sock *sk) -{ - return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released); -} - static inline int mptcp_win_from_space(const struct sock *sk, int space) { return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space); @@ -401,7 +394,8 @@ static inline int mptcp_space_from_win(const struct sock *sk, int win) static inline int __mptcp_space(const struct sock *sk) { - return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk)); + return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - + sk_rmem_alloc_get(sk)); } static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) @@ -1059,8 +1053,6 @@ void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); -void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); int mptcp_nl_fill_addr(struct sk_buff *skb, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index fd021cf8286e..d2caffa56bdd 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -802,9 +802,6 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk, subflow_set_remote_key(msk, subflow, mp_opt); WRITE_ONCE(subflow->fully_established, true); WRITE_ONCE(msk->fully_established, true); - - if (subflow->is_mptfo) - __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); } static struct sock *subflow_syn_recv_sock(const struct sock *sk, @@ -1271,7 +1268,12 @@ out: subflow->map_valid = 0; } -/* sched mptcp worker to remove the subflow if no more data is pending */ +static bool subflow_is_done(const struct sock *sk) +{ + return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; +} + +/* sched mptcp worker for subflow cleanup if no more data is pending */ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; @@ -1281,8 +1283,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss inet_sk_state_load(sk) != TCP_ESTABLISHED))) return; - if (skb_queue_empty(&ssk->sk_receive_queue) && - !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + if (!skb_queue_empty(&ssk->sk_receive_queue)) + return; + + if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + mptcp_schedule_work(sk); + + /* when the fallback subflow closes the rx side, trigger a 'dummy' + * ingress data fin, so that the msk state will follow along + */ + if (__mptcp_check_fallback(msk) && subflow_is_done(ssk) && + msk->first == ssk && + mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) mptcp_schedule_work(sk); } @@ -1842,11 +1854,6 @@ static void __subflow_state_change(struct sock *sk) rcu_read_unlock(); } -static bool subflow_is_done(const struct sock *sk) -{ - return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; -} - static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -1873,13 +1880,6 @@ static void subflow_state_change(struct sock *sk) subflow_error_report(sk); subflow_sched_work_if_closed(mptcp_sk(parent), sk); - - /* when the fallback subflow closes the rx side, trigger a 'dummy' - * ingress data fin, so that the msk state will follow along - */ - if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk && - mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) - mptcp_schedule_work(parent); } void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index ba91284f4086..e6cf4eb06b46 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -78,17 +78,6 @@ static struct nfc_llc_engine *nfc_llc_name_to_engine(const char *name) return NULL; } -void nfc_llc_unregister(const char *name) -{ - struct nfc_llc_engine *llc_engine; - - llc_engine = nfc_llc_name_to_engine(name); - if (llc_engine == NULL) - return; - - nfc_llc_del_engine(llc_engine); -} - struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, rcv_to_hci_t rcv_to_hci, int tx_headroom, diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index d66271d211a5..09914608ec43 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -40,7 +40,6 @@ struct nfc_llc { void *nfc_llc_get_data(struct nfc_llc *llc); int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops); -void nfc_llc_unregister(const char *name); int nfc_llc_nop_register(void); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 8996c73c9779..3f2e707a11d1 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -460,7 +460,7 @@ META_COLLECTOR(int_sk_fwd_alloc) *err = -1; return; } - dst->value = sk_forward_alloc_get(sk); + dst->value = READ_ONCE(sk->sk_forward_alloc); } META_COLLECTOR(int_sk_sndbuf) diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 21ba64ce1e34..ae783e18be83 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -15,6 +15,7 @@ TEST_PROGS = \ nic_performance.py \ pp_alloc_fail.py \ rss_ctx.py \ + tso.py \ # TEST_FILES := \ diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cd477f3440ca..701aca1361e0 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -9,15 +9,12 @@ from lib.py import ksft_run, ksft_exit, KsftSkipEx from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen -def test_receive(cfg, ipv4=False, extra_args=None): +def test_receive(cfg, ipver="6", extra_args=None): """Test local nic checksum receive. Remote host sends crafted packets.""" if not cfg.have_rx_csum: raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}") - if ipv4: - ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}" - else: - ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}" + ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" @@ -27,17 +24,14 @@ def test_receive(cfg, ipv4=False, extra_args=None): cmd(tx_cmd, host=cfg.remote) -def test_transmit(cfg, ipv4=False, extra_args=None): +def test_transmit(cfg, ipver="6", extra_args=None): """Test local nic checksum transmit. Remote host verifies packets.""" if (not cfg.have_tx_csum_generic and - not (cfg.have_tx_csum_ipv4 and ipv4) and - not (cfg.have_tx_csum_ipv6 and not ipv4)): + not (cfg.have_tx_csum_ipv4 and ipver == "4") and + not (cfg.have_tx_csum_ipv6 and ipver == "6")): raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}") - if ipv4: - ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}" - else: - ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}" + ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}" # Cannot randomize input when calculating zero checksum if extra_args != "-U -Z": @@ -51,26 +45,20 @@ def test_transmit(cfg, ipv4=False, extra_args=None): cmd(tx_cmd) -def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""): +def test_builder(name, cfg, ipver="6", tx=False, extra_args=""): """Construct specific tests from the common template. Most tests follow the same basic pattern, differing only in Direction of the test and optional flags passed to csum.""" def f(cfg): - if ipv4: - cfg.require_v4() - else: - cfg.require_v6() + cfg.require_ipver(ipver) if tx: - test_transmit(cfg, ipv4, extra_args) + test_transmit(cfg, ipver, extra_args) else: - test_receive(cfg, ipv4, extra_args) + test_receive(cfg, ipver, extra_args) - if ipv4: - f.__name__ = "ipv4_" + name - else: - f.__name__ = "ipv6_" + name + f.__name__ = f"ipv{ipver}_" + name return f @@ -104,15 +92,15 @@ def main() -> None: cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cases = [] - for ipv4 in [True, False]: - cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t")) - cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E")) + for ipver in ["4", "6"]: + cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t")) + cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E")) - cases.append(test_builder("rx_udp", cfg, ipv4, False, "")) - cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E")) + cases.append(test_builder("rx_udp", cfg, ipver, False, "")) + cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E")) - cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U")) - cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z")) + cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U")) + cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z")) ksft_run(cases=cases, args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 1223f0f5c10c..3947e9157115 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -21,15 +21,15 @@ def require_devmem(cfg): @ksft_disruptive def check_rx(cfg) -> None: - cfg.require_v6() + cfg.require_ipver("6") require_devmem(cfg) port = rand_port() - listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}" + listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}" with bkg(listen_cmd) as socat: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py new file mode 100755 index 000000000000..e1ecb92f79d9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Run the tools/testing/selftests/net/csum testsuite.""" + +import fcntl +import socket +import struct +import termios +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx +from lib.py import ksft_eq, ksft_ge, ksft_lt +from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv +from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen + + +def sock_wait_drain(sock, max_wait=1000): + """Wait for all pending write data on the socket to get ACKed.""" + for _ in range(max_wait): + one = b'\0' * 4 + outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one) + outq = struct.unpack("I", outq)[0] + if outq == 0: + break + time.sleep(0.01) + ksft_eq(outq, 0) + + +def tcp_sock_get_retrans(sock): + """Get the number of retransmissions for the TCP socket.""" + info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512) + return struct.unpack("I", info[100:104])[0] + + +def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): + cfg.require_cmd("socat", remote=True) + + port = rand_port() + listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" + + with bkg(listen_cmd, host=cfg.remote) as nc: + wait_port_listen(port, host=cfg.remote) + + if ipver == "4": + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((remote_v4, port)) + else: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + sock.connect((remote_v6, port)) + + # Small send to make sure the connection is working. + sock.send("ping".encode()) + sock_wait_drain(sock) + + # Send 4MB of data, record the LSO packet count. + qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + buf = b"0" * 1024 * 1024 * 4 + sock.send(buf) + sock_wait_drain(sock) + qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + # No math behind the 10 here, but try to catch cases where + # TCP falls back to non-LSO. + ksft_lt(tcp_sock_get_retrans(sock), 10) + sock.close() + + # Check that at least 90% of the data was sent as LSO packets. + # System noise may cause false negatives. Also header overheads + # will add up to 5% of extra packes... The check is best effort. + total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] + total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + if should_lso: + if cfg.have_stat_super_count: + ksft_ge(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + total_lso_super, + comment="Number of LSO super-packets with LSO enabled") + if cfg.have_stat_wire_count: + ksft_ge(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + total_lso_wire, + comment="Number of LSO wire-packets with LSO enabled") + else: + if cfg.have_stat_super_count: + ksft_lt(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + 15, comment="Number of LSO super-packets with LSO disabled") + if cfg.have_stat_wire_count: + ksft_lt(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + 500, comment="Number of LSO wire-packets with LSO disabled") + + +def build_tunnel(cfg, outer_ipver, tun_info): + local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1" + local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1" + remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2" + remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2" + + local_addr = cfg.addr_v[outer_ipver] + remote_addr = cfg.remote_addr_v[outer_ipver] + + tun_type = tun_info[0] + tun_arg = tun_info[2] + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") + defer(ip, f"link del {tun_type}-ksft") + ip(f"link set dev {tun_type}-ksft up") + ip(f"addr add {local_v4}/24 dev {tun_type}-ksft") + ip(f"addr add {local_v6}/64 dev {tun_type}-ksft") + + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}", + host=cfg.remote) + defer(ip, f"link del {tun_type}-ksft", host=cfg.remote) + ip(f"link set dev {tun_type}-ksft up", host=cfg.remote) + ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote) + ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote) + + return remote_v4, remote_v6 + + +def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): + """Construct specific tests from the common template.""" + def f(cfg): + cfg.require_ipver(outer_ipver) + + if not cfg.have_stat_super_count and \ + not cfg.have_stat_wire_count: + raise KsftSkipEx(f"Device does not support LSO queue stats") + + ipver = outer_ipver + if tun: + remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) + ipver = inner_ipver + else: + remote_v4 = cfg.remote_addr_v["4"] + remote_v6 = cfg.remote_addr_v["6"] + + tun_partial = tun and tun[1] + # Tunnel which can silently fall back to gso-partial + has_gso_partial = tun and 'tx-gso-partial' in cfg.features + + # For TSO4 via partial we need mangleid + if ipver == "4" and feature in cfg.partial_features: + ksft_pr("Testing with mangleid enabled") + if 'tx-tcp-mangleid-segmentation' not in cfg.features: + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") + defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + + # First test without the feature enabled. + ethtool(f"-K {cfg.ifname} {feature} off") + if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) + + # Now test with the feature enabled. + # For compatible tunnels only - just GSO partial, not specific feature. + if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, + should_lso=tun_partial) + + # Full feature enabled. + if feature in cfg.features: + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) + else: + raise KsftXfailEx(f"Device does not support {feature}") + + f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver + return f + + +def query_nic_features(cfg) -> None: + """Query and cache the NIC features.""" + cfg.have_stat_super_count = False + cfg.have_stat_wire_count = False + + cfg.features = set() + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + cfg.features.add(f["name"]) + + # Check which features are supported via GSO partial + cfg.partial_features = set() + if 'tx-gso-partial' in cfg.features: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + + no_partial = set() + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + no_partial.add(f["name"]) + cfg.partial_features = cfg.features - no_partial + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) + if stats: + if 'tx-hw-gso-packets' in stats[0]: + ksft_pr("Detected qstat for LSO super-packets") + cfg.have_stat_super_count = True + if 'tx-hw-gso-wire-packets' in stats[0]: + ksft_pr("Detected qstat for LSO wire-packets") + cfg.have_stat_wire_count = True + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + query_nic_features(cfg) + + test_info = ( + # name, v4/v6 ethtool_feature tun:(type, partial, args) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), + ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")), + ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), + ) + + cases = [] + for outer_ipver in ["4", "6"]: + for info in test_info: + # Skip if test which only works for a specific IP version + if info[1] and outer_ipver != info[1]: + continue + + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], + tun=info[3], inner_ipver="4")) + if info[3]: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], + tun=info[3], inner_ipver="6")) + + ksft_run(cases=cases, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 886b4904613c..96b33b5ef9dd 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -64,7 +64,7 @@ class NetDrvEnv(NetDrvEnvBase): self._ns = None if 'NETIF' in self.env: - self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] else: self._ns = NetdevSimDev(**kwargs) self.dev = self._ns.nsims[0].dev @@ -113,17 +113,20 @@ class NetDrvEpEnv(NetDrvEnvBase): self._ns = None self._ns_peer = None + self.addr_v = { "4": None, "6": None } + self.remote_addr_v = { "4": None, "6": None } + if "NETIF" in self.env: if nsim_test is True: raise KsftXfailEx("Test only works on netdevsim") self._check_env() - self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] - self.v4 = self.env.get("LOCAL_V4") - self.v6 = self.env.get("LOCAL_V6") - self.remote_v4 = self.env.get("REMOTE_V4") - self.remote_v6 = self.env.get("REMOTE_V6") + self.addr_v["4"] = self.env.get("LOCAL_V4") + self.addr_v["6"] = self.env.get("LOCAL_V6") + self.remote_addr_v["4"] = self.env.get("REMOTE_V4") + self.remote_addr_v["6"] = self.env.get("REMOTE_V6") kind = self.env["REMOTE_TYPE"] args = self.env["REMOTE_ARGS"] else: @@ -134,26 +137,29 @@ class NetDrvEpEnv(NetDrvEnvBase): self.dev = self._ns.nsims[0].dev - self.v4 = self.nsim_v4_pfx + "1" - self.v6 = self.nsim_v6_pfx + "1" - self.remote_v4 = self.nsim_v4_pfx + "2" - self.remote_v6 = self.nsim_v6_pfx + "2" + self.addr_v["4"] = self.nsim_v4_pfx + "1" + self.addr_v["6"] = self.nsim_v6_pfx + "1" + self.remote_addr_v["4"] = self.nsim_v4_pfx + "2" + self.remote_addr_v["6"] = self.nsim_v6_pfx + "2" kind = "netns" args = self._netns.name self.remote = Remote(kind, args, src_path) - self.addr = self.v6 if self.v6 else self.v4 - self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4 + self.addr_ipver = "6" if self.addr_v["6"] else "4" + self.addr = self.addr_v[self.addr_ipver] + self.remote_addr = self.remote_addr_v[self.addr_ipver] - self.addr_ipver = "6" if self.v6 else "4" # Bracketed addresses, some commands need IPv6 to be inside [] - self.baddr = f"[{self.v6}]" if self.v6 else self.v4 - self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4 + self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"] + self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"] self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] + # resolve remote interface name + self.remote_ifname = self.resolve_remote_ifc() + self._required_cmd = {} def create_local(self): @@ -200,6 +206,18 @@ class NetDrvEpEnv(NetDrvEnvBase): raise Exception("Invalid environment, missing configuration:", missing, "Please see tools/testing/selftests/drivers/net/README.rst") + def resolve_remote_ifc(self): + v4 = v6 = None + if self.remote_addr_v["4"]: + v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote) + if self.remote_addr_v["6"]: + v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote) + if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]: + raise Exception("Can't resolve remote interface name, v4 and v6 don't match") + if (v4 and len(v4) > 1) or (v6 and len(v6) > 1): + raise Exception("Can't resolve remote interface name, multiple interfaces match") + return v6[0]["ifname"] if v6 else v4[0]["ifname"] + def __enter__(self): return self @@ -223,13 +241,9 @@ class NetDrvEpEnv(NetDrvEnvBase): del self.remote self.remote = None - def require_v4(self): - if not self.v4 or not self.remote_v4: - raise KsftSkipEx("Test requires IPv4 connectivity") - - def require_v6(self): - if not self.v6 or not self.remote_v6: - raise KsftSkipEx("Test requires IPv6 connectivity") + def require_ipver(self, ipver): + if not self.addr_v[ipver] or not self.remote_addr_v[ipver]: + raise KsftSkipEx(f"Test requires IPv{ipver} connectivity") def _require_cmd(self, comm, key, host=None): cached = self._required_cmd.get(comm, {}) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index eb83e7b48797..17dc11e9b6dd 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -8,17 +8,17 @@ from lib.py import bkg, cmd, wait_port_listen, rand_port def test_v4(cfg) -> None: - cfg.require_v4() + cfg.require_ipver("4") - cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}") - cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote) + cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def test_v6(cfg) -> None: - cfg.require_v6() + cfg.require_ipver("6") - cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}") - cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote) + cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["6"]) + cmd("ping -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) def test_tcp(cfg) -> None: diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 847936363a12..06c51d7ceb4a 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -256,6 +256,24 @@ fib_rule6_test() fib_rule6_test_match_n_redirect "$match" "$match" \ "$getnomatch" "sport and dport redirect to table" \ "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" @@ -525,6 +543,24 @@ fib_rule4_test() fib_rule4_test_match_n_redirect "$match" "$match" \ "$getnomatch" "sport and dport redirect to table" \ "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 9c2a415976cb..2329c2f8519b 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -28,7 +28,7 @@ size=0 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]" - echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo -e "\t-b: bail out after first error, otherwise runs all testcases" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" echo -e "\t-d: debug this script" echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 404a2ce759ab..221270cee3ea 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -12,7 +12,7 @@ * * Datapath: * Open a pair of packet sockets and send resp. receive an a priori known - * packet pattern accross the sockets and check if it was received resp. + * packet pattern across the sockets and check if it was received resp. * sent correctly. Fanout in combination with RX_RING is currently not * tested here. * |