diff options
49 files changed, 1285 insertions, 148 deletions
diff --git a/Documentation/devicetree/bindings/net/qca,ar803x.yaml b/Documentation/devicetree/bindings/net/qca,ar803x.yaml new file mode 100644 index 000000000000..5a6c9d20c0ba --- /dev/null +++ b/Documentation/devicetree/bindings/net/qca,ar803x.yaml @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: GPL-2.0+ +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/qca,ar803x.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Atheros AR803x PHY + +maintainers: + - Andrew Lunn <andrew@lunn.ch> + - Florian Fainelli <f.fainelli@gmail.com> + - Heiner Kallweit <hkallweit1@gmail.com> + +description: | + Bindings for Qualcomm Atheros AR803x PHYs + +allOf: + - $ref: ethernet-phy.yaml# + +properties: + qca,clk-out-frequency: + description: Clock output frequency in Hertz. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - enum: [ 25000000, 50000000, 62500000, 125000000 ] + + qca,clk-out-strength: + description: Clock output driver strength. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - enum: [ 0, 1, 2 ] + + qca,keep-pll-enabled: + description: | + If set, keep the PLL enabled even if there is no link. Useful if you + want to use the clock output without an ethernet link. + + Only supported on the AR8031. + type: boolean + + vddio-supply: + description: | + RGMII I/O voltage regulator (see regulator/regulator.yaml). + + The PHY supports RGMII I/O voltages of 1.5V, 1.8V and 2.5V. You can + either connect this to the vddio-regulator (1.5V / 1.8V) or the + vddh-regulator (2.5V). + + Only supported on the AR8031. + + vddio-regulator: + type: object + description: + Initial data for the VDDIO regulator. Set this to 1.5V or 1.8V. + allOf: + - $ref: /schemas/regulator/regulator.yaml + + vddh-regulator: + type: object + description: + Dummy subnode to model the external connection of the PHY VDDH + regulator to VDDIO. + allOf: + - $ref: /schemas/regulator/regulator.yaml + + +examples: + - | + #include <dt-bindings/net/qca-ar803x.h> + + ethernet { + #address-cells = <1>; + #size-cells = <0>; + + phy-mode = "rgmii-id"; + + ethernet-phy@0 { + reg = <0>; + + qca,clk-out-frequency = <125000000>; + qca,clk-out-strength = <AR803X_STRENGTH_FULL>; + + vddio-supply = <&vddio>; + + vddio: vddio-regulator { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + }; + }; + - | + #include <dt-bindings/net/qca-ar803x.h> + + ethernet { + #address-cells = <1>; + #size-cells = <0>; + + phy-mode = "rgmii-id"; + + ethernet-phy@0 { + reg = <0>; + + qca,clk-out-frequency = <50000000>; + qca,keep-pll-enabled; + + vddio-supply = <&vddh>; + + vddh: vddh-regulator { + }; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index c0024b296158..709c60aacb58 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6155,10 +6155,12 @@ S: Maintained F: Documentation/ABI/testing/sysfs-class-net-phydev F: Documentation/devicetree/bindings/net/ethernet-phy.yaml F: Documentation/devicetree/bindings/net/mdio* +F: Documentation/devicetree/bindings/net/qca,ar803x.yaml F: Documentation/networking/phy.rst F: drivers/net/phy/ F: drivers/of/of_mdio.c F: drivers/of/of_net.c +F: include/dt-bindings/net/qca-ar803x.h F: include/linux/*mdio*.h F: include/linux/of_net.h F: include/linux/phy.h diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index 98bc5a4cd5e7..599dec59c6cc 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1437,7 +1437,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, csk->wr_max_credits)) sk->sk_write_space(sk); - if (copied >= target && !sk->sk_backlog.tail) + if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break; if (copied) { @@ -1470,7 +1470,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, break; } } - if (sk->sk_backlog.tail) { + if (READ_ONCE(sk->sk_backlog.tail)) { release_sock(sk); lock_sock(sk); chtls_cleanup_rbuf(sk, copied); @@ -1615,7 +1615,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, break; } - if (sk->sk_backlog.tail) { + if (READ_ONCE(sk->sk_backlog.tail)) { /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); @@ -1743,7 +1743,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, csk->wr_max_credits)) sk->sk_write_space(sk); - if (copied >= target && !sk->sk_backlog.tail) + if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break; if (copied) { @@ -1774,7 +1774,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } - if (sk->sk_backlog.tail) { + if (READ_ONCE(sk->sk_backlog.tail)) { release_sock(sk); lock_sock(sk); chtls_cleanup_rbuf(sk, copied); diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index d80ad203d126..1f42ee656816 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -532,7 +532,8 @@ static inline int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip, return -EOPNOTSUPP; } -static inline int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip) +static inline int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip, + u16 *stats) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 8175513e48c9..1f9eab74453e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -1207,7 +1207,7 @@ int aq_ptp_init(struct aq_nic_s *aq_nic, unsigned int idx_vec) aq_ptp->ptp_info = aq_ptp_clock; aq_ptp_gpio_init(&aq_ptp->ptp_info, &mbox.info); clock = ptp_clock_register(&aq_ptp->ptp_info, &aq_nic->ndev->dev); - if (!clock || IS_ERR(clock)) { + if (IS_ERR(clock)) { netdev_err(aq_nic->ndev, "ptp_clock_register failed\n"); err = PTR_ERR(clock); goto err_exit; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index b713739f4804..d322123ed373 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -7,14 +7,19 @@ #define phylink_to_dpaa2_mac(config) \ container_of((config), struct dpaa2_mac, phylink_config) -static phy_interface_t phy_mode(enum dpmac_eth_if eth_if) +static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode) { + *if_mode = PHY_INTERFACE_MODE_NA; + switch (eth_if) { case DPMAC_ETH_IF_RGMII: - return PHY_INTERFACE_MODE_RGMII; + *if_mode = PHY_INTERFACE_MODE_RGMII; + break; default: return -EINVAL; } + + return 0; } /* Caller must call of_node_put on the returned value */ @@ -51,11 +56,11 @@ static int dpaa2_mac_get_if_mode(struct device_node *node, if (!err) return if_mode; - if_mode = phy_mode(attr.eth_if); - if (if_mode >= 0) + err = phy_mode(attr.eth_if, &if_mode); + if (!err) return if_mode; - return -ENODEV; + return err; } static bool dpaa2_mac_phy_mode_mismatch(struct dpaa2_mac *mac, diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 867726d696e2..8f32db6d2c45 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1788,10 +1788,6 @@ static int axienet_probe(struct platform_device *pdev) /* Check for these resources directly on the Ethernet node. */ struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!res) { - dev_err(&pdev->dev, "unable to get DMA memory resource\n"); - goto free_netdev; - } lp->dma_regs = devm_ioremap_resource(&pdev->dev, res); lp->rx_irq = platform_get_irq(pdev, 1); lp->tx_irq = platform_get_irq(pdev, 0); diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index fe602648b99f..8bccadf17e60 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -282,11 +282,6 @@ config AX88796B_PHY Currently supports the Asix Electronics PHY found in the X-Surf 100 AX88796B package. -config AT803X_PHY - tristate "AT803X PHYs" - ---help--- - Currently supports the AT8030 and AT8035 model - config BCM63XX_PHY tristate "Broadcom 63xx SOCs internal PHY" depends on BCM63XX || COMPILE_TEST @@ -444,6 +439,11 @@ config NXP_TJA11XX_PHY ---help--- Currently supports the NXP TJA1100 and TJA1101 PHY. +config AT803X_PHY + tristate "Qualcomm Atheros AR803X PHYs" + help + Currently supports the AR8030, AR8031, AR8033 and AR8035 model + config QSEMI_PHY tristate "Quality Semiconductor PHYs" ---help--- diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 8e30db28fd7d..aee62610bade 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -2,7 +2,7 @@ /* * drivers/net/phy/at803x.c * - * Driver for Atheros 803x PHY + * Driver for Qualcomm Atheros AR803x PHY * * Author: Matus Ujhelyi <ujhelyi.m@gmail.com> */ @@ -13,7 +13,12 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/of_gpio.h> +#include <linux/bitfield.h> #include <linux/gpio/consumer.h> +#include <linux/regulator/of_regulator.h> +#include <linux/regulator/driver.h> +#include <linux/regulator/consumer.h> +#include <dt-bindings/net/qca-ar803x.h> #define AT803X_SPECIFIC_STATUS 0x11 #define AT803X_SS_SPEED_MASK (3 << 14) @@ -62,16 +67,62 @@ #define AT803X_DEBUG_REG_5 0x05 #define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8) +#define AT803X_DEBUG_REG_1F 0x1F +#define AT803X_DEBUG_PLL_ON BIT(2) +#define AT803X_DEBUG_RGMII_1V8 BIT(3) + +/* AT803x supports either the XTAL input pad, an internal PLL or the + * DSP as clock reference for the clock output pad. The XTAL reference + * is only used for 25 MHz output, all other frequencies need the PLL. + * The DSP as a clock reference is used in synchronous ethernet + * applications. + * + * By default the PLL is only enabled if there is a link. Otherwise + * the PHY will go into low power state and disabled the PLL. You can + * set the PLL_ON bit (see debug register 0x1f) to keep the PLL always + * enabled. + */ +#define AT803X_MMD7_CLK25M 0x8016 +#define AT803X_CLK_OUT_MASK GENMASK(4, 2) +#define AT803X_CLK_OUT_25MHZ_XTAL 0 +#define AT803X_CLK_OUT_25MHZ_DSP 1 +#define AT803X_CLK_OUT_50MHZ_PLL 2 +#define AT803X_CLK_OUT_50MHZ_DSP 3 +#define AT803X_CLK_OUT_62_5MHZ_PLL 4 +#define AT803X_CLK_OUT_62_5MHZ_DSP 5 +#define AT803X_CLK_OUT_125MHZ_PLL 6 +#define AT803X_CLK_OUT_125MHZ_DSP 7 + +/* The AR8035 has another mask which is compatible with the AR8031/AR8033 mask + * but doesn't support choosing between XTAL/PLL and DSP. + */ +#define AT8035_CLK_OUT_MASK GENMASK(4, 3) + +#define AT803X_CLK_OUT_STRENGTH_MASK GENMASK(8, 7) +#define AT803X_CLK_OUT_STRENGTH_FULL 0 +#define AT803X_CLK_OUT_STRENGTH_HALF 1 +#define AT803X_CLK_OUT_STRENGTH_QUARTER 2 + #define ATH9331_PHY_ID 0x004dd041 #define ATH8030_PHY_ID 0x004dd076 #define ATH8031_PHY_ID 0x004dd074 #define ATH8035_PHY_ID 0x004dd072 #define AT803X_PHY_ID_MASK 0xffffffef -MODULE_DESCRIPTION("Atheros 803x PHY driver"); +MODULE_DESCRIPTION("Qualcomm Atheros AR803x PHY driver"); MODULE_AUTHOR("Matus Ujhelyi"); MODULE_LICENSE("GPL"); +struct at803x_priv { + int flags; +#define AT803X_KEEP_PLL_ENABLED BIT(0) /* don't turn off internal PLL */ + u16 clk_25m_reg; + u16 clk_25m_mask; + struct regulator_dev *vddio_rdev; + struct regulator_dev *vddh_rdev; + struct regulator *vddio; +}; + struct at803x_context { u16 bmcr; u16 advertise; @@ -237,6 +288,240 @@ static int at803x_resume(struct phy_device *phydev) return phy_modify(phydev, MII_BMCR, BMCR_PDOWN | BMCR_ISOLATE, 0); } +static int at803x_rgmii_reg_set_voltage_sel(struct regulator_dev *rdev, + unsigned int selector) +{ + struct phy_device *phydev = rdev_get_drvdata(rdev); + + if (selector) + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F, + 0, AT803X_DEBUG_RGMII_1V8); + else + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F, + AT803X_DEBUG_RGMII_1V8, 0); +} + +static int at803x_rgmii_reg_get_voltage_sel(struct regulator_dev *rdev) +{ + struct phy_device *phydev = rdev_get_drvdata(rdev); + int val; + + val = at803x_debug_reg_read(phydev, AT803X_DEBUG_REG_1F); + if (val < 0) + return val; + + return (val & AT803X_DEBUG_RGMII_1V8) ? 1 : 0; +} + +static struct regulator_ops vddio_regulator_ops = { + .list_voltage = regulator_list_voltage_table, + .set_voltage_sel = at803x_rgmii_reg_set_voltage_sel, + .get_voltage_sel = at803x_rgmii_reg_get_voltage_sel, +}; + +static const unsigned int vddio_voltage_table[] = { + 1500000, + 1800000, +}; + +static const struct regulator_desc vddio_desc = { + .name = "vddio", + .of_match = of_match_ptr("vddio-regulator"), + .n_voltages = ARRAY_SIZE(vddio_voltage_table), + .volt_table = vddio_voltage_table, + .ops = &vddio_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, +}; + +static struct regulator_ops vddh_regulator_ops = { +}; + +static const struct regulator_desc vddh_desc = { + .name = "vddh", + .of_match = of_match_ptr("vddh-regulator"), + .n_voltages = 1, + .fixed_uV = 2500000, + .ops = &vddh_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, +}; + +static int at8031_register_regulators(struct phy_device *phydev) +{ + struct at803x_priv *priv = phydev->priv; + struct device *dev = &phydev->mdio.dev; + struct regulator_config config = { }; + + config.dev = dev; + config.driver_data = phydev; + + priv->vddio_rdev = devm_regulator_register(dev, &vddio_desc, &config); + if (IS_ERR(priv->vddio_rdev)) { + phydev_err(phydev, "failed to register VDDIO regulator\n"); + return PTR_ERR(priv->vddio_rdev); + } + + priv->vddh_rdev = devm_regulator_register(dev, &vddh_desc, &config); + if (IS_ERR(priv->vddh_rdev)) { + phydev_err(phydev, "failed to register VDDH regulator\n"); + return PTR_ERR(priv->vddh_rdev); + } + + return 0; +} + +static bool at803x_match_phy_id(struct phy_device *phydev, u32 phy_id) +{ + return (phydev->phy_id & phydev->drv->phy_id_mask) + == (phy_id & phydev->drv->phy_id_mask); +} + +static int at803x_parse_dt(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + struct at803x_priv *priv = phydev->priv; + unsigned int sel, mask; + u32 freq, strength; + int ret; + + if (!IS_ENABLED(CONFIG_OF_MDIO)) + return 0; + + ret = of_property_read_u32(node, "qca,clk-out-frequency", &freq); + if (!ret) { + mask = AT803X_CLK_OUT_MASK; + switch (freq) { + case 25000000: + sel = AT803X_CLK_OUT_25MHZ_XTAL; + break; + case 50000000: + sel = AT803X_CLK_OUT_50MHZ_PLL; + break; + case 62500000: + sel = AT803X_CLK_OUT_62_5MHZ_PLL; + break; + case 125000000: + sel = AT803X_CLK_OUT_125MHZ_PLL; + break; + default: + phydev_err(phydev, "invalid qca,clk-out-frequency\n"); + return -EINVAL; + } + + priv->clk_25m_reg |= FIELD_PREP(mask, sel); + priv->clk_25m_mask |= mask; + + /* Fixup for the AR8030/AR8035. This chip has another mask and + * doesn't support the DSP reference. Eg. the lowest bit of the + * mask. The upper two bits select the same frequencies. Mask + * the lowest bit here. + * + * Warning: + * There was no datasheet for the AR8030 available so this is + * just a guess. But the AR8035 is listed as pin compatible + * to the AR8030 so there might be a good chance it works on + * the AR8030 too. + */ + if (at803x_match_phy_id(phydev, ATH8030_PHY_ID) || + at803x_match_phy_id(phydev, ATH8035_PHY_ID)) { + priv->clk_25m_reg &= ~AT8035_CLK_OUT_MASK; + priv->clk_25m_mask &= ~AT8035_CLK_OUT_MASK; + } + } + + ret = of_property_read_u32(node, "qca,clk-out-strength", &strength); + if (!ret) { + priv->clk_25m_mask |= AT803X_CLK_OUT_STRENGTH_MASK; + switch (strength) { + case AR803X_STRENGTH_FULL: + priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_FULL; + break; + case AR803X_STRENGTH_HALF: + priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_HALF; + break; + case AR803X_STRENGTH_QUARTER: + priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_QUARTER; + break; + default: + phydev_err(phydev, "invalid qca,clk-out-strength\n"); + return -EINVAL; + } + } + + /* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping + * options. + */ + if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) { + if (of_property_read_bool(node, "qca,keep-pll-enabled")) + priv->flags |= AT803X_KEEP_PLL_ENABLED; + + ret = at8031_register_regulators(phydev); + if (ret < 0) + return ret; + + priv->vddio = devm_regulator_get_optional(&phydev->mdio.dev, + "vddio"); + if (IS_ERR(priv->vddio)) { + phydev_err(phydev, "failed to get VDDIO regulator\n"); + return PTR_ERR(priv->vddio); + } + + ret = regulator_enable(priv->vddio); + if (ret < 0) + return ret; + } + + return 0; +} + +static int at803x_probe(struct phy_device *phydev) +{ + struct device *dev = &phydev->mdio.dev; + struct at803x_priv *priv; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + phydev->priv = priv; + + return at803x_parse_dt(phydev); +} + +static int at803x_clk_out_config(struct phy_device *phydev) +{ + struct at803x_priv *priv = phydev->priv; + int val; + + if (!priv->clk_25m_mask) + return 0; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, AT803X_MMD7_CLK25M); + if (val < 0) + return val; + + val &= ~priv->clk_25m_mask; + val |= priv->clk_25m_reg; + + return phy_write_mmd(phydev, MDIO_MMD_AN, AT803X_MMD7_CLK25M, val); +} + +static int at8031_pll_config(struct phy_device *phydev) +{ + struct at803x_priv *priv = phydev->priv; + + /* The default after hardware reset is PLL OFF. After a soft reset, the + * values are retained. + */ + if (priv->flags & AT803X_KEEP_PLL_ENABLED) + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F, + 0, AT803X_DEBUG_PLL_ON); + else + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F, + AT803X_DEBUG_PLL_ON, 0); +} + static int at803x_config_init(struct phy_device *phydev) { int ret; @@ -259,8 +544,20 @@ static int at803x_config_init(struct phy_device *phydev) ret = at803x_enable_tx_delay(phydev); else ret = at803x_disable_tx_delay(phydev); + if (ret < 0) + return ret; - return ret; + ret = at803x_clk_out_config(phydev); + if (ret < 0) + return ret; + + if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) { + ret = at8031_pll_config(phydev); + if (ret < 0) + return ret; + } + + return 0; } static int at803x_ack_interrupt(struct phy_device *phydev) @@ -409,10 +706,11 @@ static int at803x_read_status(struct phy_device *phydev) static struct phy_driver at803x_driver[] = { { - /* ATHEROS 8035 */ + /* Qualcomm Atheros AR8035 */ .phy_id = ATH8035_PHY_ID, - .name = "Atheros 8035 ethernet", + .name = "Qualcomm Atheros AR8035", .phy_id_mask = AT803X_PHY_ID_MASK, + .probe = at803x_probe, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, @@ -423,10 +721,11 @@ static struct phy_driver at803x_driver[] = { .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, }, { - /* ATHEROS 8030 */ + /* Qualcomm Atheros AR8030 */ .phy_id = ATH8030_PHY_ID, - .name = "Atheros 8030 ethernet", + .name = "Qualcomm Atheros AR8030", .phy_id_mask = AT803X_PHY_ID_MASK, + .probe = at803x_probe, .config_init = at803x_config_init, .link_change_notify = at803x_link_change_notify, .set_wol = at803x_set_wol, @@ -437,10 +736,11 @@ static struct phy_driver at803x_driver[] = { .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, }, { - /* ATHEROS 8031 */ + /* Qualcomm Atheros AR8031/AR8033 */ .phy_id = ATH8031_PHY_ID, - .name = "Atheros 8031 ethernet", + .name = "Qualcomm Atheros AR8031/AR8033", .phy_id_mask = AT803X_PHY_ID_MASK, + .probe = at803x_probe, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, @@ -454,8 +754,7 @@ static struct phy_driver at803x_driver[] = { }, { /* ATHEROS AR9331 */ PHY_ID_MATCH_EXACT(ATH9331_PHY_ID), - .name = "Atheros AR9331 built-in PHY", - .config_init = at803x_config_init, + .name = "Qualcomm Atheros AR9331 built-in PHY", .suspend = at803x_suspend, .resume = at803x_resume, /* PHY_BASIC_FEATURES */ diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c index cf5889b7d825..a5110b7b4ece 100644 --- a/drivers/ptp/ptp_clockmatrix.c +++ b/drivers/ptp/ptp_clockmatrix.c @@ -1294,8 +1294,10 @@ static int idtcm_probe(struct i2c_client *client, err = set_tod_write_overhead(idtcm); - if (err) + if (err) { + mutex_unlock(&idtcm->reg_lock); return err; + } err = idtcm_load_firmware(idtcm, &client->dev); diff --git a/include/dt-bindings/net/qca-ar803x.h b/include/dt-bindings/net/qca-ar803x.h new file mode 100644 index 000000000000..9c046c7242ed --- /dev/null +++ b/include/dt-bindings/net/qca-ar803x.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Device Tree constants for the Qualcomm Atheros AR803x PHYs + */ + +#ifndef _DT_BINDINGS_QCA_AR803X_H +#define _DT_BINDINGS_QCA_AR803X_H + +#define AR803X_STRENGTH_FULL 0 +#define AR803X_STRENGTH_HALF 1 +#define AR803X_STRENGTH_QUARTER 2 + +#endif diff --git a/include/net/arp.h b/include/net/arp.h index c8f580a0e6b1..4950191f6b2b 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -57,8 +57,8 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } diff --git a/include/net/ndisc.h b/include/net/ndisc.h index b2f715ca0567..b5ebeb3b0de0 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -414,8 +414,8 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } @@ -431,8 +431,8 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } diff --git a/include/net/sock.h b/include/net/sock.h index ac6042d0af32..bd210c78dc9d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -859,17 +859,17 @@ static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) static inline void sk_acceptq_removed(struct sock *sk) { - sk->sk_ack_backlog--; + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1); } static inline void sk_acceptq_added(struct sock *sk) { - sk->sk_ack_backlog++; + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1); } static inline bool sk_acceptq_is_full(const struct sock *sk) { - return sk->sk_ack_backlog > sk->sk_max_ack_backlog; + return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog); } /* @@ -899,11 +899,11 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb_dst_force(skb); if (!sk->sk_backlog.tail) - sk->sk_backlog.head = skb; + WRITE_ONCE(sk->sk_backlog.head, skb); else sk->sk_backlog.tail->next = skb; - sk->sk_backlog.tail = skb; + WRITE_ONCE(sk->sk_backlog.tail, skb); skb->next = NULL; } @@ -1939,8 +1939,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); static inline void sk_dst_confirm(struct sock *sk) { - if (!sk->sk_dst_pending_confirm) - sk->sk_dst_pending_confirm = 1; + if (!READ_ONCE(sk->sk_dst_pending_confirm)) + WRITE_ONCE(sk->sk_dst_pending_confirm, 1); } static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) @@ -1950,10 +1950,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; - if (sk && sk->sk_dst_pending_confirm) - sk->sk_dst_pending_confirm = 0; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); + if (sk && READ_ONCE(sk->sk_dst_pending_confirm)) + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); } } diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index de696ca12f2c..f6035f737193 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -27,6 +27,7 @@ enum lwtunnel_ip_t { LWTUNNEL_IP_TOS, LWTUNNEL_IP_FLAGS, LWTUNNEL_IP_PAD, + LWTUNNEL_IP_OPTS, __LWTUNNEL_IP_MAX, }; @@ -41,12 +42,52 @@ enum lwtunnel_ip6_t { LWTUNNEL_IP6_TC, LWTUNNEL_IP6_FLAGS, LWTUNNEL_IP6_PAD, + LWTUNNEL_IP6_OPTS, __LWTUNNEL_IP6_MAX, }; #define LWTUNNEL_IP6_MAX (__LWTUNNEL_IP6_MAX - 1) enum { + LWTUNNEL_IP_OPTS_UNSPEC, + LWTUNNEL_IP_OPTS_GENEVE, + LWTUNNEL_IP_OPTS_VXLAN, + LWTUNNEL_IP_OPTS_ERSPAN, + __LWTUNNEL_IP_OPTS_MAX, +}; + +#define LWTUNNEL_IP_OPTS_MAX (__LWTUNNEL_IP_OPTS_MAX - 1) + +enum { + LWTUNNEL_IP_OPT_GENEVE_UNSPEC, + LWTUNNEL_IP_OPT_GENEVE_CLASS, + LWTUNNEL_IP_OPT_GENEVE_TYPE, + LWTUNNEL_IP_OPT_GENEVE_DATA, + __LWTUNNEL_IP_OPT_GENEVE_MAX, +}; + +#define LWTUNNEL_IP_OPT_GENEVE_MAX (__LWTUNNEL_IP_OPT_GENEVE_MAX - 1) + +enum { + LWTUNNEL_IP_OPT_VXLAN_UNSPEC, + LWTUNNEL_IP_OPT_VXLAN_GBP, + __LWTUNNEL_IP_OPT_VXLAN_MAX, +}; + +#define LWTUNNEL_IP_OPT_VXLAN_MAX (__LWTUNNEL_IP_OPT_VXLAN_MAX - 1) + +enum { + LWTUNNEL_IP_OPT_ERSPAN_UNSPEC, + LWTUNNEL_IP_OPT_ERSPAN_VER, + LWTUNNEL_IP_OPT_ERSPAN_INDEX, + LWTUNNEL_IP_OPT_ERSPAN_DIR, + LWTUNNEL_IP_OPT_ERSPAN_HWID, + __LWTUNNEL_IP_OPT_ERSPAN_MAX, +}; + +#define LWTUNNEL_IP_OPT_ERSPAN_MAX (__LWTUNNEL_IP_OPT_ERSPAN_MAX - 1) + +enum { LWT_BPF_PROG_UNSPEC, LWT_BPF_PROG_FD, LWT_BPF_PROG_NAME, diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 6c11cdf4dd4c..fbd0c5e7b299 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -109,7 +109,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb) dev_kfree_skb(skb); goto as_indicate_complete; } - sk->sk_ack_backlog++; + sk_acceptq_added(sk); skb_queue_tail(&sk->sk_receive_queue, skb); pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk)); sk->sk_state_change(sk); diff --git a/net/atm/svc.c b/net/atm/svc.c index 908cbb8654f5..ba144d035e3d 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -381,7 +381,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags, msg->pvc.sap_addr.vpi, msg->pvc.sap_addr.vci); dev_kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); if (error) { sigd_enq2(NULL, as_reject, old_vcc, NULL, NULL, &old_vcc->qos, error); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index bb222b882b67..324306d6fde0 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1384,7 +1384,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, /* Now attach up the new socket */ kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); newsock->state = SS_CONNECTED; out: diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index dcdbaeeb2358..cd6afe895db9 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -356,7 +356,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, make->sk_state = TCP_ESTABLISHED; - sk->sk_ack_backlog++; + sk_acceptq_added(sk); bh_unlock_sock(sk); } else { if (!mine) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 5f508c50649d..3fd124927d4d 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -173,7 +173,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) else release_sock(sk); - parent->sk_ack_backlog++; + sk_acceptq_added(parent); } EXPORT_SYMBOL(bt_accept_enqueue); @@ -185,7 +185,7 @@ void bt_accept_unlink(struct sock *sk) BT_DBG("sk %p state %d", sk, sk->sk_state); list_del_init(&bt_sk(sk)->accept_q); - bt_sk(sk)->parent->sk_ack_backlog--; + sk_acceptq_removed(bt_sk(sk)->parent); bt_sk(sk)->parent = NULL; sock_put(sk); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5480edff0c86..8c82e95f7539 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2052,8 +2052,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, goto nla_put_failure; { unsigned long now = jiffies; - unsigned int flush_delta = now - tbl->last_flush; - unsigned int rand_delta = now - tbl->last_rand; + long flush_delta = now - tbl->last_flush; + long rand_delta = now - tbl->last_rand; struct neigh_hash_table *nht; struct ndt_config ndc = { .ndtc_key_len = tbl->key_len, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 5bad08dc4316..a52e8ba1ced0 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -944,7 +944,7 @@ int inet_dccp_listen(struct socket *sock, int backlog) if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) goto out; - sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); /* Really, if the socket is already in listen state * we can only allow the backlog to be adjusted. */ diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 3349ea81f901..e19a92a62e14 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1091,7 +1091,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags, } cb = DN_SKB_CB(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern); if (newsk == NULL) { release_sock(sk); diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index e4161e0c86aa..c68503a18025 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -328,7 +328,7 @@ static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb) return; } - sk->sk_ack_backlog++; + sk_acceptq_added(sk); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_state_change(sk); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 70f92aaca411..53de8e00990e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -208,7 +208,7 @@ int inet_listen(struct socket *sock, int backlog) if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN))) goto out; - sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); /* Really, if the socket is already in listen state * we can only allow the backlog to be adjusted. */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index eb30fc1770de..e4c6e8b40490 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -716,7 +716,7 @@ static void reqsk_timer_handler(struct timer_list *t) * ones are about to clog our table. */ qlen = reqsk_queue_len(queue); - if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) { + if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) { int young = reqsk_queue_len_young(queue) << 1; while (thresh > 2) { diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7dc79b973e6e..af154977904c 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -226,17 +226,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = - jiffies_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = - jiffies_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = - jiffies_to_msecs(sk->sk_timer.expires - jiffies); + jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); } else { r->idiag_timer = 0; r->idiag_expires = 0; @@ -342,16 +342,13 @@ static int inet_twsk_diag_fill(struct sock *sk, r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); - tmo = tw->tw_timer.expires - jiffies; - if (tmo < 0) - tmo = 0; - inet_diag_msg_common_fill(r, sk); r->idiag_retrans = 0; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - r->idiag_expires = jiffies_to_msecs(tmo); + tmo = tw->tw_timer.expires - jiffies; + r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; @@ -385,7 +382,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, offsetof(struct sock, sk_cookie)); tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; - r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; + r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 1452a97914a0..d4f84bf9289a 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -34,6 +34,9 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/dst_metadata.h> +#include <net/geneve.h> +#include <net/vxlan.h> +#include <net/erspan.h> const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; @@ -126,15 +129,14 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, if (!md || md->type != METADATA_IP_TUNNEL || md->u.tun_info.mode & IP_TUNNEL_INFO_TX) - return NULL; - res = metadata_dst_alloc(0, METADATA_IP_TUNNEL, flags); + src = &md->u.tun_info; + res = metadata_dst_alloc(src->options_len, METADATA_IP_TUNNEL, flags); if (!res) return NULL; dst = &res->u.tun_info; - src = &md->u.tun_info; dst->key.tun_id = src->key.tun_id; if (src->mode & IP_TUNNEL_INFO_IPV6) memcpy(&dst->key.u.ipv6.dst, &src->key.u.ipv6.src, @@ -143,6 +145,8 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, dst->key.u.ipv4.dst = src->key.u.ipv4.src; dst->key.tun_flags = src->key.tun_flags; dst->mode = src->mode | IP_TUNNEL_INFO_TX; + ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src), + src->options_len, 0); return res; } @@ -217,24 +221,199 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { [LWTUNNEL_IP_TTL] = { .type = NLA_U8 }, [LWTUNNEL_IP_TOS] = { .type = NLA_U8 }, [LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 }, + [LWTUNNEL_IP_OPTS] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy ip_opts_policy[LWTUNNEL_IP_OPTS_MAX + 1] = { + [LWTUNNEL_IP_OPTS_GENEVE] = { .type = NLA_NESTED }, + [LWTUNNEL_IP_OPTS_VXLAN] = { .type = NLA_NESTED }, + [LWTUNNEL_IP_OPTS_ERSPAN] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +geneve_opt_policy[LWTUNNEL_IP_OPT_GENEVE_MAX + 1] = { + [LWTUNNEL_IP_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, + [LWTUNNEL_IP_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, + [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, +}; + +static const struct nla_policy +vxlan_opt_policy[LWTUNNEL_IP_OPT_VXLAN_MAX + 1] = { + [LWTUNNEL_IP_OPT_VXLAN_GBP] = { .type = NLA_U32 }, +}; + +static const struct nla_policy +erspan_opt_policy[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1] = { + [LWTUNNEL_IP_OPT_ERSPAN_VER] = { .type = NLA_U8 }, + [LWTUNNEL_IP_OPT_ERSPAN_INDEX] = { .type = NLA_U32 }, + [LWTUNNEL_IP_OPT_ERSPAN_DIR] = { .type = NLA_U8 }, + [LWTUNNEL_IP_OPT_ERSPAN_HWID] = { .type = NLA_U8 }, }; +static int ip_tun_parse_opts_geneve(struct nlattr *attr, + struct ip_tunnel_info *info, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[LWTUNNEL_IP_OPT_GENEVE_MAX + 1]; + int data_len, err; + + err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPT_GENEVE_MAX, + attr, geneve_opt_policy, extack); + if (err) + return err; + + if (!tb[LWTUNNEL_IP_OPT_GENEVE_CLASS] || + !tb[LWTUNNEL_IP_OPT_GENEVE_TYPE] || + !tb[LWTUNNEL_IP_OPT_GENEVE_DATA]) + return -EINVAL; + + attr = tb[LWTUNNEL_IP_OPT_GENEVE_DATA]; + data_len = nla_len(attr); + if (data_len % 4) + return -EINVAL; + + if (info) { + struct geneve_opt *opt = ip_tunnel_info_opts(info); + + memcpy(opt->opt_data, nla_data(attr), data_len); + opt->length = data_len / 4; + attr = tb[LWTUNNEL_IP_OPT_GENEVE_CLASS]; + opt->opt_class = nla_get_be16(attr); + attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE]; + opt->type = nla_get_u8(attr); + info->key.tun_flags |= TUNNEL_GENEVE_OPT; + } + + return sizeof(struct geneve_opt) + data_len; +} + +static int ip_tun_parse_opts_vxlan(struct nlattr *attr, + struct ip_tunnel_info *info, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[LWTUNNEL_IP_OPT_VXLAN_MAX + 1]; + int err; + + err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPT_VXLAN_MAX, + attr, vxlan_opt_policy, extack); + if (err) + return err; + + if (!tb[LWTUNNEL_IP_OPT_VXLAN_GBP]) + return -EINVAL; + + if (info) { + struct vxlan_metadata *md = ip_tunnel_info_opts(info); + + attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP]; + md->gbp = nla_get_u32(attr); + info->key.tun_flags |= TUNNEL_VXLAN_OPT; + } + + return sizeof(struct vxlan_metadata); +} + +static int ip_tun_parse_opts_erspan(struct nlattr *attr, + struct ip_tunnel_info *info, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1]; + int err; + + err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPT_ERSPAN_MAX, + attr, erspan_opt_policy, extack); + if (err) + return err; + + if (!tb[LWTUNNEL_IP_OPT_ERSPAN_VER]) + return -EINVAL; + + if (info) { + struct erspan_metadata *md = ip_tunnel_info_opts(info); + + attr = tb[LWTUNNEL_IP_OPT_ERSPAN_VER]; + md->version = nla_get_u8(attr); + + if (md->version == 1 && tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX]) { + attr = tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX]; + md->u.index = nla_get_be32(attr); + } else if (md->version == 2 && tb[LWTUNNEL_IP_OPT_ERSPAN_DIR] && + tb[LWTUNNEL_IP_OPT_ERSPAN_HWID]) { + attr = tb[LWTUNNEL_IP_OPT_ERSPAN_DIR]; + md->u.md2.dir = nla_get_u8(attr); + attr = tb[LWTUNNEL_IP_OPT_ERSPAN_HWID]; + set_hwid(&md->u.md2, nla_get_u8(attr)); + } else { + return -EINVAL; + } + + info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + } + + return sizeof(struct erspan_metadata); +} + +static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[LWTUNNEL_IP_OPTS_MAX + 1]; + int err; + + if (!attr) + return 0; + + err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPTS_MAX, attr, + ip_opts_policy, extack); + if (err) + return err; + + if (tb[LWTUNNEL_IP_OPTS_GENEVE]) + err = ip_tun_parse_opts_geneve(tb[LWTUNNEL_IP_OPTS_GENEVE], + info, extack); + else if (tb[LWTUNNEL_IP_OPTS_VXLAN]) + err = ip_tun_parse_opts_vxlan(tb[LWTUNNEL_IP_OPTS_VXLAN], + info, extack); + else if (tb[LWTUNNEL_IP_OPTS_ERSPAN]) + err = ip_tun_parse_opts_erspan(tb[LWTUNNEL_IP_OPTS_ERSPAN], + info, extack); + else + err = -EINVAL; + + return err; +} + +static int ip_tun_get_optlen(struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + return ip_tun_parse_opts(attr, NULL, extack); +} + +static int ip_tun_set_opts(struct nlattr *attr, struct ip_tunnel_info *info, + struct netlink_ext_ack *extack) +{ + return ip_tun_parse_opts(attr, info, extack); +} + static int ip_tun_build_state(struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) { - struct ip_tunnel_info *tun_info; - struct lwtunnel_state *new_state; struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; - int err; + struct lwtunnel_state *new_state; + struct ip_tunnel_info *tun_info; + int err, opt_len; err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, extack); if (err < 0) return err; - new_state = lwtunnel_state_alloc(sizeof(*tun_info)); + opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP_OPTS], extack); + if (opt_len < 0) + return opt_len; + + new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len); if (!new_state) return -ENOMEM; @@ -242,6 +421,12 @@ static int ip_tun_build_state(struct nlattr *attr, tun_info = lwt_tun_info(new_state); + err = ip_tun_set_opts(tb[LWTUNNEL_IP_OPTS], tun_info, extack); + if (err < 0) { + lwtstate_free(new_state); + return err; + } + #ifdef CONFIG_DST_CACHE err = dst_cache_init(&tun_info->dst_cache, GFP_KERNEL); if (err) { @@ -266,10 +451,10 @@ static int ip_tun_build_state(struct nlattr *attr, tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); if (tb[LWTUNNEL_IP_FLAGS]) - tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP_FLAGS]); + tun_info->key.tun_flags |= nla_get_be16(tb[LWTUNNEL_IP_FLAGS]); tun_info->mode = IP_TUNNEL_INFO_TX; - tun_info->options_len = 0; + tun_info->options_len = opt_len; *ts = new_state; @@ -285,6 +470,110 @@ static void ip_tun_destroy_state(struct lwtunnel_state *lwtstate) #endif } +static int ip_tun_fill_encap_opts_geneve(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) +{ + struct geneve_opt *opt; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_GENEVE); + if (!nest) + return -ENOMEM; + + opt = ip_tunnel_info_opts(tun_info); + if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS, opt->opt_class) || + nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) || + nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4, + opt->opt_data)) { + nla_nest_cancel(skb, nest); + return -ENOMEM; + } + + nla_nest_end(skb, nest); + return 0; +} + +static int ip_tun_fill_encap_opts_vxlan(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) +{ + struct vxlan_metadata *md; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_VXLAN); + if (!nest) + return -ENOMEM; + + md = ip_tunnel_info_opts(tun_info); + if (nla_put_u32(skb, LWTUNNEL_IP_OPT_VXLAN_GBP, md->gbp)) { + nla_nest_cancel(skb, nest); + return -ENOMEM; + } + + nla_nest_end(skb, nest); + return 0; +} + +static int ip_tun_fill_encap_opts_erspan(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) +{ + struct erspan_metadata *md; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_ERSPAN); + if (!nest) + return -ENOMEM; + + md = ip_tunnel_info_opts(tun_info); + if (nla_put_u32(skb, LWTUNNEL_IP_OPT_ERSPAN_VER, md->version)) + goto err; + + if (md->version == 1 && + nla_put_be32(skb, LWTUNNEL_IP_OPT_ERSPAN_INDEX, md->u.index)) + goto err; + + if (md->version == 2 && + (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_DIR, md->u.md2.dir) || + nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_HWID, + get_hwid(&md->u.md2)))) + goto err; + + nla_nest_end(skb, nest); + return 0; +err: + nla_nest_cancel(skb, nest); + return -ENOMEM; +} + +static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type, + struct ip_tunnel_info *tun_info) +{ + struct nlattr *nest; + int err = 0; + + if (!(tun_info->key.tun_flags & + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT))) + return 0; + + nest = nla_nest_start_noflag(skb, type); + if (!nest) + return -ENOMEM; + + if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) + err = ip_tun_fill_encap_opts_geneve(skb, tun_info); + else if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT) + err = ip_tun_fill_encap_opts_vxlan(skb, tun_info); + else if (tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT) + err = ip_tun_fill_encap_opts_erspan(skb, tun_info); + + if (err) { + nla_nest_cancel(skb, nest); + return err; + } + + nla_nest_end(skb, nest); + return 0; +} + static int ip_tun_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { @@ -296,12 +585,42 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || - nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) + nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags) || + ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info)) return -ENOMEM; return 0; } +static int ip_tun_opts_nlsize(struct ip_tunnel_info *info) +{ + int opt_len; + + if (!(info->key.tun_flags & + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT))) + return 0; + + opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */ + if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { + struct geneve_opt *opt = ip_tunnel_info_opts(info); + + opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_GENEVE */ + + nla_total_size(2) /* OPT_GENEVE_CLASS */ + + nla_total_size(1) /* OPT_GENEVE_TYPE */ + + nla_total_size(opt->length * 4); + /* OPT_GENEVE_DATA */ + } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */ + + nla_total_size(4); /* OPT_VXLAN_GBP */ + } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) { + opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */ + + nla_total_size(1) /* OPT_ERSPAN_VER */ + + nla_total_size(4); /* OPT_ERSPAN_INDEX/DIR/HWID */ + } + + return opt_len; +} + static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { return nla_total_size_64bit(8) /* LWTUNNEL_IP_ID */ @@ -309,13 +628,21 @@ static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) + nla_total_size(4) /* LWTUNNEL_IP_SRC */ + nla_total_size(1) /* LWTUNNEL_IP_TOS */ + nla_total_size(1) /* LWTUNNEL_IP_TTL */ - + nla_total_size(2); /* LWTUNNEL_IP_FLAGS */ + + nla_total_size(2) /* LWTUNNEL_IP_FLAGS */ + + ip_tun_opts_nlsize(lwt_tun_info(lwtstate)); + /* LWTUNNEL_IP_OPTS */ } static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { - return memcmp(lwt_tun_info(a), lwt_tun_info(b), - sizeof(struct ip_tunnel_info)); + struct ip_tunnel_info *info_a = lwt_tun_info(a); + struct ip_tunnel_info *info_b = lwt_tun_info(b); + + return memcmp(info_a, info_b, sizeof(info_a->key)) || + info_a->mode != info_b->mode || + info_a->options_len != info_b->options_len || + memcmp(ip_tunnel_info_opts(info_a), + ip_tunnel_info_opts(info_b), info_a->options_len); } static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { @@ -341,17 +668,21 @@ static int ip6_tun_build_state(struct nlattr *attr, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) { - struct ip_tunnel_info *tun_info; - struct lwtunnel_state *new_state; struct nlattr *tb[LWTUNNEL_IP6_MAX + 1]; - int err; + struct lwtunnel_state *new_state; + struct ip_tunnel_info *tun_info; + int err, opt_len; err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy, extack); if (err < 0) return err; - new_state = lwtunnel_state_alloc(sizeof(*tun_info)); + opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP6_OPTS], extack); + if (opt_len < 0) + return opt_len; + + new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len); if (!new_state) return -ENOMEM; @@ -359,6 +690,12 @@ static int ip6_tun_build_state(struct nlattr *attr, tun_info = lwt_tun_info(new_state); + err = ip_tun_set_opts(tb[LWTUNNEL_IP6_OPTS], tun_info, extack); + if (err < 0) { + lwtstate_free(new_state); + return err; + } + if (tb[LWTUNNEL_IP6_ID]) tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP6_ID]); @@ -375,10 +712,10 @@ static int ip6_tun_build_state(struct nlattr *attr, tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); if (tb[LWTUNNEL_IP6_FLAGS]) - tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]); + tun_info->key.tun_flags |= nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]); tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6; - tun_info->options_len = 0; + tun_info->options_len = opt_len; *ts = new_state; @@ -396,7 +733,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) || - nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) + nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags) || + ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info)) return -ENOMEM; return 0; @@ -409,7 +747,9 @@ static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate) + nla_total_size(16) /* LWTUNNEL_IP6_SRC */ + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */ + nla_total_size(1) /* LWTUNNEL_IP6_TC */ - + nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */ + + nla_total_size(2) /* LWTUNNEL_IP6_FLAGS */ + + ip_tun_opts_nlsize(lwt_tun_info(lwtstate)); + /* LWTUNNEL_IP6_OPTS */ } static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1dd25189d83f..9b48aec29aca 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1958,8 +1958,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, struct sk_buff *skb, *last; u32 urg_hole = 0; struct scm_timestamping_internal tss; - bool has_tss = false; - bool has_cmsg; + int cmsg_flags; if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); @@ -1974,7 +1973,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (sk->sk_state == TCP_LISTEN) goto out; - has_cmsg = tp->recvmsg_inq; + cmsg_flags = tp->recvmsg_inq ? 1 : 0; timeo = sock_rcvtimeo(sk, nonblock); /* Urgent data needs to be handled specially. */ @@ -2047,7 +2046,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Well, if we have backlog, try to process it now yet. */ - if (copied >= target && !sk->sk_backlog.tail) + if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break; if (copied) { @@ -2157,8 +2156,7 @@ skip_copy: if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, &tss); - has_tss = true; - has_cmsg = true; + cmsg_flags |= 2; } if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; @@ -2183,10 +2181,10 @@ found_fin_ok: release_sock(sk); - if (has_cmsg) { - if (has_tss) + if (cmsg_flags) { + if (cmsg_flags & 2) tcp_recv_timestamp(msg, sk, &tss); - if (tp->recvmsg_inq) { + if (cmsg_flags & 1) { inq = tcp_inq_hint(sk); put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); } @@ -3225,8 +3223,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) * tcpi_unacked -> Number of children ready for accept() * tcpi_sacked -> max backlog */ - info->tcpi_unacked = sk->sk_ack_backlog; - info->tcpi_sacked = sk->sk_max_ack_backlog; + info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog); + info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog); return; } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 549506162dde..0d08f9e2d8d0 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -21,8 +21,8 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, struct tcp_info *info = _info; if (inet_sk_state_load(sk) == TCP_LISTEN) { - r->idiag_rqueue = sk->sk_ack_backlog; - r->idiag_wqueue = sk->sk_max_ack_backlog; + r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog); + r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog); } else if (sk->sk_type == SOCK_STREAM) { const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 899e100a68e6..92282f98dc82 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2451,7 +2451,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) state = inet_sk_state_load(sk); if (state == TCP_LISTEN) - rx_queue = sk->sk_ack_backlog; + rx_queue = READ_ONCE(sk->sk_ack_backlog); else /* Because we don't lock the socket, * we might find a transient negative value. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4804b6dc5e65..81f51335e326 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1891,7 +1891,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) state = inet_sk_state_load(sp); if (state == TCP_LISTEN) - rx_queue = sp->sk_ack_backlog; + rx_queue = READ_ONCE(sp->sk_ack_backlog); else /* Because we don't lock the socket, * we might find a transient negative value. diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index c74f44dfaa22..2922d4150d88 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -705,7 +705,7 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags, /* put original socket back into a clean listen state. */ sk->sk_state = TCP_LISTEN; - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); dprintk("%s: ok success on %02X, client on %02X\n", __func__, llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap); frees: @@ -780,7 +780,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, } /* Well, if we have backlog, try to process it now yet. */ - if (copied >= target && !sk->sk_backlog.tail) + if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break; if (copied) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 3fc38d16c456..5da9392b03d6 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -403,8 +403,9 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) ids = rcu_dereference(vport->upcall_portids); - if (ids->n_ids == 1 && ids->ids[0] == 0) - return 0; + /* If there is only one portid, select it in the fast-path. */ + if (ids->n_ids == 1) + return ids->ids[0]; hash = skb_get_hash(skb); ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 6a0df7c8a939..46b8ff24020d 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -906,7 +906,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags, /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); out_release: release_sock(sk); @@ -1011,7 +1011,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros make_rose->va = 0; make_rose->vr = 0; make_rose->vl = 0; - sk->sk_ack_backlog++; + sk_acceptq_added(sk); rose_insert_socket(make); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 3177dcb17316..d99966a55c84 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -521,7 +521,7 @@ META_COLLECTOR(int_sk_ack_bl) *err = -1; return; } - dst->value = sk->sk_ack_backlog; + dst->value = READ_ONCE(sk->sk_ack_backlog); } META_COLLECTOR(int_sk_max_ack_bl) @@ -532,7 +532,7 @@ META_COLLECTOR(int_sk_max_ack_bl) *err = -1; return; } - dst->value = sk->sk_max_ack_backlog; + dst->value = READ_ONCE(sk->sk_max_ack_backlog); } META_COLLECTOR(int_sk_prio) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 1ba893b85dad..1b9809ad7725 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -324,7 +324,7 @@ void sctp_association_free(struct sctp_association *asoc) * socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); } /* Mark as dead, so other users can know this structure is @@ -1073,7 +1073,7 @@ void sctp_assoc_migrate(struct sctp_association *assoc, struct sock *newsk) /* Decrement the backlog value for a TCP-style socket. */ if (sctp_style(oldsk, TCP)) - oldsk->sk_ack_backlog--; + sk_acceptq_removed(oldsk); /* Release references to the old endpoint and the sock. */ sctp_endpoint_put(assoc->ep); diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 0851166b9175..8a15146faaeb 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -425,8 +425,8 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc); r->idiag_wqueue = infox->asoc->sndbuf_used; } else { - r->idiag_rqueue = sk->sk_ack_backlog; - r->idiag_wqueue = sk->sk_max_ack_backlog; + r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog); + r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog); } if (infox->sctpinfo) sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index ea53049d1db6..9d05b2e7bce2 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -164,7 +164,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, /* Increment the backlog value for a TCP-style listening socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) - sk->sk_ack_backlog++; + sk_acceptq_added(sk); } /* Free the endpoint structure. Delay cleanup until diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ffd3262b7a41..53abb97e0061 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8376,7 +8376,7 @@ static int sctp_listen_start(struct sock *sk, int backlog) } } - sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); return sctp_hash_endpoint(ep); } @@ -8430,7 +8430,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) /* If we are already listening, just update the backlog */ if (sctp_sstate(sk, LISTENING)) - sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); else { err = sctp_listen_start(sk, backlog); if (err) diff --git a/net/tipc/link.c b/net/tipc/link.c index 038861bad72b..e7bb4cbb7716 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1087,7 +1087,7 @@ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, return false; if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + - msecs_to_jiffies(r->tolerance))) + msecs_to_jiffies(r->tolerance * 10))) return false; hdr = buf_msg(skb); @@ -1731,21 +1731,6 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, return; __skb_queue_head_init(&tnlq); - __skb_queue_head_init(&tmpxq); - __skb_queue_head_init(&frags); - - /* At least one packet required for safe algorithm => add dummy */ - skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, - BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net), - 0, 0, TIPC_ERR_NO_PORT); - if (!skb) { - pr_warn("%sunable to create tunnel packet\n", link_co_err); - return; - } - __skb_queue_tail(&tnlq, skb); - tipc_link_xmit(l, &tnlq, &tmpxq); - __skb_queue_purge(&tmpxq); - /* Link Synching: * From now on, send only one single ("dummy") SYNCH message * to peer. The SYNCH message does not contain any data, just @@ -1771,6 +1756,20 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, return; } + __skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&frags); + /* At least one packet required for safe algorithm => add dummy */ + skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, + BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net), + 0, 0, TIPC_ERR_NO_PORT); + if (!skb) { + pr_warn("%sunable to create tunnel packet\n", link_co_err); + return; + } + __skb_queue_tail(&tnlq, skb); + tipc_link_xmit(l, &tnlq, &tmpxq); + __skb_queue_purge(&tmpxq); + /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); diff --git a/net/tipc/node.c b/net/tipc/node.c index 4b60928049ea..1f1584518221 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -667,6 +667,11 @@ static bool tipc_node_cleanup(struct tipc_node *peer) } tipc_node_write_unlock(peer); + if (!deleted) { + spin_unlock_bh(&tn->node_list_lock); + return deleted; + } + /* Calculate cluster capabilities */ tn->capabilities = TIPC_NODE_CAPABILITIES; list_for_each_entry_rcu(temp_node, &tn->node_list, list) { @@ -2043,7 +2048,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; - struct tipc_node *peer; + struct tipc_node *peer, *temp_node; u32 addr; int err; @@ -2084,6 +2089,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) tipc_node_write_unlock(peer); tipc_node_delete(peer); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } err = 0; err_out: tipc_node_put(peer); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index c0856e74f44f..1f4fde4711b6 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -439,7 +439,7 @@ static void vsock_pending_work(struct work_struct *work) if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); - listener->sk_ack_backlog--; + sk_acceptq_removed(listener); } else if (!vsk->rejected) { /* We are not on the pending list and accept() did not reject * us, so we must have been accepted by our user process. We @@ -1299,7 +1299,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, err = -listener->sk_err; if (connected) { - listener->sk_ack_backlog--; + sk_acceptq_removed(listener); lock_sock_nested(connected, SINGLE_DEPTH_NESTING); vconnected = vsock_sk(connected); diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index bef8772116ec..7fa09c5e4625 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -428,7 +428,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) if (conn_from_host) { new->sk_state = TCP_ESTABLISHED; - sk->sk_ack_backlog++; + sk_acceptq_added(sk); hvs_addr_init(&vnew->local_addr, if_type); hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index d02c9b41a768..193f959e51ef 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1066,7 +1066,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) return -ENOMEM; } - sk->sk_ack_backlog++; + sk_acceptq_added(sk); lock_sock_nested(child, SINGLE_DEPTH_NESTING); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 8c9c4ed90fa7..6ba98a1efe2e 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1098,7 +1098,7 @@ static int vmci_transport_recv_listen(struct sock *sk, } vsock_add_pending(sk, pending); - sk->sk_ack_backlog++; + sk_acceptq_added(sk); pending->sk_state = TCP_SYN_SENT; vmci_trans(vpending)->produce_size = diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 6aee9f5e8e71..c34f7d077604 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -891,7 +891,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags, /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); newsock->state = SS_CONNECTED; rc = 0; out2: @@ -1062,7 +1062,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len); makex25->calluserdata.cudlength = skb->len; - sk->sk_ack_backlog++; + sk_acceptq_added(sk); x25_insert_socket(make); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 0bd6b23c97ef..a8e04d665b69 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -10,7 +10,7 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh -TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh +TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh new file mode 100755 index 000000000000..de9ca97abc30 --- /dev/null +++ b/tools/testing/selftests/net/traceroute.sh @@ -0,0 +1,322 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run traceroute/traceroute6 tests +# + +VERBOSE=0 +PAUSE_ON_FAIL=no + +################################################################################ +# +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +run_cmd() +{ + local ns + local cmd + local out + local rc + + ns="$1" + shift + cmd="$*" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: $cmd\n" + fi + + out=$(eval ip netns exec ${ns} ${cmd} 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# create namespaces and interconnects + +create_ns() +{ + local ns=$1 + local addr=$2 + local addr6=$3 + + [ -z "${addr}" ] && addr="-" + [ -z "${addr6}" ] && addr6="-" + + ip netns add ${ns} + + ip netns exec ${ns} ip link set lo up + if [ "${addr}" != "-" ]; then + ip netns exec ${ns} ip addr add dev lo ${addr} + fi + if [ "${addr6}" != "-" ]; then + ip netns exec ${ns} ip -6 addr add dev lo ${addr6} + fi + + ip netns exec ${ns} ip ro add unreachable default metric 8192 + ip netns exec ${ns} ip -6 ro add unreachable default metric 8192 + + ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0 +} + +# create veth pair to connect namespaces and apply addresses. +connect_ns() +{ + local ns1=$1 + local ns1_dev=$2 + local ns1_addr=$3 + local ns1_addr6=$4 + local ns2=$5 + local ns2_dev=$6 + local ns2_addr=$7 + local ns2_addr6=$8 + + ip netns exec ${ns1} ip li add ${ns1_dev} type veth peer name tmp + ip netns exec ${ns1} ip li set ${ns1_dev} up + ip netns exec ${ns1} ip li set tmp netns ${ns2} name ${ns2_dev} + ip netns exec ${ns2} ip li set ${ns2_dev} up + + if [ "${ns1_addr}" != "-" ]; then + ip netns exec ${ns1} ip addr add dev ${ns1_dev} ${ns1_addr} + fi + + if [ "${ns2_addr}" != "-" ]; then + ip netns exec ${ns2} ip addr add dev ${ns2_dev} ${ns2_addr} + fi + + if [ "${ns1_addr6}" != "-" ]; then + ip netns exec ${ns1} ip addr add dev ${ns1_dev} ${ns1_addr6} + fi + + if [ "${ns2_addr6}" != "-" ]; then + ip netns exec ${ns2} ip addr add dev ${ns2_dev} ${ns2_addr6} + fi +} + +################################################################################ +# traceroute6 test +# +# Verify that in this scenario +# +# ------------------------ N2 +# | | +# ------ ------ N3 ---- +# | R1 | | R2 |------|H2| +# ------ ------ ---- +# | | +# ------------------------ N1 +# | +# ---- +# |H1| +# ---- +# +# where H1's default route goes through R1 and R1's default route goes +# through R2 over N2, traceroute6 from H1 to H2 reports R2's address +# on N2 and not N1. +# +# Addresses are assigned as follows: +# +# N1: 2000:101::/64 +# N2: 2000:102::/64 +# N3: 2000:103::/64 +# +# R1's host part of address: 1 +# R2's host part of address: 2 +# H1's host part of address: 3 +# H2's host part of address: 4 +# +# For example: +# the IPv6 address of R1's interface on N2 is 2000:102::1/64 + +cleanup_traceroute6() +{ + local ns + + for ns in host-1 host-2 router-1 router-2 + do + ip netns del ${ns} 2>/dev/null + done +} + +setup_traceroute6() +{ + brdev=br0 + + # start clean + cleanup_traceroute6 + + set -e + create_ns host-1 + create_ns host-2 + create_ns router-1 + create_ns router-2 + + # Setup N3 + connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64 + ip netns exec host-2 ip route add default via 2000:103::2 + + # Setup N2 + connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64 + ip netns exec router-1 ip route add default via 2000:102::2 + + # Setup N1. host-1 and router-2 connect to a bridge in router-1. + ip netns exec router-1 ip link add name ${brdev} type bridge + ip netns exec router-1 ip link set ${brdev} up + ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev} + + connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - - + ip netns exec router-1 ip link set dev eth0 master ${brdev} + ip netns exec host-1 ip route add default via 2000:101::1 + + connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - - + ip netns exec router-1 ip link set dev eth1 master ${brdev} + + # Prime the network + ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1 + + set +e +} + +run_traceroute6() +{ + if [ ! -x "$(command -v traceroute6)" ]; then + echo "SKIP: Could not run IPV6 test without traceroute6" + return + fi + + setup_traceroute6 + + # traceroute6 host-2 from host-1 (expects 2000:102::2) + run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2" + log_test $? 0 "IPV6 traceroute" + + cleanup_traceroute6 +} + +################################################################################ +# traceroute test +# +# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario +# +# 1.0.3.1/24 +# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and +# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary +# address on N1. +# + +cleanup_traceroute() +{ + local ns + + for ns in host-1 host-2 router + do + ip netns del ${ns} 2>/dev/null + done +} + +setup_traceroute() +{ + # start clean + cleanup_traceroute + + set -e + create_ns host-1 + create_ns host-2 + create_ns router + + connect_ns host-1 eth0 1.0.1.3/24 - \ + router eth1 1.0.3.1/24 - + ip netns exec host-1 ip route add default via 1.0.1.1 + + ip netns exec router ip addr add 1.0.1.1/24 dev eth1 + ip netns exec router sysctl -qw \ + net.ipv4.icmp_errors_use_inbound_ifaddr=1 + + connect_ns host-2 eth0 1.0.2.4/24 - \ + router eth2 1.0.2.1/24 - + ip netns exec host-2 ip route add default via 1.0.2.1 + + # Prime the network + ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1 + + set +e +} + +run_traceroute() +{ + if [ ! -x "$(command -v traceroute)" ]; then + echo "SKIP: Could not run IPV4 test without traceroute" + return + fi + + setup_traceroute + + # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. + run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" + log_test $? 0 "IPV4 traceroute" + + cleanup_traceroute +} + +################################################################################ +# Run tests + +run_tests() +{ + run_traceroute6 + run_traceroute +} + +################################################################################ +# main + +declare -i nfail=0 +declare -i nsuccess=0 + +while getopts :pv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + *) exit 1;; + esac +done + +run_tests + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} |