aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/qca,ar803x.yaml111
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_io.c10
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.h3
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ptp.c2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c15
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c4
-rw-r--r--drivers/net/phy/Kconfig10
-rw-r--r--drivers/net/phy/at803x.c321
-rw-r--r--drivers/ptp/ptp_clockmatrix.c4
-rw-r--r--include/dt-bindings/net/qca-ar803x.h13
-rw-r--r--include/net/arp.h4
-rw-r--r--include/net/ndisc.h8
-rw-r--r--include/net/sock.h22
-rw-r--r--include/uapi/linux/lwtunnel.h41
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_in.c2
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/dn_nsp_in.c2
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c15
-rw-r--r--net/ipv4/ip_tunnel_core.c382
-rw-r--r--net/ipv4/tcp.c20
-rw-r--r--net/ipv4/tcp_diag.c4
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/llc/af_llc.c4
-rw-r--r--net/openvswitch/vport.c5
-rw-r--r--net/rose/af_rose.c4
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sctp/associola.c4
-rw-r--r--net/sctp/diag.c4
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/tipc/link.c31
-rw-r--r--net/tipc/node.c12
-rw-r--r--net/vmw_vsock/af_vsock.c4
-rw-r--r--net/vmw_vsock/hyperv_transport.c2
-rw-r--r--net/vmw_vsock/virtio_transport_common.c2
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--tools/testing/selftests/net/Makefile2
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh322
49 files changed, 1285 insertions, 148 deletions
diff --git a/Documentation/devicetree/bindings/net/qca,ar803x.yaml b/Documentation/devicetree/bindings/net/qca,ar803x.yaml
new file mode 100644
index 000000000000..5a6c9d20c0ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qca,ar803x.yaml
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qca,ar803x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Atheros AR803x PHY
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Florian Fainelli <f.fainelli@gmail.com>
+ - Heiner Kallweit <hkallweit1@gmail.com>
+
+description: |
+ Bindings for Qualcomm Atheros AR803x PHYs
+
+allOf:
+ - $ref: ethernet-phy.yaml#
+
+properties:
+ qca,clk-out-frequency:
+ description: Clock output frequency in Hertz.
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ - enum: [ 25000000, 50000000, 62500000, 125000000 ]
+
+ qca,clk-out-strength:
+ description: Clock output driver strength.
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ - enum: [ 0, 1, 2 ]
+
+ qca,keep-pll-enabled:
+ description: |
+ If set, keep the PLL enabled even if there is no link. Useful if you
+ want to use the clock output without an ethernet link.
+
+ Only supported on the AR8031.
+ type: boolean
+
+ vddio-supply:
+ description: |
+ RGMII I/O voltage regulator (see regulator/regulator.yaml).
+
+ The PHY supports RGMII I/O voltages of 1.5V, 1.8V and 2.5V. You can
+ either connect this to the vddio-regulator (1.5V / 1.8V) or the
+ vddh-regulator (2.5V).
+
+ Only supported on the AR8031.
+
+ vddio-regulator:
+ type: object
+ description:
+ Initial data for the VDDIO regulator. Set this to 1.5V or 1.8V.
+ allOf:
+ - $ref: /schemas/regulator/regulator.yaml
+
+ vddh-regulator:
+ type: object
+ description:
+ Dummy subnode to model the external connection of the PHY VDDH
+ regulator to VDDIO.
+ allOf:
+ - $ref: /schemas/regulator/regulator.yaml
+
+
+examples:
+ - |
+ #include <dt-bindings/net/qca-ar803x.h>
+
+ ethernet {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy-mode = "rgmii-id";
+
+ ethernet-phy@0 {
+ reg = <0>;
+
+ qca,clk-out-frequency = <125000000>;
+ qca,clk-out-strength = <AR803X_STRENGTH_FULL>;
+
+ vddio-supply = <&vddio>;
+
+ vddio: vddio-regulator {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+ };
+ };
+ - |
+ #include <dt-bindings/net/qca-ar803x.h>
+
+ ethernet {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy-mode = "rgmii-id";
+
+ ethernet-phy@0 {
+ reg = <0>;
+
+ qca,clk-out-frequency = <50000000>;
+ qca,keep-pll-enabled;
+
+ vddio-supply = <&vddh>;
+
+ vddh: vddh-regulator {
+ };
+ };
+ };
diff --git a/MAINTAINERS b/MAINTAINERS
index c0024b296158..709c60aacb58 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6155,10 +6155,12 @@ S: Maintained
F: Documentation/ABI/testing/sysfs-class-net-phydev
F: Documentation/devicetree/bindings/net/ethernet-phy.yaml
F: Documentation/devicetree/bindings/net/mdio*
+F: Documentation/devicetree/bindings/net/qca,ar803x.yaml
F: Documentation/networking/phy.rst
F: drivers/net/phy/
F: drivers/of/of_mdio.c
F: drivers/of/of_net.c
+F: include/dt-bindings/net/qca-ar803x.h
F: include/linux/*mdio*.h
F: include/linux/of_net.h
F: include/linux/phy.h
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index 98bc5a4cd5e7..599dec59c6cc 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -1437,7 +1437,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
csk->wr_max_credits))
sk->sk_write_space(sk);
- if (copied >= target && !sk->sk_backlog.tail)
+ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
break;
if (copied) {
@@ -1470,7 +1470,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
break;
}
}
- if (sk->sk_backlog.tail) {
+ if (READ_ONCE(sk->sk_backlog.tail)) {
release_sock(sk);
lock_sock(sk);
chtls_cleanup_rbuf(sk, copied);
@@ -1615,7 +1615,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg,
break;
}
- if (sk->sk_backlog.tail) {
+ if (READ_ONCE(sk->sk_backlog.tail)) {
/* Do not sleep, just process backlog. */
release_sock(sk);
lock_sock(sk);
@@ -1743,7 +1743,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
csk->wr_max_credits))
sk->sk_write_space(sk);
- if (copied >= target && !sk->sk_backlog.tail)
+ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
break;
if (copied) {
@@ -1774,7 +1774,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
}
- if (sk->sk_backlog.tail) {
+ if (READ_ONCE(sk->sk_backlog.tail)) {
release_sock(sk);
lock_sock(sk);
chtls_cleanup_rbuf(sk, copied);
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index d80ad203d126..1f42ee656816 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -532,7 +532,8 @@ static inline int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip,
return -EOPNOTSUPP;
}
-static inline int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip)
+static inline int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip,
+ u16 *stats)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
index 8175513e48c9..1f9eab74453e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
@@ -1207,7 +1207,7 @@ int aq_ptp_init(struct aq_nic_s *aq_nic, unsigned int idx_vec)
aq_ptp->ptp_info = aq_ptp_clock;
aq_ptp_gpio_init(&aq_ptp->ptp_info, &mbox.info);
clock = ptp_clock_register(&aq_ptp->ptp_info, &aq_nic->ndev->dev);
- if (!clock || IS_ERR(clock)) {
+ if (IS_ERR(clock)) {
netdev_err(aq_nic->ndev, "ptp_clock_register failed\n");
err = PTR_ERR(clock);
goto err_exit;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
index b713739f4804..d322123ed373 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -7,14 +7,19 @@
#define phylink_to_dpaa2_mac(config) \
container_of((config), struct dpaa2_mac, phylink_config)
-static phy_interface_t phy_mode(enum dpmac_eth_if eth_if)
+static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode)
{
+ *if_mode = PHY_INTERFACE_MODE_NA;
+
switch (eth_if) {
case DPMAC_ETH_IF_RGMII:
- return PHY_INTERFACE_MODE_RGMII;
+ *if_mode = PHY_INTERFACE_MODE_RGMII;
+ break;
default:
return -EINVAL;
}
+
+ return 0;
}
/* Caller must call of_node_put on the returned value */
@@ -51,11 +56,11 @@ static int dpaa2_mac_get_if_mode(struct device_node *node,
if (!err)
return if_mode;
- if_mode = phy_mode(attr.eth_if);
- if (if_mode >= 0)
+ err = phy_mode(attr.eth_if, &if_mode);
+ if (!err)
return if_mode;
- return -ENODEV;
+ return err;
}
static bool dpaa2_mac_phy_mode_mismatch(struct dpaa2_mac *mac,
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 867726d696e2..8f32db6d2c45 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1788,10 +1788,6 @@ static int axienet_probe(struct platform_device *pdev)
/* Check for these resources directly on the Ethernet node. */
struct resource *res = platform_get_resource(pdev,
IORESOURCE_MEM, 1);
- if (!res) {
- dev_err(&pdev->dev, "unable to get DMA memory resource\n");
- goto free_netdev;
- }
lp->dma_regs = devm_ioremap_resource(&pdev->dev, res);
lp->rx_irq = platform_get_irq(pdev, 1);
lp->tx_irq = platform_get_irq(pdev, 0);
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index fe602648b99f..8bccadf17e60 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -282,11 +282,6 @@ config AX88796B_PHY
Currently supports the Asix Electronics PHY found in the X-Surf 100
AX88796B package.
-config AT803X_PHY
- tristate "AT803X PHYs"
- ---help---
- Currently supports the AT8030 and AT8035 model
-
config BCM63XX_PHY
tristate "Broadcom 63xx SOCs internal PHY"
depends on BCM63XX || COMPILE_TEST
@@ -444,6 +439,11 @@ config NXP_TJA11XX_PHY
---help---
Currently supports the NXP TJA1100 and TJA1101 PHY.
+config AT803X_PHY
+ tristate "Qualcomm Atheros AR803X PHYs"
+ help
+ Currently supports the AR8030, AR8031, AR8033 and AR8035 model
+
config QSEMI_PHY
tristate "Quality Semiconductor PHYs"
---help---
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 8e30db28fd7d..aee62610bade 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -2,7 +2,7 @@
/*
* drivers/net/phy/at803x.c
*
- * Driver for Atheros 803x PHY
+ * Driver for Qualcomm Atheros AR803x PHY
*
* Author: Matus Ujhelyi <ujhelyi.m@gmail.com>
*/
@@ -13,7 +13,12 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/of_gpio.h>
+#include <linux/bitfield.h>
#include <linux/gpio/consumer.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/consumer.h>
+#include <dt-bindings/net/qca-ar803x.h>
#define AT803X_SPECIFIC_STATUS 0x11
#define AT803X_SS_SPEED_MASK (3 << 14)
@@ -62,16 +67,62 @@
#define AT803X_DEBUG_REG_5 0x05
#define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8)
+#define AT803X_DEBUG_REG_1F 0x1F
+#define AT803X_DEBUG_PLL_ON BIT(2)
+#define AT803X_DEBUG_RGMII_1V8 BIT(3)
+
+/* AT803x supports either the XTAL input pad, an internal PLL or the
+ * DSP as clock reference for the clock output pad. The XTAL reference
+ * is only used for 25 MHz output, all other frequencies need the PLL.
+ * The DSP as a clock reference is used in synchronous ethernet
+ * applications.
+ *
+ * By default the PLL is only enabled if there is a link. Otherwise
+ * the PHY will go into low power state and disabled the PLL. You can
+ * set the PLL_ON bit (see debug register 0x1f) to keep the PLL always
+ * enabled.
+ */
+#define AT803X_MMD7_CLK25M 0x8016
+#define AT803X_CLK_OUT_MASK GENMASK(4, 2)
+#define AT803X_CLK_OUT_25MHZ_XTAL 0
+#define AT803X_CLK_OUT_25MHZ_DSP 1
+#define AT803X_CLK_OUT_50MHZ_PLL 2
+#define AT803X_CLK_OUT_50MHZ_DSP 3
+#define AT803X_CLK_OUT_62_5MHZ_PLL 4
+#define AT803X_CLK_OUT_62_5MHZ_DSP 5
+#define AT803X_CLK_OUT_125MHZ_PLL 6
+#define AT803X_CLK_OUT_125MHZ_DSP 7
+
+/* The AR8035 has another mask which is compatible with the AR8031/AR8033 mask
+ * but doesn't support choosing between XTAL/PLL and DSP.
+ */
+#define AT8035_CLK_OUT_MASK GENMASK(4, 3)
+
+#define AT803X_CLK_OUT_STRENGTH_MASK GENMASK(8, 7)
+#define AT803X_CLK_OUT_STRENGTH_FULL 0
+#define AT803X_CLK_OUT_STRENGTH_HALF 1
+#define AT803X_CLK_OUT_STRENGTH_QUARTER 2
+
#define ATH9331_PHY_ID 0x004dd041
#define ATH8030_PHY_ID 0x004dd076
#define ATH8031_PHY_ID 0x004dd074
#define ATH8035_PHY_ID 0x004dd072
#define AT803X_PHY_ID_MASK 0xffffffef
-MODULE_DESCRIPTION("Atheros 803x PHY driver");
+MODULE_DESCRIPTION("Qualcomm Atheros AR803x PHY driver");
MODULE_AUTHOR("Matus Ujhelyi");
MODULE_LICENSE("GPL");
+struct at803x_priv {
+ int flags;
+#define AT803X_KEEP_PLL_ENABLED BIT(0) /* don't turn off internal PLL */
+ u16 clk_25m_reg;
+ u16 clk_25m_mask;
+ struct regulator_dev *vddio_rdev;
+ struct regulator_dev *vddh_rdev;
+ struct regulator *vddio;
+};
+
struct at803x_context {
u16 bmcr;
u16 advertise;
@@ -237,6 +288,240 @@ static int at803x_resume(struct phy_device *phydev)
return phy_modify(phydev, MII_BMCR, BMCR_PDOWN | BMCR_ISOLATE, 0);
}
+static int at803x_rgmii_reg_set_voltage_sel(struct regulator_dev *rdev,
+ unsigned int selector)
+{
+ struct phy_device *phydev = rdev_get_drvdata(rdev);
+
+ if (selector)
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
+ 0, AT803X_DEBUG_RGMII_1V8);
+ else
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
+ AT803X_DEBUG_RGMII_1V8, 0);
+}
+
+static int at803x_rgmii_reg_get_voltage_sel(struct regulator_dev *rdev)
+{
+ struct phy_device *phydev = rdev_get_drvdata(rdev);
+ int val;
+
+ val = at803x_debug_reg_read(phydev, AT803X_DEBUG_REG_1F);
+ if (val < 0)
+ return val;
+
+ return (val & AT803X_DEBUG_RGMII_1V8) ? 1 : 0;
+}
+
+static struct regulator_ops vddio_regulator_ops = {
+ .list_voltage = regulator_list_voltage_table,
+ .set_voltage_sel = at803x_rgmii_reg_set_voltage_sel,
+ .get_voltage_sel = at803x_rgmii_reg_get_voltage_sel,
+};
+
+static const unsigned int vddio_voltage_table[] = {
+ 1500000,
+ 1800000,
+};
+
+static const struct regulator_desc vddio_desc = {
+ .name = "vddio",
+ .of_match = of_match_ptr("vddio-regulator"),
+ .n_voltages = ARRAY_SIZE(vddio_voltage_table),
+ .volt_table = vddio_voltage_table,
+ .ops = &vddio_regulator_ops,
+ .type = REGULATOR_VOLTAGE,
+ .owner = THIS_MODULE,
+};
+
+static struct regulator_ops vddh_regulator_ops = {
+};
+
+static const struct regulator_desc vddh_desc = {
+ .name = "vddh",
+ .of_match = of_match_ptr("vddh-regulator"),
+ .n_voltages = 1,
+ .fixed_uV = 2500000,
+ .ops = &vddh_regulator_ops,
+ .type = REGULATOR_VOLTAGE,
+ .owner = THIS_MODULE,
+};
+
+static int at8031_register_regulators(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ struct device *dev = &phydev->mdio.dev;
+ struct regulator_config config = { };
+
+ config.dev = dev;
+ config.driver_data = phydev;
+
+ priv->vddio_rdev = devm_regulator_register(dev, &vddio_desc, &config);
+ if (IS_ERR(priv->vddio_rdev)) {
+ phydev_err(phydev, "failed to register VDDIO regulator\n");
+ return PTR_ERR(priv->vddio_rdev);
+ }
+
+ priv->vddh_rdev = devm_regulator_register(dev, &vddh_desc, &config);
+ if (IS_ERR(priv->vddh_rdev)) {
+ phydev_err(phydev, "failed to register VDDH regulator\n");
+ return PTR_ERR(priv->vddh_rdev);
+ }
+
+ return 0;
+}
+
+static bool at803x_match_phy_id(struct phy_device *phydev, u32 phy_id)
+{
+ return (phydev->phy_id & phydev->drv->phy_id_mask)
+ == (phy_id & phydev->drv->phy_id_mask);
+}
+
+static int at803x_parse_dt(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct at803x_priv *priv = phydev->priv;
+ unsigned int sel, mask;
+ u32 freq, strength;
+ int ret;
+
+ if (!IS_ENABLED(CONFIG_OF_MDIO))
+ return 0;
+
+ ret = of_property_read_u32(node, "qca,clk-out-frequency", &freq);
+ if (!ret) {
+ mask = AT803X_CLK_OUT_MASK;
+ switch (freq) {
+ case 25000000:
+ sel = AT803X_CLK_OUT_25MHZ_XTAL;
+ break;
+ case 50000000:
+ sel = AT803X_CLK_OUT_50MHZ_PLL;
+ break;
+ case 62500000:
+ sel = AT803X_CLK_OUT_62_5MHZ_PLL;
+ break;
+ case 125000000:
+ sel = AT803X_CLK_OUT_125MHZ_PLL;
+ break;
+ default:
+ phydev_err(phydev, "invalid qca,clk-out-frequency\n");
+ return -EINVAL;
+ }
+
+ priv->clk_25m_reg |= FIELD_PREP(mask, sel);
+ priv->clk_25m_mask |= mask;
+
+ /* Fixup for the AR8030/AR8035. This chip has another mask and
+ * doesn't support the DSP reference. Eg. the lowest bit of the
+ * mask. The upper two bits select the same frequencies. Mask
+ * the lowest bit here.
+ *
+ * Warning:
+ * There was no datasheet for the AR8030 available so this is
+ * just a guess. But the AR8035 is listed as pin compatible
+ * to the AR8030 so there might be a good chance it works on
+ * the AR8030 too.
+ */
+ if (at803x_match_phy_id(phydev, ATH8030_PHY_ID) ||
+ at803x_match_phy_id(phydev, ATH8035_PHY_ID)) {
+ priv->clk_25m_reg &= ~AT8035_CLK_OUT_MASK;
+ priv->clk_25m_mask &= ~AT8035_CLK_OUT_MASK;
+ }
+ }
+
+ ret = of_property_read_u32(node, "qca,clk-out-strength", &strength);
+ if (!ret) {
+ priv->clk_25m_mask |= AT803X_CLK_OUT_STRENGTH_MASK;
+ switch (strength) {
+ case AR803X_STRENGTH_FULL:
+ priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_FULL;
+ break;
+ case AR803X_STRENGTH_HALF:
+ priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_HALF;
+ break;
+ case AR803X_STRENGTH_QUARTER:
+ priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_QUARTER;
+ break;
+ default:
+ phydev_err(phydev, "invalid qca,clk-out-strength\n");
+ return -EINVAL;
+ }
+ }
+
+ /* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping
+ * options.
+ */
+ if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
+ if (of_property_read_bool(node, "qca,keep-pll-enabled"))
+ priv->flags |= AT803X_KEEP_PLL_ENABLED;
+
+ ret = at8031_register_regulators(phydev);
+ if (ret < 0)
+ return ret;
+
+ priv->vddio = devm_regulator_get_optional(&phydev->mdio.dev,
+ "vddio");
+ if (IS_ERR(priv->vddio)) {
+ phydev_err(phydev, "failed to get VDDIO regulator\n");
+ return PTR_ERR(priv->vddio);
+ }
+
+ ret = regulator_enable(priv->vddio);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int at803x_probe(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct at803x_priv *priv;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ phydev->priv = priv;
+
+ return at803x_parse_dt(phydev);
+}
+
+static int at803x_clk_out_config(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ int val;
+
+ if (!priv->clk_25m_mask)
+ return 0;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, AT803X_MMD7_CLK25M);
+ if (val < 0)
+ return val;
+
+ val &= ~priv->clk_25m_mask;
+ val |= priv->clk_25m_reg;
+
+ return phy_write_mmd(phydev, MDIO_MMD_AN, AT803X_MMD7_CLK25M, val);
+}
+
+static int at8031_pll_config(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+
+ /* The default after hardware reset is PLL OFF. After a soft reset, the
+ * values are retained.
+ */
+ if (priv->flags & AT803X_KEEP_PLL_ENABLED)
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
+ 0, AT803X_DEBUG_PLL_ON);
+ else
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
+ AT803X_DEBUG_PLL_ON, 0);
+}
+
static int at803x_config_init(struct phy_device *phydev)
{
int ret;
@@ -259,8 +544,20 @@ static int at803x_config_init(struct phy_device *phydev)
ret = at803x_enable_tx_delay(phydev);
else
ret = at803x_disable_tx_delay(phydev);
+ if (ret < 0)
+ return ret;
- return ret;
+ ret = at803x_clk_out_config(phydev);
+ if (ret < 0)
+ return ret;
+
+ if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
+ ret = at8031_pll_config(phydev);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
}
static int at803x_ack_interrupt(struct phy_device *phydev)
@@ -409,10 +706,11 @@ static int at803x_read_status(struct phy_device *phydev)
static struct phy_driver at803x_driver[] = {
{
- /* ATHEROS 8035 */
+ /* Qualcomm Atheros AR8035 */
.phy_id = ATH8035_PHY_ID,
- .name = "Atheros 8035 ethernet",
+ .name = "Qualcomm Atheros AR8035",
.phy_id_mask = AT803X_PHY_ID_MASK,
+ .probe = at803x_probe,
.config_init = at803x_config_init,
.set_wol = at803x_set_wol,
.get_wol = at803x_get_wol,
@@ -423,10 +721,11 @@ static struct phy_driver at803x_driver[] = {
.ack_interrupt = at803x_ack_interrupt,
.config_intr = at803x_config_intr,
}, {
- /* ATHEROS 8030 */
+ /* Qualcomm Atheros AR8030 */
.phy_id = ATH8030_PHY_ID,
- .name = "Atheros 8030 ethernet",
+ .name = "Qualcomm Atheros AR8030",
.phy_id_mask = AT803X_PHY_ID_MASK,
+ .probe = at803x_probe,
.config_init = at803x_config_init,
.link_change_notify = at803x_link_change_notify,
.set_wol = at803x_set_wol,
@@ -437,10 +736,11 @@ static struct phy_driver at803x_driver[] = {
.ack_interrupt = at803x_ack_interrupt,
.config_intr = at803x_config_intr,
}, {
- /* ATHEROS 8031 */
+ /* Qualcomm Atheros AR8031/AR8033 */
.phy_id = ATH8031_PHY_ID,
- .name = "Atheros 8031 ethernet",
+ .name = "Qualcomm Atheros AR8031/AR8033",
.phy_id_mask = AT803X_PHY_ID_MASK,
+ .probe = at803x_probe,
.config_init = at803x_config_init,
.set_wol = at803x_set_wol,
.get_wol = at803x_get_wol,
@@ -454,8 +754,7 @@ static struct phy_driver at803x_driver[] = {
}, {
/* ATHEROS AR9331 */
PHY_ID_MATCH_EXACT(ATH9331_PHY_ID),
- .name = "Atheros AR9331 built-in PHY",
- .config_init = at803x_config_init,
+ .name = "Qualcomm Atheros AR9331 built-in PHY",
.suspend = at803x_suspend,
.resume = at803x_resume,
/* PHY_BASIC_FEATURES */
diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index cf5889b7d825..a5110b7b4ece 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -1294,8 +1294,10 @@ static int idtcm_probe(struct i2c_client *client,
err = set_tod_write_overhead(idtcm);
- if (err)
+ if (err) {
+ mutex_unlock(&idtcm->reg_lock);
return err;
+ }
err = idtcm_load_firmware(idtcm, &client->dev);
diff --git a/include/dt-bindings/net/qca-ar803x.h b/include/dt-bindings/net/qca-ar803x.h
new file mode 100644
index 000000000000..9c046c7242ed
--- /dev/null
+++ b/include/dt-bindings/net/qca-ar803x.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Device Tree constants for the Qualcomm Atheros AR803x PHYs
+ */
+
+#ifndef _DT_BINDINGS_QCA_AR803X_H
+#define _DT_BINDINGS_QCA_AR803X_H
+
+#define AR803X_STRENGTH_FULL 0
+#define AR803X_STRENGTH_HALF 1
+#define AR803X_STRENGTH_QUARTER 2
+
+#endif
diff --git a/include/net/arp.h b/include/net/arp.h
index c8f580a0e6b1..4950191f6b2b 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -57,8 +57,8 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
}
rcu_read_unlock_bh();
}
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index b2f715ca0567..b5ebeb3b0de0 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -414,8 +414,8 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
}
rcu_read_unlock_bh();
}
@@ -431,8 +431,8 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
}
rcu_read_unlock_bh();
}
diff --git a/include/net/sock.h b/include/net/sock.h
index ac6042d0af32..bd210c78dc9d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -859,17 +859,17 @@ static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
static inline void sk_acceptq_removed(struct sock *sk)
{
- sk->sk_ack_backlog--;
+ WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1);
}
static inline void sk_acceptq_added(struct sock *sk)
{
- sk->sk_ack_backlog++;
+ WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1);
}
static inline bool sk_acceptq_is_full(const struct sock *sk)
{
- return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
+ return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog);
}
/*
@@ -899,11 +899,11 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
skb_dst_force(skb);
if (!sk->sk_backlog.tail)
- sk->sk_backlog.head = skb;
+ WRITE_ONCE(sk->sk_backlog.head, skb);
else
sk->sk_backlog.tail->next = skb;
- sk->sk_backlog.tail = skb;
+ WRITE_ONCE(sk->sk_backlog.tail, skb);
skb->next = NULL;
}
@@ -1939,8 +1939,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
static inline void sk_dst_confirm(struct sock *sk)
{
- if (!sk->sk_dst_pending_confirm)
- sk->sk_dst_pending_confirm = 1;
+ if (!READ_ONCE(sk->sk_dst_pending_confirm))
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 1);
}
static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
@@ -1950,10 +1950,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
- if (sk && sk->sk_dst_pending_confirm)
- sk->sk_dst_pending_confirm = 0;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
+ if (sk && READ_ONCE(sk->sk_dst_pending_confirm))
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
}
}
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index de696ca12f2c..f6035f737193 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -27,6 +27,7 @@ enum lwtunnel_ip_t {
LWTUNNEL_IP_TOS,
LWTUNNEL_IP_FLAGS,
LWTUNNEL_IP_PAD,
+ LWTUNNEL_IP_OPTS,
__LWTUNNEL_IP_MAX,
};
@@ -41,12 +42,52 @@ enum lwtunnel_ip6_t {
LWTUNNEL_IP6_TC,
LWTUNNEL_IP6_FLAGS,
LWTUNNEL_IP6_PAD,
+ LWTUNNEL_IP6_OPTS,
__LWTUNNEL_IP6_MAX,
};
#define LWTUNNEL_IP6_MAX (__LWTUNNEL_IP6_MAX - 1)
enum {
+ LWTUNNEL_IP_OPTS_UNSPEC,
+ LWTUNNEL_IP_OPTS_GENEVE,
+ LWTUNNEL_IP_OPTS_VXLAN,
+ LWTUNNEL_IP_OPTS_ERSPAN,
+ __LWTUNNEL_IP_OPTS_MAX,
+};
+
+#define LWTUNNEL_IP_OPTS_MAX (__LWTUNNEL_IP_OPTS_MAX - 1)
+
+enum {
+ LWTUNNEL_IP_OPT_GENEVE_UNSPEC,
+ LWTUNNEL_IP_OPT_GENEVE_CLASS,
+ LWTUNNEL_IP_OPT_GENEVE_TYPE,
+ LWTUNNEL_IP_OPT_GENEVE_DATA,
+ __LWTUNNEL_IP_OPT_GENEVE_MAX,
+};
+
+#define LWTUNNEL_IP_OPT_GENEVE_MAX (__LWTUNNEL_IP_OPT_GENEVE_MAX - 1)
+
+enum {
+ LWTUNNEL_IP_OPT_VXLAN_UNSPEC,
+ LWTUNNEL_IP_OPT_VXLAN_GBP,
+ __LWTUNNEL_IP_OPT_VXLAN_MAX,
+};
+
+#define LWTUNNEL_IP_OPT_VXLAN_MAX (__LWTUNNEL_IP_OPT_VXLAN_MAX - 1)
+
+enum {
+ LWTUNNEL_IP_OPT_ERSPAN_UNSPEC,
+ LWTUNNEL_IP_OPT_ERSPAN_VER,
+ LWTUNNEL_IP_OPT_ERSPAN_INDEX,
+ LWTUNNEL_IP_OPT_ERSPAN_DIR,
+ LWTUNNEL_IP_OPT_ERSPAN_HWID,
+ __LWTUNNEL_IP_OPT_ERSPAN_MAX,
+};
+
+#define LWTUNNEL_IP_OPT_ERSPAN_MAX (__LWTUNNEL_IP_OPT_ERSPAN_MAX - 1)
+
+enum {
LWT_BPF_PROG_UNSPEC,
LWT_BPF_PROG_FD,
LWT_BPF_PROG_NAME,
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 6c11cdf4dd4c..fbd0c5e7b299 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -109,7 +109,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
dev_kfree_skb(skb);
goto as_indicate_complete;
}
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
skb_queue_tail(&sk->sk_receive_queue, skb);
pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk));
sk->sk_state_change(sk);
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 908cbb8654f5..ba144d035e3d 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -381,7 +381,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
msg->pvc.sap_addr.vpi,
msg->pvc.sap_addr.vci);
dev_kfree_skb(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
if (error) {
sigd_enq2(NULL, as_reject, old_vcc, NULL, NULL,
&old_vcc->qos, error);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index bb222b882b67..324306d6fde0 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1384,7 +1384,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
/* Now attach up the new socket */
kfree_skb(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
newsock->state = SS_CONNECTED;
out:
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index dcdbaeeb2358..cd6afe895db9 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -356,7 +356,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
make->sk_state = TCP_ESTABLISHED;
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
bh_unlock_sock(sk);
} else {
if (!mine)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 5f508c50649d..3fd124927d4d 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -173,7 +173,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
else
release_sock(sk);
- parent->sk_ack_backlog++;
+ sk_acceptq_added(parent);
}
EXPORT_SYMBOL(bt_accept_enqueue);
@@ -185,7 +185,7 @@ void bt_accept_unlink(struct sock *sk)
BT_DBG("sk %p state %d", sk, sk->sk_state);
list_del_init(&bt_sk(sk)->accept_q);
- bt_sk(sk)->parent->sk_ack_backlog--;
+ sk_acceptq_removed(bt_sk(sk)->parent);
bt_sk(sk)->parent = NULL;
sock_put(sk);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5480edff0c86..8c82e95f7539 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2052,8 +2052,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
goto nla_put_failure;
{
unsigned long now = jiffies;
- unsigned int flush_delta = now - tbl->last_flush;
- unsigned int rand_delta = now - tbl->last_rand;
+ long flush_delta = now - tbl->last_flush;
+ long rand_delta = now - tbl->last_rand;
struct neigh_hash_table *nht;
struct ndt_config ndc = {
.ndtc_key_len = tbl->key_len,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 5bad08dc4316..a52e8ba1ced0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -944,7 +944,7 @@ int inet_dccp_listen(struct socket *sock, int backlog)
if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
goto out;
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 3349ea81f901..e19a92a62e14 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1091,7 +1091,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
}
cb = DN_SKB_CB(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
if (newsk == NULL) {
release_sock(sk);
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index e4161e0c86aa..c68503a18025 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -328,7 +328,7 @@ static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb)
return;
}
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_state_change(sk);
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70f92aaca411..53de8e00990e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -208,7 +208,7 @@ int inet_listen(struct socket *sock, int backlog)
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index eb30fc1770de..e4c6e8b40490 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -716,7 +716,7 @@ static void reqsk_timer_handler(struct timer_list *t)
* ones are about to clog our table.
*/
qlen = reqsk_queue_len(queue);
- if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {
+ if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
int young = reqsk_queue_len_young(queue) << 1;
while (thresh > 2) {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7dc79b973e6e..af154977904c 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -226,17 +226,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
r->idiag_timer = 1;
r->idiag_retrans = icsk->icsk_retransmits;
r->idiag_expires =
- jiffies_to_msecs(icsk->icsk_timeout - jiffies);
+ jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
r->idiag_timer = 4;
r->idiag_retrans = icsk->icsk_probes_out;
r->idiag_expires =
- jiffies_to_msecs(icsk->icsk_timeout - jiffies);
+ jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
} else if (timer_pending(&sk->sk_timer)) {
r->idiag_timer = 2;
r->idiag_retrans = icsk->icsk_probes_out;
r->idiag_expires =
- jiffies_to_msecs(sk->sk_timer.expires - jiffies);
+ jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
} else {
r->idiag_timer = 0;
r->idiag_expires = 0;
@@ -342,16 +342,13 @@ static int inet_twsk_diag_fill(struct sock *sk,
r = nlmsg_data(nlh);
BUG_ON(tw->tw_state != TCP_TIME_WAIT);
- tmo = tw->tw_timer.expires - jiffies;
- if (tmo < 0)
- tmo = 0;
-
inet_diag_msg_common_fill(r, sk);
r->idiag_retrans = 0;
r->idiag_state = tw->tw_substate;
r->idiag_timer = 3;
- r->idiag_expires = jiffies_to_msecs(tmo);
+ tmo = tw->tw_timer.expires - jiffies;
+ r->idiag_expires = jiffies_delta_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
r->idiag_uid = 0;
@@ -385,7 +382,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
offsetof(struct sock, sk_cookie));
tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
- r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
+ r->idiag_expires = jiffies_delta_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
r->idiag_uid = 0;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 1452a97914a0..d4f84bf9289a 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -34,6 +34,9 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/dst_metadata.h>
+#include <net/geneve.h>
+#include <net/vxlan.h>
+#include <net/erspan.h>
const struct ip_tunnel_encap_ops __rcu *
iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
@@ -126,15 +129,14 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
if (!md || md->type != METADATA_IP_TUNNEL ||
md->u.tun_info.mode & IP_TUNNEL_INFO_TX)
-
return NULL;
- res = metadata_dst_alloc(0, METADATA_IP_TUNNEL, flags);
+ src = &md->u.tun_info;
+ res = metadata_dst_alloc(src->options_len, METADATA_IP_TUNNEL, flags);
if (!res)
return NULL;
dst = &res->u.tun_info;
- src = &md->u.tun_info;
dst->key.tun_id = src->key.tun_id;
if (src->mode & IP_TUNNEL_INFO_IPV6)
memcpy(&dst->key.u.ipv6.dst, &src->key.u.ipv6.src,
@@ -143,6 +145,8 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
dst->key.u.ipv4.dst = src->key.u.ipv4.src;
dst->key.tun_flags = src->key.tun_flags;
dst->mode = src->mode | IP_TUNNEL_INFO_TX;
+ ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src),
+ src->options_len, 0);
return res;
}
@@ -217,24 +221,199 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
[LWTUNNEL_IP_TTL] = { .type = NLA_U8 },
[LWTUNNEL_IP_TOS] = { .type = NLA_U8 },
[LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 },
+ [LWTUNNEL_IP_OPTS] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ip_opts_policy[LWTUNNEL_IP_OPTS_MAX + 1] = {
+ [LWTUNNEL_IP_OPTS_GENEVE] = { .type = NLA_NESTED },
+ [LWTUNNEL_IP_OPTS_VXLAN] = { .type = NLA_NESTED },
+ [LWTUNNEL_IP_OPTS_ERSPAN] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[LWTUNNEL_IP_OPT_GENEVE_MAX + 1] = {
+ [LWTUNNEL_IP_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
+ [LWTUNNEL_IP_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
+ [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 },
+};
+
+static const struct nla_policy
+vxlan_opt_policy[LWTUNNEL_IP_OPT_VXLAN_MAX + 1] = {
+ [LWTUNNEL_IP_OPT_VXLAN_GBP] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy
+erspan_opt_policy[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1] = {
+ [LWTUNNEL_IP_OPT_ERSPAN_VER] = { .type = NLA_U8 },
+ [LWTUNNEL_IP_OPT_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [LWTUNNEL_IP_OPT_ERSPAN_DIR] = { .type = NLA_U8 },
+ [LWTUNNEL_IP_OPT_ERSPAN_HWID] = { .type = NLA_U8 },
};
+static int ip_tun_parse_opts_geneve(struct nlattr *attr,
+ struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[LWTUNNEL_IP_OPT_GENEVE_MAX + 1];
+ int data_len, err;
+
+ err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPT_GENEVE_MAX,
+ attr, geneve_opt_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[LWTUNNEL_IP_OPT_GENEVE_CLASS] ||
+ !tb[LWTUNNEL_IP_OPT_GENEVE_TYPE] ||
+ !tb[LWTUNNEL_IP_OPT_GENEVE_DATA])
+ return -EINVAL;
+
+ attr = tb[LWTUNNEL_IP_OPT_GENEVE_DATA];
+ data_len = nla_len(attr);
+ if (data_len % 4)
+ return -EINVAL;
+
+ if (info) {
+ struct geneve_opt *opt = ip_tunnel_info_opts(info);
+
+ memcpy(opt->opt_data, nla_data(attr), data_len);
+ opt->length = data_len / 4;
+ attr = tb[LWTUNNEL_IP_OPT_GENEVE_CLASS];
+ opt->opt_class = nla_get_be16(attr);
+ attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE];
+ opt->type = nla_get_u8(attr);
+ info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+ }
+
+ return sizeof(struct geneve_opt) + data_len;
+}
+
+static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
+ struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[LWTUNNEL_IP_OPT_VXLAN_MAX + 1];
+ int err;
+
+ err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPT_VXLAN_MAX,
+ attr, vxlan_opt_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[LWTUNNEL_IP_OPT_VXLAN_GBP])
+ return -EINVAL;
+
+ if (info) {
+ struct vxlan_metadata *md = ip_tunnel_info_opts(info);
+
+ attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
+ md->gbp = nla_get_u32(attr);
+ info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+ }
+
+ return sizeof(struct vxlan_metadata);
+}
+
+static int ip_tun_parse_opts_erspan(struct nlattr *attr,
+ struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1];
+ int err;
+
+ err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPT_ERSPAN_MAX,
+ attr, erspan_opt_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[LWTUNNEL_IP_OPT_ERSPAN_VER])
+ return -EINVAL;
+
+ if (info) {
+ struct erspan_metadata *md = ip_tunnel_info_opts(info);
+
+ attr = tb[LWTUNNEL_IP_OPT_ERSPAN_VER];
+ md->version = nla_get_u8(attr);
+
+ if (md->version == 1 && tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX]) {
+ attr = tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX];
+ md->u.index = nla_get_be32(attr);
+ } else if (md->version == 2 && tb[LWTUNNEL_IP_OPT_ERSPAN_DIR] &&
+ tb[LWTUNNEL_IP_OPT_ERSPAN_HWID]) {
+ attr = tb[LWTUNNEL_IP_OPT_ERSPAN_DIR];
+ md->u.md2.dir = nla_get_u8(attr);
+ attr = tb[LWTUNNEL_IP_OPT_ERSPAN_HWID];
+ set_hwid(&md->u.md2, nla_get_u8(attr));
+ } else {
+ return -EINVAL;
+ }
+
+ info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ }
+
+ return sizeof(struct erspan_metadata);
+}
+
+static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[LWTUNNEL_IP_OPTS_MAX + 1];
+ int err;
+
+ if (!attr)
+ return 0;
+
+ err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPTS_MAX, attr,
+ ip_opts_policy, extack);
+ if (err)
+ return err;
+
+ if (tb[LWTUNNEL_IP_OPTS_GENEVE])
+ err = ip_tun_parse_opts_geneve(tb[LWTUNNEL_IP_OPTS_GENEVE],
+ info, extack);
+ else if (tb[LWTUNNEL_IP_OPTS_VXLAN])
+ err = ip_tun_parse_opts_vxlan(tb[LWTUNNEL_IP_OPTS_VXLAN],
+ info, extack);
+ else if (tb[LWTUNNEL_IP_OPTS_ERSPAN])
+ err = ip_tun_parse_opts_erspan(tb[LWTUNNEL_IP_OPTS_ERSPAN],
+ info, extack);
+ else
+ err = -EINVAL;
+
+ return err;
+}
+
+static int ip_tun_get_optlen(struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ return ip_tun_parse_opts(attr, NULL, extack);
+}
+
+static int ip_tun_set_opts(struct nlattr *attr, struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ return ip_tun_parse_opts(attr, info, extack);
+}
+
static int ip_tun_build_state(struct nlattr *attr,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_info *tun_info;
- struct lwtunnel_state *new_state;
struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
- int err;
+ struct lwtunnel_state *new_state;
+ struct ip_tunnel_info *tun_info;
+ int err, opt_len;
err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_MAX, attr,
ip_tun_policy, extack);
if (err < 0)
return err;
- new_state = lwtunnel_state_alloc(sizeof(*tun_info));
+ opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP_OPTS], extack);
+ if (opt_len < 0)
+ return opt_len;
+
+ new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len);
if (!new_state)
return -ENOMEM;
@@ -242,6 +421,12 @@ static int ip_tun_build_state(struct nlattr *attr,
tun_info = lwt_tun_info(new_state);
+ err = ip_tun_set_opts(tb[LWTUNNEL_IP_OPTS], tun_info, extack);
+ if (err < 0) {
+ lwtstate_free(new_state);
+ return err;
+ }
+
#ifdef CONFIG_DST_CACHE
err = dst_cache_init(&tun_info->dst_cache, GFP_KERNEL);
if (err) {
@@ -266,10 +451,10 @@ static int ip_tun_build_state(struct nlattr *attr,
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
if (tb[LWTUNNEL_IP_FLAGS])
- tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP_FLAGS]);
+ tun_info->key.tun_flags |= nla_get_be16(tb[LWTUNNEL_IP_FLAGS]);
tun_info->mode = IP_TUNNEL_INFO_TX;
- tun_info->options_len = 0;
+ tun_info->options_len = opt_len;
*ts = new_state;
@@ -285,6 +470,110 @@ static void ip_tun_destroy_state(struct lwtunnel_state *lwtstate)
#endif
}
+static int ip_tun_fill_encap_opts_geneve(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
+{
+ struct geneve_opt *opt;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_GENEVE);
+ if (!nest)
+ return -ENOMEM;
+
+ opt = ip_tunnel_info_opts(tun_info);
+ if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS, opt->opt_class) ||
+ nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) ||
+ nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4,
+ opt->opt_data)) {
+ nla_nest_cancel(skb, nest);
+ return -ENOMEM;
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+}
+
+static int ip_tun_fill_encap_opts_vxlan(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
+{
+ struct vxlan_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_VXLAN);
+ if (!nest)
+ return -ENOMEM;
+
+ md = ip_tunnel_info_opts(tun_info);
+ if (nla_put_u32(skb, LWTUNNEL_IP_OPT_VXLAN_GBP, md->gbp)) {
+ nla_nest_cancel(skb, nest);
+ return -ENOMEM;
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+}
+
+static int ip_tun_fill_encap_opts_erspan(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
+{
+ struct erspan_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_ERSPAN);
+ if (!nest)
+ return -ENOMEM;
+
+ md = ip_tunnel_info_opts(tun_info);
+ if (nla_put_u32(skb, LWTUNNEL_IP_OPT_ERSPAN_VER, md->version))
+ goto err;
+
+ if (md->version == 1 &&
+ nla_put_be32(skb, LWTUNNEL_IP_OPT_ERSPAN_INDEX, md->u.index))
+ goto err;
+
+ if (md->version == 2 &&
+ (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_DIR, md->u.md2.dir) ||
+ nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_HWID,
+ get_hwid(&md->u.md2))))
+ goto err;
+
+ nla_nest_end(skb, nest);
+ return 0;
+err:
+ nla_nest_cancel(skb, nest);
+ return -ENOMEM;
+}
+
+static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type,
+ struct ip_tunnel_info *tun_info)
+{
+ struct nlattr *nest;
+ int err = 0;
+
+ if (!(tun_info->key.tun_flags &
+ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)))
+ return 0;
+
+ nest = nla_nest_start_noflag(skb, type);
+ if (!nest)
+ return -ENOMEM;
+
+ if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT)
+ err = ip_tun_fill_encap_opts_geneve(skb, tun_info);
+ else if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT)
+ err = ip_tun_fill_encap_opts_vxlan(skb, tun_info);
+ else if (tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)
+ err = ip_tun_fill_encap_opts_erspan(skb, tun_info);
+
+ if (err) {
+ nla_nest_cancel(skb, nest);
+ return err;
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+}
+
static int ip_tun_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
@@ -296,12 +585,42 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
+ nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags) ||
+ ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info))
return -ENOMEM;
return 0;
}
+static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
+{
+ int opt_len;
+
+ if (!(info->key.tun_flags &
+ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)))
+ return 0;
+
+ opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */
+ if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ struct geneve_opt *opt = ip_tunnel_info_opts(info);
+
+ opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_GENEVE */
+ + nla_total_size(2) /* OPT_GENEVE_CLASS */
+ + nla_total_size(1) /* OPT_GENEVE_TYPE */
+ + nla_total_size(opt->length * 4);
+ /* OPT_GENEVE_DATA */
+ } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */
+ + nla_total_size(4); /* OPT_VXLAN_GBP */
+ } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) {
+ opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */
+ + nla_total_size(1) /* OPT_ERSPAN_VER */
+ + nla_total_size(4); /* OPT_ERSPAN_INDEX/DIR/HWID */
+ }
+
+ return opt_len;
+}
+
static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
{
return nla_total_size_64bit(8) /* LWTUNNEL_IP_ID */
@@ -309,13 +628,21 @@ static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
+ nla_total_size(4) /* LWTUNNEL_IP_SRC */
+ nla_total_size(1) /* LWTUNNEL_IP_TOS */
+ nla_total_size(1) /* LWTUNNEL_IP_TTL */
- + nla_total_size(2); /* LWTUNNEL_IP_FLAGS */
+ + nla_total_size(2) /* LWTUNNEL_IP_FLAGS */
+ + ip_tun_opts_nlsize(lwt_tun_info(lwtstate));
+ /* LWTUNNEL_IP_OPTS */
}
static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
{
- return memcmp(lwt_tun_info(a), lwt_tun_info(b),
- sizeof(struct ip_tunnel_info));
+ struct ip_tunnel_info *info_a = lwt_tun_info(a);
+ struct ip_tunnel_info *info_b = lwt_tun_info(b);
+
+ return memcmp(info_a, info_b, sizeof(info_a->key)) ||
+ info_a->mode != info_b->mode ||
+ info_a->options_len != info_b->options_len ||
+ memcmp(ip_tunnel_info_opts(info_a),
+ ip_tunnel_info_opts(info_b), info_a->options_len);
}
static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
@@ -341,17 +668,21 @@ static int ip6_tun_build_state(struct nlattr *attr,
struct lwtunnel_state **ts,
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_info *tun_info;
- struct lwtunnel_state *new_state;
struct nlattr *tb[LWTUNNEL_IP6_MAX + 1];
- int err;
+ struct lwtunnel_state *new_state;
+ struct ip_tunnel_info *tun_info;
+ int err, opt_len;
err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP6_MAX, attr,
ip6_tun_policy, extack);
if (err < 0)
return err;
- new_state = lwtunnel_state_alloc(sizeof(*tun_info));
+ opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP6_OPTS], extack);
+ if (opt_len < 0)
+ return opt_len;
+
+ new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len);
if (!new_state)
return -ENOMEM;
@@ -359,6 +690,12 @@ static int ip6_tun_build_state(struct nlattr *attr,
tun_info = lwt_tun_info(new_state);
+ err = ip_tun_set_opts(tb[LWTUNNEL_IP6_OPTS], tun_info, extack);
+ if (err < 0) {
+ lwtstate_free(new_state);
+ return err;
+ }
+
if (tb[LWTUNNEL_IP6_ID])
tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP6_ID]);
@@ -375,10 +712,10 @@ static int ip6_tun_build_state(struct nlattr *attr,
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
if (tb[LWTUNNEL_IP6_FLAGS])
- tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]);
+ tun_info->key.tun_flags |= nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]);
tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6;
- tun_info->options_len = 0;
+ tun_info->options_len = opt_len;
*ts = new_state;
@@ -396,7 +733,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
+ nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags) ||
+ ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info))
return -ENOMEM;
return 0;
@@ -409,7 +747,9 @@ static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
+ nla_total_size(16) /* LWTUNNEL_IP6_SRC */
+ nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */
+ nla_total_size(1) /* LWTUNNEL_IP6_TC */
- + nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */
+ + nla_total_size(2) /* LWTUNNEL_IP6_FLAGS */
+ + ip_tun_opts_nlsize(lwt_tun_info(lwtstate));
+ /* LWTUNNEL_IP6_OPTS */
}
static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1dd25189d83f..9b48aec29aca 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1958,8 +1958,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
struct sk_buff *skb, *last;
u32 urg_hole = 0;
struct scm_timestamping_internal tss;
- bool has_tss = false;
- bool has_cmsg;
+ int cmsg_flags;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
@@ -1974,7 +1973,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (sk->sk_state == TCP_LISTEN)
goto out;
- has_cmsg = tp->recvmsg_inq;
+ cmsg_flags = tp->recvmsg_inq ? 1 : 0;
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
@@ -2047,7 +2046,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
/* Well, if we have backlog, try to process it now yet. */
- if (copied >= target && !sk->sk_backlog.tail)
+ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
break;
if (copied) {
@@ -2157,8 +2156,7 @@ skip_copy:
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, &tss);
- has_tss = true;
- has_cmsg = true;
+ cmsg_flags |= 2;
}
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
@@ -2183,10 +2181,10 @@ found_fin_ok:
release_sock(sk);
- if (has_cmsg) {
- if (has_tss)
+ if (cmsg_flags) {
+ if (cmsg_flags & 2)
tcp_recv_timestamp(msg, sk, &tss);
- if (tp->recvmsg_inq) {
+ if (cmsg_flags & 1) {
inq = tcp_inq_hint(sk);
put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
}
@@ -3225,8 +3223,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
* tcpi_unacked -> Number of children ready for accept()
* tcpi_sacked -> max backlog
*/
- info->tcpi_unacked = sk->sk_ack_backlog;
- info->tcpi_sacked = sk->sk_max_ack_backlog;
+ info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog);
+ info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog);
return;
}
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 549506162dde..0d08f9e2d8d0 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -21,8 +21,8 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
struct tcp_info *info = _info;
if (inet_sk_state_load(sk) == TCP_LISTEN) {
- r->idiag_rqueue = sk->sk_ack_backlog;
- r->idiag_wqueue = sk->sk_max_ack_backlog;
+ r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
+ r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);
} else if (sk->sk_type == SOCK_STREAM) {
const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 899e100a68e6..92282f98dc82 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2451,7 +2451,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
state = inet_sk_state_load(sk);
if (state == TCP_LISTEN)
- rx_queue = sk->sk_ack_backlog;
+ rx_queue = READ_ONCE(sk->sk_ack_backlog);
else
/* Because we don't lock the socket,
* we might find a transient negative value.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4804b6dc5e65..81f51335e326 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1891,7 +1891,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
state = inet_sk_state_load(sp);
if (state == TCP_LISTEN)
- rx_queue = sp->sk_ack_backlog;
+ rx_queue = READ_ONCE(sp->sk_ack_backlog);
else
/* Because we don't lock the socket,
* we might find a transient negative value.
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c74f44dfaa22..2922d4150d88 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -705,7 +705,7 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
/* put original socket back into a clean listen state. */
sk->sk_state = TCP_LISTEN;
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
dprintk("%s: ok success on %02X, client on %02X\n", __func__,
llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap);
frees:
@@ -780,7 +780,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
}
/* Well, if we have backlog, try to process it now yet. */
- if (copied >= target && !sk->sk_backlog.tail)
+ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
break;
if (copied) {
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 3fc38d16c456..5da9392b03d6 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -403,8 +403,9 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
ids = rcu_dereference(vport->upcall_portids);
- if (ids->n_ids == 1 && ids->ids[0] == 0)
- return 0;
+ /* If there is only one portid, select it in the fast-path. */
+ if (ids->n_ids == 1)
+ return ids->ids[0];
hash = skb_get_hash(skb);
ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 6a0df7c8a939..46b8ff24020d 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -906,7 +906,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
/* Now attach up the new socket */
skb->sk = NULL;
kfree_skb(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
out_release:
release_sock(sk);
@@ -1011,7 +1011,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
make_rose->va = 0;
make_rose->vr = 0;
make_rose->vl = 0;
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
rose_insert_socket(make);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3177dcb17316..d99966a55c84 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -521,7 +521,7 @@ META_COLLECTOR(int_sk_ack_bl)
*err = -1;
return;
}
- dst->value = sk->sk_ack_backlog;
+ dst->value = READ_ONCE(sk->sk_ack_backlog);
}
META_COLLECTOR(int_sk_max_ack_bl)
@@ -532,7 +532,7 @@ META_COLLECTOR(int_sk_max_ack_bl)
*err = -1;
return;
}
- dst->value = sk->sk_max_ack_backlog;
+ dst->value = READ_ONCE(sk->sk_max_ack_backlog);
}
META_COLLECTOR(int_sk_prio)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 1ba893b85dad..1b9809ad7725 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -324,7 +324,7 @@ void sctp_association_free(struct sctp_association *asoc)
* socket.
*/
if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
}
/* Mark as dead, so other users can know this structure is
@@ -1073,7 +1073,7 @@ void sctp_assoc_migrate(struct sctp_association *assoc, struct sock *newsk)
/* Decrement the backlog value for a TCP-style socket. */
if (sctp_style(oldsk, TCP))
- oldsk->sk_ack_backlog--;
+ sk_acceptq_removed(oldsk);
/* Release references to the old endpoint and the sock. */
sctp_endpoint_put(assoc->ep);
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 0851166b9175..8a15146faaeb 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -425,8 +425,8 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc);
r->idiag_wqueue = infox->asoc->sndbuf_used;
} else {
- r->idiag_rqueue = sk->sk_ack_backlog;
- r->idiag_wqueue = sk->sk_max_ack_backlog;
+ r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
+ r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);
}
if (infox->sctpinfo)
sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index ea53049d1db6..9d05b2e7bce2 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -164,7 +164,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
/* Increment the backlog value for a TCP-style listening socket. */
if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
}
/* Free the endpoint structure. Delay cleanup until
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ffd3262b7a41..53abb97e0061 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -8376,7 +8376,7 @@ static int sctp_listen_start(struct sock *sk, int backlog)
}
}
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
return sctp_hash_endpoint(ep);
}
@@ -8430,7 +8430,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
/* If we are already listening, just update the backlog */
if (sctp_sstate(sk, LISTENING))
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
else {
err = sctp_listen_start(sk, backlog);
if (err)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 038861bad72b..e7bb4cbb7716 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1087,7 +1087,7 @@ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
return false;
if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
- msecs_to_jiffies(r->tolerance)))
+ msecs_to_jiffies(r->tolerance * 10)))
return false;
hdr = buf_msg(skb);
@@ -1731,21 +1731,6 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
return;
__skb_queue_head_init(&tnlq);
- __skb_queue_head_init(&tmpxq);
- __skb_queue_head_init(&frags);
-
- /* At least one packet required for safe algorithm => add dummy */
- skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
- BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
- 0, 0, TIPC_ERR_NO_PORT);
- if (!skb) {
- pr_warn("%sunable to create tunnel packet\n", link_co_err);
- return;
- }
- __skb_queue_tail(&tnlq, skb);
- tipc_link_xmit(l, &tnlq, &tmpxq);
- __skb_queue_purge(&tmpxq);
-
/* Link Synching:
* From now on, send only one single ("dummy") SYNCH message
* to peer. The SYNCH message does not contain any data, just
@@ -1771,6 +1756,20 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
return;
}
+ __skb_queue_head_init(&tmpxq);
+ __skb_queue_head_init(&frags);
+ /* At least one packet required for safe algorithm => add dummy */
+ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
+ BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
+ 0, 0, TIPC_ERR_NO_PORT);
+ if (!skb) {
+ pr_warn("%sunable to create tunnel packet\n", link_co_err);
+ return;
+ }
+ __skb_queue_tail(&tnlq, skb);
+ tipc_link_xmit(l, &tnlq, &tmpxq);
+ __skb_queue_purge(&tmpxq);
+
/* Initialize reusable tunnel packet header */
tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
mtyp, INT_H_SIZE, l->addr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 4b60928049ea..1f1584518221 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -667,6 +667,11 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
}
tipc_node_write_unlock(peer);
+ if (!deleted) {
+ spin_unlock_bh(&tn->node_list_lock);
+ return deleted;
+ }
+
/* Calculate cluster capabilities */
tn->capabilities = TIPC_NODE_CAPABILITIES;
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
@@ -2043,7 +2048,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
- struct tipc_node *peer;
+ struct tipc_node *peer, *temp_node;
u32 addr;
int err;
@@ -2084,6 +2089,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
tipc_node_write_unlock(peer);
tipc_node_delete(peer);
+ /* Calculate cluster capabilities */
+ tn->capabilities = TIPC_NODE_CAPABILITIES;
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ tn->capabilities &= temp_node->capabilities;
+ }
err = 0;
err_out:
tipc_node_put(peer);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c0856e74f44f..1f4fde4711b6 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -439,7 +439,7 @@ static void vsock_pending_work(struct work_struct *work)
if (vsock_is_pending(sk)) {
vsock_remove_pending(listener, sk);
- listener->sk_ack_backlog--;
+ sk_acceptq_removed(listener);
} else if (!vsk->rejected) {
/* We are not on the pending list and accept() did not reject
* us, so we must have been accepted by our user process. We
@@ -1299,7 +1299,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
err = -listener->sk_err;
if (connected) {
- listener->sk_ack_backlog--;
+ sk_acceptq_removed(listener);
lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
vconnected = vsock_sk(connected);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index bef8772116ec..7fa09c5e4625 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -428,7 +428,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
if (conn_from_host) {
new->sk_state = TCP_ESTABLISHED;
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
hvs_addr_init(&vnew->local_addr, if_type);
hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index d02c9b41a768..193f959e51ef 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1066,7 +1066,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
return -ENOMEM;
}
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
lock_sock_nested(child, SINGLE_DEPTH_NESTING);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 8c9c4ed90fa7..6ba98a1efe2e 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1098,7 +1098,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
}
vsock_add_pending(sk, pending);
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
pending->sk_state = TCP_SYN_SENT;
vmci_trans(vpending)->produce_size =
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 6aee9f5e8e71..c34f7d077604 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -891,7 +891,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
/* Now attach up the new socket */
skb->sk = NULL;
kfree_skb(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
newsock->state = SS_CONNECTED;
rc = 0;
out2:
@@ -1062,7 +1062,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len);
makex25->calluserdata.cudlength = skb->len;
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
x25_insert_socket(make);
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 0bd6b23c97ef..a8e04d665b69 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -10,7 +10,7 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
-TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh
+TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
new file mode 100755
index 000000000000..de9ca97abc30
--- /dev/null
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -0,0 +1,322 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run traceroute/traceroute6 tests
+#
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+
+################################################################################
+#
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd()
+{
+ local ns
+ local cmd
+ local out
+ local rc
+
+ ns="$1"
+ shift
+ cmd="$*"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ fi
+
+ out=$(eval ip netns exec ${ns} ${cmd} 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# create namespaces and interconnects
+
+create_ns()
+{
+ local ns=$1
+ local addr=$2
+ local addr6=$3
+
+ [ -z "${addr}" ] && addr="-"
+ [ -z "${addr6}" ] && addr6="-"
+
+ ip netns add ${ns}
+
+ ip netns exec ${ns} ip link set lo up
+ if [ "${addr}" != "-" ]; then
+ ip netns exec ${ns} ip addr add dev lo ${addr}
+ fi
+ if [ "${addr6}" != "-" ]; then
+ ip netns exec ${ns} ip -6 addr add dev lo ${addr6}
+ fi
+
+ ip netns exec ${ns} ip ro add unreachable default metric 8192
+ ip netns exec ${ns} ip -6 ro add unreachable default metric 8192
+
+ ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+# create veth pair to connect namespaces and apply addresses.
+connect_ns()
+{
+ local ns1=$1
+ local ns1_dev=$2
+ local ns1_addr=$3
+ local ns1_addr6=$4
+ local ns2=$5
+ local ns2_dev=$6
+ local ns2_addr=$7
+ local ns2_addr6=$8
+
+ ip netns exec ${ns1} ip li add ${ns1_dev} type veth peer name tmp
+ ip netns exec ${ns1} ip li set ${ns1_dev} up
+ ip netns exec ${ns1} ip li set tmp netns ${ns2} name ${ns2_dev}
+ ip netns exec ${ns2} ip li set ${ns2_dev} up
+
+ if [ "${ns1_addr}" != "-" ]; then
+ ip netns exec ${ns1} ip addr add dev ${ns1_dev} ${ns1_addr}
+ fi
+
+ if [ "${ns2_addr}" != "-" ]; then
+ ip netns exec ${ns2} ip addr add dev ${ns2_dev} ${ns2_addr}
+ fi
+
+ if [ "${ns1_addr6}" != "-" ]; then
+ ip netns exec ${ns1} ip addr add dev ${ns1_dev} ${ns1_addr6}
+ fi
+
+ if [ "${ns2_addr6}" != "-" ]; then
+ ip netns exec ${ns2} ip addr add dev ${ns2_dev} ${ns2_addr6}
+ fi
+}
+
+################################################################################
+# traceroute6 test
+#
+# Verify that in this scenario
+#
+# ------------------------ N2
+# | |
+# ------ ------ N3 ----
+# | R1 | | R2 |------|H2|
+# ------ ------ ----
+# | |
+# ------------------------ N1
+# |
+# ----
+# |H1|
+# ----
+#
+# where H1's default route goes through R1 and R1's default route goes
+# through R2 over N2, traceroute6 from H1 to H2 reports R2's address
+# on N2 and not N1.
+#
+# Addresses are assigned as follows:
+#
+# N1: 2000:101::/64
+# N2: 2000:102::/64
+# N3: 2000:103::/64
+#
+# R1's host part of address: 1
+# R2's host part of address: 2
+# H1's host part of address: 3
+# H2's host part of address: 4
+#
+# For example:
+# the IPv6 address of R1's interface on N2 is 2000:102::1/64
+
+cleanup_traceroute6()
+{
+ local ns
+
+ for ns in host-1 host-2 router-1 router-2
+ do
+ ip netns del ${ns} 2>/dev/null
+ done
+}
+
+setup_traceroute6()
+{
+ brdev=br0
+
+ # start clean
+ cleanup_traceroute6
+
+ set -e
+ create_ns host-1
+ create_ns host-2
+ create_ns router-1
+ create_ns router-2
+
+ # Setup N3
+ connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64
+ ip netns exec host-2 ip route add default via 2000:103::2
+
+ # Setup N2
+ connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64
+ ip netns exec router-1 ip route add default via 2000:102::2
+
+ # Setup N1. host-1 and router-2 connect to a bridge in router-1.
+ ip netns exec router-1 ip link add name ${brdev} type bridge
+ ip netns exec router-1 ip link set ${brdev} up
+ ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev}
+
+ connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - -
+ ip netns exec router-1 ip link set dev eth0 master ${brdev}
+ ip netns exec host-1 ip route add default via 2000:101::1
+
+ connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - -
+ ip netns exec router-1 ip link set dev eth1 master ${brdev}
+
+ # Prime the network
+ ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1
+
+ set +e
+}
+
+run_traceroute6()
+{
+ if [ ! -x "$(command -v traceroute6)" ]; then
+ echo "SKIP: Could not run IPV6 test without traceroute6"
+ return
+ fi
+
+ setup_traceroute6
+
+ # traceroute6 host-2 from host-1 (expects 2000:102::2)
+ run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
+ log_test $? 0 "IPV6 traceroute"
+
+ cleanup_traceroute6
+}
+
+################################################################################
+# traceroute test
+#
+# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario
+#
+# 1.0.3.1/24
+# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+#
+# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and
+# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary
+# address on N1.
+#
+
+cleanup_traceroute()
+{
+ local ns
+
+ for ns in host-1 host-2 router
+ do
+ ip netns del ${ns} 2>/dev/null
+ done
+}
+
+setup_traceroute()
+{
+ # start clean
+ cleanup_traceroute
+
+ set -e
+ create_ns host-1
+ create_ns host-2
+ create_ns router
+
+ connect_ns host-1 eth0 1.0.1.3/24 - \
+ router eth1 1.0.3.1/24 -
+ ip netns exec host-1 ip route add default via 1.0.1.1
+
+ ip netns exec router ip addr add 1.0.1.1/24 dev eth1
+ ip netns exec router sysctl -qw \
+ net.ipv4.icmp_errors_use_inbound_ifaddr=1
+
+ connect_ns host-2 eth0 1.0.2.4/24 - \
+ router eth2 1.0.2.1/24 -
+ ip netns exec host-2 ip route add default via 1.0.2.1
+
+ # Prime the network
+ ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1
+
+ set +e
+}
+
+run_traceroute()
+{
+ if [ ! -x "$(command -v traceroute)" ]; then
+ echo "SKIP: Could not run IPV4 test without traceroute"
+ return
+ fi
+
+ setup_traceroute
+
+ # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while.
+ run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
+ log_test $? 0 "IPV4 traceroute"
+
+ cleanup_traceroute
+}
+
+################################################################################
+# Run tests
+
+run_tests()
+{
+ run_traceroute6
+ run_traceroute
+}
+
+################################################################################
+# main
+
+declare -i nfail=0
+declare -i nsuccess=0
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ *) exit 1;;
+ esac
+done
+
+run_tests
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}