diff options
Diffstat (limited to 'drivers')
374 files changed, 13444 insertions, 2383 deletions
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f1a500205313..8fbd36eb8941 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1414,12 +1414,14 @@ fore200e_open(struct atm_vcc *vcc) static void fore200e_close(struct atm_vcc* vcc) { - struct fore200e* fore200e = FORE200E_DEV(vcc->dev); struct fore200e_vcc* fore200e_vcc; + struct fore200e* fore200e; struct fore200e_vc_map* vc_map; unsigned long flags; ASSERT(vcc); + fore200e = FORE200E_DEV(vcc->dev); + ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<<FORE200E_VPI_BITS)); ASSERT((vcc->vci >= 0) && (vcc->vci < 1<<FORE200E_VCI_BITS)); @@ -1464,10 +1466,10 @@ fore200e_close(struct atm_vcc* vcc) static int fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) { - struct fore200e* fore200e = FORE200E_DEV(vcc->dev); - struct fore200e_vcc* fore200e_vcc = FORE200E_VCC(vcc); + struct fore200e* fore200e; + struct fore200e_vcc* fore200e_vcc; struct fore200e_vc_map* vc_map; - struct host_txq* txq = &fore200e->host_txq; + struct host_txq* txq; struct host_txq_entry* entry; struct tpd* tpd; struct tpd_haddr tpd_haddr; @@ -1480,9 +1482,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) unsigned char* data; unsigned long flags; - ASSERT(vcc); - ASSERT(fore200e); - ASSERT(fore200e_vcc); + if (!vcc) + return -EINVAL; + + fore200e = FORE200E_DEV(vcc->dev); + fore200e_vcc = FORE200E_VCC(vcc); + + if (!fore200e) + return -EINVAL; + + txq = &fore200e->host_txq; + if (!fore200e_vcc) + return -EINVAL; if (!test_bit(ATM_VF_READY, &vcc->flags)) { DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi); diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 8e05706fe5d9..0795a49edfae 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -107,6 +107,52 @@ int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) } EXPORT_SYMBOL_GPL(btbcm_set_bdaddr); +int btbcm_read_pcm_int_params(struct hci_dev *hdev, + struct bcm_set_pcm_int_params *params) +{ + struct sk_buff *skb; + int err = 0; + + skb = __hci_cmd_sync(hdev, 0xfc1d, 0, NULL, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "BCM: Read PCM int params failed (%d)", err); + return err; + } + + if (skb->len != 6 || skb->data[0]) { + bt_dev_err(hdev, "BCM: Read PCM int params length mismatch"); + kfree_skb(skb); + return -EIO; + } + + if (params) + memcpy(params, skb->data + 1, 5); + + kfree_skb(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(btbcm_read_pcm_int_params); + +int btbcm_write_pcm_int_params(struct hci_dev *hdev, + const struct bcm_set_pcm_int_params *params) +{ + struct sk_buff *skb; + int err; + + skb = __hci_cmd_sync(hdev, 0xfc1c, 5, params, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "BCM: Write PCM int params failed (%d)", err); + return err; + } + kfree_skb(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(btbcm_write_pcm_int_params); + int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) { const struct hci_command_hdr *cmd; diff --git a/drivers/bluetooth/btbcm.h b/drivers/bluetooth/btbcm.h index d204be8a84bf..3c7dd0765837 100644 --- a/drivers/bluetooth/btbcm.h +++ b/drivers/bluetooth/btbcm.h @@ -54,6 +54,10 @@ struct bcm_set_pcm_format_params { int btbcm_check_bdaddr(struct hci_dev *hdev); int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw); +int btbcm_read_pcm_int_params(struct hci_dev *hdev, + struct bcm_set_pcm_int_params *params); +int btbcm_write_pcm_int_params(struct hci_dev *hdev, + const struct bcm_set_pcm_int_params *params); int btbcm_setup_patchram(struct hci_dev *hdev); int btbcm_setup_apple(struct hci_dev *hdev); @@ -74,6 +78,18 @@ static inline int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) return -EOPNOTSUPP; } +int btbcm_read_pcm_int_params(struct hci_dev *hdev, + struct bcm_set_pcm_int_params *params) +{ + return -EOPNOTSUPP; +} + +int btbcm_write_pcm_int_params(struct hci_dev *hdev, + const struct bcm_set_pcm_int_params *params) +{ + return -EOPNOTSUPP; +} + static inline int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 70e385987d41..0eaeca0a64fb 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -552,9 +552,9 @@ static void btusb_rtl_cmd_timeout(struct hci_dev *hdev) } bt_dev_err(hdev, "Reset Realtek device via gpio"); - gpiod_set_value_cansleep(reset_gpio, 0); - msleep(200); gpiod_set_value_cansleep(reset_gpio, 1); + msleep(200); + gpiod_set_value_cansleep(reset_gpio, 0); } static inline void btusb_free_frags(struct btusb_data *data) @@ -2602,7 +2602,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) * and being processed the events from there then. */ if (test_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) { - data->evt_skb = skb_clone(skb, GFP_KERNEL); + data->evt_skb = skb_clone(skb, GFP_ATOMIC); if (!data->evt_skb) goto err_out; } @@ -2867,7 +2867,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params); if (err < 0) { bt_dev_err(hdev, "Failed to send wmt rst (%d)", err); - return err; + goto err_release_fw; } /* Wait a few moments for firmware activation done */ @@ -3832,6 +3832,10 @@ static int btusb_probe(struct usb_interface *intf, * (DEVICE_REMOTE_WAKEUP) */ set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); + + err = usb_autopm_get_interface(intf); + if (err < 0) + goto out_free_dev; } if (id->driver_info & BTUSB_AMP) { diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index d2a6a4afdbbb..f8f5c593a05c 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -48,6 +48,14 @@ #define BCM_NUM_SUPPLIES 2 /** + * struct bcm_device_data - device specific data + * @no_early_set_baudrate: Disallow set baudrate before driver setup() + */ +struct bcm_device_data { + bool no_early_set_baudrate; +}; + +/** * struct bcm_device - device driver resources * @serdev_hu: HCI UART controller struct * @list: bcm_device_list node @@ -79,6 +87,7 @@ * @hu: pointer to HCI UART controller struct, * used to disable flow control during runtime suspend and system sleep * @is_suspended: whether flow control is currently disabled + * @no_early_set_baudrate: don't set_baudrate before setup() */ struct bcm_device { /* Must be the first member, hci_serdev.c expects this. */ @@ -112,6 +121,8 @@ struct bcm_device { struct hci_uart *hu; bool is_suspended; #endif + bool no_early_set_baudrate; + u8 pcm_int_params[5]; }; /* generic bcm uart resources */ @@ -447,7 +458,13 @@ out: if (bcm->dev) { hci_uart_set_flow_control(hu, true); hu->init_speed = bcm->dev->init_speed; - hu->oper_speed = bcm->dev->oper_speed; + + /* If oper_speed is set, ldisc/serdev will set the baudrate + * before calling setup() + */ + if (!bcm->dev->no_early_set_baudrate) + hu->oper_speed = bcm->dev->oper_speed; + err = bcm_gpio_set_power(bcm->dev, true); hci_uart_set_flow_control(hu, false); if (err) @@ -565,6 +582,8 @@ static int bcm_setup(struct hci_uart *hu) /* Operational speed if any */ if (hu->oper_speed) speed = hu->oper_speed; + else if (bcm->dev && bcm->dev->oper_speed) + speed = bcm->dev->oper_speed; else if (hu->proto->oper_speed) speed = hu->proto->oper_speed; else @@ -576,6 +595,16 @@ static int bcm_setup(struct hci_uart *hu) host_set_baudrate(hu, speed); } + /* PCM parameters if provided */ + if (bcm->dev && bcm->dev->pcm_int_params[0] != 0xff) { + struct bcm_set_pcm_int_params params; + + btbcm_read_pcm_int_params(hu->hdev, ¶ms); + + memcpy(¶ms, bcm->dev->pcm_int_params, 5); + btbcm_write_pcm_int_params(hu->hdev, ¶ms); + } + finalize: release_firmware(fw); @@ -1113,6 +1142,8 @@ static int bcm_acpi_probe(struct bcm_device *dev) static int bcm_of_probe(struct bcm_device *bdev) { device_property_read_u32(bdev->dev, "max-speed", &bdev->oper_speed); + device_property_read_u8_array(bdev->dev, "brcm,bt-pcm-int-params", + bdev->pcm_int_params, 5); return 0; } @@ -1128,6 +1159,9 @@ static int bcm_probe(struct platform_device *pdev) dev->dev = &pdev->dev; dev->irq = platform_get_irq(pdev, 0); + /* Initialize routing field to an unused value */ + dev->pcm_int_params[0] = 0xff; + if (has_acpi_companion(&pdev->dev)) { ret = bcm_acpi_probe(dev); if (ret) @@ -1374,6 +1408,7 @@ static struct platform_driver bcm_driver = { static int bcm_serdev_probe(struct serdev_device *serdev) { struct bcm_device *bcmdev; + const struct bcm_device_data *data; int err; bcmdev = devm_kzalloc(&serdev->dev, sizeof(*bcmdev), GFP_KERNEL); @@ -1387,6 +1422,9 @@ static int bcm_serdev_probe(struct serdev_device *serdev) bcmdev->serdev_hu.serdev = serdev; serdev_device_set_drvdata(serdev, bcmdev); + /* Initialize routing field to an unused value */ + bcmdev->pcm_int_params[0] = 0xff; + if (has_acpi_companion(&serdev->dev)) err = bcm_acpi_probe(bcmdev); else @@ -1408,6 +1446,10 @@ static int bcm_serdev_probe(struct serdev_device *serdev) if (err) dev_err(&serdev->dev, "Failed to power down\n"); + data = device_get_match_data(bcmdev->dev); + if (data) + bcmdev->no_early_set_baudrate = data->no_early_set_baudrate; + return hci_uart_register_device(&bcmdev->serdev_hu, &bcm_proto); } @@ -1419,12 +1461,16 @@ static void bcm_serdev_remove(struct serdev_device *serdev) } #ifdef CONFIG_OF +static struct bcm_device_data bcm4354_device_data = { + .no_early_set_baudrate = true, +}; + static const struct of_device_id bcm_bluetooth_of_match[] = { { .compatible = "brcm,bcm20702a1" }, { .compatible = "brcm,bcm4345c5" }, { .compatible = "brcm,bcm4330-bt" }, { .compatible = "brcm,bcm43438-bt" }, - { .compatible = "brcm,bcm43540-bt" }, + { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data }, { .compatible = "brcm,bcm4335a0" }, { }, }; diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 82f9a6a814ae..e342daa73d1b 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -4169,7 +4169,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * * dev pointer to network device structure */ -static void hdlcdev_tx_timeout(struct net_device *dev) +static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { MGSLPC_INFO *info = dev_to_port(dev); unsigned long flags; diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index aca75237bbcf..72e5b0f65a91 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1273,7 +1273,7 @@ static int chtls_pass_accept_req(struct chtls_dev *cdev, struct sk_buff *skb) ctx = (struct listen_ctx *)data; lsk = ctx->lsk; - if (unlikely(tid >= cdev->tids->ntids)) { + if (unlikely(tid_out_of_range(cdev->tids, tid))) { pr_info("passive open TID %u too large\n", tid); return 1; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e5f438ab716c..4a0d3a9e72e1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1182,7 +1182,7 @@ unref: return NETDEV_TX_OK; } -static void ipoib_timeout(struct net_device *dev) +static void ipoib_timeout(struct net_device *dev, unsigned int txqueue) { struct ipoib_dev_priv *priv = ipoib_priv(dev); diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c index ebc00d47abf5..7d3784aa20e5 100644 --- a/drivers/message/fusion/mptlan.c +++ b/drivers/message/fusion/mptlan.c @@ -552,7 +552,7 @@ mpt_lan_close(struct net_device *dev) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* Tx timeout handler. */ static void -mpt_lan_tx_timeout(struct net_device *dev) +mpt_lan_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mpt_lan_priv *priv = netdev_priv(dev); MPT_ADAPTER *mpt_dev = priv->mpt_dev; diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index f7d610a22347..ada94e6a3c91 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -496,7 +496,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * Deal with transmit timeouts coming from the network layer. */ static void -xpnet_dev_tx_timeout(struct net_device *dev) +xpnet_dev_tx_timeout(struct net_device *dev, unsigned int txqueue) { dev->stats.tx_errors++; } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index d02f12a5254e..01e2657e4c26 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -71,6 +71,49 @@ config DUMMY To compile this driver as a module, choose M here: the module will be called dummy. +config WIREGUARD + tristate "WireGuard secure network tunnel" + depends on NET && INET + depends on IPV6 || !IPV6 + select NET_UDP_TUNNEL + select DST_CACHE + select CRYPTO + select CRYPTO_LIB_CURVE25519 + select CRYPTO_LIB_CHACHA20POLY1305 + select CRYPTO_LIB_BLAKE2S + select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT + select CRYPTO_POLY1305_X86_64 if X86 && 64BIT + select CRYPTO_BLAKE2S_X86 if X86 && 64BIT + select CRYPTO_CURVE25519_X86 if X86 && 64BIT + select ARM_CRYPTO if ARM + select ARM64_CRYPTO if ARM64 + select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON + select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON + select CRYPTO_POLY1305_ARM if ARM + select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON + select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 + select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) + help + WireGuard is a secure, fast, and easy to use replacement for IPSec + that uses modern cryptography and clever networking tricks. It's + designed to be fairly general purpose and abstract enough to fit most + use cases, while at the same time remaining extremely simple to + configure. See www.wireguard.com for more info. + + It's safe to say Y or M here, as the driver is very lightweight and + is only in use when an administrator chooses to add an interface. + +config WIREGUARD_DEBUG + bool "Debugging checks and verbose messages" + depends on WIREGUARD + help + This will write log messages for handshake and other events + that occur for a WireGuard interface. It will also perform some + extra validation checks and unit tests at various points. This is + only useful for debugging. + + Say N here unless you know what you're doing. + config EQUALIZER tristate "EQL (serial line load balancing) support" ---help--- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 0d3ba056cda3..953b7c12f0b0 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o +obj-$(CONFIG_WIREGUARD) += wireguard/ obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o obj-$(CONFIG_MACSEC) += macsec.o diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index b3c63d2f16aa..18428e104445 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -189,7 +189,7 @@ static int cops_nodeid (struct net_device *dev, int nodeid); static irqreturn_t cops_interrupt (int irq, void *dev_id); static void cops_poll(struct timer_list *t); -static void cops_timeout(struct net_device *dev); +static void cops_timeout(struct net_device *dev, unsigned int txqueue); static void cops_rx (struct net_device *dev); static netdev_tx_t cops_send_packet (struct sk_buff *skb, struct net_device *dev); @@ -844,7 +844,7 @@ static void cops_rx(struct net_device *dev) netif_rx(skb); } -static void cops_timeout(struct net_device *dev) +static void cops_timeout(struct net_device *dev, unsigned int txqueue) { struct cops_local *lp = netdev_priv(dev); int ioaddr = dev->base_addr; diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index b0f5bc07aef5..22a49c6d7ae6 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -356,7 +356,7 @@ int arcnet_open(struct net_device *dev); int arcnet_close(struct net_device *dev); netdev_tx_t arcnet_send_packet(struct sk_buff *skb, struct net_device *dev); -void arcnet_timeout(struct net_device *dev); +void arcnet_timeout(struct net_device *dev, unsigned int txqueue); /* I/O equivalents */ diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 553776cc1d29..e04efc0a5c97 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -763,7 +763,7 @@ static int go_tx(struct net_device *dev) } /* Called by the kernel when transmit times out */ -void arcnet_timeout(struct net_device *dev) +void arcnet_timeout(struct net_device *dev, unsigned int txqueue) { unsigned long flags; struct arcnet_local *lp = netdev_priv(dev); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index e3b25f310936..34bfe99641a3 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -31,16 +31,6 @@ #define AD_CHURN_DETECTION_TIME 60 #define AD_AGGREGATE_WAIT_TIME 2 -/* Port state definitions (43.4.2.2 in the 802.3ad standard) */ -#define AD_STATE_LACP_ACTIVITY 0x1 -#define AD_STATE_LACP_TIMEOUT 0x2 -#define AD_STATE_AGGREGATION 0x4 -#define AD_STATE_SYNCHRONIZATION 0x8 -#define AD_STATE_COLLECTING 0x10 -#define AD_STATE_DISTRIBUTING 0x20 -#define AD_STATE_DEFAULTED 0x40 -#define AD_STATE_EXPIRED 0x80 - /* Port Variables definitions used by the State Machines (43.4.7 in the * 802.3ad standard) */ diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index bd40b114d6cd..d737ceb61203 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -270,7 +270,9 @@ static int caif_xmit(struct sk_buff *skb, struct net_device *dev) { struct ser_device *ser; - BUG_ON(dev == NULL); + if (WARN_ON(!dev)) + return -EINVAL; + ser = netdev_priv(dev); /* Send flow off once, on high water mark */ diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index c7667645f04a..cbd74a72d0a1 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -54,6 +54,8 @@ source "drivers/net/dsa/mv88e6xxx/Kconfig" source "drivers/net/dsa/ocelot/Kconfig" +source "drivers/net/dsa/qca/Kconfig" + source "drivers/net/dsa/sja1105/Kconfig" config NET_DSA_QCA8K diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index 9d384a32b3a2..4a943ccc2ca4 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -21,4 +21,5 @@ obj-y += b53/ obj-y += microchip/ obj-y += mv88e6xxx/ obj-y += ocelot/ +obj-y += qca/ obj-y += sja1105/ diff --git a/drivers/net/dsa/qca/Kconfig b/drivers/net/dsa/qca/Kconfig new file mode 100644 index 000000000000..e3c8d715a18f --- /dev/null +++ b/drivers/net/dsa/qca/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +config NET_DSA_AR9331 + tristate "Qualcomm Atheros AR9331 Ethernet switch support" + depends on NET_DSA + select NET_DSA_TAG_AR9331 + select REGMAP + ---help--- + This enables support for the Qualcomm Atheros AR9331 built-in Ethernet + switch. diff --git a/drivers/net/dsa/qca/Makefile b/drivers/net/dsa/qca/Makefile new file mode 100644 index 000000000000..274022319066 --- /dev/null +++ b/drivers/net/dsa/qca/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_NET_DSA_AR9331) += ar9331.o diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c new file mode 100644 index 000000000000..0d1a7cd85fe8 --- /dev/null +++ b/drivers/net/dsa/qca/ar9331.c @@ -0,0 +1,855 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2019 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> +/* + * +----------------------+ + * GMAC1----RGMII----|--MAC0 | + * \---MDIO1----|--REGs |----MDIO3----\ + * | | | +------+ + * | | +--| | + * | MAC1-|----RMII--M-----| PHY0 |-o P0 + * | | | | +------+ + * | | | +--| | + * | MAC2-|----RMII--------| PHY1 |-o P1 + * | | | | +------+ + * | | | +--| | + * | MAC3-|----RMII--------| PHY2 |-o P2 + * | | | | +------+ + * | | | +--| | + * | MAC4-|----RMII--------| PHY3 |-o P3 + * | | | | +------+ + * | | | +--| | + * | MAC5-|--+-RMII--M-----|-PHY4-|-o P4 + * | | | | +------+ + * +----------------------+ | \--CFG_SW_PHY_SWAP + * GMAC0---------------RMII--------------------/ \-CFG_SW_PHY_ADDR_SWAP + * \---MDIO0--NC + * + * GMAC0 and MAC5 are connected together and use same PHY. Depending on + * configuration it can be PHY4 (default) or PHY0. Only GMAC0 or MAC5 can be + * used at same time. If GMAC0 is used (default) then MAC5 should be disabled. + * + * CFG_SW_PHY_SWAP - swap connections of PHY0 and PHY4. If this bit is not set + * PHY4 is connected to GMAC0/MAC5 bundle and PHY0 is connected to MAC1. If this + * bit is set, PHY4 is connected to MAC1 and PHY0 is connected to GMAC0/MAC5 + * bundle. + * + * CFG_SW_PHY_ADDR_SWAP - swap addresses of PHY0 and PHY4 + * + * CFG_SW_PHY_SWAP and CFG_SW_PHY_ADDR_SWAP are part of SoC specific register + * set and not related to switch internal registers. + */ + +#include <linux/bitfield.h> +#include <linux/module.h> +#include <linux/of_irq.h> +#include <linux/of_mdio.h> +#include <linux/regmap.h> +#include <linux/reset.h> +#include <net/dsa.h> + +#define AR9331_SW_NAME "ar9331_switch" +#define AR9331_SW_PORTS 6 + +/* dummy reg to change page */ +#define AR9331_SW_REG_PAGE 0x40000 + +/* Global Interrupt */ +#define AR9331_SW_REG_GINT 0x10 +#define AR9331_SW_REG_GINT_MASK 0x14 +#define AR9331_SW_GINT_PHY_INT BIT(2) + +#define AR9331_SW_REG_FLOOD_MASK 0x2c +#define AR9331_SW_FLOOD_MASK_BROAD_TO_CPU BIT(26) + +#define AR9331_SW_REG_GLOBAL_CTRL 0x30 +#define AR9331_SW_GLOBAL_CTRL_MFS_M GENMASK(13, 0) + +#define AR9331_SW_REG_MDIO_CTRL 0x98 +#define AR9331_SW_MDIO_CTRL_BUSY BIT(31) +#define AR9331_SW_MDIO_CTRL_MASTER_EN BIT(30) +#define AR9331_SW_MDIO_CTRL_CMD_READ BIT(27) +#define AR9331_SW_MDIO_CTRL_PHY_ADDR_M GENMASK(25, 21) +#define AR9331_SW_MDIO_CTRL_REG_ADDR_M GENMASK(20, 16) +#define AR9331_SW_MDIO_CTRL_DATA_M GENMASK(16, 0) + +#define AR9331_SW_REG_PORT_STATUS(_port) (0x100 + (_port) * 0x100) + +/* FLOW_LINK_EN - enable mac flow control config auto-neg with phy. + * If not set, mac can be config by software. + */ +#define AR9331_SW_PORT_STATUS_FLOW_LINK_EN BIT(12) + +/* LINK_EN - If set, MAC is configured from PHY link status. + * If not set, MAC should be configured by software. + */ +#define AR9331_SW_PORT_STATUS_LINK_EN BIT(9) +#define AR9331_SW_PORT_STATUS_DUPLEX_MODE BIT(6) +#define AR9331_SW_PORT_STATUS_RX_FLOW_EN BIT(5) +#define AR9331_SW_PORT_STATUS_TX_FLOW_EN BIT(4) +#define AR9331_SW_PORT_STATUS_RXMAC BIT(3) +#define AR9331_SW_PORT_STATUS_TXMAC BIT(2) +#define AR9331_SW_PORT_STATUS_SPEED_M GENMASK(1, 0) +#define AR9331_SW_PORT_STATUS_SPEED_1000 2 +#define AR9331_SW_PORT_STATUS_SPEED_100 1 +#define AR9331_SW_PORT_STATUS_SPEED_10 0 + +#define AR9331_SW_PORT_STATUS_MAC_MASK \ + (AR9331_SW_PORT_STATUS_TXMAC | AR9331_SW_PORT_STATUS_RXMAC) + +#define AR9331_SW_PORT_STATUS_LINK_MASK \ + (AR9331_SW_PORT_STATUS_LINK_EN | AR9331_SW_PORT_STATUS_FLOW_LINK_EN | \ + AR9331_SW_PORT_STATUS_DUPLEX_MODE | \ + AR9331_SW_PORT_STATUS_RX_FLOW_EN | AR9331_SW_PORT_STATUS_TX_FLOW_EN | \ + AR9331_SW_PORT_STATUS_SPEED_M) + +/* Phy bypass mode + * ------------------------------------------------------------------------ + * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 | + * + * real | start | OP | PhyAddr | Reg Addr | TA | + * atheros| start | OP | 2'b00 |PhyAdd[2:0]| Reg Addr[4:0] | TA | + * + * + * Bit: |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 | + * real | Data | + * atheros| Data | + * + * ------------------------------------------------------------------------ + * Page address mode + * ------------------------------------------------------------------------ + * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 | + * real | start | OP | PhyAddr | Reg Addr | TA | + * atheros| start | OP | 2'b11 | 8'b0 | TA | + * + * Bit: |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 | + * real | Data | + * atheros| | Page [9:0] | + */ +/* In case of Page Address mode, Bit[18:9] of 32 bit register address should be + * written to bits[9:0] of mdio data register. + */ +#define AR9331_SW_ADDR_PAGE GENMASK(18, 9) + +/* ------------------------------------------------------------------------ + * Normal register access mode + * ------------------------------------------------------------------------ + * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 | + * real | start | OP | PhyAddr | Reg Addr | TA | + * atheros| start | OP | 2'b10 | low_addr[7:0] | TA | + * + * Bit: |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 | + * real | Data | + * atheros| Data | + * ------------------------------------------------------------------------ + */ +#define AR9331_SW_LOW_ADDR_PHY GENMASK(8, 6) +#define AR9331_SW_LOW_ADDR_REG GENMASK(5, 1) + +#define AR9331_SW_MDIO_PHY_MODE_M GENMASK(4, 3) +#define AR9331_SW_MDIO_PHY_MODE_PAGE 3 +#define AR9331_SW_MDIO_PHY_MODE_REG 2 +#define AR9331_SW_MDIO_PHY_MODE_BYPASS 0 +#define AR9331_SW_MDIO_PHY_ADDR_M GENMASK(2, 0) + +/* Empirical determined values */ +#define AR9331_SW_MDIO_POLL_SLEEP_US 1 +#define AR9331_SW_MDIO_POLL_TIMEOUT_US 20 + +struct ar9331_sw_priv { + struct device *dev; + struct dsa_switch ds; + struct dsa_switch_ops ops; + struct irq_domain *irqdomain; + struct mii_bus *mbus; /* mdio master */ + struct mii_bus *sbus; /* mdio slave */ + struct regmap *regmap; + struct reset_control *sw_reset; +}; + +/* Warning: switch reset will reset last AR9331_SW_MDIO_PHY_MODE_PAGE request + * If some kind of optimization is used, the request should be repeated. + */ +static int ar9331_sw_reset(struct ar9331_sw_priv *priv) +{ + int ret; + + ret = reset_control_assert(priv->sw_reset); + if (ret) + goto error; + + /* AR9331 doc do not provide any information about proper reset + * sequence. The AR8136 (the closes switch to the AR9331) doc says: + * reset duration should be greater than 10ms. So, let's use this value + * for now. + */ + usleep_range(10000, 15000); + ret = reset_control_deassert(priv->sw_reset); + if (ret) + goto error; + /* There is no information on how long should we wait after reset. + * AR8136 has an EEPROM and there is an Interrupt for EEPROM load + * status. AR9331 has no EEPROM support. + * For now, do not wait. In case AR8136 will be needed, the after + * reset delay can be added as well. + */ + + return 0; +error: + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); + return ret; +} + +static int ar9331_sw_mbus_write(struct mii_bus *mbus, int port, int regnum, + u16 data) +{ + struct ar9331_sw_priv *priv = mbus->priv; + struct regmap *regmap = priv->regmap; + u32 val; + int ret; + + ret = regmap_write(regmap, AR9331_SW_REG_MDIO_CTRL, + AR9331_SW_MDIO_CTRL_BUSY | + AR9331_SW_MDIO_CTRL_MASTER_EN | + FIELD_PREP(AR9331_SW_MDIO_CTRL_PHY_ADDR_M, port) | + FIELD_PREP(AR9331_SW_MDIO_CTRL_REG_ADDR_M, regnum) | + FIELD_PREP(AR9331_SW_MDIO_CTRL_DATA_M, data)); + if (ret) + goto error; + + ret = regmap_read_poll_timeout(regmap, AR9331_SW_REG_MDIO_CTRL, val, + !(val & AR9331_SW_MDIO_CTRL_BUSY), + AR9331_SW_MDIO_POLL_SLEEP_US, + AR9331_SW_MDIO_POLL_TIMEOUT_US); + if (ret) + goto error; + + return 0; +error: + dev_err_ratelimited(priv->dev, "PHY write error: %i\n", ret); + return ret; +} + +static int ar9331_sw_mbus_read(struct mii_bus *mbus, int port, int regnum) +{ + struct ar9331_sw_priv *priv = mbus->priv; + struct regmap *regmap = priv->regmap; + u32 val; + int ret; + + ret = regmap_write(regmap, AR9331_SW_REG_MDIO_CTRL, + AR9331_SW_MDIO_CTRL_BUSY | + AR9331_SW_MDIO_CTRL_MASTER_EN | + AR9331_SW_MDIO_CTRL_CMD_READ | + FIELD_PREP(AR9331_SW_MDIO_CTRL_PHY_ADDR_M, port) | + FIELD_PREP(AR9331_SW_MDIO_CTRL_REG_ADDR_M, regnum)); + if (ret) + goto error; + + ret = regmap_read_poll_timeout(regmap, AR9331_SW_REG_MDIO_CTRL, val, + !(val & AR9331_SW_MDIO_CTRL_BUSY), + AR9331_SW_MDIO_POLL_SLEEP_US, + AR9331_SW_MDIO_POLL_TIMEOUT_US); + if (ret) + goto error; + + ret = regmap_read(regmap, AR9331_SW_REG_MDIO_CTRL, &val); + if (ret) + goto error; + + return FIELD_GET(AR9331_SW_MDIO_CTRL_DATA_M, val); + +error: + dev_err_ratelimited(priv->dev, "PHY read error: %i\n", ret); + return ret; +} + +static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv) +{ + struct device *dev = priv->dev; + static struct mii_bus *mbus; + struct device_node *np, *mnp; + int ret; + + np = dev->of_node; + + mbus = devm_mdiobus_alloc(dev); + if (!mbus) + return -ENOMEM; + + mbus->name = np->full_name; + snprintf(mbus->id, MII_BUS_ID_SIZE, "%pOF", np); + + mbus->read = ar9331_sw_mbus_read; + mbus->write = ar9331_sw_mbus_write; + mbus->priv = priv; + mbus->parent = dev; + + mnp = of_get_child_by_name(np, "mdio"); + if (!mnp) + return -ENODEV; + + ret = of_mdiobus_register(mbus, mnp); + of_node_put(mnp); + if (ret) + return ret; + + priv->mbus = mbus; + + return 0; +} + +static int ar9331_sw_setup(struct dsa_switch *ds) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + + ret = ar9331_sw_reset(priv); + if (ret) + return ret; + + /* Reset will set proper defaults. CPU - Port0 will be enabled and + * configured. All other ports (ports 1 - 5) are disabled + */ + ret = ar9331_sw_mbus_init(priv); + if (ret) + return ret; + + /* Do not drop broadcast frames */ + ret = regmap_write_bits(regmap, AR9331_SW_REG_FLOOD_MASK, + AR9331_SW_FLOOD_MASK_BROAD_TO_CPU, + AR9331_SW_FLOOD_MASK_BROAD_TO_CPU); + if (ret) + goto error; + + /* Set max frame size to the maximum supported value */ + ret = regmap_write_bits(regmap, AR9331_SW_REG_GLOBAL_CTRL, + AR9331_SW_GLOBAL_CTRL_MFS_M, + AR9331_SW_GLOBAL_CTRL_MFS_M); + if (ret) + goto error; + + return 0; +error: + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); + return ret; +} + +static void ar9331_sw_port_disable(struct dsa_switch *ds, int port) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_write(regmap, AR9331_SW_REG_PORT_STATUS(port), 0); + if (ret) + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); +} + +static enum dsa_tag_protocol ar9331_sw_get_tag_protocol(struct dsa_switch *ds, + int port) +{ + return DSA_TAG_PROTO_AR9331; +} + +static void ar9331_sw_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + + switch (port) { + case 0: + if (state->interface != PHY_INTERFACE_MODE_GMII) + goto unsupported; + + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseT_Half); + break; + case 1: + case 2: + case 3: + case 4: + case 5: + if (state->interface != PHY_INTERFACE_MODE_INTERNAL) + goto unsupported; + break; + default: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + dev_err(ds->dev, "Unsupported port: %i\n", port); + return; + } + + phylink_set_port_modes(mask); + phylink_set(mask, Pause); + phylink_set(mask, Asym_Pause); + + phylink_set(mask, 10baseT_Half); + phylink_set(mask, 10baseT_Full); + phylink_set(mask, 100baseT_Half); + phylink_set(mask, 100baseT_Full); + + bitmap_and(supported, supported, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_and(state->advertising, state->advertising, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + + return; + +unsupported: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + dev_err(ds->dev, "Unsupported interface: %d, port: %d\n", + state->interface, port); +} + +static void ar9331_sw_phylink_mac_config(struct dsa_switch *ds, int port, + unsigned int mode, + const struct phylink_link_state *state) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + u32 val; + + switch (state->speed) { + case SPEED_1000: + val = AR9331_SW_PORT_STATUS_SPEED_1000; + break; + case SPEED_100: + val = AR9331_SW_PORT_STATUS_SPEED_100; + break; + case SPEED_10: + val = AR9331_SW_PORT_STATUS_SPEED_10; + break; + default: + return; + } + + if (state->duplex) + val |= AR9331_SW_PORT_STATUS_DUPLEX_MODE; + + if (state->pause & MLO_PAUSE_TX) + val |= AR9331_SW_PORT_STATUS_TX_FLOW_EN; + + if (state->pause & MLO_PAUSE_RX) + val |= AR9331_SW_PORT_STATUS_RX_FLOW_EN; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_PORT_STATUS(port), + AR9331_SW_PORT_STATUS_LINK_MASK, val); + if (ret) + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); +} + +static void ar9331_sw_phylink_mac_link_down(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_PORT_STATUS(port), + AR9331_SW_PORT_STATUS_MAC_MASK, 0); + if (ret) + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); +} + +static void ar9331_sw_phylink_mac_link_up(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_PORT_STATUS(port), + AR9331_SW_PORT_STATUS_MAC_MASK, + AR9331_SW_PORT_STATUS_MAC_MASK); + if (ret) + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); +} + +static const struct dsa_switch_ops ar9331_sw_ops = { + .get_tag_protocol = ar9331_sw_get_tag_protocol, + .setup = ar9331_sw_setup, + .port_disable = ar9331_sw_port_disable, + .phylink_validate = ar9331_sw_phylink_validate, + .phylink_mac_config = ar9331_sw_phylink_mac_config, + .phylink_mac_link_down = ar9331_sw_phylink_mac_link_down, + .phylink_mac_link_up = ar9331_sw_phylink_mac_link_up, +}; + +static irqreturn_t ar9331_sw_irq(int irq, void *data) +{ + struct ar9331_sw_priv *priv = data; + struct regmap *regmap = priv->regmap; + u32 stat; + int ret; + + ret = regmap_read(regmap, AR9331_SW_REG_GINT, &stat); + if (ret) { + dev_err(priv->dev, "can't read interrupt status\n"); + return IRQ_NONE; + } + + if (!stat) + return IRQ_NONE; + + if (stat & AR9331_SW_GINT_PHY_INT) { + int child_irq; + + child_irq = irq_find_mapping(priv->irqdomain, 0); + handle_nested_irq(child_irq); + } + + ret = regmap_write(regmap, AR9331_SW_REG_GINT, stat); + if (ret) { + dev_err(priv->dev, "can't write interrupt status\n"); + return IRQ_NONE; + } + + return IRQ_HANDLED; +} + +static void ar9331_sw_mask_irq(struct irq_data *d) +{ + struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d); + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_GINT_MASK, + AR9331_SW_GINT_PHY_INT, 0); + if (ret) + dev_err(priv->dev, "could not mask IRQ\n"); +} + +static void ar9331_sw_unmask_irq(struct irq_data *d) +{ + struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d); + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_GINT_MASK, + AR9331_SW_GINT_PHY_INT, + AR9331_SW_GINT_PHY_INT); + if (ret) + dev_err(priv->dev, "could not unmask IRQ\n"); +} + +static struct irq_chip ar9331_sw_irq_chip = { + .name = AR9331_SW_NAME, + .irq_mask = ar9331_sw_mask_irq, + .irq_unmask = ar9331_sw_unmask_irq, +}; + +static int ar9331_sw_irq_map(struct irq_domain *domain, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(irq, domain->host_data); + irq_set_chip_and_handler(irq, &ar9331_sw_irq_chip, handle_simple_irq); + irq_set_nested_thread(irq, 1); + irq_set_noprobe(irq); + + return 0; +} + +static void ar9331_sw_irq_unmap(struct irq_domain *d, unsigned int irq) +{ + irq_set_nested_thread(irq, 0); + irq_set_chip_and_handler(irq, NULL, NULL); + irq_set_chip_data(irq, NULL); +} + +static const struct irq_domain_ops ar9331_sw_irqdomain_ops = { + .map = ar9331_sw_irq_map, + .unmap = ar9331_sw_irq_unmap, + .xlate = irq_domain_xlate_onecell, +}; + +static int ar9331_sw_irq_init(struct ar9331_sw_priv *priv) +{ + struct device_node *np = priv->dev->of_node; + struct device *dev = priv->dev; + int ret, irq; + + irq = of_irq_get(np, 0); + if (irq <= 0) { + dev_err(dev, "failed to get parent IRQ\n"); + return irq ? irq : -EINVAL; + } + + ret = devm_request_threaded_irq(dev, irq, NULL, ar9331_sw_irq, + IRQF_ONESHOT, AR9331_SW_NAME, priv); + if (ret) { + dev_err(dev, "unable to request irq: %d\n", ret); + return ret; + } + + priv->irqdomain = irq_domain_add_linear(np, 1, &ar9331_sw_irqdomain_ops, + priv); + if (!priv->irqdomain) { + dev_err(dev, "failed to create IRQ domain\n"); + return -EINVAL; + } + + irq_set_parent(irq_create_mapping(priv->irqdomain, 0), irq); + + return 0; +} + +static int __ar9331_mdio_write(struct mii_bus *sbus, u8 mode, u16 reg, u16 val) +{ + u8 r, p; + + p = FIELD_PREP(AR9331_SW_MDIO_PHY_MODE_M, mode) | + FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg); + r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg); + + return mdiobus_write(sbus, p, r, val); +} + +static int __ar9331_mdio_read(struct mii_bus *sbus, u16 reg) +{ + u8 r, p; + + p = FIELD_PREP(AR9331_SW_MDIO_PHY_MODE_M, AR9331_SW_MDIO_PHY_MODE_REG) | + FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg); + r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg); + + return mdiobus_read(sbus, p, r); +} + +static int ar9331_mdio_read(void *ctx, const void *reg_buf, size_t reg_len, + void *val_buf, size_t val_len) +{ + struct ar9331_sw_priv *priv = ctx; + struct mii_bus *sbus = priv->sbus; + u32 reg = *(u32 *)reg_buf; + int ret; + + if (reg == AR9331_SW_REG_PAGE) { + /* We cannot read the page selector register from hardware and + * we cache its value in regmap. Return all bits set here, + * that regmap will always write the page on first use. + */ + *(u32 *)val_buf = GENMASK(9, 0); + return 0; + } + + ret = __ar9331_mdio_read(sbus, reg); + if (ret < 0) + goto error; + + *(u32 *)val_buf = ret; + ret = __ar9331_mdio_read(sbus, reg + 2); + if (ret < 0) + goto error; + + *(u32 *)val_buf |= ret << 16; + + return 0; +error: + dev_err_ratelimited(&sbus->dev, "Bus error. Failed to read register.\n"); + return ret; +} + +static int ar9331_mdio_write(void *ctx, u32 reg, u32 val) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ctx; + struct mii_bus *sbus = priv->sbus; + int ret; + + if (reg == AR9331_SW_REG_PAGE) { + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_PAGE, + 0, val); + if (ret < 0) + goto error; + + return 0; + } + + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); + if (ret < 0) + goto error; + + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, + val >> 16); + if (ret < 0) + goto error; + + return 0; +error: + dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n"); + return ret; +} + +static int ar9331_sw_bus_write(void *context, const void *data, size_t count) +{ + u32 reg = *(u32 *)data; + u32 val = *((u32 *)data + 1); + + return ar9331_mdio_write(context, reg, val); +} + +static const struct regmap_range ar9331_valid_regs[] = { + regmap_reg_range(0x0, 0x0), + regmap_reg_range(0x10, 0x14), + regmap_reg_range(0x20, 0x24), + regmap_reg_range(0x2c, 0x30), + regmap_reg_range(0x40, 0x44), + regmap_reg_range(0x50, 0x78), + regmap_reg_range(0x80, 0x98), + + regmap_reg_range(0x100, 0x120), + regmap_reg_range(0x200, 0x220), + regmap_reg_range(0x300, 0x320), + regmap_reg_range(0x400, 0x420), + regmap_reg_range(0x500, 0x520), + regmap_reg_range(0x600, 0x620), + + regmap_reg_range(0x20000, 0x200a4), + regmap_reg_range(0x20100, 0x201a4), + regmap_reg_range(0x20200, 0x202a4), + regmap_reg_range(0x20300, 0x203a4), + regmap_reg_range(0x20400, 0x204a4), + regmap_reg_range(0x20500, 0x205a4), + + /* dummy page selector reg */ + regmap_reg_range(AR9331_SW_REG_PAGE, AR9331_SW_REG_PAGE), +}; + +static const struct regmap_range ar9331_nonvolatile_regs[] = { + regmap_reg_range(AR9331_SW_REG_PAGE, AR9331_SW_REG_PAGE), +}; + +static const struct regmap_range_cfg ar9331_regmap_range[] = { + { + .selector_reg = AR9331_SW_REG_PAGE, + .selector_mask = GENMASK(9, 0), + .selector_shift = 0, + + .window_start = 0, + .window_len = 512, + + .range_min = 0, + .range_max = AR9331_SW_REG_PAGE - 4, + }, +}; + +static const struct regmap_access_table ar9331_register_set = { + .yes_ranges = ar9331_valid_regs, + .n_yes_ranges = ARRAY_SIZE(ar9331_valid_regs), +}; + +static const struct regmap_access_table ar9331_volatile_set = { + .no_ranges = ar9331_nonvolatile_regs, + .n_no_ranges = ARRAY_SIZE(ar9331_nonvolatile_regs), +}; + +static const struct regmap_config ar9331_mdio_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = AR9331_SW_REG_PAGE, + + .ranges = ar9331_regmap_range, + .num_ranges = ARRAY_SIZE(ar9331_regmap_range), + + .volatile_table = &ar9331_volatile_set, + .wr_table = &ar9331_register_set, + .rd_table = &ar9331_register_set, + + .cache_type = REGCACHE_RBTREE, +}; + +static struct regmap_bus ar9331_sw_bus = { + .reg_format_endian_default = REGMAP_ENDIAN_NATIVE, + .val_format_endian_default = REGMAP_ENDIAN_NATIVE, + .read = ar9331_mdio_read, + .write = ar9331_sw_bus_write, + .max_raw_read = 4, + .max_raw_write = 4, +}; + +static int ar9331_sw_probe(struct mdio_device *mdiodev) +{ + struct ar9331_sw_priv *priv; + struct dsa_switch *ds; + int ret; + + priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->regmap = devm_regmap_init(&mdiodev->dev, &ar9331_sw_bus, priv, + &ar9331_mdio_regmap_config); + if (IS_ERR(priv->regmap)) { + ret = PTR_ERR(priv->regmap); + dev_err(&mdiodev->dev, "regmap init failed: %d\n", ret); + return ret; + } + + priv->sw_reset = devm_reset_control_get(&mdiodev->dev, "switch"); + if (IS_ERR(priv->sw_reset)) { + dev_err(&mdiodev->dev, "missing switch reset\n"); + return PTR_ERR(priv->sw_reset); + } + + priv->sbus = mdiodev->bus; + priv->dev = &mdiodev->dev; + + ret = ar9331_sw_irq_init(priv); + if (ret) + return ret; + + ds = &priv->ds; + ds->dev = &mdiodev->dev; + ds->num_ports = AR9331_SW_PORTS; + ds->priv = priv; + priv->ops = ar9331_sw_ops; + ds->ops = &priv->ops; + dev_set_drvdata(&mdiodev->dev, priv); + + ret = dsa_register_switch(ds); + if (ret) + goto err_remove_irq; + + return 0; + +err_remove_irq: + irq_domain_remove(priv->irqdomain); + + return ret; +} + +static void ar9331_sw_remove(struct mdio_device *mdiodev) +{ + struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev); + + irq_domain_remove(priv->irqdomain); + mdiobus_unregister(priv->mbus); + dsa_unregister_switch(&priv->ds); + + reset_control_assert(priv->sw_reset); +} + +static const struct of_device_id ar9331_sw_of_match[] = { + { .compatible = "qca,ar9331-switch" }, + { }, +}; + +static struct mdio_driver ar9331_sw_mdio_driver = { + .probe = ar9331_sw_probe, + .remove = ar9331_sw_remove, + .mdiodrv.driver = { + .name = AR9331_SW_NAME, + .of_match_table = ar9331_sw_of_match, + }, +}; + +mdio_module_driver(ar9331_sw_mdio_driver); + +MODULE_AUTHOR("Oleksij Rempel <kernel@pengutronix.de>"); +MODULE_DESCRIPTION("Driver for Atheros AR9331 switch"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 3da97996bdf3..8cafd06ff0c4 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -196,7 +196,7 @@ static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev); static int el3_close(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void el3_tx_timeout (struct net_device *dev); +static void el3_tx_timeout (struct net_device *dev, unsigned int txqueue); static void el3_down(struct net_device *dev); static void el3_up(struct net_device *dev); static const struct ethtool_ops ethtool_ops; @@ -689,7 +689,7 @@ el3_open(struct net_device *dev) } static void -el3_tx_timeout (struct net_device *dev) +el3_tx_timeout (struct net_device *dev, unsigned int txqueue) { int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index b15752267c8d..1e233e2f0a5a 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -371,7 +371,7 @@ static void corkscrew_timer(struct timer_list *t); static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb, struct net_device *dev); static int corkscrew_rx(struct net_device *dev); -static void corkscrew_timeout(struct net_device *dev); +static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue); static int boomerang_rx(struct net_device *dev); static irqreturn_t corkscrew_interrupt(int irq, void *dev_id); static int corkscrew_close(struct net_device *dev); @@ -961,7 +961,7 @@ static void corkscrew_timer(struct timer_list *t) #endif /* AUTOMEDIA */ } -static void corkscrew_timeout(struct net_device *dev) +static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue) { int i; struct corkscrew_private *vp = netdev_priv(dev); diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index 3044a6f35f04..ef1c3151fbb2 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -234,7 +234,7 @@ static void update_stats(struct net_device *dev); static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev, int worklimit); static int el3_close(struct net_device *dev); -static void el3_tx_timeout(struct net_device *dev); +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue); static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void set_rx_mode(struct net_device *dev); static void set_multicast_list(struct net_device *dev); @@ -690,7 +690,7 @@ static int el3_open(struct net_device *dev) return 0; } -static void el3_tx_timeout(struct net_device *dev) +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue) { unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index 2b2695311bda..d47cde6c5f08 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -173,7 +173,7 @@ static void update_stats(struct net_device *dev); static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev); static int el3_close(struct net_device *dev); -static void el3_tx_timeout(struct net_device *dev); +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue); static void set_rx_mode(struct net_device *dev); static void set_multicast_list(struct net_device *dev); static const struct ethtool_ops netdev_ethtool_ops; @@ -526,7 +526,7 @@ static int el3_open(struct net_device *dev) return 0; } -static void el3_tx_timeout(struct net_device *dev) +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue) { unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 8785c2ff3825..fc046797c0ea 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -776,7 +776,7 @@ static void set_rx_mode(struct net_device *dev); #ifdef CONFIG_PCI static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); #endif -static void vortex_tx_timeout(struct net_device *dev); +static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue); static void acpi_set_WOL(struct net_device *dev); static const struct ethtool_ops vortex_ethtool_ops; static void set_8021q_mode(struct net_device *dev, int enable); @@ -1877,7 +1877,7 @@ leave_media_alone: iowrite16(FakeIntr, ioaddr + EL3_CMD); } -static void vortex_tx_timeout(struct net_device *dev) +static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct vortex_private *vp = netdev_priv(dev); void __iomem *ioaddr = vp->ioaddr; diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index be823c186517..14fce6658106 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -2013,7 +2013,7 @@ typhoon_stop_runtime(struct typhoon *tp, int wait_type) } static void -typhoon_tx_timeout(struct net_device *dev) +typhoon_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct typhoon *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c index 78f3e532c600..0e0aa4016858 100644 --- a/drivers/net/ethernet/8390/8390.c +++ b/drivers/net/ethernet/8390/8390.c @@ -36,9 +36,9 @@ void ei_set_multicast_list(struct net_device *dev) } EXPORT_SYMBOL(ei_set_multicast_list); -void ei_tx_timeout(struct net_device *dev) +void ei_tx_timeout(struct net_device *dev, unsigned int txqueue) { - __ei_tx_timeout(dev); + __ei_tx_timeout(dev, txqueue); } EXPORT_SYMBOL(ei_tx_timeout); diff --git a/drivers/net/ethernet/8390/8390.h b/drivers/net/ethernet/8390/8390.h index 3e2f2c2e7b58..529c728f334a 100644 --- a/drivers/net/ethernet/8390/8390.h +++ b/drivers/net/ethernet/8390/8390.h @@ -32,7 +32,7 @@ void NS8390_init(struct net_device *dev, int startp); int ei_open(struct net_device *dev); int ei_close(struct net_device *dev); irqreturn_t ei_interrupt(int irq, void *dev_id); -void ei_tx_timeout(struct net_device *dev); +void ei_tx_timeout(struct net_device *dev, unsigned int txqueue); netdev_tx_t ei_start_xmit(struct sk_buff *skb, struct net_device *dev); void ei_set_multicast_list(struct net_device *dev); struct net_device_stats *ei_get_stats(struct net_device *dev); @@ -50,7 +50,7 @@ void NS8390p_init(struct net_device *dev, int startp); int eip_open(struct net_device *dev); int eip_close(struct net_device *dev); irqreturn_t eip_interrupt(int irq, void *dev_id); -void eip_tx_timeout(struct net_device *dev); +void eip_tx_timeout(struct net_device *dev, unsigned int txqueue); netdev_tx_t eip_start_xmit(struct sk_buff *skb, struct net_device *dev); void eip_set_multicast_list(struct net_device *dev); struct net_device_stats *eip_get_stats(struct net_device *dev); diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c index 6cf36992a2c6..6834742057b3 100644 --- a/drivers/net/ethernet/8390/8390p.c +++ b/drivers/net/ethernet/8390/8390p.c @@ -41,9 +41,9 @@ void eip_set_multicast_list(struct net_device *dev) } EXPORT_SYMBOL(eip_set_multicast_list); -void eip_tx_timeout(struct net_device *dev) +void eip_tx_timeout(struct net_device *dev, unsigned int txqueue) { - __ei_tx_timeout(dev); + __ei_tx_timeout(dev, txqueue); } EXPORT_SYMBOL(eip_tx_timeout); diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 0b6bbf63f7ca..aeae7966a082 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -83,7 +83,7 @@ static netdev_tx_t axnet_start_xmit(struct sk_buff *skb, struct net_device *dev); static struct net_device_stats *get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void axnet_tx_timeout(struct net_device *dev); +static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); static void ei_watchdog(struct timer_list *t); static void axnet_reset_8390(struct net_device *dev); @@ -903,7 +903,7 @@ static int ax_close(struct net_device *dev) * completed (or failed) - i.e. never posted a Tx related interrupt. */ -static void axnet_tx_timeout(struct net_device *dev) +static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue) { long e8390_base = dev->base_addr; struct ei_device *ei_local = netdev_priv(dev); diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c index c9c55c9eab9f..babc92e2692e 100644 --- a/drivers/net/ethernet/8390/lib8390.c +++ b/drivers/net/ethernet/8390/lib8390.c @@ -251,7 +251,7 @@ static int __ei_close(struct net_device *dev) * completed (or failed) - i.e. never posted a Tx related interrupt. */ -static void __ei_tx_timeout(struct net_device *dev) +static void __ei_tx_timeout(struct net_device *dev, unsigned int txqueue) { unsigned long e8390_base = dev->base_addr; struct ei_device *ei_local = netdev_priv(dev); diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 816540e6beac..165d18405b0c 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -576,7 +576,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); @@ -1105,7 +1105,7 @@ static void check_duplex(struct net_device *dev) } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base; diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 174344c450af..3c51d8c502ed 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3811,7 +3811,7 @@ drop_err: * specified by the 'tx_timeo" element in the net_device structure (see * et131x_alloc_device() to see how this value is set). */ -static void et131x_tx_timeout(struct net_device *netdev) +static void et131x_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct et131x_adapter *adapter = netdev_priv(netdev); struct tx_ring *tx_ring = &adapter->tx_ring; diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 0537df06a9b5..5ea806423e4c 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -407,7 +407,7 @@ static void emac_init_device(struct net_device *dev) } /* Our watchdog timed out. Called by the networking layer */ -static void emac_timeout(struct net_device *dev) +static void emac_timeout(struct net_device *dev, unsigned int txqueue) { struct emac_board_info *db = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index 46b4207d3266..f366faf88eee 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -437,7 +437,7 @@ static const struct ethtool_ops ace_ethtool_ops = { .set_link_ksettings = ace_set_link_ksettings, }; -static void ace_watchdog(struct net_device *dev); +static void ace_watchdog(struct net_device *dev, unsigned int txqueue); static const struct net_device_ops ace_netdev_ops = { .ndo_open = ace_open, @@ -1542,7 +1542,7 @@ static void ace_set_rxtx_parms(struct net_device *dev, int jumbo) } -static void ace_watchdog(struct net_device *data) +static void ace_watchdog(struct net_device *data, unsigned int txqueue) { struct net_device *dev = data; struct ace_private *ap = netdev_priv(dev); diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index a3250dcf7d53..745fffd422aa 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -744,7 +744,9 @@ static int ena_set_channels(struct net_device *netdev, struct ena_adapter *adapter = netdev_priv(netdev); u32 count = channels->combined_count; /* The check for max value is already done in ethtool */ - if (count < ENA_MIN_NUM_IO_QUEUES) + if (count < ENA_MIN_NUM_IO_QUEUES || + (ena_xdp_present(adapter) && + !ena_xdp_legal_queue_count(adapter, channels->combined_count))) return -EINVAL; return ena_update_queue_count(adapter, count); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index d46a912002ff..26954fde4766 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -36,7 +36,6 @@ #include <linux/cpu_rmap.h> #endif /* CONFIG_RFS_ACCEL */ #include <linux/ethtool.h> -#include <linux/if_vlan.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/numa.h> @@ -47,6 +46,7 @@ #include <net/ip.h> #include "ena_netdev.h" +#include <linux/bpf_trace.h> #include "ena_pci_id_tbl.h" static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n"; @@ -78,7 +78,37 @@ static void check_for_admin_com_state(struct ena_adapter *adapter); static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_restore_device(struct ena_adapter *adapter); -static void ena_tx_timeout(struct net_device *dev) +static void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count); +static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index, + int count); +static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index, + int count); +static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid); +static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, + int count); +static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid); +static void ena_free_tx_resources(struct ena_adapter *adapter, int qid); +static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget); +static void ena_destroy_all_tx_queues(struct ena_adapter *adapter); +static void ena_free_all_io_tx_resources(struct ena_adapter *adapter); +static void ena_napi_disable_in_range(struct ena_adapter *adapter, + int first_index, int count); +static void ena_napi_enable_in_range(struct ena_adapter *adapter, + int first_index, int count); +static int ena_up(struct ena_adapter *adapter); +static void ena_down(struct ena_adapter *adapter); +static void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring); +static void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring); +static void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info); +static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count); + +static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ena_adapter *adapter = netdev_priv(dev); @@ -123,6 +153,451 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu) return ret; } +static int ena_xmit_common(struct net_device *dev, + struct ena_ring *ring, + struct ena_tx_buffer *tx_info, + struct ena_com_tx_ctx *ena_tx_ctx, + u16 next_to_use, + u32 bytes) +{ + struct ena_adapter *adapter = netdev_priv(dev); + int rc, nb_hw_desc; + + if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq, + ena_tx_ctx))) { + netif_dbg(adapter, tx_queued, dev, + "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", + ring->qid); + ena_com_write_sq_doorbell(ring->ena_com_io_sq); + } + + /* prepare the packet's descriptors to dma engine */ + rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx, + &nb_hw_desc); + + /* In case there isn't enough space in the queue for the packet, + * we simply drop it. All other failure reasons of + * ena_com_prepare_tx() are fatal and therefore require a device reset. + */ + if (unlikely(rc)) { + netif_err(adapter, tx_queued, dev, + "failed to prepare tx bufs\n"); + u64_stats_update_begin(&ring->syncp); + ring->tx_stats.prepare_ctx_err++; + u64_stats_update_end(&ring->syncp); + if (rc != -ENOMEM) { + adapter->reset_reason = + ENA_REGS_RESET_DRIVER_INVALID_STATE; + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } + return rc; + } + + u64_stats_update_begin(&ring->syncp); + ring->tx_stats.cnt++; + ring->tx_stats.bytes += bytes; + u64_stats_update_end(&ring->syncp); + + tx_info->tx_descs = nb_hw_desc; + tx_info->last_jiffies = jiffies; + tx_info->print_once = 0; + + ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, + ring->ring_size); + return 0; +} + +/* This is the XDP napi callback. XDP queues use a separate napi callback + * than Rx/Tx queues. + */ +static int ena_xdp_io_poll(struct napi_struct *napi, int budget) +{ + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + u32 xdp_work_done, xdp_budget; + struct ena_ring *xdp_ring; + int napi_comp_call = 0; + int ret; + + xdp_ring = ena_napi->xdp_ring; + xdp_ring->first_interrupt = ena_napi->first_interrupt; + + xdp_budget = budget; + + if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) || + test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) { + napi_complete_done(napi, 0); + return 0; + } + + xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget); + + /* If the device is about to reset or down, avoid unmask + * the interrupt and return 0 so NAPI won't reschedule + */ + if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) { + napi_complete_done(napi, 0); + ret = 0; + } else if (xdp_budget > xdp_work_done) { + napi_comp_call = 1; + if (napi_complete_done(napi, xdp_work_done)) + ena_unmask_interrupt(xdp_ring, NULL); + ena_update_ring_numa_node(xdp_ring, NULL); + ret = xdp_work_done; + } else { + ret = xdp_budget; + } + + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.napi_comp += napi_comp_call; + xdp_ring->tx_stats.tx_poll++; + u64_stats_update_end(&xdp_ring->syncp); + + return ret; +} + +static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring, + struct ena_tx_buffer *tx_info, + struct xdp_buff *xdp, + void **push_hdr, + u32 *push_len) +{ + struct ena_adapter *adapter = xdp_ring->adapter; + struct ena_com_buf *ena_buf; + dma_addr_t dma = 0; + u32 size; + + tx_info->xdpf = convert_to_xdp_frame(xdp); + size = tx_info->xdpf->len; + ena_buf = tx_info->bufs; + + /* llq push buffer */ + *push_len = min_t(u32, size, xdp_ring->tx_max_header_size); + *push_hdr = tx_info->xdpf->data; + + if (size - *push_len > 0) { + dma = dma_map_single(xdp_ring->dev, + *push_hdr + *push_len, + size - *push_len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) + goto error_report_dma_error; + + tx_info->map_linear_data = 1; + tx_info->num_of_bufs = 1; + } + + ena_buf->paddr = dma; + ena_buf->len = size; + + return 0; + +error_report_dma_error: + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.dma_mapping_err++; + u64_stats_update_end(&xdp_ring->syncp); + netdev_warn(adapter->netdev, "failed to map xdp buff\n"); + + xdp_return_frame_rx_napi(tx_info->xdpf); + tx_info->xdpf = NULL; + tx_info->num_of_bufs = 0; + + return -EINVAL; +} + +static int ena_xdp_xmit_buff(struct net_device *dev, + struct xdp_buff *xdp, + int qid, + struct ena_rx_buffer *rx_info) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_com_tx_ctx ena_tx_ctx = {0}; + struct ena_tx_buffer *tx_info; + struct ena_ring *xdp_ring; + struct ena_ring *rx_ring; + u16 next_to_use, req_id; + int rc; + void *push_hdr; + u32 push_len; + + xdp_ring = &adapter->tx_ring[qid]; + next_to_use = xdp_ring->next_to_use; + req_id = xdp_ring->free_ids[next_to_use]; + tx_info = &xdp_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + rx_ring = &xdp_ring->adapter->rx_ring[qid - + xdp_ring->adapter->xdp_first_ring]; + page_ref_inc(rx_info->page); + tx_info->xdp_rx_page = rx_info->page; + + rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len); + if (unlikely(rc)) + goto error_drop_packet; + + ena_tx_ctx.ena_bufs = tx_info->bufs; + ena_tx_ctx.push_header = push_hdr; + ena_tx_ctx.num_bufs = tx_info->num_of_bufs; + ena_tx_ctx.req_id = req_id; + ena_tx_ctx.header_len = push_len; + + rc = ena_xmit_common(dev, + xdp_ring, + tx_info, + &ena_tx_ctx, + next_to_use, + xdp->data_end - xdp->data); + if (rc) + goto error_unmap_dma; + /* trigger the dma engine. ena_com_write_sq_doorbell() + * has a mb + */ + ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq); + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.doorbells++; + u64_stats_update_end(&xdp_ring->syncp); + + return NETDEV_TX_OK; + +error_unmap_dma: + ena_unmap_tx_buff(xdp_ring, tx_info); + tx_info->xdpf = NULL; +error_drop_packet: + + return NETDEV_TX_OK; +} + +static int ena_xdp_execute(struct ena_ring *rx_ring, + struct xdp_buff *xdp, + struct ena_rx_buffer *rx_info) +{ + struct bpf_prog *xdp_prog; + u32 verdict = XDP_PASS; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); + + if (!xdp_prog) + goto out; + + verdict = bpf_prog_run_xdp(xdp_prog, xdp); + + if (verdict == XDP_TX) + ena_xdp_xmit_buff(rx_ring->netdev, + xdp, + rx_ring->qid + rx_ring->adapter->num_io_queues, + rx_info); + else if (unlikely(verdict == XDP_ABORTED)) + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + else if (unlikely(verdict > XDP_TX)) + bpf_warn_invalid_xdp_action(verdict); +out: + rcu_read_unlock(); + return verdict; +} + +static void ena_init_all_xdp_queues(struct ena_adapter *adapter) +{ + adapter->xdp_first_ring = adapter->num_io_queues; + adapter->xdp_num_queues = adapter->num_io_queues; + + ena_init_io_rings(adapter, + adapter->xdp_first_ring, + adapter->xdp_num_queues); +} + +static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) +{ + int rc = 0; + + rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring, + adapter->xdp_num_queues); + if (rc) + goto setup_err; + + rc = ena_create_io_tx_queues_in_range(adapter, + adapter->xdp_first_ring, + adapter->xdp_num_queues); + if (rc) + goto create_err; + + return 0; + +create_err: + ena_free_all_io_tx_resources(adapter); +setup_err: + return rc; +} + +/* Provides a way for both kernel and bpf-prog to know + * more about the RX-queue a given XDP frame arrived on. + */ +static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) +{ + int rc; + + rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid); + + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + goto err; + } + + rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, + NULL); + + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + +err: + return rc; +} + +static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) +{ + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); +} + +void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, + struct bpf_prog *prog, + int first, + int count) +{ + struct ena_ring *rx_ring; + int i = 0; + + for (i = first; i < count; i++) { + rx_ring = &adapter->rx_ring[i]; + xchg(&rx_ring->xdp_bpf_prog, prog); + if (prog) { + ena_xdp_register_rxq_info(rx_ring); + rx_ring->rx_headroom = XDP_PACKET_HEADROOM; + } else { + ena_xdp_unregister_rxq_info(rx_ring); + rx_ring->rx_headroom = 0; + } + } +} + +void ena_xdp_exchange_program(struct ena_adapter *adapter, + struct bpf_prog *prog) +{ + struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); + + ena_xdp_exchange_program_rx_in_range(adapter, + prog, + 0, + adapter->num_io_queues); + + if (old_bpf_prog) + bpf_prog_put(old_bpf_prog); +} + +static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) +{ + bool was_up; + int rc; + + was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + if (was_up) + ena_down(adapter); + + adapter->xdp_first_ring = 0; + adapter->xdp_num_queues = 0; + ena_xdp_exchange_program(adapter, NULL); + if (was_up) { + rc = ena_up(adapter); + if (rc) + return rc; + } + return 0; +} + +static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct bpf_prog *prog = bpf->prog; + struct bpf_prog *old_bpf_prog; + int rc, prev_mtu; + bool is_up; + + is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + rc = ena_xdp_allowed(adapter); + if (rc == ENA_XDP_ALLOWED) { + old_bpf_prog = adapter->xdp_bpf_prog; + if (prog) { + if (!is_up) { + ena_init_all_xdp_queues(adapter); + } else if (!old_bpf_prog) { + ena_down(adapter); + ena_init_all_xdp_queues(adapter); + } + ena_xdp_exchange_program(adapter, prog); + + if (is_up && !old_bpf_prog) { + rc = ena_up(adapter); + if (rc) + return rc; + } + } else if (old_bpf_prog) { + rc = ena_destroy_and_free_all_xdp_queues(adapter); + if (rc) + return rc; + } + + prev_mtu = netdev->max_mtu; + netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; + + if (!old_bpf_prog) + netif_info(adapter, drv, adapter->netdev, + "xdp program set, changing the max_mtu from %d to %d", + prev_mtu, netdev->max_mtu); + + } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", + netdev->mtu, ENA_XDP_MAX_MTU); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); + return -EINVAL; + } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", + adapter->num_io_queues, adapter->max_num_io_queues); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); + return -EINVAL; + } + + return 0; +} + +/* This is the main xdp callback, it's used by the kernel to set/unset the xdp + * program as well as to query the current xdp program id. + */ +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return ena_xdp_set(netdev, bpf); + case XDP_QUERY_PROG: + bpf->prog_id = adapter->xdp_bpf_prog ? + adapter->xdp_bpf_prog->aux->id : 0; + break; + default: + return -EINVAL; + } + return 0; +} + static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) { #ifdef CONFIG_RFS_ACCEL @@ -164,7 +639,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, u64_stats_init(&ring->syncp); } -static void ena_init_io_rings(struct ena_adapter *adapter) +static void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev; struct ena_ring *txr, *rxr; @@ -172,13 +648,12 @@ static void ena_init_io_rings(struct ena_adapter *adapter) ena_dev = adapter->ena_dev; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { txr = &adapter->tx_ring[i]; rxr = &adapter->rx_ring[i]; - /* TX/RX common ring state */ + /* TX common ring state */ ena_init_io_rings_common(adapter, txr, i); - ena_init_io_rings_common(adapter, rxr, i); /* TX specific ring state */ txr->ring_size = adapter->requested_tx_ring_size; @@ -188,14 +663,20 @@ static void ena_init_io_rings(struct ena_adapter *adapter) txr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); - /* RX specific ring state */ - rxr->ring_size = adapter->requested_rx_ring_size; - rxr->rx_copybreak = adapter->rx_copybreak; - rxr->sgl_size = adapter->max_rx_sgl_size; - rxr->smoothed_interval = - ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); - rxr->empty_rx_queue = 0; - adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + /* Don't init RX queues for xdp queues */ + if (!ENA_IS_XDP_INDEX(adapter, i)) { + /* RX common ring state */ + ena_init_io_rings_common(adapter, rxr, i); + + /* RX specific ring state */ + rxr->ring_size = adapter->requested_rx_ring_size; + rxr->rx_copybreak = adapter->rx_copybreak; + rxr->sgl_size = adapter->max_rx_sgl_size; + rxr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + rxr->empty_rx_queue = 0; + adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } } } @@ -285,16 +766,13 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->push_buf_intermediate_buf = NULL; } -/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues - * @adapter: private structure - * - * Return 0 on success, negative on failure - */ -static int ena_setup_all_tx_resources(struct ena_adapter *adapter) +static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i, rc = 0; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { rc = ena_setup_tx_resources(adapter, i); if (rc) goto err_setup_tx; @@ -308,11 +786,20 @@ err_setup_tx: "Tx queue %d: allocation failed\n", i); /* rewind the index freeing the rings as we go */ - while (i--) + while (first_index < i--) ena_free_tx_resources(adapter, i); return rc; } +static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count) +{ + int i; + + for (i = first_index; i < first_index + count; i++) + ena_free_tx_resources(adapter, i); +} + /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues * @adapter: board private structure * @@ -320,10 +807,10 @@ err_setup_tx: */ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) { - int i; - - for (i = 0; i < adapter->num_io_queues; i++) - ena_free_tx_resources(adapter, i); + ena_free_all_io_tx_resources_in_range(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); } static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) @@ -495,8 +982,8 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring, rx_info->page = page; rx_info->page_offset = 0; ena_buf = &rx_info->ena_buf; - ena_buf->paddr = dma; - ena_buf->len = ENA_PAGE_SIZE; + ena_buf->paddr = dma + rx_ring->rx_headroom; + ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom; return 0; } @@ -513,7 +1000,9 @@ static void ena_free_rx_page(struct ena_ring *rx_ring, return; } - dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE, + dma_unmap_page(rx_ring->dev, + ena_buf->paddr - rx_ring->rx_headroom, + ENA_PAGE_SIZE, DMA_FROM_DEVICE); __free_page(page); @@ -620,8 +1109,8 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter) ena_free_rx_bufs(adapter, i); } -static void ena_unmap_tx_skb(struct ena_ring *tx_ring, - struct ena_tx_buffer *tx_info) +static void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info) { struct ena_com_buf *ena_buf; u32 cnt; @@ -675,7 +1164,7 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring) tx_ring->qid, i); } - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); dev_kfree_skb_any(tx_info->skb); } @@ -688,7 +1177,7 @@ static void ena_free_all_tx_bufs(struct ena_adapter *adapter) struct ena_ring *tx_ring; int i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { tx_ring = &adapter->tx_ring[i]; ena_free_tx_bufs(tx_ring); } @@ -699,7 +1188,7 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) u16 ena_qid; int i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { ena_qid = ENA_IO_TXQ_IDX(i); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); } @@ -723,6 +1212,32 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter) ena_destroy_all_rx_queues(adapter); } +static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, + struct ena_tx_buffer *tx_info, bool is_xdp) +{ + if (tx_info) + netif_err(ring->adapter, + tx_done, + ring->netdev, + "tx_info doesn't have valid %s", + is_xdp ? "xdp frame" : "skb"); + else + netif_err(ring->adapter, + tx_done, + ring->netdev, + "Invalid req_id: %hu\n", + req_id); + + u64_stats_update_begin(&ring->syncp); + ring->tx_stats.bad_req_id++; + u64_stats_update_end(&ring->syncp); + + /* Trigger device reset */ + ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; + set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags); + return -EFAULT; +} + static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) { struct ena_tx_buffer *tx_info = NULL; @@ -733,21 +1248,20 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) return 0; } - if (tx_info) - netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, - "tx_info doesn't have valid skb\n"); - else - netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, - "Invalid req_id: %hu\n", req_id); + return handle_invalid_req_id(tx_ring, req_id, tx_info, false); +} - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.bad_req_id++; - u64_stats_update_end(&tx_ring->syncp); +static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) +{ + struct ena_tx_buffer *tx_info = NULL; - /* Trigger device reset */ - tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; - set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags); - return -EFAULT; + if (likely(req_id < xdp_ring->ring_size)) { + tx_info = &xdp_ring->tx_buffer_info[req_id]; + if (likely(tx_info->xdpf)) + return 0; + } + + return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); } static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) @@ -786,7 +1300,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) tx_info->skb = NULL; tx_info->last_jiffies = 0; - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, "tx_poll: q %d skb %p completed\n", tx_ring->qid, @@ -1037,6 +1551,33 @@ static void ena_set_rx_hash(struct ena_ring *rx_ring, } } +int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) +{ + struct ena_rx_buffer *rx_info; + int ret; + + rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; + xdp->data = page_address(rx_info->page) + + rx_info->page_offset + rx_ring->rx_headroom; + xdp_set_data_meta_invalid(xdp); + xdp->data_hard_start = page_address(rx_info->page); + xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len; + /* If for some reason we received a bigger packet than + * we expect, then we simply drop it + */ + if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) + return XDP_DROP; + + ret = ena_xdp_execute(rx_ring, xdp, rx_info); + + /* The xdp program might expand the headers */ + if (ret == XDP_PASS) { + rx_info->page_offset = xdp->data - xdp->data_hard_start; + rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; + } + + return ret; +} /* ena_clean_rx_irq - Cleanup RX irq * @rx_ring: RX ring to clean * @napi: napi handler @@ -1048,23 +1589,27 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, u32 budget) { u16 next_to_clean = rx_ring->next_to_clean; - u32 res_budget, work_done; - struct ena_com_rx_ctx ena_rx_ctx; struct ena_adapter *adapter; + u32 res_budget, work_done; + int rx_copybreak_pkt = 0; + int refill_threshold; struct sk_buff *skb; int refill_required; - int refill_threshold; - int rc = 0; + struct xdp_buff xdp; int total_len = 0; - int rx_copybreak_pkt = 0; + int xdp_verdict; + int rc = 0; int i; netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "%s qid %d\n", __func__, rx_ring->qid); res_budget = budget; + xdp.rxq = &rx_ring->xdp_rxq; do { + xdp_verdict = XDP_PASS; + skb = NULL; ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; ena_rx_ctx.max_bufs = rx_ring->sgl_size; ena_rx_ctx.descs = 0; @@ -1082,12 +1627,22 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + if (ena_xdp_present_ring(rx_ring)) + xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); + /* allocate skb and fill it */ - skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, - &next_to_clean); + if (xdp_verdict == XDP_PASS) + skb = ena_rx_skb(rx_ring, + rx_ring->ena_bufs, + ena_rx_ctx.descs, + &next_to_clean); - /* exit if we failed to retrieve a buffer */ if (unlikely(!skb)) { + if (xdp_verdict == XDP_TX) { + ena_free_rx_page(rx_ring, + &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]); + res_budget--; + } for (i = 0; i < ena_rx_ctx.descs; i++) { rx_ring->free_ids[next_to_clean] = rx_ring->ena_bufs[i].req_id; @@ -1095,6 +1650,8 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ENA_RX_RING_IDX_NEXT(next_to_clean, rx_ring->ring_size); } + if (xdp_verdict == XDP_TX || xdp_verdict == XDP_DROP) + continue; break; } @@ -1188,9 +1745,14 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { struct ena_eth_io_intr_reg intr_reg; - u32 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? - rx_ring->smoothed_interval : - ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); + u32 rx_interval = 0; + /* Rx ring can be NULL when for XDP tx queues which don't have an + * accompanying rx_ring pair. + */ + if (rx_ring) + rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? + rx_ring->smoothed_interval : + ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); /* Update intr register: rx intr delay, * tx intr delay and interrupt unmask @@ -1203,8 +1765,9 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring, /* It is a shared MSI-X. * Tx and Rx CQ have pointer to it. * So we use one of them to reach the intr reg + * The Tx ring is used because the rx_ring is NULL for XDP queues */ - ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg); } static void ena_update_ring_numa_node(struct ena_ring *tx_ring, @@ -1222,22 +1785,82 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring, if (numa_node != NUMA_NO_NODE) { ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); - ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + if (rx_ring) + ena_com_update_numa_node(rx_ring->ena_com_io_cq, + numa_node); } tx_ring->cpu = cpu; - rx_ring->cpu = cpu; + if (rx_ring) + rx_ring->cpu = cpu; return; out: put_cpu(); } +static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) +{ + u32 total_done = 0; + u16 next_to_clean; + u32 tx_bytes = 0; + int tx_pkts = 0; + u16 req_id; + int rc; + + if (unlikely(!xdp_ring)) + return 0; + next_to_clean = xdp_ring->next_to_clean; + + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct xdp_frame *xdpf; + + rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, + &req_id); + if (rc) + break; + + rc = validate_xdp_req_id(xdp_ring, req_id); + if (rc) + break; + + tx_info = &xdp_ring->tx_buffer_info[req_id]; + xdpf = tx_info->xdpf; + + tx_info->xdpf = NULL; + tx_info->last_jiffies = 0; + ena_unmap_tx_buff(xdp_ring, tx_info); + + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, + "tx_poll: q %d skb %p completed\n", xdp_ring->qid, + xdpf); + + tx_bytes += xdpf->len; + tx_pkts++; + total_done += tx_info->tx_descs; + + __free_page(tx_info->xdp_rx_page); + xdp_ring->free_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + xdp_ring->ring_size); + } + + xdp_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done); + ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq); + + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, + "tx_poll: q %d done. total pkts: %d\n", + xdp_ring->qid, tx_pkts); + + return tx_pkts; +} + static int ena_io_poll(struct napi_struct *napi, int budget) { struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); struct ena_ring *tx_ring, *rx_ring; - u32 tx_work_done; u32 rx_work_done; int tx_budget; @@ -1247,6 +1870,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget) tx_ring = ena_napi->tx_ring; rx_ring = ena_napi->rx_ring; + tx_ring->first_interrupt = ena_napi->first_interrupt; + rx_ring->first_interrupt = ena_napi->first_interrupt; + tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || @@ -1318,8 +1944,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data) { struct ena_napi *ena_napi = data; - ena_napi->tx_ring->first_interrupt = true; - ena_napi->rx_ring->first_interrupt = true; + ena_napi->first_interrupt = true; napi_schedule_irqoff(&ena_napi->napi); @@ -1394,10 +2019,12 @@ static void ena_setup_io_intr(struct ena_adapter *adapter) { struct net_device *netdev; int irq_idx, i, cpu; + int io_queue_count; netdev = adapter->netdev; + io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = 0; i < io_queue_count; i++) { irq_idx = ENA_IO_IRQ_IDX(i); cpu = i % num_online_cpus(); @@ -1525,45 +2152,64 @@ static void ena_disable_io_intr_sync(struct ena_adapter *adapter) synchronize_irq(adapter->irq_tbl[i].vector); } -static void ena_del_napi(struct ena_adapter *adapter) +static void ena_del_napi_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i; - for (i = 0; i < adapter->num_io_queues; i++) - netif_napi_del(&adapter->ena_napi[i].napi); + for (i = first_index; i < first_index + count; i++) { + /* Check if napi was initialized before */ + if (!ENA_IS_XDP_INDEX(adapter, i) || + adapter->ena_napi[i].xdp_ring) + netif_napi_del(&adapter->ena_napi[i].napi); + else + WARN_ON(ENA_IS_XDP_INDEX(adapter, i) && + adapter->ena_napi[i].xdp_ring); + } } -static void ena_init_napi(struct ena_adapter *adapter) +static void ena_init_napi_in_range(struct ena_adapter *adapter, + int first_index, int count) { - struct ena_napi *napi; + struct ena_napi *napi = {0}; int i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { napi = &adapter->ena_napi[i]; netif_napi_add(adapter->netdev, &adapter->ena_napi[i].napi, - ena_io_poll, + ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll, ENA_NAPI_BUDGET); - napi->rx_ring = &adapter->rx_ring[i]; - napi->tx_ring = &adapter->tx_ring[i]; + + if (!ENA_IS_XDP_INDEX(adapter, i)) { + napi->rx_ring = &adapter->rx_ring[i]; + napi->tx_ring = &adapter->tx_ring[i]; + } else { + napi->xdp_ring = &adapter->tx_ring[i]; + } napi->qid = i; } } -static void ena_napi_disable_all(struct ena_adapter *adapter) +static void ena_napi_disable_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i; - for (i = 0; i < adapter->num_io_queues; i++) + for (i = first_index; i < first_index + count; i++) napi_disable(&adapter->ena_napi[i].napi); } -static void ena_napi_enable_all(struct ena_adapter *adapter) +static void ena_napi_enable_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i; - for (i = 0; i < adapter->num_io_queues; i++) + for (i = first_index; i < first_index + count; i++) napi_enable(&adapter->ena_napi[i].napi); } @@ -1578,7 +2224,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) rc = ena_rss_init_default(adapter); if (rc && (rc != -EOPNOTSUPP)) { netif_err(adapter, ifup, adapter->netdev, - "Failed to init RSS rc: %d\n", rc); + "Failed to init RSS rc: %d\n", rc); return rc; } } @@ -1616,7 +2262,9 @@ static int ena_up_complete(struct ena_adapter *adapter) /* enable transmits */ netif_tx_start_all_queues(adapter->netdev); - ena_napi_enable_all(adapter); + ena_napi_enable_in_range(adapter, + 0, + adapter->xdp_num_queues + adapter->num_io_queues); return 0; } @@ -1649,7 +2297,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) if (rc) { netif_err(adapter, ifup, adapter->netdev, "Failed to create I/O TX queue num %d rc: %d\n", - qid, rc); + qid, rc); return rc; } @@ -1668,12 +2316,13 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) return rc; } -static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) +static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev = adapter->ena_dev; int rc, i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { rc = ena_create_io_tx_queue(adapter, i); if (rc) goto create_err; @@ -1682,7 +2331,7 @@ static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) return 0; create_err: - while (i--) + while (i-- > first_index) ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); return rc; @@ -1727,13 +2376,15 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) netif_err(adapter, ifup, adapter->netdev, "Failed to get RX queue handlers. RX queue num %d rc: %d\n", qid, rc); - ena_com_destroy_io_queue(ena_dev, ena_qid); - return rc; + goto err; } ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); return rc; +err: + ena_com_destroy_io_queue(ena_dev, ena_qid); + return rc; } static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) @@ -1760,7 +2411,8 @@ create_err: } static void set_io_rings_size(struct ena_adapter *adapter, - int new_tx_size, int new_rx_size) + int new_tx_size, + int new_rx_size) { int i; @@ -1794,14 +2446,24 @@ static int create_queues_with_size_backoff(struct ena_adapter *adapter) * ones due to past queue allocation failures. */ set_io_rings_size(adapter, adapter->requested_tx_ring_size, - adapter->requested_rx_ring_size); + adapter->requested_rx_ring_size); while (1) { - rc = ena_setup_all_tx_resources(adapter); + if (ena_xdp_present(adapter)) { + rc = ena_setup_and_create_all_xdp_queues(adapter); + + if (rc) + goto err_setup_tx; + } + rc = ena_setup_tx_resources_in_range(adapter, + 0, + adapter->num_io_queues); if (rc) goto err_setup_tx; - rc = ena_create_all_io_tx_queues(adapter); + rc = ena_create_io_tx_queues_in_range(adapter, + 0, + adapter->num_io_queues); if (rc) goto err_create_tx_queues; @@ -1825,7 +2487,7 @@ err_setup_tx: if (rc != -ENOMEM) { netif_err(adapter, ifup, adapter->netdev, "Queue creation failed with error code %d\n", - rc); + rc); return rc; } @@ -1848,7 +2510,7 @@ err_setup_tx: new_rx_ring_size = cur_rx_ring_size / 2; if (new_tx_ring_size < ENA_MIN_RING_SIZE || - new_rx_ring_size < ENA_MIN_RING_SIZE) { + new_rx_ring_size < ENA_MIN_RING_SIZE) { netif_err(adapter, ifup, adapter->netdev, "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", ENA_MIN_RING_SIZE); @@ -1867,10 +2529,11 @@ err_setup_tx: static int ena_up(struct ena_adapter *adapter) { - int rc, i; + int io_queue_count, rc, i; netdev_dbg(adapter->netdev, "%s\n", __func__); + io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; ena_setup_io_intr(adapter); /* napi poll functions should be initialized before running @@ -1878,7 +2541,7 @@ static int ena_up(struct ena_adapter *adapter) * interrupt, causing the ISR to fire immediately while the poll * function wasn't set yet, causing a null dereference */ - ena_init_napi(adapter); + ena_init_napi_in_range(adapter, 0, io_queue_count); rc = ena_request_io_irq(adapter); if (rc) @@ -1909,7 +2572,7 @@ static int ena_up(struct ena_adapter *adapter) /* schedule napi in case we had pending packets * from the last time we disable napi */ - for (i = 0; i < adapter->num_io_queues; i++) + for (i = 0; i < io_queue_count; i++) napi_schedule(&adapter->ena_napi[i].napi); return rc; @@ -1922,13 +2585,15 @@ err_up: err_create_queues_with_backoff: ena_free_io_irq(adapter); err_req_irq: - ena_del_napi(adapter); + ena_del_napi_in_range(adapter, 0, io_queue_count); return rc; } static void ena_down(struct ena_adapter *adapter) { + int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; + netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); @@ -1941,7 +2606,7 @@ static void ena_down(struct ena_adapter *adapter) netif_tx_disable(adapter->netdev); /* After this point the napi handler won't enable the tx queue */ - ena_napi_disable_all(adapter); + ena_napi_disable_in_range(adapter, 0, io_queue_count); /* After destroy the queue there won't be any new interrupts */ @@ -1959,7 +2624,7 @@ static void ena_down(struct ena_adapter *adapter) ena_disable_io_intr_sync(adapter); ena_free_io_irq(adapter); - ena_del_napi(adapter); + ena_del_napi_in_range(adapter, 0, io_queue_count); ena_free_all_tx_bufs(adapter); ena_free_all_rx_bufs(adapter); @@ -2049,23 +2714,47 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, ena_close(adapter->netdev); adapter->requested_tx_ring_size = new_tx_size; adapter->requested_rx_ring_size = new_rx_size; - ena_init_io_rings(adapter); + ena_init_io_rings(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); return dev_was_up ? ena_up(adapter) : 0; } int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) { struct ena_com_dev *ena_dev = adapter->ena_dev; + int prev_channel_count; bool dev_was_up; dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); ena_close(adapter->netdev); + prev_channel_count = adapter->num_io_queues; adapter->num_io_queues = new_channel_count; + if (ena_xdp_present(adapter) && + ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) { + adapter->xdp_first_ring = new_channel_count; + adapter->xdp_num_queues = new_channel_count; + if (prev_channel_count > new_channel_count) + ena_xdp_exchange_program_rx_in_range(adapter, + NULL, + new_channel_count, + prev_channel_count); + else + ena_xdp_exchange_program_rx_in_range(adapter, + adapter->xdp_bpf_prog, + prev_channel_count, + new_channel_count); + } + /* We need to destroy the rss table so that the indirection * table will be reinitialized by ena_up() */ ena_com_rss_destroy(ena_dev); - ena_init_io_rings(adapter); + ena_init_io_rings(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); return dev_was_up ? ena_open(adapter->netdev) : 0; } @@ -2249,7 +2938,7 @@ error_report_dma_error: tx_info->skb = NULL; tx_info->num_of_bufs += i; - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); return -EINVAL; } @@ -2264,7 +2953,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) struct netdev_queue *txq; void *push_hdr; u16 next_to_use, req_id, header_len; - int qid, rc, nb_hw_desc; + int qid, rc; netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); /* Determine which tx ring we will be placed on */ @@ -2299,50 +2988,17 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) /* set flags and meta data */ ena_tx_csum(&ena_tx_ctx, skb); - if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) { - netif_dbg(adapter, tx_queued, dev, - "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", - qid); - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); - } - - /* prepare the packet's descriptors to dma engine */ - rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, - &nb_hw_desc); - - /* ena_com_prepare_tx() can't fail due to overflow of tx queue, - * since the number of free descriptors in the queue is checked - * after sending the previous packet. In case there isn't enough - * space in the queue for the next packet, it is stopped - * until there is again enough available space in the queue. - * All other failure reasons of ena_com_prepare_tx() are fatal - * and therefore require a device reset. - */ - if (unlikely(rc)) { - netif_err(adapter, tx_queued, dev, - "failed to prepare tx bufs\n"); - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.prepare_ctx_err++; - u64_stats_update_end(&tx_ring->syncp); - adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE; - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + rc = ena_xmit_common(dev, + tx_ring, + tx_info, + &ena_tx_ctx, + next_to_use, + skb->len); + if (rc) goto error_unmap_dma; - } netdev_tx_sent_queue(txq, skb->len); - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.cnt++; - tx_ring->tx_stats.bytes += skb->len; - u64_stats_update_end(&tx_ring->syncp); - - tx_info->tx_descs = nb_hw_desc; - tx_info->last_jiffies = jiffies; - tx_info->print_once = 0; - - tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, - tx_ring->ring_size); - /* stop the queue when no more space available, the packet can have up * to sgl_size + 2. one for the meta descriptor and one for header * (if the header is larger than tx_max_header_size). @@ -2389,7 +3045,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; error_unmap_dma: - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); tx_info->skb = NULL; error_drop_packet: @@ -2568,6 +3224,7 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_change_mtu = ena_change_mtu, .ndo_set_mac_address = NULL, .ndo_validate_addr = eth_validate_addr, + .ndo_bpf = ena_xdp, }; static int ena_device_validate_params(struct ena_adapter *adapter, @@ -2947,7 +3604,9 @@ static void check_for_missing_completions(struct ena_adapter *adapter) struct ena_ring *tx_ring; struct ena_ring *rx_ring; int i, budget, rc; + int io_queue_count; + io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -2962,7 +3621,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) budget = ENA_MONITORED_TX_QUEUES; - for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) { + for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { tx_ring = &adapter->tx_ring[i]; rx_ring = &adapter->rx_ring[i]; @@ -2970,7 +3629,8 @@ static void check_for_missing_completions(struct ena_adapter *adapter) if (unlikely(rc)) return; - rc = check_for_rx_interrupt_queue(adapter, rx_ring); + rc = !ENA_IS_XDP_INDEX(adapter, i) ? + check_for_rx_interrupt_queue(adapter, rx_ring) : 0; if (unlikely(rc)) return; @@ -2979,7 +3639,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) break; } - adapter->last_monitored_tx_qid = i % adapter->num_io_queues; + adapter->last_monitored_tx_qid = i % io_queue_count; } /* trigger napi schedule after 2 consecutive detections */ @@ -3556,6 +4216,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->num_io_queues = max_num_io_queues; adapter->max_num_io_queues = max_num_io_queues; + adapter->xdp_first_ring = 0; + adapter->xdp_num_queues = 0; + adapter->last_monitored_tx_qid = 0; adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; @@ -3569,7 +4232,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Failed to query interrupt moderation feature\n"); goto err_netdev_destroy; } - ena_init_io_rings(adapter); + ena_init_io_rings(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); netdev->netdev_ops = &ena_netdev_ops; netdev->watchdog_timeo = TX_TIMEOUT; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index bffd778f2ce3..094324fd0edc 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -36,6 +36,7 @@ #include <linux/bitops.h> #include <linux/dim.h> #include <linux/etherdevice.h> +#include <linux/if_vlan.h> #include <linux/inetdevice.h> #include <linux/interrupt.h> #include <linux/netdevice.h> @@ -142,6 +143,18 @@ #define ENA_MMIO_DISABLE_REG_READ BIT(0) +/* The max MTU size is configured to be the ethernet frame size without + * the overhead of the ethernet header, which can have a VLAN header, and + * a frame check sequence (FCS). + * The buffer size we share with the device is defined to be ENA_PAGE_SIZE + */ + +#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \ + VLAN_HLEN - XDP_PACKET_HEADROOM) + +#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \ + ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues)) + struct ena_irq { irq_handler_t handler; void *data; @@ -155,6 +168,8 @@ struct ena_napi { struct napi_struct napi ____cacheline_aligned; struct ena_ring *tx_ring; struct ena_ring *rx_ring; + struct ena_ring *xdp_ring; + bool first_interrupt; u32 qid; struct dim dim; }; @@ -180,6 +195,17 @@ struct ena_tx_buffer { /* num of buffers used by this skb */ u32 num_of_bufs; + /* XDP buffer structure which is used for sending packets in + * the xdp queues + */ + struct xdp_frame *xdpf; + /* The rx page for the rx buffer that was received in rx and + * re transmitted on xdp tx queues as a result of XDP_TX action. + * We need to free the page once we finished cleaning the buffer in + * clean_xdp_irq() + */ + struct page *xdp_rx_page; + /* Indicate if bufs[0] map the linear data of the skb. */ u8 map_linear_data; @@ -258,10 +284,13 @@ struct ena_ring { struct ena_adapter *adapter; struct ena_com_io_cq *ena_com_io_cq; struct ena_com_io_sq *ena_com_io_sq; + struct bpf_prog *xdp_bpf_prog; + struct xdp_rxq_info xdp_rxq; u16 next_to_use; u16 next_to_clean; u16 rx_copybreak; + u16 rx_headroom; u16 qid; u16 mtu; u16 sgl_size; @@ -379,6 +408,10 @@ struct ena_adapter { u32 last_monitored_tx_qid; enum ena_regs_reset_reason_types reset_reason; + + struct bpf_prog *xdp_bpf_prog; + u32 xdp_first_ring; + u32 xdp_num_queues; }; void ena_set_ethtool_ops(struct net_device *netdev); @@ -390,8 +423,48 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); int ena_update_queue_sizes(struct ena_adapter *adapter, u32 new_tx_size, u32 new_rx_size); + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); int ena_get_sset_count(struct net_device *netdev, int sset); +enum ena_xdp_errors_t { + ENA_XDP_ALLOWED = 0, + ENA_XDP_CURRENT_MTU_TOO_LARGE, + ENA_XDP_NO_ENOUGH_QUEUES, +}; + +static inline bool ena_xdp_queues_present(struct ena_adapter *adapter) +{ + return adapter->xdp_first_ring != 0; +} + +static inline bool ena_xdp_present(struct ena_adapter *adapter) +{ + return !!adapter->xdp_bpf_prog; +} + +static inline bool ena_xdp_present_ring(struct ena_ring *ring) +{ + return !!ring->xdp_bpf_prog; +} + +static inline int ena_xdp_legal_queue_count(struct ena_adapter *adapter, + u32 queues) +{ + return 2 * queues <= adapter->max_num_io_queues; +} + +static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter) +{ + enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED; + + if (adapter->netdev->mtu > ENA_XDP_MAX_MTU) + rc = ENA_XDP_CURRENT_MTU_TOO_LARGE; + else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues)) + rc = ENA_XDP_NO_ENOUGH_QUEUES; + + return rc; +} + #endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c index ab30761003da..cf3562e82ca9 100644 --- a/drivers/net/ethernet/amd/7990.c +++ b/drivers/net/ethernet/amd/7990.c @@ -527,7 +527,7 @@ int lance_close(struct net_device *dev) } EXPORT_SYMBOL_GPL(lance_close); -void lance_tx_timeout(struct net_device *dev) +void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { printk("lance_tx_timeout\n"); lance_reset(dev); diff --git a/drivers/net/ethernet/amd/7990.h b/drivers/net/ethernet/amd/7990.h index 741cdc392c6b..8266b3c1fefc 100644 --- a/drivers/net/ethernet/amd/7990.h +++ b/drivers/net/ethernet/amd/7990.h @@ -243,7 +243,7 @@ int lance_open(struct net_device *dev); int lance_close(struct net_device *dev); int lance_start_xmit(struct sk_buff *skb, struct net_device *dev); void lance_set_multicast(struct net_device *dev); -void lance_tx_timeout(struct net_device *dev); +void lance_tx_timeout(struct net_device *dev, unsigned int txqueue); #ifdef CONFIG_NET_POLL_CONTROLLER void lance_poll(struct net_device *dev); #endif diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 212fe72a190b..a3faf4feb204 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -522,7 +522,7 @@ static inline int lance_reset(struct net_device *dev) return status; } -static void lance_tx_timeout(struct net_device *dev) +static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index 0842da492a64..1c53408f5d47 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -422,7 +422,7 @@ static void am79c961_setmulticastlist (struct net_device *dev) spin_unlock_irqrestore(&priv->chip_lock, flags); } -static void am79c961_timeout(struct net_device *dev) +static void am79c961_timeout(struct net_device *dev, unsigned int txqueue) { printk(KERN_WARNING "%s: transmit timed out, network cable problem?\n", dev->name); diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 573e88fc8ede..0f3b743425e8 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1569,7 +1569,7 @@ static int amd8111e_enable_link_change(struct amd8111e_priv *lp) * failed or the interface is locked up. This function will reinitialize * the hardware. */ -static void amd8111e_tx_timeout(struct net_device *dev) +static void amd8111e_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct amd8111e_priv *lp = netdev_priv(dev); int err; diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c index 4b6a5cb85dd2..5e0f645f5bde 100644 --- a/drivers/net/ethernet/amd/ariadne.c +++ b/drivers/net/ethernet/amd/ariadne.c @@ -530,7 +530,7 @@ static inline void ariadne_reset(struct net_device *dev) netif_start_queue(dev); } -static void ariadne_tx_timeout(struct net_device *dev) +static void ariadne_tx_timeout(struct net_device *dev, unsigned int txqueue) { volatile struct Am79C960 *lance = (struct Am79C960 *)dev->base_addr; diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index d3d44e07afbc..4e36122609a3 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -346,7 +346,7 @@ static int lance_rx( struct net_device *dev ); static int lance_close( struct net_device *dev ); static void set_multicast_list( struct net_device *dev ); static int lance_set_mac_address( struct net_device *dev, void *addr ); -static void lance_tx_timeout (struct net_device *dev); +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue); /************************* End of Prototypes **************************/ @@ -727,7 +727,7 @@ static void lance_init_ring( struct net_device *dev ) /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -static void lance_tx_timeout (struct net_device *dev) +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); struct lance_ioreg *IO = lp->iobase; diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 1793950f0582..d832c9f4d306 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1014,7 +1014,7 @@ static netdev_tx_t au1000_tx(struct sk_buff *skb, struct net_device *dev) * The Tx ring has been full longer than the watchdog timeout * value. The transmitter must be hung? */ -static void au1000_tx_timeout(struct net_device *dev) +static void au1000_tx_timeout(struct net_device *dev, unsigned int txqueue) { netdev_err(dev, "au1000_tx_timeout: dev=%p\n", dev); au1000_reset_mac(dev); diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index dac4a2fcad6a..6592a2db9efb 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -884,7 +884,7 @@ static inline int lance_reset(struct net_device *dev) return status; } -static void lance_tx_timeout(struct net_device *dev) +static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c index f90b454b1642..aff44241988c 100644 --- a/drivers/net/ethernet/amd/lance.c +++ b/drivers/net/ethernet/amd/lance.c @@ -306,7 +306,7 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id); static int lance_close(struct net_device *dev); static struct net_device_stats *lance_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void lance_tx_timeout (struct net_device *dev); +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue); @@ -913,7 +913,7 @@ lance_restart(struct net_device *dev, unsigned int csr0_bits, int must_reinit) } -static void lance_tx_timeout (struct net_device *dev) +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = (struct lance_private *) dev->ml_priv; int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index c6c2a54c1121..c38edf6f03a3 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -254,7 +254,7 @@ static int ni65_lance_reinit(struct net_device *dev); static void ni65_init_lance(struct priv *p,unsigned char*,int,int); static netdev_tx_t ni65_send_packet(struct sk_buff *skb, struct net_device *dev); -static void ni65_timeout(struct net_device *dev); +static void ni65_timeout(struct net_device *dev, unsigned int txqueue); static int ni65_close(struct net_device *dev); static int ni65_alloc_buffer(struct net_device *dev); static void ni65_free_buffer(struct priv *p); @@ -1133,7 +1133,7 @@ static void ni65_recv_intr(struct net_device *dev,int csr0) * kick xmitter .. */ -static void ni65_timeout(struct net_device *dev) +static void ni65_timeout(struct net_device *dev, unsigned int txqueue) { int i; struct priv *p = dev->ml_priv; diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 9c152d85840d..023aecf6ab30 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -407,7 +407,7 @@ static int mace_open(struct net_device *dev); static int mace_close(struct net_device *dev); static netdev_tx_t mace_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void mace_tx_timeout(struct net_device *dev); +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t mace_interrupt(int irq, void *dev_id); static struct net_device_stats *mace_get_stats(struct net_device *dev); static int mace_rx(struct net_device *dev, unsigned char RxCnt); @@ -837,7 +837,7 @@ mace_start_xmit failed, put skb back into a list." ---------------------------------------------------------------------------- */ -static void mace_tx_timeout(struct net_device *dev) +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue) { mace_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index f5ad12c10934..dc7d88227e76 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -314,7 +314,7 @@ static int pcnet32_open(struct net_device *); static int pcnet32_init_ring(struct net_device *); static netdev_tx_t pcnet32_start_xmit(struct sk_buff *, struct net_device *); -static void pcnet32_tx_timeout(struct net_device *dev); +static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t pcnet32_interrupt(int, void *); static int pcnet32_close(struct net_device *); static struct net_device_stats *pcnet32_get_stats(struct net_device *); @@ -2455,7 +2455,7 @@ static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits) lp->a->write_csr(ioaddr, CSR0, csr0_bits); } -static void pcnet32_tx_timeout(struct net_device *dev) +static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct pcnet32_private *lp = netdev_priv(dev); unsigned long ioaddr = dev->base_addr, flags; diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index ebcbf8ca4829..b00e00881253 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1097,7 +1097,7 @@ static void lance_piozero(void __iomem *dest, int len) sbus_writeb(0, piobuf); } -static void lance_tx_timeout(struct net_device *dev) +static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 98f8f2033154..b71f9b04a51e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2152,7 +2152,7 @@ static int xgbe_change_mtu(struct net_device *netdev, int mtu) return 0; } -static void xgbe_tx_timeout(struct net_device *netdev) +static void xgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct xgbe_prv_data *pdata = netdev_priv(netdev); diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c index 02b4f3af02b5..c48f60996761 100644 --- a/drivers/net/ethernet/apm/xgene-v2/main.c +++ b/drivers/net/ethernet/apm/xgene-v2/main.c @@ -575,7 +575,7 @@ static void xge_free_pending_skb(struct net_device *ndev) } } -static void xge_timeout(struct net_device *ndev) +static void xge_timeout(struct net_device *ndev, unsigned int txqueue) { struct xge_pdata *pdata = netdev_priv(ndev); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index d8612131c55e..e284b6753725 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -859,7 +859,7 @@ static int xgene_enet_napi(struct napi_struct *napi, const int budget) return processed; } -static void xgene_enet_timeout(struct net_device *ndev) +static void xgene_enet_timeout(struct net_device *ndev, unsigned int txqueue) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct netdev_queue *txq; diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index 8d03578d5e8c..95d3061c61be 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -91,7 +91,7 @@ static int mace_set_address(struct net_device *dev, void *addr); static void mace_reset(struct net_device *dev); static irqreturn_t mace_interrupt(int irq, void *dev_id); static irqreturn_t mace_dma_intr(int irq, void *dev_id); -static void mace_tx_timeout(struct net_device *dev); +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue); static void __mace_set_address(struct net_device *dev, void *addr); /* @@ -600,7 +600,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void mace_tx_timeout(struct net_device *dev) +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mace_data *mp = netdev_priv(dev); volatile struct mace *mb = mp->mace; diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 8f5021091eee..ad8b0e3fcd2c 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1409,7 +1409,7 @@ static void ag71xx_oom_timer_handler(struct timer_list *t) napi_schedule(&ag->napi); } -static void ag71xx_tx_timeout(struct net_device *ndev) +static void ag71xx_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ag71xx *ag = netdev_priv(ndev); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index d4bbcdfd691a..1dcbc486eca9 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1553,7 +1553,7 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb, return alx_start_xmit_ring(skb, alx_tx_queue_mapping(alx, skb)); } -static void alx_tx_timeout(struct net_device *dev) +static void alx_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct alx_priv *alx = netdev_priv(dev); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 2b239ecea05f..4c0b1f8551dd 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -350,7 +350,7 @@ static void atl1c_del_timer(struct atl1c_adapter *adapter) * atl1c_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atl1c_tx_timeout(struct net_device *netdev) +static void atl1c_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atl1c_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 4f7b65825c15..e0d89942d537 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -251,7 +251,7 @@ static void atl1e_cancel_work(struct atl1e_adapter *adapter) * atl1e_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atl1e_tx_timeout(struct net_device *netdev) +static void atl1e_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atl1e_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 3aba38322717..b81a4e0c5b57 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1001,7 +1001,7 @@ static int atl2_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) * atl2_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atl2_tx_timeout(struct net_device *netdev) +static void atl2_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atl2_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c index 505a22c703f7..0941d07d0833 100644 --- a/drivers/net/ethernet/atheros/atlx/atlx.c +++ b/drivers/net/ethernet/atheros/atlx/atlx.c @@ -183,7 +183,7 @@ static void atlx_clear_phy_int(struct atlx_adapter *adapter) * atlx_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atlx_tx_timeout(struct net_device *netdev) +static void atlx_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atlx_adapter *adapter = netdev_priv(netdev); /* Do the reset outside of interrupt context */ diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 035dbb1b2c98..5b3464c3e8d1 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -948,7 +948,7 @@ irq_ack: return IRQ_RETVAL(handled); } -static void b44_tx_timeout(struct net_device *dev) +static void b44_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct b44 *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 825af709708e..1907e47fd0af 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1354,7 +1354,7 @@ out: return ret; } -static void bcm_sysport_tx_timeout(struct net_device *dev) +static void bcm_sysport_tx_timeout(struct net_device *dev, unsigned int txqueue) { netdev_warn(dev, "transmit timeout!\n"); @@ -2427,6 +2427,14 @@ static int bcm_sysport_probe(struct platform_device *pdev) if (!of_id || !of_id->data) return -EINVAL; + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(&pdev->dev, "unable to set DMA mask: %d\n", ret); + return ret; + } + /* Fairly quickly we need to know the type of adapter we have */ params = of_id->data; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index fbc196b480b6..dbb7874607ca 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6575,7 +6575,7 @@ bnx2_dump_state(struct bnx2 *bp) } static void -bnx2_tx_timeout(struct net_device *dev) +bnx2_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bnx2 *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 5e037a305b83..ee9e9290f112 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4970,7 +4970,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features) return 0; } -void bnx2x_tx_timeout(struct net_device *dev) +void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bnx2x *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 8b08cb18e363..e35f48bfdc85 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -617,7 +617,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features); * * @dev: net device */ -void bnx2x_tx_timeout(struct net_device *dev); +void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue); /** bnx2x_get_c2s_mapping - read inner-to-outer vlan configuration * c2s_map should have BNX2X_MAX_PRIORITY entries. diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 85983f0e3134..4e34841906c7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9976,7 +9976,7 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent) } } -static void bnxt_tx_timeout(struct net_device *dev) +static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bnxt *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 120fa05a39ff..3ee7917e3fc0 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014-2017 Broadcom + * Copyright (c) 2014-2019 Broadcom */ #define pr_fmt(fmt) "bcmgenet: " fmt @@ -508,8 +508,8 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev, return phy_ethtool_ksettings_set(dev->phydev, cmd); } -static int bcmgenet_set_rx_csum(struct net_device *dev, - netdev_features_t wanted) +static void bcmgenet_set_rx_csum(struct net_device *dev, + netdev_features_t wanted) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 rbuf_chk_ctrl; @@ -521,7 +521,7 @@ static int bcmgenet_set_rx_csum(struct net_device *dev, /* enable rx checksumming */ if (rx_csum_en) - rbuf_chk_ctrl |= RBUF_RXCHK_EN; + rbuf_chk_ctrl |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS; else rbuf_chk_ctrl &= ~RBUF_RXCHK_EN; priv->desc_rxchk_en = rx_csum_en; @@ -535,12 +535,10 @@ static int bcmgenet_set_rx_csum(struct net_device *dev, rbuf_chk_ctrl &= ~RBUF_SKIP_FCS; bcmgenet_rbuf_writel(priv, rbuf_chk_ctrl, RBUF_CHK_CTRL); - - return 0; } -static int bcmgenet_set_tx_csum(struct net_device *dev, - netdev_features_t wanted) +static void bcmgenet_set_tx_csum(struct net_device *dev, + netdev_features_t wanted) { struct bcmgenet_priv *priv = netdev_priv(dev); bool desc_64b_en; @@ -549,7 +547,7 @@ static int bcmgenet_set_tx_csum(struct net_device *dev, tbuf_ctrl = bcmgenet_tbuf_ctrl_get(priv); rbuf_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CTRL); - desc_64b_en = !!(wanted & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)); + desc_64b_en = !!(wanted & NETIF_F_HW_CSUM); /* enable 64 bytes descriptor in both directions (RBUF and TBUF) */ if (desc_64b_en) { @@ -563,21 +561,27 @@ static int bcmgenet_set_tx_csum(struct net_device *dev, bcmgenet_tbuf_ctrl_set(priv, tbuf_ctrl); bcmgenet_rbuf_writel(priv, rbuf_ctrl, RBUF_CTRL); - - return 0; } static int bcmgenet_set_features(struct net_device *dev, netdev_features_t features) { - netdev_features_t changed = features ^ dev->features; - netdev_features_t wanted = dev->wanted_features; - int ret = 0; + struct bcmgenet_priv *priv = netdev_priv(dev); + u32 reg; + int ret; - if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) - ret = bcmgenet_set_tx_csum(dev, wanted); - if (changed & (NETIF_F_RXCSUM)) - ret = bcmgenet_set_rx_csum(dev, wanted); + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; + + /* Make sure we reflect the value of CRC_CMD_FWD */ + reg = bcmgenet_umac_readl(priv, UMAC_CMD); + priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); + + bcmgenet_set_tx_csum(dev, features); + bcmgenet_set_rx_csum(dev, features); + + clk_disable_unprepare(priv->clk); return ret; } @@ -857,6 +861,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed), STAT_GENET_SOFT_MIB("tx_dma_failed", mib.tx_dma_failed), + STAT_GENET_SOFT_MIB("tx_realloc_tsb", mib.tx_realloc_tsb), + STAT_GENET_SOFT_MIB("tx_realloc_tsb_failed", + mib.tx_realloc_tsb_failed), /* Per TX queues */ STAT_GENET_Q(0), STAT_GENET_Q(1), @@ -1483,6 +1490,7 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev) static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, struct sk_buff *skb) { + struct bcmgenet_priv *priv = netdev_priv(dev); struct status_64 *status = NULL; struct sk_buff *new_skb; u16 offset; @@ -1495,12 +1503,15 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, * enough headroom for us to insert 64B status block. */ new_skb = skb_realloc_headroom(skb, sizeof(*status)); - dev_kfree_skb(skb); if (!new_skb) { + dev_kfree_skb_any(skb); + priv->mib.tx_realloc_tsb_failed++; dev->stats.tx_dropped++; return NULL; } + dev_consume_skb_any(skb); skb = new_skb; + priv->mib.tx_realloc_tsb++; } skb_push(skb, sizeof(*status)); @@ -1516,24 +1527,19 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, ip_proto = ipv6_hdr(skb)->nexthdr; break; default: - return skb; + /* don't use UDP flag */ + ip_proto = 0; + break; } offset = skb_checksum_start_offset(skb) - sizeof(*status); tx_csum_info = (offset << STATUS_TX_CSUM_START_SHIFT) | - (offset + skb->csum_offset); + (offset + skb->csum_offset) | + STATUS_TX_CSUM_LV; - /* Set the length valid bit for TCP and UDP and just set - * the special UDP flag for IPv4, else just set to 0. - */ - if (ip_proto == IPPROTO_TCP || ip_proto == IPPROTO_UDP) { - tx_csum_info |= STATUS_TX_CSUM_LV; - if (ip_proto == IPPROTO_UDP && - ip_ver == htons(ETH_P_IP)) - tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP; - } else { - tx_csum_info = 0; - } + /* Set the special UDP flag for UDP */ + if (ip_proto == IPPROTO_UDP) + tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP; status->tx_csum_info = tx_csum_info; } @@ -1744,7 +1750,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, unsigned int bytes_processed = 0; unsigned int p_index, mask; unsigned int discards; - unsigned int chksum_ok = 0; /* Clear status before servicing to reduce spurious interrupts */ if (ring->index == DESC_INDEX) { @@ -1795,9 +1800,15 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, dmadesc_get_length_status(priv, cb->bd_addr); } else { struct status_64 *status; + __be16 rx_csum; status = (struct status_64 *)skb->data; dma_length_status = status->length_status; + rx_csum = (__force __be16)(status->rx_csum & 0xffff); + if (priv->desc_rxchk_en) { + skb->csum = (__force __wsum)ntohs(rx_csum); + skb->ip_summed = CHECKSUM_COMPLETE; + } } /* DMA flags and length are still valid no matter how @@ -1840,18 +1851,12 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, goto next; } /* error packet */ - chksum_ok = (dma_flag & priv->dma_rx_chk_bit) && - priv->desc_rxchk_en; - skb_put(skb, len); if (priv->desc_64b_en) { skb_pull(skb, 64); len -= 64; } - if (likely(chksum_ok)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - /* remove hardware 2bytes added for IP alignment */ skb_pull(skb, 2); len -= 2; @@ -2886,9 +2891,10 @@ static int bcmgenet_open(struct net_device *dev) init_umac(priv); - /* Make sure we reflect the value of CRC_CMD_FWD */ - reg = bcmgenet_umac_readl(priv, UMAC_CMD); - priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); + /* Apply features again in case we changed them while interface was + * down + */ + bcmgenet_set_features(dev, dev->features); bcmgenet_set_hw_addr(priv, dev->dev_addr); @@ -3055,7 +3061,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) ring->cb_ptr, ring->end_ptr); } -static void bcmgenet_timeout(struct net_device *dev) +static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 int0_enable = 0; @@ -3327,19 +3333,15 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) if (GENET_IS_V5(priv) || GENET_IS_V4(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; genet_dma_ring_regs = genet_dma_ring_regs_v4; - priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS; } else if (GENET_IS_V3(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; genet_dma_ring_regs = genet_dma_ring_regs_v123; - priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS; } else if (GENET_IS_V2(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v2; genet_dma_ring_regs = genet_dma_ring_regs_v123; - priv->dma_rx_chk_bit = DMA_RX_CHK_V12; } else if (GENET_IS_V1(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v1; genet_dma_ring_regs = genet_dma_ring_regs_v123; - priv->dma_rx_chk_bit = DMA_RX_CHK_V12; } /* enum genet_version starts at 1 */ @@ -3535,9 +3537,11 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->msg_enable = netif_msg_init(-1, GENET_MSG_DEFAULT); - /* Set hardware features */ - dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; + /* Set default features */ + dev->features |= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_RXCSUM; + dev->hw_features |= dev->features; + dev->vlan_features |= dev->features; /* Request the WOL interrupt and advertise suspend if available */ priv->wol_irq_disabled = true; @@ -3574,6 +3578,14 @@ static int bcmgenet_probe(struct platform_device *pdev) bcmgenet_set_hw_params(priv); + err = -EIO; + if (priv->hw_params->flags & GENET_HAS_40BITS) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (err) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + goto err; + /* Mii wait queue */ init_waitqueue_head(&priv->wq); /* Always use RX_BUF_LENGTH (2KB) buffer for all chips */ @@ -3689,6 +3701,9 @@ static int bcmgenet_resume(struct device *d) genphy_config_aneg(dev->phydev); bcmgenet_mii_config(priv->dev, false); + /* Restore enabled features */ + bcmgenet_set_features(dev, dev->features); + bcmgenet_set_hw_addr(priv, dev->dev_addr); if (priv->internal_phy) { diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index a5659197598f..61a6fe9f4cec 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -144,6 +144,8 @@ struct bcmgenet_mib_counters { u32 alloc_rx_buff_failed; u32 rx_dma_failed; u32 tx_dma_failed; + u32 tx_realloc_tsb; + u32 tx_realloc_tsb_failed; }; #define UMAC_HD_BKP_CTRL 0x004 @@ -251,6 +253,7 @@ struct bcmgenet_mib_counters { #define RBUF_CHK_CTRL 0x14 #define RBUF_RXCHK_EN (1 << 0) #define RBUF_SKIP_FCS (1 << 4) +#define RBUF_L3_PARSE_DIS (1 << 5) #define RBUF_ENERGY_CTRL 0x9c #define RBUF_EEE_EN (1 << 0) @@ -663,7 +666,6 @@ struct bcmgenet_priv { bool desc_rxchk_en; bool crc_fwd_en; - unsigned int dma_rx_chk_bit; u32 dma_max_burst_length; u32 msg_enable; diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 1604ad32e920..80ff52527233 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -294,7 +294,7 @@ static int sbmac_set_duplex(struct sbmac_softc *s, enum sbmac_duplex duplex, enum sbmac_fc fc); static int sbmac_open(struct net_device *dev); -static void sbmac_tx_timeout (struct net_device *dev); +static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue); static void sbmac_set_rx_mode(struct net_device *dev); static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int sbmac_close(struct net_device *dev); @@ -2419,7 +2419,7 @@ static void sbmac_mii_poll(struct net_device *dev) } -static void sbmac_tx_timeout (struct net_device *dev) +static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct sbmac_softc *sc = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ca3aa1250dd1..460b4992914a 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7645,7 +7645,7 @@ static void tg3_poll_controller(struct net_device *dev) } #endif -static void tg3_tx_timeout(struct net_device *dev) +static void tg3_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct tg3 *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index af04a2c81adb..05a3d067c3fc 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1251,7 +1251,7 @@ static int xgmac_poll(struct napi_struct *napi, int budget) * netdev structure and arrange for the device to be reset to a sane state * in order to transmit a new packet. */ -static void xgmac_tx_timeout(struct net_device *dev) +static void xgmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct xgmac_priv *priv = netdev_priv(dev); schedule_work(&priv->tx_timeout_work); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 7f3b2e3b0868..eab05b5534ea 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2562,7 +2562,7 @@ lio_xmit_failed: /** \brief Network device Tx timeout * @param netdev pointer to network device */ -static void liquidio_tx_timeout(struct net_device *netdev) +static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct lio *lio; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 370d76822ee0..7a77544a54f5 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1628,7 +1628,7 @@ lio_xmit_failed: /** \brief Network device Tx timeout * @param netdev pointer to network device */ -static void liquidio_tx_timeout(struct net_device *netdev) +static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct lio *lio; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index f3f2e71431ac..600de587d7a9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -31,7 +31,7 @@ static int lio_vf_rep_open(struct net_device *ndev); static int lio_vf_rep_stop(struct net_device *ndev); static netdev_tx_t lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev); -static void lio_vf_rep_tx_timeout(struct net_device *netdev); +static void lio_vf_rep_tx_timeout(struct net_device *netdev, unsigned int txqueue); static int lio_vf_rep_phys_port_name(struct net_device *dev, char *buf, size_t len); static void lio_vf_rep_get_stats64(struct net_device *dev, @@ -172,7 +172,7 @@ lio_vf_rep_stop(struct net_device *ndev) } static void -lio_vf_rep_tx_timeout(struct net_device *ndev) +lio_vf_rep_tx_timeout(struct net_device *ndev, unsigned int txqueue) { netif_trans_update(ndev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index f28409279ea4..016957285f99 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1741,7 +1741,7 @@ static void nicvf_get_stats64(struct net_device *netdev, } -static void nicvf_tx_timeout(struct net_device *dev) +static void nicvf_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct nicvf *nic = netdev_priv(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index a70ac2097892..eda8e1269551 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -56,6 +56,7 @@ #include <asm/io.h> #include "t4_chip_type.h" #include "cxgb4_uld.h" +#include "t4fw_api.h" #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) extern struct list_head adapter_list; @@ -68,6 +69,16 @@ extern struct mutex uld_mutex; #define ETHTXQ_STOP_THRES \ (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8)) +#define FW_PARAM_DEV(param) \ + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \ + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param)) + +#define FW_PARAM_PFVF(param) \ + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \ + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) | \ + FW_PARAMS_PARAM_Y_V(0) | \ + FW_PARAMS_PARAM_Z_V(0)) + enum { MAX_NPORTS = 4, /* max # of ports */ SERNUM_LEN = 24, /* Serial # length */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 93868dca186a..9a96b01dad56 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -3171,14 +3171,12 @@ static const struct file_operations mem_debugfs_fops = { static int tid_info_show(struct seq_file *seq, void *v) { - unsigned int tid_start = 0; struct adapter *adap = seq->private; - const struct tid_info *t = &adap->tids; - enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip); - - if (chip > CHELSIO_T5) - tid_start = t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A); + const struct tid_info *t; + enum chip_type chip; + t = &adap->tids; + chip = CHELSIO_CHIP_VERSION(adap->params.chip); if (t4_read_reg(adap, LE_DB_CONFIG_A) & HASHEN_F) { unsigned int sb; seq_printf(seq, "Connections in use: %u\n", @@ -3190,9 +3188,9 @@ static int tid_info_show(struct seq_file *seq, void *v) sb = t4_read_reg(adap, LE_DB_SRVR_START_INDEX_A); if (sb) { - seq_printf(seq, "TID range: %u..%u/%u..%u", tid_start, + seq_printf(seq, "TID range: %u..%u/%u..%u", t->tid_base, sb - 1, adap->tids.hash_base, - t->ntids - 1); + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u/%u\n", atomic_read(&t->tids_in_use), atomic_read(&t->hash_tids_in_use)); @@ -3201,14 +3199,14 @@ static int tid_info_show(struct seq_file *seq, void *v) t->aftid_base, t->aftid_end, adap->tids.hash_base, - t->ntids - 1); + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u/%u\n", atomic_read(&t->tids_in_use), atomic_read(&t->hash_tids_in_use)); } else { seq_printf(seq, "TID range: %u..%u", adap->tids.hash_base, - t->ntids - 1); + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u\n", atomic_read(&t->hash_tids_in_use)); } @@ -3216,8 +3214,8 @@ static int tid_info_show(struct seq_file *seq, void *v) seq_printf(seq, "Connections in use: %u\n", atomic_read(&t->conns_in_use)); - seq_printf(seq, "TID range: %u..%u", tid_start, - tid_start + t->ntids - 1); + seq_printf(seq, "TID range: %u..%u", t->tid_base, + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u\n", atomic_read(&t->tids_in_use)); } @@ -3240,6 +3238,9 @@ static int tid_info_show(struct seq_file *seq, void *v) seq_printf(seq, "SFTID range: %u..%u in use: %u\n", t->sftid_base, t->sftid_base + t->nsftids - 2, t->sftids_in_use); + if (t->nhpftids) + seq_printf(seq, "HPFTID range: %u..%u\n", t->hpftid_base, + t->hpftid_base + t->nhpftids - 1); if (t->ntids) seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n", t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A), diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 1d39fca11810..2a2938bbb93a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -361,20 +361,22 @@ static int get_filter_count(struct adapter *adapter, unsigned int fidx, tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A); if (is_hashfilter(adapter) && hash) { - if (fidx < adapter->tids.ntids) { - f = adapter->tids.tid_tab[fidx]; - if (!f) - return -EINVAL; - } else { + if (tid_out_of_range(&adapter->tids, fidx)) return -E2BIG; - } + f = adapter->tids.tid_tab[fidx - adapter->tids.tid_base]; + if (!f) + return -EINVAL; } else { - if ((fidx != (adapter->tids.nftids + - adapter->tids.nsftids - 1)) && - fidx >= adapter->tids.nftids) + if ((fidx != (adapter->tids.nftids + adapter->tids.nsftids + + adapter->tids.nhpftids - 1)) && + fidx >= (adapter->tids.nftids + adapter->tids.nhpftids)) return -E2BIG; - f = &adapter->tids.ftid_tab[fidx]; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - + adapter->tids.nhpftids]; if (!f->valid) return -EINVAL; } @@ -480,6 +482,7 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family) ftid -= n; } spin_unlock_bh(&t->ftid_lock); + ftid += t->nhpftids; return found ? ftid : -ENOMEM; } @@ -507,6 +510,24 @@ static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family, return 0; } +static int cxgb4_set_hpftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (test_bit(fidx, t->hpftid_bmap)) { + spin_unlock_bh(&t->ftid_lock); + return -EBUSY; + } + + if (family == PF_INET) + __set_bit(fidx, t->hpftid_bmap); + else + bitmap_allocate_region(t->hpftid_bmap, fidx, 1); + + spin_unlock_bh(&t->ftid_lock); + return 0; +} + static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family, unsigned int chip_ver) { @@ -522,33 +543,58 @@ static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family, spin_unlock_bh(&t->ftid_lock); } +static void cxgb4_clear_hpftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (family == PF_INET) + __clear_bit(fidx, t->hpftid_bmap); + else + bitmap_release_region(t->hpftid_bmap, fidx, 1); + + spin_unlock_bh(&t->ftid_lock); +} + bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio) { + struct filter_entry *prev_fe, *next_fe, *tab; struct adapter *adap = netdev2adap(dev); - struct filter_entry *prev_fe, *next_fe; + u32 prev_ftid, next_ftid, max_tid; struct tid_info *t = &adap->tids; - u32 prev_ftid, next_ftid; + unsigned long *bmap; bool valid = true; + if (idx < t->nhpftids) { + bmap = t->hpftid_bmap; + tab = t->hpftid_tab; + max_tid = t->nhpftids; + } else { + idx -= t->nhpftids; + bmap = t->ftid_bmap; + tab = t->ftid_tab; + max_tid = t->nftids; + } + /* Only insert the rule if both of the following conditions * are met: * 1. The immediate previous rule has priority <= @prio. * 2. The immediate next rule has priority >= @prio. */ spin_lock_bh(&t->ftid_lock); + /* Don't insert if there's a rule already present at @idx. */ - if (test_bit(idx, t->ftid_bmap)) { + if (test_bit(idx, bmap)) { valid = false; goto out_unlock; } - next_ftid = find_next_bit(t->ftid_bmap, t->nftids, idx); - if (next_ftid >= t->nftids) + next_ftid = find_next_bit(bmap, max_tid, idx); + if (next_ftid >= max_tid) next_ftid = idx; - next_fe = &adap->tids.ftid_tab[next_ftid]; + next_fe = &tab[next_ftid]; - prev_ftid = find_last_bit(t->ftid_bmap, idx); + prev_ftid = find_last_bit(bmap, idx); if (prev_ftid >= idx) prev_ftid = idx; @@ -558,13 +604,13 @@ bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio) * accordingly. */ if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6) { - prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x3]; + prev_fe = &tab[prev_ftid & ~0x3]; if (!prev_fe->fs.type) - prev_fe = &adap->tids.ftid_tab[prev_ftid]; + prev_fe = &tab[prev_ftid]; } else { - prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x1]; + prev_fe = &tab[prev_ftid & ~0x1]; if (!prev_fe->fs.type) - prev_fe = &adap->tids.ftid_tab[prev_ftid]; + prev_fe = &tab[prev_ftid]; } if ((prev_fe->valid && prio < prev_fe->fs.tc_prio) || @@ -579,11 +625,16 @@ out_unlock: /* Delete the filter at a specified index. */ static int del_filter_wr(struct adapter *adapter, int fidx) { - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; + struct filter_entry *f; struct sk_buff *skb; unsigned int len; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids]; + len = sizeof(*fwr); skb = alloc_skb(len, GFP_KERNEL); @@ -609,10 +660,15 @@ static int del_filter_wr(struct adapter *adapter, int fidx) */ int set_filter_wr(struct adapter *adapter, int fidx) { - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter2_wr *fwr; + struct filter_entry *f; struct sk_buff *skb; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids]; + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); if (!skb) return -ENOMEM; @@ -762,10 +818,14 @@ int delete_filter(struct adapter *adapter, unsigned int fidx) struct filter_entry *f; int ret; - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids + + adapter->tids.nhpftids) return -EINVAL; - f = &adapter->tids.ftid_tab[fidx]; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids]; ret = writable_filter(f); if (ret) return ret; @@ -811,12 +871,22 @@ void clear_all_filters(struct adapter *adapter) struct net_device *dev = adapter->port[0]; unsigned int i; + if (adapter->tids.hpftid_tab) { + struct filter_entry *f = &adapter->tids.hpftid_tab[0]; + + for (i = 0; i < adapter->tids.nhpftids; i++, f++) + if (f->valid || f->pending) + cxgb4_del_filter(dev, i, &f->fs); + } + if (adapter->tids.ftid_tab) { struct filter_entry *f = &adapter->tids.ftid_tab[0]; unsigned int max_ftid = adapter->tids.nftids + - adapter->tids.nsftids; + adapter->tids.nsftids + + adapter->tids.nhpftids; + /* Clear all TCAM filters */ - for (i = 0; i < max_ftid; i++, f++) + for (i = adapter->tids.nhpftids; i < max_ftid; i++, f++) if (f->valid || f->pending) cxgb4_del_filter(dev, i, &f->fs); } @@ -1319,17 +1389,17 @@ out_err: * filter specification in order to facilitate signaling completion of the * operation. */ -int __cxgb4_set_filter(struct net_device *dev, int filter_id, +int __cxgb4_set_filter(struct net_device *dev, int ftid, struct ch_filter_specification *fs, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); - unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); - unsigned int max_fidx, fidx; - struct filter_entry *f; + unsigned int max_fidx, fidx, chip_ver; + int iq, ret, filter_id = ftid; + struct filter_entry *f, *tab; u32 iconf; - int iq, ret; + chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); if (fs->hash) { if (is_hashfilter(adapter)) return cxgb4_set_hash_filter(dev, fs, ctx); @@ -1338,7 +1408,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, return -EINVAL; } - max_fidx = adapter->tids.nftids; + max_fidx = adapter->tids.nftids + adapter->tids.nhpftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) return -E2BIG; @@ -1353,6 +1423,13 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, if (iq < 0) return iq; + if (fs->prio) { + tab = &adapter->tids.hpftid_tab[0]; + } else { + tab = &adapter->tids.ftid_tab[0]; + filter_id = ftid - adapter->tids.nhpftids; + } + /* IPv6 filters occupy four slots and must be aligned on * four-slot boundaries. IPv4 filters only occupy a single * slot and have no alignment requirements but writing a new @@ -1373,9 +1450,8 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, else fidx = filter_id & ~0x1; - if (fidx != filter_id && - adapter->tids.ftid_tab[fidx].fs.type) { - f = &adapter->tids.ftid_tab[fidx]; + if (fidx != filter_id && tab[fidx].fs.type) { + f = &tab[fidx]; if (f->valid) { dev_err(adapter->pdev_dev, "Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n", @@ -1399,7 +1475,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, */ for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) { - f = &adapter->tids.ftid_tab[fidx]; + f = &tab[fidx]; if (f->valid) { dev_err(adapter->pdev_dev, "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", @@ -1415,7 +1491,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, return -EINVAL; /* Check overlapping IPv4 filter slot */ fidx = filter_id + 1; - f = &adapter->tids.ftid_tab[fidx]; + f = &tab[fidx]; if (f->valid) { pr_err("%s: IPv6 filter requires 2 indices. IPv4 filter already present at %d. Please remove IPv4 filter first.\n", __func__, fidx); @@ -1427,36 +1503,35 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, /* Check to make sure that provided filter index is not * already in use by someone else */ - f = &adapter->tids.ftid_tab[filter_id]; + f = &tab[filter_id]; if (f->valid) return -EBUSY; - fidx = filter_id + adapter->tids.ftid_base; - ret = cxgb4_set_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET, - chip_ver); + if (fs->prio) { + fidx = filter_id + adapter->tids.hpftid_base; + ret = cxgb4_set_hpftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + } else { + fidx = filter_id + adapter->tids.ftid_base; + ret = cxgb4_set_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET, + chip_ver); + } + if (ret) return ret; /* Check t make sure the filter requested is writable ... */ ret = writable_filter(f); - if (ret) { - /* Clear the bits we have set above */ - cxgb4_clear_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET, - chip_ver); - return ret; - } + if (ret) + goto free_tid; if (is_t6(adapter->params.chip) && fs->type && ipv6_addr_type((const struct in6_addr *)fs->val.lip) != IPV6_ADDR_ANY) { ret = cxgb4_clip_get(dev, (const u32 *)&fs->val.lip, 1); - if (ret) { - cxgb4_clear_ftid(&adapter->tids, filter_id, PF_INET6, - chip_ver); - return ret; - } + if (ret) + goto free_tid; } /* Convert the filter specification into our internal format. @@ -1487,7 +1562,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, f->fs.mask.vni, 0, 1, 1); if (ret < 0) - goto free_clip; + goto free_tid; f->fs.val.ovlan = ret; f->fs.mask.ovlan = 0x1ff; @@ -1501,21 +1576,22 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, */ f->ctx = ctx; f->tid = fidx; /* Save the actual tid */ - ret = set_filter_wr(adapter, filter_id); - if (ret) { + ret = set_filter_wr(adapter, ftid); + if (ret) + goto free_tid; + + return ret; + +free_tid: + if (f->fs.prio) + cxgb4_clear_hpftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + else cxgb4_clear_ftid(&adapter->tids, filter_id, fs->type ? PF_INET6 : PF_INET, chip_ver); - clear_filter(adapter, f); - } - - return ret; -free_clip: - if (is_t6(adapter->params.chip) && f->fs.type) - cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1); - cxgb4_clear_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET, chip_ver); + clear_filter(adapter, f); return ret; } @@ -1537,7 +1613,7 @@ static int cxgb4_del_hash_filter(struct net_device *dev, int filter_id, netdev_dbg(dev, "%s: filter_id = %d ; nftids = %d\n", __func__, filter_id, adapter->tids.nftids); - if (filter_id > adapter->tids.ntids) + if (tid_out_of_range(t, filter_id)) return -E2BIG; f = lookup_tid(t, filter_id); @@ -1590,11 +1666,11 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); - unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); + unsigned int max_fidx, chip_ver; struct filter_entry *f; - unsigned int max_fidx; int ret; + chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); if (fs && fs->hash) { if (is_hashfilter(adapter)) return cxgb4_del_hash_filter(dev, filter_id, ctx); @@ -1603,21 +1679,31 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, return -EINVAL; } - max_fidx = adapter->tids.nftids; + max_fidx = adapter->tids.nftids + adapter->tids.nhpftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) return -E2BIG; - f = &adapter->tids.ftid_tab[filter_id]; + if (filter_id < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[filter_id]; + else + f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids]; + ret = writable_filter(f); if (ret) return ret; if (f->valid) { f->ctx = ctx; - cxgb4_clear_ftid(&adapter->tids, filter_id, - f->fs.type ? PF_INET6 : PF_INET, - chip_ver); + if (f->fs.prio) + cxgb4_clear_hpftid(&adapter->tids, + f->tid - adapter->tids.hpftid_base, + f->fs.type ? PF_INET6 : PF_INET); + else + cxgb4_clear_ftid(&adapter->tids, + f->tid - adapter->tids.ftid_base, + f->fs.type ? PF_INET6 : PF_INET, + chip_ver); return del_filter_wr(adapter, filter_id); } @@ -1842,11 +1928,18 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) max_fidx = adap->tids.nftids + adap->tids.nsftids; /* Get the corresponding filter entry for this tid */ if (adap->tids.ftid_tab) { - /* Check this in normal filter region */ - idx = tid - adap->tids.ftid_base; - if (idx >= max_fidx) - return; - f = &adap->tids.ftid_tab[idx]; + idx = tid - adap->tids.hpftid_base; + if (idx < adap->tids.nhpftids) { + f = &adap->tids.hpftid_tab[idx]; + } else { + /* Check this in normal filter region */ + idx = tid - adap->tids.ftid_base; + if (idx >= max_fidx) + return; + f = &adap->tids.ftid_tab[idx]; + idx += adap->tids.nhpftids; + } + if (f->tid != tid) return; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 12ff69b3ba91..1930e39f195e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -804,6 +804,26 @@ static int setup_ppod_edram(struct adapter *adap) return 0; } +static void adap_config_hpfilter(struct adapter *adapter) +{ + u32 param, val = 0; + int ret; + + /* Enable HP filter region. Older fw will fail this request and + * it is fine. + */ + param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT); + ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0, + 1, ¶m, &val); + + /* An error means FW doesn't know about HP filter support, + * it's not a problem, don't return an error. + */ + if (ret < 0) + dev_err(adapter->pdev_dev, + "HP filter region isn't supported by FW\n"); +} + /** * cxgb4_write_rss - write the RSS table for a given port * @pi: the port @@ -1427,8 +1447,8 @@ static void mk_tid_release(struct sk_buff *skb, unsigned int chan, static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan, unsigned int tid) { - void **p = &t->tid_tab[tid]; struct adapter *adap = container_of(t, struct adapter, tids); + void **p = &t->tid_tab[tid - t->tid_base]; spin_lock_bh(&adap->tid_release_lock); *p = adap->tid_release_head; @@ -1480,13 +1500,13 @@ static void process_tid_release_list(struct work_struct *work) void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid, unsigned short family) { - struct sk_buff *skb; struct adapter *adap = container_of(t, struct adapter, tids); + struct sk_buff *skb; - WARN_ON(tid >= t->ntids); + WARN_ON(tid_out_of_range(&adap->tids, tid)); - if (t->tid_tab[tid]) { - t->tid_tab[tid] = NULL; + if (t->tid_tab[tid - adap->tids.tid_base]) { + t->tid_tab[tid - adap->tids.tid_base] = NULL; atomic_dec(&t->conns_in_use); if (t->hash_base && (tid >= t->hash_base)) { if (family == AF_INET6) @@ -1518,6 +1538,7 @@ static int tid_init(struct tid_info *t) struct adapter *adap = container_of(t, struct adapter, tids); unsigned int max_ftids = t->nftids + t->nsftids; unsigned int natids = t->natids; + unsigned int hpftid_bmap_size; unsigned int eotid_bmap_size; unsigned int stid_bmap_size; unsigned int ftid_bmap_size; @@ -1525,12 +1546,15 @@ static int tid_init(struct tid_info *t) stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); ftid_bmap_size = BITS_TO_LONGS(t->nftids); + hpftid_bmap_size = BITS_TO_LONGS(t->nhpftids); eotid_bmap_size = BITS_TO_LONGS(t->neotids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + + t->nhpftids * sizeof(*t->hpftid_tab) + + hpftid_bmap_size * sizeof(long) + max_ftids * sizeof(*t->ftid_tab) + ftid_bmap_size * sizeof(long) + t->neotids * sizeof(*t->eotid_tab) + @@ -1543,7 +1567,9 @@ static int tid_init(struct tid_info *t) t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; - t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->hpftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->hpftid_bmap = (unsigned long *)&t->hpftid_tab[t->nhpftids]; + t->ftid_tab = (struct filter_entry *)&t->hpftid_bmap[hpftid_bmap_size]; t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; t->eotid_tab = (struct eotid_entry *)&t->ftid_bmap[ftid_bmap_size]; t->eotid_bmap = (unsigned long *)&t->eotid_tab[t->neotids]; @@ -1578,6 +1604,8 @@ static int tid_init(struct tid_info *t) bitmap_zero(t->eotid_bmap, t->neotids); } + if (t->nhpftids) + bitmap_zero(t->hpftid_bmap, t->nhpftids); bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@ -4351,6 +4379,7 @@ static int adap_init0_config(struct adapter *adapter, int reset) "HMA configuration failed with error %d\n", ret); if (is_t6(adapter->params.chip)) { + adap_config_hpfilter(adapter); ret = setup_ppod_edram(adapter); if (!ret) dev_info(adapter->pdev_dev, "Successfully enabled " @@ -4660,16 +4689,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip) /* * Grab some of our basic fundamental operating parameters. */ -#define FW_PARAM_DEV(param) \ - (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \ - FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param)) - -#define FW_PARAM_PFVF(param) \ - FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \ - FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param)| \ - FW_PARAMS_PARAM_Y_V(0) | \ - FW_PARAMS_PARAM_Z_V(0) - params[0] = FW_PARAM_PFVF(EQ_START); params[1] = FW_PARAM_PFVF(L2T_START); params[2] = FW_PARAM_PFVF(L2T_END); @@ -4687,6 +4706,16 @@ static int adap_init0(struct adapter *adap, int vpd_skip) adap->sge.ingr_start = val[5]; if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { + params[0] = FW_PARAM_PFVF(HPFILTER_START); + params[1] = FW_PARAM_PFVF(HPFILTER_END); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, + params, val); + if (ret < 0) + goto bye; + + adap->tids.hpftid_base = val[0]; + adap->tids.nhpftids = val[1] - val[0] + 1; + /* Read the raw mps entries. In T6, the last 2 tcam entries * are reserved for raw mac addresses (rawf = 2, one per port). */ @@ -4698,6 +4727,9 @@ static int adap_init0(struct adapter *adap, int vpd_skip) adap->rawf_start = val[0]; adap->rawf_cnt = val[1] - val[0] + 1; } + + adap->tids.tid_base = + t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A); } /* qids (ingress/egress) returned from firmware can be anywhere @@ -5050,8 +5082,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip) } adap->params.crypto = ntohs(caps_cmd.cryptocaps); } -#undef FW_PARAM_PFVF -#undef FW_PARAM_DEV /* The MTU/MSS Table is initialized by now, so load their values. If * we're initializing the adapter, then we'll make any modifications diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 0fa80bef575d..bb5513bdd293 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -672,10 +672,14 @@ int cxgb4_tc_flower_replace(struct net_device *dev, * 0 to driver. However, the hardware TCAM index * starts from 0. Hence, the -1 here. */ - if (cls->common.prio <= adap->tids.nftids) + if (cls->common.prio <= (adap->tids.nftids + + adap->tids.nhpftids)) { fidx = cls->common.prio - 1; - else + if (fidx < adap->tids.nhpftids) + fs->prio = 1; + } else { fidx = cxgb4_get_free_ftid(dev, inet_family); + } /* Only insert FLOWER rule if its priority doesn't * conflict with existing rules in the LETCAM. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c index 102b370fbd3e..24c3c2dc7171 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c @@ -137,7 +137,7 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev, * -1 here. 1 slot is enough to create a wildcard matchall * VIID rule. */ - if (cls->common.prio <= adap->tids.nftids) + if (cls->common.prio <= (adap->tids.nftids + adap->tids.nhpftids)) fidx = cls->common.prio - 1; else fidx = cxgb4_get_free_ftid(dev, PF_INET); @@ -156,6 +156,8 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev, fs = &tc_port_matchall->ingress.fs; memset(fs, 0, sizeof(*fs)); + if (fidx < adap->tids.nhpftids) + fs->prio = 1; fs->tc_prio = cls->common.prio; fs->tc_cookie = cls->cookie; fs->hitcnts = 1; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 133f8623ba86..269b8d9e25e0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -176,7 +176,7 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) /* Only insert U32 rule if its priority doesn't conflict with * existing rules in the LETCAM. */ - if (filter_id >= adapter->tids.nftids || + if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids || !cxgb4_filter_prio_in_range(dev, filter_id, cls->common.prio)) { NL_SET_ERR_MSG_MOD(extack, "No free LETCAM index available"); @@ -199,6 +199,8 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) memset(&fs, 0, sizeof(fs)); + if (filter_id < adapter->tids.nhpftids) + fs.prio = 1; fs.tc_prio = cls->common.prio; fs.tc_cookie = cls->knode.handle; @@ -355,6 +357,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) unsigned int filter_id, max_tids, i, j; struct cxgb4_link *link = NULL; struct cxgb4_tc_u32_table *t; + struct filter_entry *f; u32 handle, uhtid; int ret; @@ -363,8 +366,15 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) /* Fetch the location to delete the filter. */ filter_id = TC_U32_NODE(cls->knode.handle) - 1; - if (filter_id >= adapter->tids.nftids || - cls->knode.handle != adapter->tids.ftid_tab[filter_id].fs.tc_cookie) + if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids) + return -ERANGE; + + if (filter_id < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[filter_id]; + else + f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids]; + + if (cls->knode.handle != f->fs.tc_cookie) return -ERANGE; t = adapter->tc_u32; @@ -445,7 +455,7 @@ void cxgb4_cleanup_tc_u32(struct adapter *adap) struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) { - unsigned int max_tids = adap->tids.nftids; + unsigned int max_tids = adap->tids.nftids + adap->tids.nhpftids; struct cxgb4_tc_u32_table *t; unsigned int i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 861b25d28ed6..d9d27bc1ae67 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -99,6 +99,7 @@ struct eotid_entry { */ struct tid_info { void **tid_tab; + unsigned int tid_base; unsigned int ntids; struct serv_entry *stid_tab; @@ -111,6 +112,11 @@ struct tid_info { unsigned int natids; unsigned int atid_base; + struct filter_entry *hpftid_tab; + unsigned long *hpftid_bmap; + unsigned int nhpftids; + unsigned int hpftid_base; + struct filter_entry *ftid_tab; unsigned long *ftid_bmap; unsigned int nftids; @@ -147,9 +153,15 @@ struct tid_info { static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) { + tid -= t->tid_base; return tid < t->ntids ? t->tid_tab[tid] : NULL; } +static inline bool tid_out_of_range(const struct tid_info *t, unsigned int tid) +{ + return ((tid - t->tid_base) >= t->ntids); +} + static inline void *lookup_atid(const struct tid_info *t, unsigned int atid) { return atid < t->natids ? t->atid_tab[atid].data : NULL; @@ -171,7 +183,7 @@ static inline void *lookup_stid(const struct tid_info *t, unsigned int stid) static inline void cxgb4_insert_tid(struct tid_info *t, void *data, unsigned int tid, unsigned short family) { - t->tid_tab[tid] = data; + t->tid_tab[tid - t->tid_base] = data; if (t->hash_base && (tid >= t->hash_base)) { if (family == AF_INET6) atomic_add(2, &t->hash_tids_in_use); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index ac4fb43bdec6..accad1101ad1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1321,6 +1321,7 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21, FW_PARAMS_PARAM_DEV_PPOD_EDRAM = 0x23, FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24, + FW_PARAMS_PARAM_DEV_HPFILTER_REGION_SUPPORT = 0x26, FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27, FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28, FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29, diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index c9aebcde403a..33ace3307059 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -1128,7 +1128,7 @@ net_get_stats(struct net_device *dev) return &dev->stats; } -static void net_timeout(struct net_device *dev) +static void net_timeout(struct net_device *dev, unsigned int txqueue) { /* If we get here, some higher level has decided we are broken. There should really be a "kick me" function call instead. */ diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index acb2856936d2..bbd7b3175f09 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1095,7 +1095,7 @@ static void enic_set_rx_mode(struct net_device *netdev) } /* netif_tx_lock held, BHs disabled */ -static void enic_tx_timeout(struct net_device *netdev) +static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct enic *enic = netdev_priv(netdev); schedule_work(&enic->tx_hang_reset); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index a8f4c69252ff..de0b6e066eef 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1296,7 +1296,7 @@ out_drop: return NETDEV_TX_OK; } -static void gmac_tx_timeout(struct net_device *netdev) +static void gmac_tx_timeout(struct net_device *netdev, unsigned int txqueue) { netdev_err(netdev, "Tx timeout\n"); gmac_dump_dma_state(netdev); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index cce90b5925d9..1ea3372775e6 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -964,7 +964,7 @@ dm9000_init_dm9000(struct net_device *dev) } /* Our watchdog timed out. Called by the networking layer */ -static void dm9000_timeout(struct net_device *dev) +static void dm9000_timeout(struct net_device *dev, unsigned int txqueue) { struct board_info *db = netdev_priv(dev); u8 reg_save; diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index f1a2da15dd0a..fd3c2abf74b5 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -1436,7 +1436,7 @@ static int de_close (struct net_device *dev) return 0; } -static void de_tx_timeout (struct net_device *dev) +static void de_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct de_private *de = netdev_priv(dev); const int irq = de->pdev->irq; diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 0efdbd1a4a6f..32d470d4122a 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -2214,15 +2214,16 @@ static int __init dmfe_init_module(void) if (cr6set) dmfe_cr6_user_set = cr6set; - switch(mode) { - case DMFE_10MHF: + switch (mode) { + case DMFE_10MHF: case DMFE_100MHF: case DMFE_10MFD: case DMFE_100MFD: case DMFE_1M_HPNA: dmfe_media_mode = mode; break; - default:dmfe_media_mode = DMFE_AUTO; + default: + dmfe_media_mode = DMFE_AUTO; break; } diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 3e3e08698876..9e9d9eee29d9 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -255,7 +255,7 @@ MODULE_DEVICE_TABLE(pci, tulip_pci_tbl); const char tulip_media_cap[32] = {0,0,0,16, 3,19,16,24, 27,4,7,5, 0,20,23,20, 28,31,0,0, }; -static void tulip_tx_timeout(struct net_device *dev); +static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue); static void tulip_init_ring(struct net_device *dev); static void tulip_free_ring(struct net_device *dev); static netdev_tx_t tulip_start_xmit(struct sk_buff *skb, @@ -534,7 +534,7 @@ free_ring: } -static void tulip_tx_timeout(struct net_device *dev) +static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct tulip_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->base_addr; diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index b1f30b194300..117ffe08800d 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -1809,8 +1809,8 @@ static int __init uli526x_init_module(void) if (cr6set) uli526x_cr6_user_set = cr6set; - switch (mode) { - case ULI526X_10MHF: + switch (mode) { + case ULI526X_10MHF: case ULI526X_100MHF: case ULI526X_10MFD: case ULI526X_100MFD: diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 70cb2d689c2c..7f136488e67c 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -331,7 +331,7 @@ static void netdev_timer(struct timer_list *t); static void init_rxtx_rings(struct net_device *dev); static void free_rxtx_rings(struct netdev_private *np); static void init_registers(struct net_device *dev); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); static int alloc_ringdesc(struct net_device *dev); static void free_ringdesc(struct netdev_private *np); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -921,7 +921,7 @@ static void init_registers(struct net_device *dev) iowrite32(0, ioaddr + RxStartDemand); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base_addr; diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 55e720d2ea0c..26c5da032b1e 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -66,7 +66,7 @@ static const int multicast_filter_limit = 0x40; static int rio_open (struct net_device *dev); static void rio_timer (struct timer_list *t); -static void rio_tx_timeout (struct net_device *dev); +static void rio_tx_timeout (struct net_device *dev, unsigned int txqueue); static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev); static irqreturn_t rio_interrupt (int irq, void *dev_instance); static void rio_free_tx (struct net_device *dev, int irq); @@ -696,7 +696,7 @@ rio_timer (struct timer_list *t) } static void -rio_tx_timeout (struct net_device *dev) +rio_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->ioaddr; diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 4a37a69764ce..b91387c456ba 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -432,7 +432,7 @@ static int mdio_wait_link(struct net_device *dev, int wait); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); static void netdev_timer(struct timer_list *t); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static int reset_tx (struct net_device *dev); @@ -969,7 +969,7 @@ static void netdev_timer(struct timer_list *t) add_timer(&np->timer); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 39eb7d525043..56f59db6ebf2 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1417,7 +1417,7 @@ drop: return NETDEV_TX_OK; } -static void be_tx_timeout(struct net_device *netdev) +static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct be_adapter *adapter = netdev_priv(netdev); struct device *dev = &adapter->pdev->dev; diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index ea4f17f5cce7..66406da16b60 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -869,7 +869,7 @@ static int ethoc_change_mtu(struct net_device *dev, int new_mtu) return -ENOSYS; } -static void ethoc_tx_timeout(struct net_device *dev) +static void ethoc_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ethoc *priv = netdev_priv(dev); u32 pending = ethoc_read(priv, INT_SOURCE); diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 8ed85037f021..48b3b72fe02e 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1545,7 +1545,7 @@ static int ftgmac100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int return phy_mii_ioctl(netdev->phydev, ifr, cmd); } -static void ftgmac100_tx_timeout(struct net_device *netdev) +static void ftgmac100_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ftgmac100 *priv = netdev_priv(netdev); diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index c24fd56a2c71..84f10970299a 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -428,7 +428,7 @@ static void getlinktype(struct net_device *dev); static void getlinkstatus(struct net_device *dev); static void netdev_timer(struct timer_list *t); static void reset_timer(struct timer_list *t); -static void fealnx_tx_timeout(struct net_device *dev); +static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); @@ -1191,7 +1191,7 @@ static void reset_timer(struct timer_list *t) } -static void fealnx_tx_timeout(struct net_device *dev) +static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->mem; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 6a9d12dad5d9..a60fc3cfc06e 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -288,7 +288,7 @@ static int dpaa_stop(struct net_device *net_dev) return err; } -static void dpaa_tx_timeout(struct net_device *net_dev) +static void dpaa_tx_timeout(struct net_device *net_dev, unsigned int txqueue) { struct dpaa_percpu_priv *percpu_priv; const struct dpaa_priv *priv; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index a9503aea527f..a0061e929908 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -27,6 +27,20 @@ static int dpaa2_ptp_enable(struct ptp_clock_info *ptp, mc_dev = to_fsl_mc_device(dev); switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + switch (rq->extts.index) { + case 0: + bit = DPRTC_EVENT_ETS1; + break; + case 1: + bit = DPRTC_EVENT_ETS2; + break; + default: + return -EINVAL; + } + if (on) + extts_clean_up(ptp_qoriq, rq->extts.index, false); + break; case PTP_CLK_REQ_PPS: bit = DPRTC_EVENT_PPS; break; @@ -96,6 +110,12 @@ static irqreturn_t dpaa2_ptp_irq_handler_thread(int irq, void *priv) ptp_clock_event(ptp_qoriq->clock, &event); } + if (status & DPRTC_EVENT_ETS1) + extts_clean_up(ptp_qoriq, 0, true); + + if (status & DPRTC_EVENT_ETS2) + extts_clean_up(ptp_qoriq, 1, true); + err = dprtc_clear_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle, DPRTC_IRQ_INDEX, status); if (unlikely(err)) { diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h index 4ac05bfef338..96ffeb948f08 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h @@ -9,9 +9,11 @@ /* Command versioning */ #define DPRTC_CMD_BASE_VERSION 1 +#define DPRTC_CMD_VERSION_2 2 #define DPRTC_CMD_ID_OFFSET 4 #define DPRTC_CMD(id) (((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_BASE_VERSION) +#define DPRTC_CMD_V2(id) (((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_VERSION_2) /* Command IDs */ #define DPRTC_CMDID_CLOSE DPRTC_CMD(0x800) @@ -19,7 +21,7 @@ #define DPRTC_CMDID_SET_IRQ_ENABLE DPRTC_CMD(0x012) #define DPRTC_CMDID_GET_IRQ_ENABLE DPRTC_CMD(0x013) -#define DPRTC_CMDID_SET_IRQ_MASK DPRTC_CMD(0x014) +#define DPRTC_CMDID_SET_IRQ_MASK DPRTC_CMD_V2(0x014) #define DPRTC_CMDID_GET_IRQ_MASK DPRTC_CMD(0x015) #define DPRTC_CMDID_GET_IRQ_STATUS DPRTC_CMD(0x016) #define DPRTC_CMDID_CLEAR_IRQ_STATUS DPRTC_CMD(0x017) diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h index 311c184e1aef..05c413719e55 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h @@ -20,6 +20,8 @@ struct fsl_mc_io; #define DPRTC_IRQ_INDEX 0 #define DPRTC_EVENT_PPS 0x08000000 +#define DPRTC_EVENT_ETS1 0x00800000 +#define DPRTC_EVENT_ETS2 0x00400000 int dprtc_open(struct fsl_mc_io *mc_io, u32 cmd_flags, diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 17739906c966..2ee4a2cd4780 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -227,6 +227,8 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, enetc_bdr_idx_inc(tx_ring, &i); tx_ring->next_to_use = i; + skb_tx_timestamp(skb); + /* let H/W know BD ring has been updated */ enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 880a8ed8bb47..301ee0dde02d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -579,6 +579,7 @@ static int enetc_get_ts_info(struct net_device *ndev, (1 << HWTSTAMP_FILTER_ALL); #else info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; #endif return 0; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 2e99438cb1bf..9190ffc9f6b2 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -192,7 +192,6 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) u32 hi_credit_bit, hi_credit_reg; u32 max_interference_size; u32 port_frame_max_size; - u32 tc_max_sized_frame; u8 tc = cbs->queue; u8 prio_top, prio_next; int bw_sum = 0; @@ -250,7 +249,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) return -EINVAL; } - tc_max_sized_frame = enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); + enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); /* For top prio TC, the max_interfrence_size is maxSizedFrame. * diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 05c1899f6628..798fed37be46 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1141,7 +1141,7 @@ fec_stop(struct net_device *ndev) static void -fec_timeout(struct net_device *ndev) +fec_timeout(struct net_device *ndev, unsigned int txqueue) { struct fec_enet_private *fep = netdev_priv(ndev); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 30cdb246d020..de5278485062 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -84,7 +84,7 @@ static int debug = -1; /* the above default */ module_param(debug, int, 0); MODULE_PARM_DESC(debug, "debugging messages level"); -static void mpc52xx_fec_tx_timeout(struct net_device *dev) +static void mpc52xx_fec_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mpc52xx_fec_priv *priv = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 3981c06f082f..80903cd58468 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -641,7 +641,7 @@ static void fs_timeout_work(struct work_struct *work) netif_wake_queue(dev); } -static void fs_timeout(struct net_device *dev) +static void fs_timeout(struct net_device *dev, unsigned int txqueue) { struct fs_enet_private *fep = netdev_priv(dev); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 72868a28b621..b636d83a7ee9 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2093,7 +2093,7 @@ static void gfar_reset_task(struct work_struct *work) reset_gfar(priv->ndev); } -static void gfar_timeout(struct net_device *dev) +static void gfar_timeout(struct net_device *dev, unsigned int txqueue) { struct gfar_private *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index f839fa94ebdd..0d101c00286f 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3545,7 +3545,7 @@ static void ucc_geth_timeout_work(struct work_struct *work) * ucc_geth_timeout gets called when a packet has not been * transmitted after a set amount of time. */ -static void ucc_geth_timeout(struct net_device *dev) +static void ucc_geth_timeout(struct net_device *dev, unsigned int txqueue) { struct ucc_geth_private *ugeth = netdev_priv(dev); diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 1eca0fdb9933..a7b7a4aace79 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -93,7 +93,7 @@ static irqreturn_t fjn_interrupt(int irq, void *dev_id); static void fjn_rx(struct net_device *dev); static void fjn_reset(struct net_device *dev); static void set_rx_mode(struct net_device *dev); -static void fjn_tx_timeout(struct net_device *dev); +static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue); static const struct ethtool_ops netdev_ethtool_ops; /* @@ -774,7 +774,7 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) /*====================================================================*/ -static void fjn_tx_timeout(struct net_device *dev) +static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct local_info *lp = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 9b7a8db9860f..e032563ceefd 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -845,7 +845,7 @@ static void gve_turnup(struct gve_priv *priv) gve_set_napi_enabled(priv); } -static void gve_tx_timeout(struct net_device *dev) +static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gve_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 3e9b6d543c77..dc8dd5fc1559 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -779,7 +779,7 @@ static int hip04_mac_stop(struct net_device *ndev) return 0; } -static void hip04_timeout(struct net_device *ndev) +static void hip04_timeout(struct net_device *ndev, unsigned int txqueue) { struct hip04_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 247de9105d10..4fb776920a93 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -893,7 +893,7 @@ static void hix5hd2_tx_timeout_task(struct work_struct *work) hix5hd2_net_open(priv->netdev); } -static void hix5hd2_net_timeout(struct net_device *dev) +static void hix5hd2_net_timeout(struct net_device *dev, unsigned int txqueue) { struct hix5hd2_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 14ab20491fd0..e45553ec114a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1485,7 +1485,7 @@ static int hns_nic_net_stop(struct net_device *ndev) static void hns_tx_timeout_reset(struct hns_nic_priv *priv); #define HNS_TX_TIMEO_LIMIT (40 * HZ) -static void hns_nic_net_timeout(struct net_device *ndev) +static void hns_nic_net_timeout(struct net_device *ndev, unsigned int txqueue) { struct hns_nic_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 69545dd6c938..aee5facc89b5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1556,6 +1556,37 @@ static int hns3_nic_set_features(struct net_device *netdev, return 0; } +static netdev_features_t hns3_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ +#define HNS3_MAX_HDR_LEN 480U +#define HNS3_MAX_L4_HDR_LEN 60U + + size_t len; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return features; + + if (skb->encapsulation) + len = skb_inner_transport_header(skb) - skb->data; + else + len = skb_transport_header(skb) - skb->data; + + /* Assume L4 is 60 byte as TCP is the only protocol with a + * a flexible value, and it's max len is 60 bytes. + */ + len += HNS3_MAX_L4_HDR_LEN; + + /* Hardware only supports checksum on the skb with a max header + * len of 480 bytes. + */ + if (len > HNS3_MAX_HDR_LEN) + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + + return features; +} + static void hns3_nic_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -1869,7 +1900,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev) return true; } -static void hns3_nic_net_timeout(struct net_device *ndev) +static void hns3_nic_net_timeout(struct net_device *ndev, unsigned int txqueue) { struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; @@ -1970,6 +2001,7 @@ static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_do_ioctl = hns3_nic_do_ioctl, .ndo_change_mtu = hns3_nic_change_mtu, .ndo_set_features = hns3_nic_set_features, + .ndo_features_check = hns3_features_check, .ndo_get_stats64 = hns3_nic_get_stats64, .ndo_setup_tc = hns3_nic_setup_tc, .ndo_set_rx_mode = hns3_nic_set_rx_mode, @@ -2788,7 +2820,6 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring, static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, unsigned char *va) { -#define HNS3_NEED_ADD_FRAG 1 struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean]; struct net_device *netdev = ring_to_netdev(ring); struct sk_buff *skb; @@ -2832,33 +2863,19 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, desc_cb); ring_ptr_move_fw(ring, next_to_clean); - return HNS3_NEED_ADD_FRAG; + return 0; } -static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc, - bool pending) +static int hns3_add_frag(struct hns3_enet_ring *ring) { struct sk_buff *skb = ring->skb; struct sk_buff *head_skb = skb; struct sk_buff *new_skb; struct hns3_desc_cb *desc_cb; - struct hns3_desc *pre_desc; + struct hns3_desc *desc; u32 bd_base_info; - int pre_bd; - /* if there is pending bd, the SW param next_to_clean has moved - * to next and the next is NULL - */ - if (pending) { - pre_bd = (ring->next_to_clean - 1 + ring->desc_num) % - ring->desc_num; - pre_desc = &ring->desc[pre_bd]; - bd_base_info = le32_to_cpu(pre_desc->rx.bd_base_info); - } else { - bd_base_info = le32_to_cpu(desc->rx.bd_base_info); - } - - while (!(bd_base_info & BIT(HNS3_RXD_FE_B))) { + do { desc = &ring->desc[ring->next_to_clean]; desc_cb = &ring->desc_cb[ring->next_to_clean]; bd_base_info = le32_to_cpu(desc->rx.bd_base_info); @@ -2895,7 +2912,7 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc, hns3_nic_reuse_page(skb, ring->frag_num++, ring, 0, desc_cb); ring_ptr_move_fw(ring, next_to_clean); ring->pending_buf++; - } + } while (!(bd_base_info & BIT(HNS3_RXD_FE_B))); return 0; } @@ -3063,28 +3080,23 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring) if (ret < 0) /* alloc buffer fail */ return ret; - if (ret > 0) { /* need add frag */ - ret = hns3_add_frag(ring, desc, false); + if (!(bd_base_info & BIT(HNS3_RXD_FE_B))) { /* need add frag */ + ret = hns3_add_frag(ring); if (ret) return ret; - - /* As the head data may be changed when GRO enable, copy - * the head data in after other data rx completed - */ - memcpy(skb->data, ring->va, - ALIGN(ring->pull_len, sizeof(long))); } } else { - ret = hns3_add_frag(ring, desc, true); + ret = hns3_add_frag(ring); if (ret) return ret; + } - /* As the head data may be changed when GRO enable, copy - * the head data in after other data rx completed - */ + /* As the head data may be changed when GRO enable, copy + * the head data in after other data rx completed + */ + if (skb->len > HNS3_RX_HEAD_SIZE) memcpy(skb->data, ring->va, ALIGN(ring->pull_len, sizeof(long))); - } ret = hns3_handle_bdinfo(ring, skb); if (unlikely(ret)) { @@ -3590,7 +3602,12 @@ static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) if (!tqp_vector->rx_group.ring && !tqp_vector->tx_group.ring) continue; - hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain); + /* Since the mapping can be overwritten, when fail to get the + * chain between vector and ring, we should go on to deal with + * the remaining options. + */ + if (hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain)) + dev_warn(priv->dev, "failed to get ring chain\n"); h->ae_algo->ops->unmap_ring_from_vector(h, tqp_vector->vector_irq, &vector_ring_chain); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 940ead3970d1..7f509eff562e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -479,19 +479,6 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw) hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0); } -static void hclge_destroy_queue(struct hclge_cmq_ring *ring) -{ - spin_lock(&ring->lock); - hclge_free_cmd_desc(ring); - spin_unlock(&ring->lock); -} - -static void hclge_destroy_cmd_queue(struct hclge_hw *hw) -{ - hclge_destroy_queue(&hw->cmq.csq); - hclge_destroy_queue(&hw->cmq.crq); -} - void hclge_cmd_uninit(struct hclge_dev *hdev) { spin_lock_bh(&hdev->hw.cmq.csq.lock); @@ -501,5 +488,6 @@ void hclge_cmd_uninit(struct hclge_dev *hdev) spin_unlock(&hdev->hw.cmq.crq.lock); spin_unlock_bh(&hdev->hw.cmq.csq.lock); - hclge_destroy_cmd_queue(&hdev->hw); + hclge_free_cmd_desc(&hdev->hw.cmq.csq); + hclge_free_cmd_desc(&hdev->hw.cmq.crq); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 112df34b3869..f3d4cbd28913 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -886,8 +886,8 @@ static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev) } } -static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, - bool sel_x, u32 loc) +static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, + bool sel_x, u32 loc) { struct hclge_fd_tcam_config_1_cmd *req1; struct hclge_fd_tcam_config_2_cmd *req2; @@ -912,7 +912,7 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, ret = hclge_cmd_send(&hdev->hw, desc, 3); if (ret) - return; + return ret; dev_info(&hdev->pdev->dev, " read result tcam key %s(%u):\n", sel_x ? "x" : "y", loc); @@ -931,16 +931,76 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, req = (u32 *)req3->tcam_data; for (i = 0; i < 5; i++) dev_info(&hdev->pdev->dev, "%08x\n", *req++); + + return ret; +} + +static int hclge_dbg_get_rules_location(struct hclge_dev *hdev, u16 *rule_locs) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + int cnt = 0; + + spin_lock_bh(&hdev->fd_rule_lock); + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + rule_locs[cnt] = rule->location; + cnt++; + } + spin_unlock_bh(&hdev->fd_rule_lock); + + if (cnt != hdev->hclge_fd_rule_num) + return -EINVAL; + + return cnt; } static void hclge_dbg_fd_tcam(struct hclge_dev *hdev) { - u32 i; + int i, ret, rule_cnt; + u16 *rule_locs; - for (i = 0; i < hdev->fd_cfg.rule_num[0]; i++) { - hclge_dbg_fd_tcam_read(hdev, 0, true, i); - hclge_dbg_fd_tcam_read(hdev, 0, false, i); + if (!hnae3_dev_fd_supported(hdev)) { + dev_err(&hdev->pdev->dev, + "Only FD-supported dev supports dump fd tcam\n"); + return; } + + if (!hdev->hclge_fd_rule_num || + !hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) + return; + + rule_locs = kcalloc(hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1], + sizeof(u16), GFP_KERNEL); + if (!rule_locs) + return; + + rule_cnt = hclge_dbg_get_rules_location(hdev, rule_locs); + if (rule_cnt <= 0) { + dev_err(&hdev->pdev->dev, + "failed to get rule number, ret = %d\n", rule_cnt); + kfree(rule_locs); + return; + } + + for (i = 0; i < rule_cnt; i++) { + ret = hclge_dbg_fd_tcam_read(hdev, 0, true, rule_locs[i]); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get fd tcam key x, ret = %d\n", ret); + kfree(rule_locs); + return; + } + + ret = hclge_dbg_fd_tcam_read(hdev, 0, false, rule_locs[i]); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get fd tcam key y, ret = %d\n", ret); + kfree(rule_locs); + return; + } + } + + kfree(rule_locs); } void hclge_dbg_dump_rst_info(struct hclge_dev *hdev) @@ -976,6 +1036,14 @@ void hclge_dbg_dump_rst_info(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state); } +static void hclge_dbg_dump_serv_info(struct hclge_dev *hdev) +{ + dev_info(&hdev->pdev->dev, "last_serv_processed: %lu\n", + hdev->last_serv_processed); + dev_info(&hdev->pdev->dev, "last_serv_cnt: %lu\n", + hdev->serv_processed_cnt); +} + static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev) { struct hclge_desc *desc_src, *desc_tmp; @@ -1227,6 +1295,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]); } else if (strncmp(cmd_buf, "dump reset info", 15) == 0) { hclge_dbg_dump_rst_info(hdev); + } else if (strncmp(cmd_buf, "dump serv info", 14) == 0) { + hclge_dbg_dump_serv_info(hdev); } else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) { hclge_dbg_get_m7_stats_info(hdev); } else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index d862e9ba27e1..a510f005209c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -72,6 +72,8 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev); static struct hnae3_ae_algo ae_algo; +static struct workqueue_struct *hclge_wq; + static const struct pci_device_id ae_algo_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0}, @@ -416,7 +418,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) { #define HCLGE_MAC_CMD_NUM 21 - u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); + u64 *data = (u64 *)(&hdev->mac_stats); struct hclge_desc desc[HCLGE_MAC_CMD_NUM]; __le64 *desc_data; int i, k, n; @@ -453,7 +455,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) static int hclge_mac_update_stats_complete(struct hclge_dev *hdev, u32 desc_num) { - u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); + u64 *data = (u64 *)(&hdev->mac_stats); struct hclge_desc *desc; __le64 *desc_data; u16 i, k, n; @@ -802,7 +804,7 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data) struct hclge_dev *hdev = vport->back; u64 *p; - p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats, g_mac_stats_string, + p = hclge_comm_get_stats(&hdev->mac_stats, g_mac_stats_string, ARRAY_SIZE(g_mac_stats_string), data); p = hclge_tqps_get_stats(handle, p); } @@ -815,8 +817,8 @@ static void hclge_get_mac_stat(struct hnae3_handle *handle, hclge_update_stats(handle, NULL); - mac_stats->tx_pause_cnt = hdev->hw_stats.mac_stats.mac_tx_mac_pause_num; - mac_stats->rx_pause_cnt = hdev->hw_stats.mac_stats.mac_rx_mac_pause_num; + mac_stats->tx_pause_cnt = hdev->mac_stats.mac_tx_mac_pause_num; + mac_stats->rx_pause_cnt = hdev->mac_stats.mac_rx_mac_pause_num; } static int hclge_parse_func_status(struct hclge_dev *hdev, @@ -2665,31 +2667,27 @@ static int hclge_mac_init(struct hclge_dev *hdev) static void hclge_mbx_task_schedule(struct hclge_dev *hdev) { - if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) && + if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state)) - queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq, - &hdev->mbx_service_task); + mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), + hclge_wq, &hdev->service_task, 0); } static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) - queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq, - &hdev->rst_service_task); + mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), + hclge_wq, &hdev->service_task, 0); } void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time) { - if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) && - !test_bit(HCLGE_STATE_REMOVING, &hdev->state) && - !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state)) { - hdev->hw_stats.stats_timer++; - hdev->fd_arfs_expire_timer++; + if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && + !test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - system_wq, &hdev->service_task, + hclge_wq, &hdev->service_task, delay_time); - } } static int hclge_get_mac_link_status(struct hclge_dev *hdev) @@ -2748,6 +2746,10 @@ static void hclge_update_link_status(struct hclge_dev *hdev) if (!client) return; + + if (test_and_set_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state)) + return; + state = hclge_get_mac_phy_link(hdev); if (state != hdev->hw.mac.link) { for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { @@ -2761,6 +2763,8 @@ static void hclge_update_link_status(struct hclge_dev *hdev) } hdev->hw.mac.link = state; } + + clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); } static void hclge_update_port_capability(struct hclge_mac *mac) @@ -2940,6 +2944,9 @@ static int hclge_get_vf_config(struct hnae3_handle *handle, int vf, ivf->trusted = vport->vf_info.trusted; ivf->min_tx_rate = 0; ivf->max_tx_rate = vport->vf_info.max_tx_rate; + ivf->vlan = vport->port_base_vlan_cfg.vlan_info.vlan_tag; + ivf->vlan_proto = htons(vport->port_base_vlan_cfg.vlan_info.vlan_proto); + ivf->qos = vport->port_base_vlan_cfg.vlan_info.qos; ether_addr_copy(ivf->mac, vport->vf_info.mac); return 0; @@ -2998,8 +3005,6 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) /* check for vector0 msix event source */ if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) { - dev_info(&hdev->pdev->dev, "received event 0x%x\n", - msix_src_reg); *clearval = msix_src_reg; return HCLGE_VECTOR0_EVENT_ERR; } @@ -3352,6 +3357,18 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) return 0; } +static void hclge_mailbox_service_task(struct hclge_dev *hdev) +{ + if (!test_and_clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state) || + test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) || + test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state)) + return; + + hclge_mbx_handler(hdev); + + clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); +} + static int hclge_func_reset_sync_vf(struct hclge_dev *hdev) { struct hclge_pf_rst_sync_cmd *req; @@ -3363,6 +3380,9 @@ static int hclge_func_reset_sync_vf(struct hclge_dev *hdev) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_VF_RST_RDY, true); do { + /* vf need to down netdev by mbx during PF or FLR reset */ + hclge_mailbox_service_task(hdev); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); /* for compatible with old firmware, wait * 100 ms for VF to stop IO @@ -3483,10 +3503,15 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, /* first, resolve any unknown reset type to the known type(s) */ if (test_bit(HNAE3_UNKNOWN_RESET, addr)) { + u32 msix_sts_reg = hclge_read_dev(&hdev->hw, + HCLGE_VECTOR0_PF_OTHER_INT_STS_REG); /* we will intentionally ignore any errors from this function * as we will end up in *some* reset request in any case */ - hclge_handle_hw_msix_error(hdev, addr); + if (hclge_handle_hw_msix_error(hdev, addr)) + dev_info(&hdev->pdev->dev, "received msix interrupt 0x%x\n", + msix_sts_reg); + clear_bit(HNAE3_UNKNOWN_RESET, addr); /* We defered the clearing of the error event which caused * interrupt since it was not posssible to do that in @@ -3672,6 +3697,8 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev) hclge_dbg_dump_rst_info(hdev); + set_bit(HCLGE_STATE_RST_FAIL, &hdev->state); + return false; } @@ -3825,6 +3852,7 @@ static void hclge_reset(struct hclge_dev *hdev) hdev->rst_stats.reset_fail_cnt = 0; hdev->rst_stats.reset_done_cnt++; ae_dev->reset_type = HNAE3_NONE_RESET; + clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state); /* if default_reset_request has a higher level reset request, * it should be handled as soon as possible. since some errors @@ -3939,36 +3967,19 @@ static void hclge_reset_subtask(struct hclge_dev *hdev) hdev->reset_type = HNAE3_NONE_RESET; } -static void hclge_reset_service_task(struct work_struct *work) +static void hclge_reset_service_task(struct hclge_dev *hdev) { - struct hclge_dev *hdev = - container_of(work, struct hclge_dev, rst_service_task); + if (!test_and_clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) + return; if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) return; - clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); - hclge_reset_subtask(hdev); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); } -static void hclge_mailbox_service_task(struct work_struct *work) -{ - struct hclge_dev *hdev = - container_of(work, struct hclge_dev, mbx_service_task); - - if (test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state)) - return; - - clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state); - - hclge_mbx_handler(hdev); - - clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); -} - static void hclge_update_vport_alive(struct hclge_dev *hdev) { int i; @@ -3986,29 +3997,62 @@ static void hclge_update_vport_alive(struct hclge_dev *hdev) } } -static void hclge_service_task(struct work_struct *work) +static void hclge_periodic_service_task(struct hclge_dev *hdev) { - struct hclge_dev *hdev = - container_of(work, struct hclge_dev, service_task.work); + unsigned long delta = round_jiffies_relative(HZ); - clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); + /* Always handle the link updating to make sure link state is + * updated when it is triggered by mbx. + */ + hclge_update_link_status(hdev); - if (hdev->hw_stats.stats_timer >= HCLGE_STATS_TIMER_INTERVAL) { - hclge_update_stats_for_all(hdev); - hdev->hw_stats.stats_timer = 0; + if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { + delta = jiffies - hdev->last_serv_processed; + + if (delta < round_jiffies_relative(HZ)) { + delta = round_jiffies_relative(HZ) - delta; + goto out; + } } - hclge_update_port_info(hdev); - hclge_update_link_status(hdev); + hdev->serv_processed_cnt++; hclge_update_vport_alive(hdev); + + if (test_bit(HCLGE_STATE_DOWN, &hdev->state)) { + hdev->last_serv_processed = jiffies; + goto out; + } + + if (!(hdev->serv_processed_cnt % HCLGE_STATS_TIMER_INTERVAL)) + hclge_update_stats_for_all(hdev); + + hclge_update_port_info(hdev); hclge_sync_vlan_filter(hdev); - if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) { + if (!(hdev->serv_processed_cnt % HCLGE_ARFS_EXPIRE_INTERVAL)) hclge_rfs_filter_expire(hdev); - hdev->fd_arfs_expire_timer = 0; - } - hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + hdev->last_serv_processed = jiffies; + +out: + hclge_task_schedule(hdev, delta); +} + +static void hclge_service_task(struct work_struct *work) +{ + struct hclge_dev *hdev = + container_of(work, struct hclge_dev, service_task.work); + + hclge_reset_service_task(hdev); + hclge_mailbox_service_task(hdev); + hclge_periodic_service_task(hdev); + + /* Handle reset and mbx again in case periodical task delays the + * handling by calling hclge_task_schedule() in + * hclge_periodic_service_task(). + */ + hclge_reset_service_task(hdev); + hclge_mailbox_service_task(hdev); } struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle) @@ -6734,6 +6778,19 @@ static void hclge_reset_tqp_stats(struct hnae3_handle *handle) } } +static void hclge_flush_link_update(struct hclge_dev *hdev) +{ +#define HCLGE_FLUSH_LINK_TIMEOUT 100000 + + unsigned long last = hdev->serv_processed_cnt; + int i = 0; + + while (test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state) && + i++ < HCLGE_FLUSH_LINK_TIMEOUT && + last == hdev->serv_processed_cnt) + usleep_range(1, 1); +} + static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -6742,12 +6799,12 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) if (enable) { hclge_task_schedule(hdev, round_jiffies_relative(HZ)); } else { - /* Set the DOWN flag here to disable the service to be - * scheduled again - */ + /* Set the DOWN flag here to disable link updating */ set_bit(HCLGE_STATE_DOWN, &hdev->state); - cancel_delayed_work_sync(&hdev->service_task); - clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); + + /* flush memory to make sure DOWN is seen by service task */ + smp_mb__before_atomic(); + hclge_flush_link_update(hdev); } } @@ -7483,7 +7540,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev) struct hclge_vport *vport; int i; - mutex_lock(&hdev->vport_cfg_mutex); for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; list_for_each_entry_safe(mac, tmp, &vport->uc_mac_list, node) { @@ -7496,7 +7552,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev) kfree(mac); } } - mutex_unlock(&hdev->vport_cfg_mutex); } static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev, @@ -8257,7 +8312,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) struct hclge_vport *vport; int i; - mutex_lock(&hdev->vport_cfg_mutex); for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { @@ -8265,7 +8319,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) kfree(vlan); } } - mutex_unlock(&hdev->vport_cfg_mutex); } static void hclge_restore_vlan_table(struct hnae3_handle *handle) @@ -8277,7 +8330,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle) u16 state, vlan_id; int i; - mutex_lock(&hdev->vport_cfg_mutex); for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto; @@ -8303,8 +8355,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle) break; } } - - mutex_unlock(&hdev->vport_cfg_mutex); } int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) @@ -9256,6 +9306,7 @@ static void hclge_state_init(struct hclge_dev *hdev) set_bit(HCLGE_STATE_DOWN, &hdev->state); clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state); clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state); clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); } @@ -9269,10 +9320,6 @@ static void hclge_state_uninit(struct hclge_dev *hdev) del_timer_sync(&hdev->reset_timer); if (hdev->service_task.work.func) cancel_delayed_work_sync(&hdev->service_task); - if (hdev->rst_service_task.func) - cancel_work_sync(&hdev->rst_service_task); - if (hdev->mbx_service_task.func) - cancel_work_sync(&hdev->mbx_service_task); } static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) @@ -9342,7 +9389,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN; mutex_init(&hdev->vport_lock); - mutex_init(&hdev->vport_cfg_mutex); spin_lock_init(&hdev->fd_rule_lock); ret = hclge_pci_init(hdev); @@ -9477,8 +9523,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) timer_setup(&hdev->reset_timer, hclge_reset_timer, 0); INIT_DELAYED_WORK(&hdev->service_task, hclge_service_task); - INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task); - INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task); /* Setup affinity after service timer setup because add_timer_on * is called in affinity notify. @@ -9512,6 +9556,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) dev_info(&hdev->pdev->dev, "%s driver initialization finished.\n", HCLGE_DRIVER_NAME); + hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + return 0; err_mdiobus_unreg: @@ -9534,7 +9580,7 @@ out: static void hclge_stats_clear(struct hclge_dev *hdev) { - memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats)); + memset(&hdev->mac_stats, 0, sizeof(hdev->mac_stats)); } static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable) @@ -9895,7 +9941,6 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) mutex_destroy(&hdev->vport_lock); hclge_uninit_vport_mac_table(hdev); hclge_uninit_vport_vlan_table(hdev); - mutex_destroy(&hdev->vport_cfg_mutex); ae_dev->priv = NULL; } @@ -10611,6 +10656,12 @@ static int hclge_init(void) { pr_info("%s is initializing\n", HCLGE_NAME); + hclge_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGE_NAME); + if (!hclge_wq) { + pr_err("%s: failed to create workqueue\n", HCLGE_NAME); + return -ENOMEM; + } + hnae3_register_ae_algo(&ae_algo); return 0; @@ -10619,6 +10670,7 @@ static int hclge_init(void) static void hclge_exit(void) { hnae3_unregister_ae_algo(&ae_algo); + destroy_workqueue(hclge_wq); } module_init(hclge_init); module_exit(hclge_exit); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index ebb4c6e9aed3..4e5cfda16eb7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -208,13 +208,14 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_NIC_REGISTERED, HCLGE_STATE_ROCE_REGISTERED, HCLGE_STATE_SERVICE_INITED, - HCLGE_STATE_SERVICE_SCHED, HCLGE_STATE_RST_SERVICE_SCHED, HCLGE_STATE_RST_HANDLING, HCLGE_STATE_MBX_SERVICE_SCHED, HCLGE_STATE_MBX_HANDLING, HCLGE_STATE_STATISTICS_UPDATING, HCLGE_STATE_CMD_DISABLE, + HCLGE_STATE_LINK_UPDATING, + HCLGE_STATE_RST_FAIL, HCLGE_STATE_MAX }; @@ -454,11 +455,7 @@ struct hclge_mac_stats { u64 mac_rx_ctrl_pkt_num; }; -#define HCLGE_STATS_TIMER_INTERVAL (60 * 5) -struct hclge_hw_stats { - struct hclge_mac_stats mac_stats; - u32 stats_timer; -}; +#define HCLGE_STATS_TIMER_INTERVAL 300UL struct hclge_vlan_type_cfg { u16 rx_ot_fst_vlan_type; @@ -549,7 +546,7 @@ struct key_info { /* assigned by firmware, the real filter number for each pf may be less */ #define MAX_FD_FILTER_NUM 4096 -#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL 5 +#define HCLGE_ARFS_EXPIRE_INTERVAL 5UL enum HCLGE_FD_ACTIVE_RULE_TYPE { HCLGE_FD_RULE_NONE, @@ -712,7 +709,7 @@ struct hclge_dev { struct hnae3_ae_dev *ae_dev; struct hclge_hw hw; struct hclge_misc_vector misc_vector; - struct hclge_hw_stats hw_stats; + struct hclge_mac_stats mac_stats; unsigned long state; unsigned long flr_state; unsigned long last_reset_time; @@ -774,8 +771,6 @@ struct hclge_dev { unsigned long service_timer_previous; struct timer_list reset_timer; struct delayed_work service_task; - struct work_struct rst_service_task; - struct work_struct mbx_service_task; bool cur_promisc; int num_alloc_vfs; /* Actual number of VFs allocated */ @@ -811,7 +806,8 @@ struct hclge_dev { struct hlist_head fd_rule_list; spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */ u16 hclge_fd_rule_num; - u16 fd_arfs_expire_timer; + unsigned long serv_processed_cnt; + unsigned long last_serv_processed; unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)]; enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type; u8 fd_en; @@ -825,8 +821,6 @@ struct hclge_dev { u16 share_umv_size; struct mutex umv_mutex; /* protect share_umv_size */ - struct mutex vport_cfg_mutex; /* Protect stored vf table */ - DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats, HCLGE_MAC_TNL_LOG_SIZE); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 0b433ebe6a2d..f905dd3386b3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -635,7 +635,6 @@ static void hclge_handle_link_change_event(struct hclge_dev *hdev, #define LINK_STATUS_OFFSET 1 #define LINK_FAIL_CODE_OFFSET 2 - clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); hclge_task_schedule(hdev, 0); if (!req->msg[LINK_STATUS_OFFSET]) @@ -798,13 +797,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev) hclge_get_link_mode(vport, req); break; case HCLGE_MBX_GET_VF_FLR_STATUS: - mutex_lock(&hdev->vport_cfg_mutex); hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_UC); hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_MC); hclge_rm_vport_all_vlan_table(vport, true); - mutex_unlock(&hdev->vport_cfg_mutex); break; case HCLGE_MBX_GET_MEDIA_TYPE: ret = hclge_get_vf_media_type(vport, req); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index af2245e3bb95..f38d236ebf4f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -443,7 +443,7 @@ void hclgevf_cmd_uninit(struct hclgevf_dev *hdev) { spin_lock_bh(&hdev->hw.cmq.csq.lock); spin_lock(&hdev->hw.cmq.crq.lock); - clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); + set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); hclgevf_cmd_uninit_regs(&hdev->hw); spin_unlock(&hdev->hw.cmq.crq.lock); spin_unlock_bh(&hdev->hw.cmq.csq.lock); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 25d78a5aaa34..c33b8027f801 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -16,6 +16,8 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev); static struct hnae3_ae_algo ae_algovf; +static struct workqueue_struct *hclgevf_wq; + static const struct pci_device_id ae_algovf_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF), 0}, @@ -440,6 +442,9 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) struct hnae3_client *rclient; struct hnae3_client *client; + if (test_and_set_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state)) + return; + client = handle->client; rclient = hdev->roce_client; @@ -452,6 +457,8 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) rclient->ops->link_status_change(rhandle, !!link_state); hdev->hw.mac.link = link_state; } + + clear_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state); } static void hclgevf_update_link_mode(struct hclgevf_dev *hdev) @@ -1591,6 +1598,7 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev) set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); hclgevf_reset_task_schedule(hdev); } else { + set_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); hclgevf_dump_rst_info(hdev); } } @@ -1652,6 +1660,7 @@ static int hclgevf_reset(struct hclgevf_dev *hdev) ae_dev->reset_type = HNAE3_NONE_RESET; hdev->rst_stats.rst_done_cnt++; hdev->rst_stats.rst_fail_cnt = 0; + clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); return ret; err_reset_lock: @@ -1767,62 +1776,39 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev) void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev) { - if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) && - !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) { - set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); - schedule_work(&hdev->rst_service_task); - } + if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + !test_and_set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, + &hdev->state)) + mod_delayed_work(hclgevf_wq, &hdev->service_task, 0); } void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev) { - if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) && - !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) { - set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); - schedule_work(&hdev->mbx_service_task); - } + if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + !test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, + &hdev->state)) + mod_delayed_work(hclgevf_wq, &hdev->service_task, 0); } -static void hclgevf_task_schedule(struct hclgevf_dev *hdev) +static void hclgevf_task_schedule(struct hclgevf_dev *hdev, + unsigned long delay) { - if (!test_bit(HCLGEVF_STATE_DOWN, &hdev->state) && - !test_and_set_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state)) - schedule_work(&hdev->service_task); + if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + !test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) + mod_delayed_work(hclgevf_wq, &hdev->service_task, delay); } -static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev) -{ - /* if we have any pending mailbox event then schedule the mbx task */ - if (hdev->mbx_event_pending) - hclgevf_mbx_task_schedule(hdev); - - if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) - hclgevf_reset_task_schedule(hdev); -} - -static void hclgevf_service_timer(struct timer_list *t) -{ - struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer); - - mod_timer(&hdev->service_timer, jiffies + - HCLGEVF_GENERAL_TASK_INTERVAL * HZ); - - hdev->stats_timer++; - hclgevf_task_schedule(hdev); -} - -static void hclgevf_reset_service_task(struct work_struct *work) +static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) { #define HCLGEVF_MAX_RESET_ATTEMPTS_CNT 3 - struct hclgevf_dev *hdev = - container_of(work, struct hclgevf_dev, rst_service_task); int ret; - if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + if (!test_and_clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) return; - clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); + if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + return; if (test_and_clear_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) { @@ -1885,39 +1871,24 @@ static void hclgevf_reset_service_task(struct work_struct *work) clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); } -static void hclgevf_mailbox_service_task(struct work_struct *work) +static void hclgevf_mailbox_service_task(struct hclgevf_dev *hdev) { - struct hclgevf_dev *hdev; - - hdev = container_of(work, struct hclgevf_dev, mbx_service_task); + if (!test_and_clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state)) + return; if (test_and_set_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) return; - clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); - hclgevf_mbx_async_handler(hdev); clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state); } -static void hclgevf_keep_alive_timer(struct timer_list *t) +static void hclgevf_keep_alive(struct hclgevf_dev *hdev) { - struct hclgevf_dev *hdev = from_timer(hdev, t, keep_alive_timer); - - schedule_work(&hdev->keep_alive_task); - mod_timer(&hdev->keep_alive_timer, jiffies + - HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ); -} - -static void hclgevf_keep_alive_task(struct work_struct *work) -{ - struct hclgevf_dev *hdev; u8 respmsg; int ret; - hdev = container_of(work, struct hclgevf_dev, keep_alive_task); - if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) return; @@ -1928,19 +1899,32 @@ static void hclgevf_keep_alive_task(struct work_struct *work) "VF sends keep alive cmd failed(=%d)\n", ret); } -static void hclgevf_service_task(struct work_struct *work) +static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev) { - struct hnae3_handle *handle; - struct hclgevf_dev *hdev; + unsigned long delta = round_jiffies_relative(HZ); + struct hnae3_handle *handle = &hdev->nic; - hdev = container_of(work, struct hclgevf_dev, service_task); - handle = &hdev->nic; + if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { + delta = jiffies - hdev->last_serv_processed; - if (hdev->stats_timer >= HCLGEVF_STATS_TIMER_INTERVAL) { - hclgevf_tqps_update_stats(handle); - hdev->stats_timer = 0; + if (delta < round_jiffies_relative(HZ)) { + delta = round_jiffies_relative(HZ) - delta; + goto out; + } + } + + hdev->serv_processed_cnt++; + if (!(hdev->serv_processed_cnt % HCLGEVF_KEEP_ALIVE_TASK_INTERVAL)) + hclgevf_keep_alive(hdev); + + if (test_bit(HCLGEVF_STATE_DOWN, &hdev->state)) { + hdev->last_serv_processed = jiffies; + goto out; } + if (!(hdev->serv_processed_cnt % HCLGEVF_STATS_TIMER_INTERVAL)) + hclgevf_tqps_update_stats(handle); + /* request the link status from the PF. PF would be able to tell VF * about such updates in future so we might remove this later */ @@ -1950,9 +1934,27 @@ static void hclgevf_service_task(struct work_struct *work) hclgevf_sync_vlan_filter(hdev); - hclgevf_deferred_task_schedule(hdev); + hdev->last_serv_processed = jiffies; - clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); +out: + hclgevf_task_schedule(hdev, delta); +} + +static void hclgevf_service_task(struct work_struct *work) +{ + struct hclgevf_dev *hdev = container_of(work, struct hclgevf_dev, + service_task.work); + + hclgevf_reset_service_task(hdev); + hclgevf_mailbox_service_task(hdev); + hclgevf_periodic_service_task(hdev); + + /* Handle reset and mbx again in case periodical task delays the + * handling by calling hclgevf_task_schedule() in + * hclgevf_periodic_service_task() + */ + hclgevf_reset_service_task(hdev); + hclgevf_mailbox_service_task(hdev); } static void hclgevf_clear_event_cause(struct hclgevf_dev *hdev, u32 regclr) @@ -2189,16 +2191,31 @@ static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) false); } +static void hclgevf_flush_link_update(struct hclgevf_dev *hdev) +{ +#define HCLGEVF_FLUSH_LINK_TIMEOUT 100000 + + unsigned long last = hdev->serv_processed_cnt; + int i = 0; + + while (test_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state) && + i++ < HCLGEVF_FLUSH_LINK_TIMEOUT && + last == hdev->serv_processed_cnt) + usleep_range(1, 1); +} + static void hclgevf_set_timer_task(struct hnae3_handle *handle, bool enable) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); if (enable) { - mod_timer(&hdev->service_timer, jiffies + HZ); + hclgevf_task_schedule(hdev, 0); } else { - del_timer_sync(&hdev->service_timer); - cancel_work_sync(&hdev->service_task); - clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); + set_bit(HCLGEVF_STATE_DOWN, &hdev->state); + + /* flush memory to make sure DOWN is seen by service task */ + smp_mb__before_atomic(); + hclgevf_flush_link_update(hdev); } } @@ -2245,16 +2262,12 @@ static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive) static int hclgevf_client_start(struct hnae3_handle *handle) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); int ret; ret = hclgevf_set_alive(handle, true); if (ret) return ret; - mod_timer(&hdev->keep_alive_timer, jiffies + - HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ); - return 0; } @@ -2267,25 +2280,15 @@ static void hclgevf_client_stop(struct hnae3_handle *handle) if (ret) dev_warn(&hdev->pdev->dev, "%s failed %d\n", __func__, ret); - - del_timer_sync(&hdev->keep_alive_timer); - cancel_work_sync(&hdev->keep_alive_task); } static void hclgevf_state_init(struct hclgevf_dev *hdev) { - /* setup tasks for the MBX */ - INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task); clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state); + clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); - /* setup tasks for service timer */ - timer_setup(&hdev->service_timer, hclgevf_service_timer, 0); - - INIT_WORK(&hdev->service_task, hclgevf_service_task); - clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); - - INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task); + INIT_DELAYED_WORK(&hdev->service_task, hclgevf_service_task); mutex_init(&hdev->mbx_resp.mbx_mutex); @@ -2298,18 +2301,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev) set_bit(HCLGEVF_STATE_DOWN, &hdev->state); set_bit(HCLGEVF_STATE_REMOVING, &hdev->state); - if (hdev->keep_alive_timer.function) - del_timer_sync(&hdev->keep_alive_timer); - if (hdev->keep_alive_task.func) - cancel_work_sync(&hdev->keep_alive_task); - if (hdev->service_timer.function) - del_timer_sync(&hdev->service_timer); - if (hdev->service_task.func) - cancel_work_sync(&hdev->service_task); - if (hdev->mbx_service_task.func) - cancel_work_sync(&hdev->mbx_service_task); - if (hdev->rst_service_task.func) - cancel_work_sync(&hdev->rst_service_task); + if (hdev->service_task.work.func) + cancel_delayed_work_sync(&hdev->service_task); mutex_destroy(&hdev->mbx_resp.mbx_mutex); } @@ -2807,6 +2800,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) dev_info(&hdev->pdev->dev, "finished initializing %s driver\n", HCLGEVF_DRIVER_NAME); + hclgevf_task_schedule(hdev, round_jiffies_relative(HZ)); + return 0; err_config: @@ -2838,7 +2833,6 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) { struct pci_dev *pdev = ae_dev->pdev; - struct hclgevf_dev *hdev; int ret; ret = hclgevf_alloc_hdev(ae_dev); @@ -2853,10 +2847,6 @@ static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } - hdev = ae_dev->priv; - timer_setup(&hdev->keep_alive_timer, hclgevf_keep_alive_timer, 0); - INIT_WORK(&hdev->keep_alive_task, hclgevf_keep_alive_task); - return 0; } @@ -3213,6 +3203,12 @@ static int hclgevf_init(void) { pr_info("%s is initializing\n", HCLGEVF_NAME); + hclgevf_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGEVF_NAME); + if (!hclgevf_wq) { + pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME); + return -ENOMEM; + } + hnae3_register_ae_algo(&ae_algovf); return 0; @@ -3221,6 +3217,7 @@ static int hclgevf_init(void) static void hclgevf_exit(void) { hnae3_unregister_ae_algo(&ae_algovf); + destroy_workqueue(hclgevf_wq); } module_init(hclgevf_init); module_exit(hclgevf_exit); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 2f4c81bf4169..003114f6db6c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -142,12 +142,13 @@ enum hclgevf_states { HCLGEVF_STATE_REMOVING, HCLGEVF_STATE_NIC_REGISTERED, /* task states */ - HCLGEVF_STATE_SERVICE_SCHED, HCLGEVF_STATE_RST_SERVICE_SCHED, HCLGEVF_STATE_RST_HANDLING, HCLGEVF_STATE_MBX_SERVICE_SCHED, HCLGEVF_STATE_MBX_HANDLING, HCLGEVF_STATE_CMD_DISABLE, + HCLGEVF_STATE_LINK_UPDATING, + HCLGEVF_STATE_RST_FAIL, }; struct hclgevf_mac { @@ -283,12 +284,7 @@ struct hclgevf_dev { struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */ struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */ - struct timer_list service_timer; - struct timer_list keep_alive_timer; - struct work_struct service_task; - struct work_struct keep_alive_task; - struct work_struct rst_service_task; - struct work_struct mbx_service_task; + struct delayed_work service_task; struct hclgevf_tqp *htqp; @@ -298,7 +294,8 @@ struct hclgevf_dev { struct hnae3_client *nic_client; struct hnae3_client *roce_client; u32 flag; - u32 stats_timer; + unsigned long serv_processed_cnt; + unsigned long last_serv_processed; }; static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 2411ad270c98..02a14f5e7fe3 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -766,7 +766,7 @@ static void hinic_set_rx_mode(struct net_device *netdev) queue_work(nic_dev->workq, &rx_mode_work->work); } -static void hinic_tx_timeout(struct net_device *netdev) +static void hinic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct hinic_dev *nic_dev = netdev_priv(netdev); diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index 92929750f832..bef676d93339 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -363,7 +363,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t i596_interrupt(int irq, void *dev_id); static int i596_close(struct net_device *dev); static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd); -static void i596_tx_timeout (struct net_device *dev); +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue); static void print_eth(unsigned char *buf, char *str); static void set_multicast_list(struct net_device *dev); @@ -1019,7 +1019,7 @@ err_irq_dev: return res; } -static void i596_tx_timeout (struct net_device *dev) +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct i596_private *lp = dev->ml_priv; int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c index bb3b8adbe4f0..a0bfb509e002 100644 --- a/drivers/net/ethernet/i825xx/ether1.c +++ b/drivers/net/ethernet/i825xx/ether1.c @@ -66,7 +66,7 @@ static netdev_tx_t ether1_sendpacket(struct sk_buff *skb, static irqreturn_t ether1_interrupt(int irq, void *dev_id); static int ether1_close(struct net_device *dev); static void ether1_setmulticastlist(struct net_device *dev); -static void ether1_timeout(struct net_device *dev); +static void ether1_timeout(struct net_device *dev, unsigned int txqueue); /* ------------------------------------------------------------------------- */ @@ -650,7 +650,7 @@ ether1_open (struct net_device *dev) } static void -ether1_timeout(struct net_device *dev) +ether1_timeout(struct net_device *dev, unsigned int txqueue) { printk(KERN_WARNING "%s: transmit timeout, network cable problem?\n", dev->name); diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index f9742af7f142..b03757e169e4 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -351,7 +351,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t i596_interrupt(int irq, void *dev_id); static int i596_close(struct net_device *dev); static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd); -static void i596_tx_timeout (struct net_device *dev); +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue); static void print_eth(unsigned char *buf, char *str); static void set_multicast_list(struct net_device *dev); static inline void ca(struct net_device *dev); @@ -936,7 +936,7 @@ out_remove_rx_bufs: return -EAGAIN; } -static void i596_tx_timeout (struct net_device *dev) +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct i596_private *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 1a86184d44c0..4564ee02c95f 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -125,7 +125,7 @@ static netdev_tx_t sun3_82586_send_packet(struct sk_buff *, struct net_device *); static struct net_device_stats *sun3_82586_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void sun3_82586_timeout(struct net_device *dev); +static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue); #if 0 static void sun3_82586_dump(struct net_device *,void *); #endif @@ -965,7 +965,7 @@ static void startrecv586(struct net_device *dev) WAIT_4_SCB_CMD_RUC(); /* wait for accept cmd. (no timeout!!) */ } -static void sun3_82586_timeout(struct net_device *dev) +static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue) { struct priv *p = netdev_priv(dev); #ifndef NO_NOPCOMMANDS diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 13e30eba5349..0273fb7a9d01 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -2786,7 +2786,7 @@ out: return; } -static void ehea_tx_watchdog(struct net_device *dev) +static void ehea_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct ehea_port *port = netdev_priv(dev); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 2e40425d8a34..b7fc17756c51 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -776,7 +776,7 @@ static void emac_reset_work(struct work_struct *work) mutex_unlock(&dev->link_lock); } -static void emac_tx_timeout(struct net_device *ndev) +static void emac_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct emac_instance *dev = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c90080781924..94b9d8913b66 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2282,7 +2282,7 @@ err: return -ret; } -static void ibmvnic_tx_timeout(struct net_device *dev) +static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ibmvnic_adapter *adapter = netdev_priv(dev); diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index a65d5a9ba7db..1b8d015ebfb0 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2316,7 +2316,7 @@ static void e100_down(struct nic *nic) e100_rx_clean_list(nic); } -static void e100_tx_timeout(struct net_device *netdev) +static void e100_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct nic *nic = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index aca97b084003..2bced34c19ba 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -134,7 +134,7 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); static void e1000_enter_82542_rst(struct e1000_adapter *adapter); static void e1000_leave_82542_rst(struct e1000_adapter *adapter); -static void e1000_tx_timeout(struct net_device *dev); +static void e1000_tx_timeout(struct net_device *dev, unsigned int txqueue); static void e1000_reset_task(struct work_struct *work); static void e1000_smartspeed(struct e1000_adapter *adapter); static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, @@ -3488,7 +3488,7 @@ exit: * e1000_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void e1000_tx_timeout(struct net_device *netdev) +static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct e1000_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index fe7997c18a10..4c220600ea9a 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5929,7 +5929,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, * e1000_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void e1000_tx_timeout(struct net_device *netdev) +static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct e1000_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 68baee04dc58..ba2566e2123d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -697,7 +697,7 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) * fm10k_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void fm10k_tx_timeout(struct net_device *netdev) +static void fm10k_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct fm10k_intfc *interface = netdev_priv(netdev); bool real_tx_hang = false; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1ccabeafa44c..4c9ac6c80eb8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -301,7 +301,7 @@ void i40e_service_event_schedule(struct i40e_pf *pf) * device is munged, not just the one netdev port, so go for the full * reset. **/ -static void i40e_tx_timeout(struct net_device *netdev) +static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 821987da5698..0a8824871618 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -159,7 +159,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter) * iavf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void iavf_tx_timeout(struct net_device *netdev) +static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct iavf_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 69bff085acf7..4d5220c9c721 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5060,7 +5060,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, * ice_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void ice_tx_timeout(struct net_device *netdev) +static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_ring *tx_ring = NULL; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 98346eb064d5..d11e64a58ed1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -146,7 +146,7 @@ static int igb_poll(struct napi_struct *, int); static bool igb_clean_tx_irq(struct igb_q_vector *, int); static int igb_clean_rx_irq(struct igb_q_vector *, int); static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); -static void igb_tx_timeout(struct net_device *); +static void igb_tx_timeout(struct net_device *, unsigned int txqueue); static void igb_reset_task(struct work_struct *); static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); @@ -6184,7 +6184,7 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, * igb_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void igb_tx_timeout(struct net_device *netdev) +static void igb_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 6003dc3ff5fd..5b1800c3ba82 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2375,7 +2375,7 @@ static netdev_tx_t igbvf_xmit_frame(struct sk_buff *skb, * igbvf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void igbvf_tx_timeout(struct net_device *netdev) +static void igbvf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct igbvf_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 3d8c051dd327..b64e91ea3465 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -70,7 +70,7 @@ static int ixgb_clean(struct napi_struct *, int); static bool ixgb_clean_rx_irq(struct ixgb_adapter *, int *, int); static void ixgb_alloc_rx_buffers(struct ixgb_adapter *, int); -static void ixgb_tx_timeout(struct net_device *dev); +static void ixgb_tx_timeout(struct net_device *dev, unsigned int txqueue); static void ixgb_tx_timeout_task(struct work_struct *work); static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter); @@ -1538,7 +1538,7 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) **/ static void -ixgb_tx_timeout(struct net_device *netdev) +ixgb_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgb_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c index 171cdc552961..5b1cf49df3d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c @@ -166,7 +166,9 @@ static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp, ixgbe_dbg_netdev_ops_buf[len] = '\0'; if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) { - adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev); + /* TX Queue number below is wrong, but ixgbe does not use it */ + adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev, + UINT_MAX); e_dev_info("tx_timeout called\n"); } else { e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 25c097cd8100..8129ea2e94a8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6158,7 +6158,7 @@ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter) * ixgbe_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void ixgbe_tx_timeout(struct net_device *netdev) +static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgbe_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 076f2da36f27..fa286694ac2c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -250,7 +250,7 @@ static void ixgbevf_tx_timeout_reset(struct ixgbevf_adapter *adapter) * ixgbevf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void ixgbevf_tx_timeout(struct net_device *netdev) +static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 25aa400e2e3c..2e4975572e9f 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2337,7 +2337,7 @@ jme_change_mtu(struct net_device *netdev, int new_mtu) } static void -jme_tx_timeout(struct net_device *netdev) +jme_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct jme_adapter *jme = netdev_priv(netdev); diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index ae195f8adff5..f98d9d627c71 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -917,7 +917,7 @@ static void korina_restart_task(struct work_struct *work) enable_irq(lp->rx_irq); } -static void korina_tx_timeout(struct net_device *dev) +static void korina_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct korina_private *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 6e73ffe6f928..028e3e6222e9 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -594,7 +594,7 @@ err_hw: } static void -ltq_etop_tx_timeout(struct net_device *dev) +ltq_etop_tx_timeout(struct net_device *dev, unsigned int txqueue) { int err; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index d5b644131cff..c43820597218 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2590,7 +2590,7 @@ static void tx_timeout_task(struct work_struct *ugly) } } -static void mv643xx_eth_tx_timeout(struct net_device *dev) +static void mv643xx_eth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mv643xx_eth_private *mp = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 62dc2f362a16..28eba99888c3 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4792,6 +4792,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, 10000baseER_Full); phylink_set(mask, 10000baseKR_Full); } + if (state->interface != PHY_INTERFACE_MODE_NA) + break; /* Fall-through */ case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: @@ -4802,13 +4804,23 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, 10baseT_Full); phylink_set(mask, 100baseT_Half); phylink_set(mask, 100baseT_Full); + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + if (state->interface != PHY_INTERFACE_MODE_NA) + break; /* Fall-through */ case PHY_INTERFACE_MODE_1000BASEX: case PHY_INTERFACE_MODE_2500BASEX: - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); - phylink_set(mask, 2500baseT_Full); - phylink_set(mask, 2500baseX_Full); + if (port->comphy || + state->interface != PHY_INTERFACE_MODE_2500BASEX) { + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + } + if (port->comphy || + state->interface == PHY_INTERFACE_MODE_2500BASEX) { + phylink_set(mask, 2500baseT_Full); + phylink_set(mask, 2500baseX_Full); + } break; default: goto empty_set; @@ -4817,6 +4829,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config, bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); bitmap_and(state->advertising, state->advertising, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); + + phylink_helper_basex_speed(state); return; empty_set: @@ -5411,6 +5425,16 @@ static int mvpp2_port_probe(struct platform_device *pdev, port->phylink = NULL; } + /* Cycle the comphy to power it down, saving 270mW per port - + * don't worry about an error powering it up. When the comphy + * driver does this, we can remove this code. + */ + if (port->comphy) { + err = mvpp22_comphy_init(port); + if (err == 0) + phy_power_off(port->comphy); + } + err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register netdev\n"); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 3fb7ee3d4d13..1a6877902dd6 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -742,7 +742,7 @@ txq_reclaim_end: return released; } -static void pxa168_eth_tx_timeout(struct net_device *dev) +static void pxa168_eth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct pxa168_eth_private *pep = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 095f6c71b4fa..8ca15958e752 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -2884,7 +2884,7 @@ static void skge_tx_clean(struct net_device *dev) skge->tx_ring.to_clean = e; } -static void skge_tx_timeout(struct net_device *dev) +static void skge_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct skge_port *skge = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 5f56ee83e3b1..acd1cba987fb 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -2358,7 +2358,7 @@ static void sky2_qlink_intr(struct sky2_hw *hw) /* Transmit timeout is only called if we are running, carrier is up * and tx queue is full (stopped). */ -static void sky2_tx_timeout(struct net_device *dev) +static void sky2_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sky2_port *sky2 = netdev_priv(dev); struct sky2_hw *hw = sky2->hw; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 527ad2aadcca..8c6cfd15481c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2081,7 +2081,7 @@ static void mtk_dma_free(struct mtk_eth *eth) kfree(eth->scratch_head); } -static void mtk_tx_timeout(struct net_device *dev) +static void mtk_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 7af75b63245f..43dcbd8214c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1363,24 +1363,18 @@ static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv) } } -static void mlx4_en_tx_timeout(struct net_device *dev) +static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int i; + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][txqueue]; if (netif_msg_timer(priv)) en_warn(priv, "Tx timeout called on port:%d\n", priv->port); - for (i = 0; i < priv->tx_ring_num[TX]; i++) { - struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i]; - - if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) - continue; - en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", - i, tx_ring->qpn, tx_ring->sp_cqn, - tx_ring->cons, tx_ring->prod); - } + en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", + txqueue, tx_ring->qpn, tx_ring->sp_cqn, + tx_ring->cons, tx_ring->prod); priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Scheduling watchdog\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4980e80a5e85..68f1c8cb302b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4332,7 +4332,7 @@ unlock: rtnl_unlock(); } -static void mlx5e_tx_timeout(struct net_device *dev) +static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mlx5e_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index b70afa310ad2..416676c35b1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -200,8 +200,6 @@ static void mlx5_lag_fib_update(struct work_struct *work) rtnl_lock(); switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: mlx5_lag_fib_route_event(ldev, fib_work->event, fib_work->fen_info.fi); @@ -259,8 +257,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, switch (event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5294a1622643..86a2d575ae73 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3477,10 +3477,10 @@ MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8); enum mlxsw_reg_qeec_hr { - MLXSW_REG_QEEC_HIERARCY_PORT, - MLXSW_REG_QEEC_HIERARCY_GROUP, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_PORT, + MLXSW_REG_QEEC_HR_GROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, + MLXSW_REG_QEEC_HR_TC, }; /* reg_qeec_element_hierarchy @@ -3618,8 +3618,7 @@ static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port, { MLXSW_REG_ZERO(qeec, payload); mlxsw_reg_qeec_local_port_set(payload, local_port); - mlxsw_reg_qeec_element_hierarchy_set(payload, - MLXSW_REG_QEEC_HIERARCY_PORT); + mlxsw_reg_qeec_element_hierarchy_set(payload, MLXSW_REG_QEEC_HR_PORT); mlxsw_reg_qeec_ptps_set(payload, ptps); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 556dca328bb5..ea632042e609 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1796,6 +1796,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); case TC_SETUP_QDISC_PRIO: return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_ETS: + return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } @@ -3602,26 +3604,25 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) * one subgroup, which are all member in the same group. */ err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false, - 0); + MLXSW_REG_QEEC_HR_GROUP, 0, 0, false, 0); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, false, 0); if (err) return err; } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, i, i, + MLXSW_REG_QEEC_HR_TC, i, i, false, 0); if (err) return err; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, true, 100); if (err) @@ -3633,13 +3634,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) * for the initial configuration. */ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0, + MLXSW_REG_QEEC_HR_PORT, 0, 0, MLXSW_REG_QEEC_MAS_DIS); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, MLXSW_REG_QEEC_MAS_DIS); if (err) @@ -3647,14 +3648,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i, i, MLXSW_REG_QEEC_MAS_DIS); if (err) return err; err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, MLXSW_REG_QEEC_MAS_DIS); if (err) @@ -3664,7 +3665,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) /* Configure the min shaper for multicast TCs. */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, MLXSW_REG_QEEC_MIS_MIN); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 347bec9d1ecf..948ef4720d40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -852,6 +852,8 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_red_qopt_offload *p); int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p); /* spectrum_fid.c */ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 21296fa7f7fb..fe3bbba90659 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -160,7 +160,7 @@ static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port, u8 weight = ets->tc_tx_bw[i]; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, dwrr, weight); if (err) { netdev_err(dev, "Failed to link subgroup ETS element %d to group\n", @@ -198,7 +198,7 @@ err_port_ets_set: u8 weight = my_ets->tc_tx_bw[i]; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, dwrr, weight); } return err; @@ -507,7 +507,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, maxrate->tc_maxrate[i]); if (err) { @@ -523,7 +523,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, err_port_ets_maxrate_set: for (i--; i >= 0; i--) mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, my_maxrate->tc_maxrate[i]); return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 68cc6737d45c..81a2c087f534 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -18,6 +18,7 @@ enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_NO_QDISC, MLXSW_SP_QDISC_RED, MLXSW_SP_QDISC_PRIO, + MLXSW_SP_QDISC_ETS, }; struct mlxsw_sp_qdisc_ops { @@ -471,14 +472,16 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) { int i; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, MLXSW_SP_PORT_DEFAULT_TCLASS); + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + i, 0, false, 0); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, &mlxsw_sp_port->tclass_qdiscs[i]); mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; @@ -488,36 +491,58 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - void *params) +mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct tc_prio_qopt_offload_params *p = params; + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); +} - if (p->bands > IEEE_8021QAZ_MAX_TCS) +static int +__mlxsw_sp_qdisc_ets_check_params(unsigned int nbands) +{ + if (nbands > IEEE_8021QAZ_MAX_TCS) return -EOPNOTSUPP; return 0; } static int -mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - void *params) +mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) { struct tc_prio_qopt_offload_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static int +__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + unsigned int nbands, + const unsigned int *quanta, + const unsigned int *weights, + const u8 *priomap) +{ struct mlxsw_sp_qdisc *child_qdisc; int tclass, i, band, backlog; u8 old_priomap; int err; - for (band = 0; band < p->bands; band++) { + for (band = 0; band < nbands; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; old_priomap = child_qdisc->prio_bitmap; child_qdisc->prio_bitmap = 0; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + tclass, 0, !!quanta[band], + weights[band]); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (p->priomap[i] == band) { + if (priomap[i] == band) { child_qdisc->prio_bitmap |= BIT(i); if (BIT(i) & old_priomap) continue; @@ -540,21 +565,46 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; child_qdisc->prio_bitmap = 0; mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + tclass, 0, false, 0); } return 0; } +static int +mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0}; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + zeroes, zeroes, p->priomap); +} + +static void +__mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct gnet_stats_queue *qstats) +{ + u64 backlog; + + backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_qdisc->stats_base.backlog); + qstats->backlog -= backlog; +} + static void mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_prio_qopt_offload_params *p = params; - u64 backlog; - backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_qdisc->stats_base.backlog); - p->qstats->backlog -= backlog; + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); } static int @@ -631,31 +681,97 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, }; -/* Grafting is not supported in mlxsw. It will result in un-offloading of the - * grafted qdisc as well as the qdisc in the qdisc new location. - * (However, if the graft is to the location where the qdisc is already at, it - * will be ignored completely and won't cause un-offloading). +static int +mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static int +mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + p->quanta, p->weights, p->priomap); +} + +static void +mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); +} + +static int +mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { + .type = MLXSW_SP_QDISC_ETS, + .check_params = mlxsw_sp_qdisc_ets_check_params, + .replace = mlxsw_sp_qdisc_ets_replace, + .unoffload = mlxsw_sp_qdisc_ets_unoffload, + .destroy = mlxsw_sp_qdisc_ets_destroy, + .get_stats = mlxsw_sp_qdisc_get_prio_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, +}; + +/* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting + * graph is free of cycles). These operations do not change the parent handle + * though, which means it can be incomplete (if there is more than one class + * where the Qdisc in question is grafted) or outright wrong (if the Qdisc was + * linked to a different class and then removed from the original class). + * + * E.g. consider this sequence of operations: + * + * # tc qdisc add dev swp1 root handle 1: prio + * # tc qdisc add dev swp1 parent 1:3 handle 13: red limit 1000000 avpkt 10000 + * RED: set bandwidth to 10Mbit + * # tc qdisc link dev swp1 handle 13: parent 1:2 + * + * At this point, both 1:2 and 1:3 have the same RED Qdisc instance as their + * child. But RED will still only claim that 1:3 is its parent. If it's removed + * from that band, its only parent will be 1:2, but it will continue to claim + * that it is in fact 1:3. + * + * The notification for child Qdisc replace (e.g. TC_RED_REPLACE) comes before + * the notification for parent graft (e.g. TC_PRIO_GRAFT). We take the replace + * notification to offload the child Qdisc, based on its parent handle, and use + * the graft operation to validate that the class where the child is actually + * grafted corresponds to the parent handle. If the two don't match, we + * unoffload the child. */ static int -mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - struct tc_prio_qopt_offload_graft_params *p) +__mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u8 band, u32 child_handle) { - int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band); + int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); struct mlxsw_sp_qdisc *old_qdisc; - /* Check if the grafted qdisc is already in its "new" location. If so - - * nothing needs to be done. - */ - if (p->band < IEEE_8021QAZ_MAX_TCS && - mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle) + if (band < IEEE_8021QAZ_MAX_TCS && + mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == child_handle) return 0; /* See if the grafted qdisc is already offloaded on any tclass. If so, * unoffload it. */ old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port, - p->child_handle); + child_handle); if (old_qdisc) mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); @@ -664,6 +780,15 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } +static int +mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_prio_qopt_offload_graft_params *p) +{ + return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->band, p->child_handle); +} + int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p) { @@ -697,6 +822,40 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, } } +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_ETS_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_ets, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_ETS)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_ETS_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_ETS_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_ETS_GRAFT: + return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->graft_params.band, + p->graft_params.child_handle); + default: + return -EOPNOTSUPP; + } +} + int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 30bfe3880faf..bba1c8215d06 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3845,7 +3845,7 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, key.fib_nh = fib_nh; nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); - if (WARN_ON_ONCE(!nh)) + if (!nh) return; switch (event) { @@ -4780,95 +4780,6 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(mlxsw_sp, vr); } -static struct mlxsw_sp_fib4_entry * -mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib4_entry *fib4_entry; - - list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { - if (fib4_entry->tb_id > new4_entry->tb_id) - continue; - if (fib4_entry->tb_id != new4_entry->tb_id) - break; - if (fib4_entry->tos > new4_entry->tos) - continue; - if (fib4_entry->prio >= new4_entry->prio || - fib4_entry->tos < new4_entry->tos) - return fib4_entry; - } - - return NULL; -} - -static int -mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, - struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib_node *fib_node; - - if (WARN_ON(!fib4_entry)) - return -EINVAL; - - fib_node = fib4_entry->common.fib_node; - list_for_each_entry_from(fib4_entry, &fib_node->entry_list, - common.list) { - if (fib4_entry->tb_id != new4_entry->tb_id || - fib4_entry->tos != new4_entry->tos || - fib4_entry->prio != new4_entry->prio) - break; - } - - list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); - return 0; -} - -static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, - bool replace, bool append) -{ - struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; - struct mlxsw_sp_fib4_entry *fib4_entry; - - fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); - - if (append) - return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); - if (replace && WARN_ON(!fib4_entry)) - return -EINVAL; - - /* Insert new entry before replaced one, so that we can later - * remove the second. - */ - if (fib4_entry) { - list_add_tail(&new4_entry->common.list, - &fib4_entry->common.list); - } else { - struct mlxsw_sp_fib4_entry *last; - - list_for_each_entry(last, &fib_node->entry_list, common.list) { - if (new4_entry->tb_id > last->tb_id) - break; - fib4_entry = last; - } - - if (fib4_entry) - list_add(&new4_entry->common.list, - &fib4_entry->common.list); - else - list_add(&new4_entry->common.list, - &fib_node->entry_list); - } - - return 0; -} - -static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) -{ - list_del(&fib4_entry->common.list); -} - static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { @@ -4912,14 +4823,12 @@ static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace, bool append) + struct mlxsw_sp_fib4_entry *fib4_entry) { + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; int err; - err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); - if (err) - return err; + list_add(&fib4_entry->common.list, &fib_node->entry_list); err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); if (err) @@ -4928,7 +4837,7 @@ static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, return 0; err_fib_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib4_entry); + list_del(&fib4_entry->common.list); return err; } @@ -4937,20 +4846,19 @@ mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib4_entry *fib4_entry) { mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); - mlxsw_sp_fib4_node_list_remove(fib4_entry); + list_del(&fib4_entry->common.list); if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common); } static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace) + struct mlxsw_sp_fib4_entry *fib4_entry) { struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; struct mlxsw_sp_fib4_entry *replaced; - if (!replace) + if (list_is_singular(&fib_node->entry_list)) return; /* We inserted the new entry before replaced one */ @@ -4962,9 +4870,8 @@ static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info, - bool replace, bool append) +mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; @@ -4989,14 +4896,13 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, - append); + err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib4_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); + mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry); return 0; @@ -6094,7 +6000,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - bool replace, append; int err; /* Protect internal structures from changes */ @@ -6102,13 +6007,9 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; - append = fib_work->event == FIB_EVENT_ENTRY_APPEND; - err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, - replace, append); + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib4_replace(mlxsw_sp, + &fib_work->fen_info); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); @@ -6211,8 +6112,6 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); @@ -6343,9 +6242,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp); return notifier_from_errno(err); - case FIB_EVENT_ENTRY_ADD: - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_REPLACE: if (router->aborted) { NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route"); return notifier_from_errno(-EINVAL); diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c index da329ca115cc..f3f6dfe3eddc 100644 --- a/drivers/net/ethernet/micrel/ks8842.c +++ b/drivers/net/ethernet/micrel/ks8842.c @@ -1103,7 +1103,7 @@ static void ks8842_tx_timeout_work(struct work_struct *work) __ks8842_start_new_rx_dma(netdev); } -static void ks8842_tx_timeout(struct net_device *netdev) +static void ks8842_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ks8842_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index e102e1560ac7..d1444ba36e10 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -4896,7 +4896,7 @@ unlock: * triggered to free up resources so that the transmit routine can continue * sending out packets. The hardware is reset to correct the problem. */ -static void netdev_tx_timeout(struct net_device *dev) +static void netdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { static unsigned long last_reset; diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 0567e4f387a5..09cdc2f2e7ff 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1325,7 +1325,7 @@ static irqreturn_t enc28j60_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static void enc28j60_tx_timeout(struct net_device *ndev) +static void enc28j60_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct enc28j60_net *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 52c41d11f565..39925e4bf2ec 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -892,7 +892,7 @@ static netdev_tx_t encx24j600_tx(struct sk_buff *skb, struct net_device *dev) } /* Deal with a transmit timeout */ -static void encx24j600_tx_timeout(struct net_device *dev) +static void encx24j600_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct encx24j600_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 1a2634cbbb69..d21d706b83a7 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -612,7 +612,7 @@ static void undo_cable_magic(struct net_device *dev); static void check_link(struct net_device *dev); static void netdev_timer(struct timer_list *t); static void dump_ring(struct net_device *dev); -static void ns_tx_timeout(struct net_device *dev); +static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue); static int alloc_ring(struct net_device *dev); static void refill_rx(struct net_device *dev); static void init_ring(struct net_device *dev); @@ -1881,7 +1881,7 @@ static void dump_ring(struct net_device *dev) } } -static void ns_tx_timeout(struct net_device *dev) +static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem * ioaddr = ns_ioaddr(dev); diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 6af9a7eee114..be5f62f06785 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1549,7 +1549,7 @@ static int ns83820_stop(struct net_device *ndev) return 0; } -static void ns83820_tx_timeout(struct net_device *ndev) +static void ns83820_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ns83820 *dev = PRIV(ndev); u32 tx_done_idx; @@ -1603,7 +1603,7 @@ static void ns83820_tx_watch(struct timer_list *t) ndev->name, dev->tx_done_idx, dev->tx_free_idx, atomic_read(&dev->nr_tx_skbs)); - ns83820_tx_timeout(ndev); + ns83820_tx_timeout(ndev, UINT_MAX); } mod_timer(&dev->tx_watchdog, jiffies + 2*HZ); diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index b339125b2f09..fdebc8598b22 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -161,7 +161,7 @@ static int sonic_close(struct net_device *dev) return 0; } -static void sonic_tx_timeout(struct net_device *dev) +static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sonic_local *lp = netdev_priv(dev); int i; diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 2b27f7049acb..f1544481aac1 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -336,7 +336,7 @@ static int sonic_close(struct net_device *dev); static struct net_device_stats *sonic_get_stats(struct net_device *dev); static void sonic_multicast_list(struct net_device *dev); static int sonic_init(struct net_device *dev); -static void sonic_tx_timeout(struct net_device *dev); +static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue); static void sonic_msg_init(struct net_device *dev); /* Internal inlines for reading/writing DMA buffers. Note that bus diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index e0b2bf327905..0ec6b8e8b549 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7238,7 +7238,7 @@ out_unlock: * void */ -static void s2io_tx_watchdog(struct net_device *dev) +static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct s2io_nic *sp = netdev_priv(dev); struct swStat *swstats = &sp->mac_control.stats_info->sw_stat; diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h index 0a921f30f98f..6fa3159a977f 100644 --- a/drivers/net/ethernet/neterion/s2io.h +++ b/drivers/net/ethernet/neterion/s2io.h @@ -1065,7 +1065,7 @@ static void s2io_txpic_intr_handle(struct s2io_nic *sp); static void tx_intr_handler(struct fifo_info *fifo_data); static void s2io_handle_errors(void * dev_id); -static void s2io_tx_watchdog(struct net_device *dev); +static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue); static void s2io_set_multicast(struct net_device *dev); static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp); static void s2io_link(struct s2io_nic * sp, int link); diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 1d334f2e0a56..9b63574b6202 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3273,7 +3273,7 @@ static int vxge_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) * This function is triggered if the Tx Queue is stopped * for a pre-defined amount of time when the Interface is still up. */ -static void vxge_tx_watchdog(struct net_device *dev) +static void vxge_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct vxgedev *vdev; diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c index 9f8a1f69c0c4..23ebddfb9532 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/cls.c +++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c @@ -176,10 +176,8 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, u8 mask, val; int err; - if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) { - err = -EOPNOTSUPP; + if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) goto err_delete; - } tos_off = proto == htons(ETH_P_IP) ? 16 : 20; @@ -200,18 +198,14 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, if ((iter->val & cmask) == (val & cmask) && iter->band != knode->res->classid) { NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter"); - err = -EOPNOTSUPP; goto err_delete; } } if (!match) { match = kzalloc(sizeof(*match), GFP_KERNEL); - if (!match) { - err = -ENOMEM; - goto err_delete; - } - + if (!match) + return -ENOMEM; list_add(&match->list, &alink->dscp_map); } match->handle = knode->handle; @@ -227,7 +221,7 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, err_delete: nfp_abm_u32_knode_delete(alink, knode); - return err; + return -EOPNOTSUPP; } static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type, diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h index a460c75522be..d81d450be50e 100644 --- a/drivers/net/ethernet/netronome/nfp/ccm.h +++ b/drivers/net/ethernet/netronome/nfp/ccm.h @@ -26,6 +26,7 @@ enum nfp_ccm_type { NFP_CCM_TYPE_CRYPTO_ADD = 10, NFP_CCM_TYPE_CRYPTO_DEL = 11, NFP_CCM_TYPE_CRYPTO_UPDATE = 12, + NFP_CCM_TYPE_CRYPTO_RESYNC = 13, __NFP_CCM_TYPE_MAX, }; diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h index 60372ddf69f0..bffe58bb2f27 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h +++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h @@ -4,6 +4,10 @@ #ifndef NFP_CRYPTO_H #define NFP_CRYPTO_H 1 +struct net_device; +struct nfp_net; +struct nfp_net_tls_resync_req; + struct nfp_net_tls_offload_ctx { __be32 fw_handle[2]; @@ -17,11 +21,22 @@ struct nfp_net_tls_offload_ctx { #ifdef CONFIG_TLS_DEVICE int nfp_net_tls_init(struct nfp_net *nn); +int nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len); #else static inline int nfp_net_tls_init(struct nfp_net *nn) { return 0; } + +static inline int +nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len) +{ + return -EOPNOTSUPP; +} #endif #endif diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h index 67413d946c4a..8d1458896bcb 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/fw.h +++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h @@ -9,6 +9,14 @@ #define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC 0 #define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC 1 +struct nfp_net_tls_resync_req { + __be32 fw_handle[2]; + __be32 tcp_seq; + u8 l3_offset; + u8 l4_offset; + u8 resv[2]; +}; + struct nfp_crypto_reply_simple { struct nfp_ccm_hdr hdr; __be32 error; diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c index 96a96b35c0ca..7c50e3dfb9d5 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c @@ -5,6 +5,7 @@ #include <linux/ipv6.h> #include <linux/skbuff.h> #include <linux/string.h> +#include <net/inet6_hashtables.h> #include <net/tls.h> #include "../ccm.h" @@ -391,8 +392,9 @@ nfp_net_tls_add(struct net_device *netdev, struct sock *sk, if (direction == TLS_OFFLOAD_CTX_DIR_TX) return 0; - tls_offload_rx_resync_set_type(sk, - TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT); + if (!nn->tlv_caps.tls_resync_ss) + tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT); + return 0; err_fw_remove: @@ -424,6 +426,7 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq, struct nfp_net *nn = netdev_priv(netdev); struct nfp_net_tls_offload_ctx *ntls; struct nfp_crypto_req_update *req; + enum nfp_ccm_type type; struct sk_buff *skb; gfp_t flags; int err; @@ -442,15 +445,18 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq, req->tcp_seq = cpu_to_be32(seq); memcpy(req->rec_no, rcd_sn, sizeof(req->rec_no)); + type = NFP_CCM_TYPE_CRYPTO_UPDATE; if (direction == TLS_OFFLOAD_CTX_DIR_TX) { - err = nfp_net_tls_communicate_simple(nn, skb, "sync", - NFP_CCM_TYPE_CRYPTO_UPDATE); + err = nfp_net_tls_communicate_simple(nn, skb, "sync", type); if (err) return err; ntls->next_seq = seq; } else { - nfp_ccm_mbox_post(nn, skb, NFP_CCM_TYPE_CRYPTO_UPDATE, + if (nn->tlv_caps.tls_resync_ss) + type = NFP_CCM_TYPE_CRYPTO_RESYNC; + nfp_ccm_mbox_post(nn, skb, type, sizeof(struct nfp_crypto_reply_simple)); + atomic_inc(&nn->ktls_rx_resync_sent); } return 0; @@ -462,6 +468,79 @@ static const struct tlsdev_ops nfp_net_tls_ops = { .tls_dev_resync = nfp_net_tls_resync, }; +int nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_tls_offload_ctx *ntls; + struct ipv6hdr *ipv6h; + struct tcphdr *th; + struct iphdr *iph; + struct sock *sk; + __be32 tcp_seq; + int err; + + iph = pkt + req->l3_offset; + ipv6h = pkt + req->l3_offset; + th = pkt + req->l4_offset; + + if ((u8 *)&th[1] > (u8 *)pkt + pkt_len) { + netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu pkt_len: %u)\n", + req->l3_offset, req->l4_offset, pkt_len); + err = -EINVAL; + goto err_cnt_ign; + } + + switch (iph->version) { + case 4: + sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo, + iph->saddr, th->source, iph->daddr, + th->dest, netdev->ifindex); + break; +#if IS_ENABLED(CONFIG_IPV6) + case 6: + sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo, + &ipv6h->saddr, th->source, + &ipv6h->daddr, ntohs(th->dest), + netdev->ifindex, 0); + break; +#endif + default: + netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu ipver: %u)\n", + req->l3_offset, req->l4_offset, iph->version); + err = -EINVAL; + goto err_cnt_ign; + } + + err = 0; + if (!sk) + goto err_cnt_ign; + if (!tls_is_sk_rx_device_offloaded(sk) || + sk->sk_shutdown & RCV_SHUTDOWN) + goto err_put_sock; + + ntls = tls_driver_ctx(sk, TLS_OFFLOAD_CTX_DIR_RX); + /* some FW versions can't report the handle and report 0s */ + if (memchr_inv(&req->fw_handle, 0, sizeof(req->fw_handle)) && + memcmp(&req->fw_handle, &ntls->fw_handle, sizeof(ntls->fw_handle))) + goto err_put_sock; + + /* copy to ensure alignment */ + memcpy(&tcp_seq, &req->tcp_seq, sizeof(tcp_seq)); + tls_offload_rx_resync_request(sk, tcp_seq); + atomic_inc(&nn->ktls_rx_resync_req); + + sock_gen_put(sk); + return 0; + +err_put_sock: + sock_gen_put(sk); +err_cnt_ign: + atomic_inc(&nn->ktls_rx_resync_ign); + return err; +} + static int nfp_net_tls_reset(struct nfp_net *nn) { struct nfp_crypto_req_reset *req; diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 1b019fdfcd97..c06600fb47ff 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -22,8 +22,9 @@ #define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) #define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) #define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) -#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS IP_TUNNEL_INFO_TX -#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ +#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS (IP_TUNNEL_INFO_TX | \ + IP_TUNNEL_INFO_IPV6) +#define NFP_FL_SUPPORTED_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ NFP_FL_TUNNEL_KEY | \ NFP_FL_TUNNEL_GENEVE_OPT) @@ -394,19 +395,26 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, } static int -nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, - const struct flow_action_entry *act, - struct nfp_fl_pre_tunnel *pre_tun, - enum nfp_flower_tun_type tun_type, - struct net_device *netdev, struct netlink_ext_ack *extack) +nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, + const struct flow_action_entry *act, + struct nfp_fl_pre_tunnel *pre_tun, + enum nfp_flower_tun_type tun_type, + struct net_device *netdev, struct netlink_ext_ack *extack) { - size_t act_size = sizeof(struct nfp_fl_set_ipv4_tun); const struct ip_tunnel_info *ip_tun = act->tunnel; + bool ipv6 = ip_tunnel_info_af(ip_tun) == AF_INET6; + size_t act_size = sizeof(struct nfp_fl_set_tun); struct nfp_flower_priv *priv = app->priv; u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; + if (!IS_ENABLED(CONFIG_IPV6) && ipv6) + return -EOPNOTSUPP; + + if (ipv6 && !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) + return -EOPNOTSUPP; + BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || NFP_FL_TUNNEL_KEY != TUNNEL_KEY || NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); @@ -417,19 +425,35 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, return -EOPNOTSUPP; } - set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; + set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; /* Set tunnel type and pre-tunnel index. */ tmp_set_ip_tun_type_index |= - FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, tun_type) | - FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx); + FIELD_PREP(NFP_FL_TUNNEL_TYPE, tun_type) | + FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx); set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; if (ip_tun->key.ttl) { set_tun->ttl = ip_tun->key.ttl; +#ifdef CONFIG_IPV6 + } else if (ipv6) { + struct net *net = dev_net(netdev); + struct flowi6 flow = {}; + struct dst_entry *dst; + + flow.daddr = ip_tun->key.u.ipv6.dst; + flow.flowi4_proto = IPPROTO_UDP; + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow, NULL); + if (!IS_ERR(dst)) { + set_tun->ttl = ip6_dst_hoplimit(dst); + dst_release(dst); + } else { + set_tun->ttl = net->ipv6.devconf_all->hop_limit; + } +#endif } else { struct net *net = dev_net(netdev); struct flowi4 flow = {}; @@ -455,7 +479,7 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, set_tun->tos = ip_tun->key.tos; if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) || - ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) { + ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload"); return -EOPNOTSUPP; } @@ -467,7 +491,12 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, } /* Complete pre_tunnel action. */ - pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; + if (ipv6) { + pre_tun->flags |= cpu_to_be16(NFP_FL_PRE_TUN_IPV6); + pre_tun->ipv6_dst = ip_tun->key.u.ipv6.dst; + } else { + pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; + } return 0; } @@ -956,8 +985,8 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, struct nfp_flower_pedit_acts *set_act, bool *pkt_host, struct netlink_ext_ack *extack, int act_idx) { - struct nfp_fl_set_ipv4_tun *set_tun; struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_tun *set_tun; struct nfp_fl_push_vlan *psh_v; struct nfp_fl_push_mpls *psh_m; struct nfp_fl_pop_vlan *pop_v; @@ -1032,7 +1061,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, * If none, the packet falls back before applying other actions. */ if (*a_len + sizeof(struct nfp_fl_pre_tunnel) + - sizeof(struct nfp_fl_set_ipv4_tun) > NFP_FL_MAX_A_SIZ) { + sizeof(struct nfp_fl_set_tun) > NFP_FL_MAX_A_SIZ) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap"); return -EOPNOTSUPP; } @@ -1046,11 +1075,11 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, return err; set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_tun(app, set_tun, act, pre_tun, - *tun_type, netdev, extack); + err = nfp_fl_set_tun(app, set_tun, act, pre_tun, *tun_type, + netdev, extack); if (err) return err; - *a_len += sizeof(struct nfp_fl_set_ipv4_tun); + *a_len += sizeof(struct nfp_fl_set_tun); } break; case FLOW_ACTION_TUNNEL_DECAP: diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 05981b54eaab..a595ddb92bff 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -270,11 +270,17 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) } goto err_default; case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: - nfp_tunnel_request_route(app, skb); + nfp_tunnel_request_route_v4(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6: + nfp_tunnel_request_route_v6(app, skb); break; case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: nfp_tunnel_keep_alive(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6: + nfp_tunnel_keep_alive_v6(app, skb); + break; case NFP_FLOWER_CMSG_TYPE_QOS_STATS: nfp_flower_stats_rlim_reply(app, skb); break; @@ -361,7 +367,8 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) nfp_flower_process_mtu_ack(app, skb)) { /* Handle MTU acks outside wq to prevent RTNL conflict. */ dev_consume_skb_any(skb); - } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH) { + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6) { /* Acks from the NFP that the route is added - ignore. */ dev_consume_skb_any(skb); } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 7eb2ec8969c3..9b50d76bbc09 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -26,6 +26,7 @@ #define NFP_FLOWER_LAYER2_GRE BIT(0) #define NFP_FLOWER_LAYER2_GENEVE BIT(5) #define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) +#define NFP_FLOWER_LAYER2_TUN_IPV6 BIT(7) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) #define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12) @@ -63,6 +64,7 @@ #define NFP_FL_MAX_GENEVE_OPT_ACT 32 #define NFP_FL_MAX_GENEVE_OPT_CNT 64 #define NFP_FL_MAX_GENEVE_OPT_KEY 32 +#define NFP_FL_MAX_GENEVE_OPT_KEY_V6 8 /* Action opcodes */ #define NFP_FL_ACTION_OPCODE_OUTPUT 0 @@ -70,7 +72,7 @@ #define NFP_FL_ACTION_OPCODE_POP_VLAN 2 #define NFP_FL_ACTION_OPCODE_PUSH_MPLS 3 #define NFP_FL_ACTION_OPCODE_POP_MPLS 4 -#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_SET_TUNNEL 6 #define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 #define NFP_FL_ACTION_OPCODE_SET_MPLS 8 #define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 @@ -99,8 +101,8 @@ /* Tunnel ports */ #define NFP_FL_PORT_TYPE_TUN 0x50000000 -#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) -#define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0) +#define NFP_FL_TUNNEL_TYPE GENMASK(7, 4) +#define NFP_FL_PRE_TUN_INDEX GENMASK(2, 0) #define NFP_FLOWER_WORKQ_MAX_SKBS 30000 @@ -206,13 +208,16 @@ struct nfp_fl_pre_lag { struct nfp_fl_pre_tunnel { struct nfp_fl_act_head head; - __be16 reserved; - __be32 ipv4_dst; - /* reserved for use with IPv6 addresses */ - __be32 extra[3]; + __be16 flags; + union { + __be32 ipv4_dst; + struct in6_addr ipv6_dst; + }; }; -struct nfp_fl_set_ipv4_tun { +#define NFP_FL_PRE_TUN_IPV6 BIT(0) + +struct nfp_fl_set_tun { struct nfp_fl_act_head head; __be16 reserved; __be64 tun_id __packed; @@ -387,6 +392,11 @@ struct nfp_flower_tun_ipv4 { __be32 dst; }; +struct nfp_flower_tun_ipv6 { + struct in6_addr src; + struct in6_addr dst; +}; + struct nfp_flower_tun_ip_ext { u8 tos; u8 ttl; @@ -416,6 +426,42 @@ struct nfp_flower_ipv4_udp_tun { __be32 tun_id; }; +/* Flow Frame IPv6 UDP TUNNEL --> Tunnel details (11W/44B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_udp_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 reserved1; + struct nfp_flower_tun_ip_ext ip_ext; + __be32 reserved2; + __be32 tun_id; +}; + /* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B) * ----------------------------------------------------------------- * 3 2 1 @@ -445,6 +491,46 @@ struct nfp_flower_ipv4_gre_tun { __be32 reserved2; }; +/* Flow Frame GRE TUNNEL V6 --> Tunnel details (12W/48B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Ethertype | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_gre_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 tun_flags; + struct nfp_flower_tun_ip_ext ip_ext; + __be16 reserved1; + __be16 ethertype; + __be32 tun_key; + __be32 reserved2; +}; + struct nfp_flower_geneve_options { u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; }; @@ -485,6 +571,10 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_QOS_DEL = 19, NFP_FLOWER_CMSG_TYPE_QOS_STATS = 20, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE = 21, + NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6 = 22, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6 = 23, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 = 24, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6 = 25, NFP_FLOWER_CMSG_TYPE_MAX = 32, }; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 31d94592a7c0..ddd7b7f5f5ab 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -43,6 +43,7 @@ struct nfp_app; #define NFP_FL_FEATS_VF_RLIM BIT(4) #define NFP_FL_FEATS_FLOW_MOD BIT(5) #define NFP_FL_FEATS_PRE_TUN_RULES BIT(6) +#define NFP_FL_FEATS_IPV6_TUN BIT(7) #define NFP_FL_FEATS_FLOW_MERGE BIT(30) #define NFP_FL_FEATS_LAG BIT(31) @@ -62,18 +63,26 @@ struct nfp_fl_stats_id { * struct nfp_fl_tunnel_offloads - priv data for tunnel offloads * @offloaded_macs: Hashtable of the offloaded MAC addresses * @ipv4_off_list: List of IPv4 addresses to offload - * @neigh_off_list: List of neighbour offloads + * @ipv6_off_list: List of IPv6 addresses to offload + * @neigh_off_list_v4: List of IPv4 neighbour offloads + * @neigh_off_list_v6: List of IPv6 neighbour offloads * @ipv4_off_lock: Lock for the IPv4 address list - * @neigh_off_lock: Lock for the neighbour address list + * @ipv6_off_lock: Lock for the IPv6 address list + * @neigh_off_lock_v4: Lock for the IPv4 neighbour address list + * @neigh_off_lock_v6: Lock for the IPv6 neighbour address list * @mac_off_ids: IDA to manage id assignment for offloaded MACs * @neigh_nb: Notifier to monitor neighbour state */ struct nfp_fl_tunnel_offloads { struct rhashtable offloaded_macs; struct list_head ipv4_off_list; - struct list_head neigh_off_list; + struct list_head ipv6_off_list; + struct list_head neigh_off_list_v4; + struct list_head neigh_off_list_v6; struct mutex ipv4_off_lock; - spinlock_t neigh_off_lock; + struct mutex ipv6_off_lock; + spinlock_t neigh_off_lock_v4; + spinlock_t neigh_off_lock_v6; struct ida mac_off_ids; struct notifier_block neigh_nb; }; @@ -273,12 +282,25 @@ struct nfp_fl_stats { u64 used; }; +/** + * struct nfp_ipv6_addr_entry - cached IPv6 addresses + * @ipv6_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv6_addr_entry { + struct in6_addr ipv6_addr; + int ref_count; + struct list_head list; +}; + struct nfp_fl_payload { struct nfp_fl_rule_metadata meta; unsigned long tc_flower_cookie; struct rhash_head fl_node; struct rcu_head rcu; __be32 nfp_tun_ipv4_addr; + struct nfp_ipv6_addr_entry *nfp_tun_ipv6; struct net_device *ingress_dev; char *unmasked_data; char *mask_data; @@ -396,8 +418,14 @@ int nfp_tunnel_mac_event_handler(struct nfp_app *app, unsigned long event, void *ptr); void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); -void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry); +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6); +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb); void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb); void nfp_flower_lag_init(struct nfp_fl_lag *lag); void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag); int nfp_flower_lag_reset(struct nfp_fl_lag *lag); diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 9cc3ba17ff69..546bc01d507d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -10,9 +10,8 @@ static void nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, struct nfp_flower_meta_tci *msk, - struct flow_cls_offload *flow, u8 key_type) + struct flow_rule *rule, u8 key_type) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); u16 tmp_tci; memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); @@ -77,11 +76,8 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, static void nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, - struct nfp_flower_mac_mpls *msk, - struct flow_cls_offload *flow) + struct nfp_flower_mac_mpls *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); @@ -130,10 +126,8 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, static void nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, struct nfp_flower_tp_ports *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_tp_ports)); memset(msk, 0, sizeof(struct nfp_flower_tp_ports)); @@ -150,11 +144,8 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, static void nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, - struct nfp_flower_ip_ext *msk, - struct flow_cls_offload *flow) + struct nfp_flower_ip_ext *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -224,10 +215,8 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, static void nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, - struct nfp_flower_ipv4 *msk, - struct flow_cls_offload *flow) + struct nfp_flower_ipv4 *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct flow_match_ipv4_addrs match; memset(ext, 0, sizeof(struct nfp_flower_ipv4)); @@ -241,16 +230,13 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, msk->ipv4_dst = match.mask->dst; } - nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } static void nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, - struct nfp_flower_ipv6 *msk, - struct flow_cls_offload *flow) + struct nfp_flower_ipv6 *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_ipv6)); memset(msk, 0, sizeof(struct nfp_flower_ipv6)); @@ -264,16 +250,15 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, msk->ipv6_dst = match.mask->dst; } - nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } static int -nfp_flower_compile_geneve_opt(void *ext, void *msk, - struct flow_cls_offload *flow) +nfp_flower_compile_geneve_opt(void *ext, void *msk, struct flow_rule *rule) { struct flow_match_enc_opts match; - flow_rule_match_enc_opts(flow->rule, &match); + flow_rule_match_enc_opts(rule, &match); memcpy(ext, match.key->data, match.key->len); memcpy(msk, match.mask->data, match.mask->len); @@ -283,10 +268,8 @@ nfp_flower_compile_geneve_opt(void *ext, void *msk, static void nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, struct nfp_flower_tun_ipv4 *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; @@ -299,12 +282,26 @@ nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, } static void +nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext, + struct nfp_flower_tun_ipv6 *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + ext->src = match.key->src; + ext->dst = match.key->dst; + msk->src = match.mask->src; + msk->dst = match.mask->dst; + } +} + +static void nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, struct nfp_flower_tun_ip_ext *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { struct flow_match_ip match; @@ -317,57 +314,97 @@ nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, } static void -nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, - struct nfp_flower_ipv4_gre_tun *msk, - struct flow_cls_offload *flow) +nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk, + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - - memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + u32 vni; - /* NVGRE is the only supported GRE tunnel type */ - ext->ethertype = cpu_to_be16(ETH_P_TEB); - msk->ethertype = cpu_to_be16(~0); + flow_rule_match_enc_keyid(rule, &match); + vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET; + *key = cpu_to_be32(vni); + vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; + *key_msk = cpu_to_be32(vni); + } +} +static void +nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags, + __be16 *flags_msk, struct flow_rule *rule) +{ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { struct flow_match_enc_keyid match; flow_rule_match_enc_keyid(rule, &match); - ext->tun_key = match.key->keyid; - msk->tun_key = match.mask->keyid; + *key = match.key->keyid; + *key_msk = match.mask->keyid; - ext->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); - msk->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + *flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + *flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); } +} + +static void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_rule *rule) +{ + memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); - nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow); - nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); } static void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, struct nfp_flower_ipv4_udp_tun *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid match; - u32 temp_vni; + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} - flow_rule_match_enc_keyid(rule, &match); - temp_vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET; - ext->tun_id = cpu_to_be32(temp_vni); - temp_vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; - msk->tun_id = cpu_to_be32(temp_vni); - } +static void +nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, + struct nfp_flower_ipv6_udp_tun *msk, + struct flow_rule *rule) +{ + memset(ext, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); + + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} + +static void +nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, + struct nfp_flower_ipv6_gre_tun *msk, + struct flow_rule *rule) +{ + memset(ext, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); + + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); - nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow); - nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); } int nfp_flower_compile_flow_match(struct nfp_app *app, @@ -378,6 +415,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, enum nfp_flower_tun_type tun_type, struct netlink_ext_ack *extack) { + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); u32 port_id; int err; u8 *ext; @@ -393,7 +431,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext, (struct nfp_flower_meta_tci *)msk, - flow, key_ls->key_layer); + rule, key_ls->key_layer); ext += sizeof(struct nfp_flower_meta_tci); msk += sizeof(struct nfp_flower_meta_tci); @@ -425,7 +463,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) { nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext, (struct nfp_flower_mac_mpls *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_mac_mpls); msk += sizeof(struct nfp_flower_mac_mpls); } @@ -433,7 +471,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) { nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext, (struct nfp_flower_tp_ports *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_tp_ports); msk += sizeof(struct nfp_flower_tp_ports); } @@ -441,7 +479,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) { nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext, (struct nfp_flower_ipv4 *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_ipv4); msk += sizeof(struct nfp_flower_ipv4); } @@ -449,43 +487,83 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) { nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext, (struct nfp_flower_ipv6 *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_ipv6); msk += sizeof(struct nfp_flower_ipv6); } if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) { - __be32 tun_dst; - - nfp_flower_compile_ipv4_gre_tun((void *)ext, (void *)msk, flow); - tun_dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst; - ext += sizeof(struct nfp_flower_ipv4_gre_tun); - msk += sizeof(struct nfp_flower_ipv4_gre_tun); - - /* Store the tunnel destination in the rule data. - * This must be present and be an exact match. - */ - nfp_flow->nfp_tun_ipv4_addr = tun_dst; - nfp_tunnel_add_ipv4_off(app, tun_dst); + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_gre_tun *gre_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_gre_tun((void *)ext, + (void *)msk, rule); + gre_match = (struct nfp_flower_ipv6_gre_tun *)ext; + dst = &gre_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_gre_tun); + msk += sizeof(struct nfp_flower_ipv6_gre_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_gre_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_gre_tun); + msk += sizeof(struct nfp_flower_ipv4_gre_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } } if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN || key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { - __be32 tun_dst; - - nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow); - tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst; - ext += sizeof(struct nfp_flower_ipv4_udp_tun); - msk += sizeof(struct nfp_flower_ipv4_udp_tun); - - /* Store the tunnel destination in the rule data. - * This must be present and be an exact match. - */ - nfp_flow->nfp_tun_ipv4_addr = tun_dst; - nfp_tunnel_add_ipv4_off(app, tun_dst); + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_udp_tun *udp_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_udp_tun((void *)ext, + (void *)msk, rule); + udp_match = (struct nfp_flower_ipv6_udp_tun *)ext; + dst = &udp_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_udp_tun); + msk += sizeof(struct nfp_flower_ipv6_udp_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_udp_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_udp_tun); + msk += sizeof(struct nfp_flower_ipv4_udp_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { - err = nfp_flower_compile_geneve_opt(ext, msk, flow); + err = nfp_flower_compile_geneve_opt(ext, msk, rule); if (err) return err; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 987ae221f6be..7ca5c1becfcf 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -54,6 +54,10 @@ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) + #define NFP_FLOWER_MERGE_FIELDS \ (NFP_FLOWER_LAYER_PORT | \ NFP_FLOWER_LAYER_MAC | \ @@ -64,7 +68,8 @@ #define NFP_FLOWER_PRE_TUN_RULE_FIELDS \ (NFP_FLOWER_LAYER_PORT | \ NFP_FLOWER_LAYER_MAC | \ - NFP_FLOWER_LAYER_IPV4) + NFP_FLOWER_LAYER_IPV4 | \ + NFP_FLOWER_LAYER_IPV6) struct nfp_flower_merge_check { union { @@ -146,10 +151,11 @@ static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f) static int nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, - u32 *key_layer_two, int *key_size, + u32 *key_layer_two, int *key_size, bool ipv6, struct netlink_ext_ack *extack) { - if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) { + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY || + (ipv6 && enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY_V6)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length"); return -EOPNOTSUPP; } @@ -167,7 +173,7 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, struct flow_dissector_key_enc_opts *enc_op, u32 *key_layer_two, u8 *key_layer, int *key_size, struct nfp_flower_priv *priv, - enum nfp_flower_tun_type *tun_type, + enum nfp_flower_tun_type *tun_type, bool ipv6, struct netlink_ext_ack *extack) { int err; @@ -176,7 +182,15 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, case htons(IANA_VXLAN_UDP_PORT): *tun_type = NFP_FL_TUNNEL_VXLAN; *key_layer |= NFP_FLOWER_LAYER_VXLAN; - *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (ipv6) { + *key_layer |= NFP_FLOWER_LAYER_EXT_META; + *key_size += sizeof(struct nfp_flower_ext_meta); + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } if (enc_op) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels"); @@ -192,7 +206,13 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, *key_layer |= NFP_FLOWER_LAYER_EXT_META; *key_size += sizeof(struct nfp_flower_ext_meta); *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; - *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (ipv6) { + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } if (!enc_op) break; @@ -200,8 +220,8 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload"); return -EOPNOTSUPP; } - err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, - key_size, extack); + err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, key_size, + ipv6, extack); if (err) return err; break; @@ -237,6 +257,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, /* If any tun dissector is used then the required set must be used. */ if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && + (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R && (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported"); @@ -268,8 +290,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { struct flow_match_enc_opts enc_op = { NULL, NULL }; struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; struct flow_match_control enc_ctl; struct flow_match_ports enc_ports; + bool ipv6_tun = false; flow_rule_match_enc_control(rule, &enc_ctl); @@ -277,38 +301,62 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported"); return -EOPNOTSUPP; } - if (enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only IPv4 tunnels are supported"); + + ipv6_tun = enc_ctl.key->addr_type == + FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (ipv6_tun && + !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: firmware does not support IPv6 tunnels"); return -EOPNOTSUPP; } - /* These fields are already verified as used. */ - flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); - if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported"); + if (!ipv6_tun && + enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel address type not IPv4 or IPv6"); return -EOPNOTSUPP; } + if (ipv6_tun) { + flow_rule_match_enc_ipv6_addrs(rule, &ipv6_addrs); + if (memchr_inv(&ipv6_addrs.mask->dst, 0xff, + sizeof(ipv6_addrs.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv6 destination address is supported"); + return -EOPNOTSUPP; + } + } else { + flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); + if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported"); + return -EOPNOTSUPP; + } + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) flow_rule_match_enc_opts(rule, &enc_op); - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { /* check if GRE, which has no enc_ports */ - if (netif_is_gretap(netdev)) { - *tun_type = NFP_FL_TUNNEL_GRE; - key_layer |= NFP_FLOWER_LAYER_EXT_META; - key_size += sizeof(struct nfp_flower_ext_meta); - key_layer_two |= NFP_FLOWER_LAYER2_GRE; - key_size += - sizeof(struct nfp_flower_ipv4_gre_tun); + if (!netif_is_gretap(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); + return -EOPNOTSUPP; + } - if (enc_op.key) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels"); - return -EOPNOTSUPP; - } + *tun_type = NFP_FL_TUNNEL_GRE; + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_layer_two |= NFP_FLOWER_LAYER2_GRE; + + if (ipv6_tun) { + key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + key_size += + sizeof(struct nfp_flower_ipv6_udp_tun); } else { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); + key_size += + sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (enc_op.key) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels"); return -EOPNOTSUPP; } } else { @@ -323,7 +371,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, &key_layer_two, &key_layer, &key_size, priv, - tun_type, extack); + tun_type, ipv6_tun, + extack); if (err) return err; @@ -491,6 +540,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) goto err_free_mask; flow_pay->nfp_tun_ipv4_addr = 0; + flow_pay->nfp_tun_ipv6 = NULL; flow_pay->meta.flags = 0; INIT_LIST_HEAD(&flow_pay->linked_flows); flow_pay->in_hw = false; @@ -517,10 +567,12 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, struct nfp_fl_set_ip4_addrs *ipv4_add; struct nfp_fl_set_ipv6_addr *ipv6_add; struct nfp_fl_push_vlan *push_vlan; + struct nfp_fl_pre_tunnel *pre_tun; struct nfp_fl_set_tport *tport; struct nfp_fl_set_eth *eth; struct nfp_fl_act_head *a; unsigned int act_off = 0; + bool ipv6_tun = false; u8 act_id = 0; u8 *ports; int i; @@ -542,14 +594,18 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, case NFP_FL_ACTION_OPCODE_POP_VLAN: merge->tci = cpu_to_be16(0); break; - case NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL: + case NFP_FL_ACTION_OPCODE_SET_TUNNEL: /* New tunnel header means l2 to l4 can be matched. */ eth_broadcast_addr(&merge->l2.mac_dst[0]); eth_broadcast_addr(&merge->l2.mac_src[0]); memset(&merge->l4, 0xff, sizeof(struct nfp_flower_tp_ports)); - memset(&merge->ipv4, 0xff, - sizeof(struct nfp_flower_ipv4)); + if (ipv6_tun) + memset(&merge->ipv6, 0xff, + sizeof(struct nfp_flower_ipv6)); + else + memset(&merge->ipv4, 0xff, + sizeof(struct nfp_flower_ipv4)); break; case NFP_FL_ACTION_OPCODE_SET_ETHERNET: eth = (struct nfp_fl_set_eth *)a; @@ -597,6 +653,10 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, ports[i] |= tport->tp_port_mask[i]; break; case NFP_FL_ACTION_OPCODE_PRE_TUNNEL: + pre_tun = (struct nfp_fl_pre_tunnel *)a; + ipv6_tun = be16_to_cpu(pre_tun->flags) & + NFP_FL_PRE_TUN_IPV6; + break; case NFP_FL_ACTION_OPCODE_PRE_LAG: case NFP_FL_ACTION_OPCODE_PUSH_GENEVE: break; @@ -765,15 +825,15 @@ nfp_fl_verify_post_tun_acts(char *acts, int len, struct nfp_fl_push_vlan **vlan) static int nfp_fl_push_vlan_after_tun(char *acts, int len, struct nfp_fl_push_vlan *vlan) { - struct nfp_fl_set_ipv4_tun *tun; + struct nfp_fl_set_tun *tun; struct nfp_fl_act_head *a; unsigned int act_off = 0; while (act_off < len) { a = (struct nfp_fl_act_head *)&acts[act_off]; - if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL) { - tun = (struct nfp_fl_set_ipv4_tun *)a; + if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_TUNNEL) { + tun = (struct nfp_fl_set_tun *)a; tun->outer_vlan_tpid = vlan->vlan_tpid; tun->outer_vlan_tci = vlan->vlan_tci; @@ -1058,15 +1118,22 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } - if (key_layer & NFP_FLOWER_LAYER_IPV4) { + if (key_layer & NFP_FLOWER_LAYER_IPV4 || + key_layer & NFP_FLOWER_LAYER_IPV6) { + /* Flags and proto fields have same offset in IPv4 and IPv6. */ int ip_flags = offsetof(struct nfp_flower_ipv4, ip_ext.flags); int ip_proto = offsetof(struct nfp_flower_ipv4, ip_ext.proto); + int size; int i; + size = key_layer & NFP_FLOWER_LAYER_IPV4 ? + sizeof(struct nfp_flower_ipv4) : + sizeof(struct nfp_flower_ipv6); + mask += sizeof(struct nfp_flower_mac_mpls); /* Ensure proto and flags are the only IP layer fields. */ - for (i = 0; i < sizeof(struct nfp_flower_ipv4); i++) + for (i = 0; i < size; i++) if (mask[i] && i != ip_flags && i != ip_proto) { NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header"); return -EOPNOTSUPP; @@ -1195,6 +1262,8 @@ err_remove_rhash: err_release_metadata: nfp_modify_flow_metadata(app, flow_pay); err_destroy_flow: + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); kfree(flow_pay->action_data); kfree(flow_pay->mask_data); kfree(flow_pay->unmasked_data); @@ -1311,6 +1380,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, if (nfp_flow->nfp_tun_ipv4_addr) nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr); + if (nfp_flow->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, nfp_flow->nfp_tun_ipv6); + if (!nfp_flow->in_hw) { err = 0; goto err_free_merge_flow; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 2600ce476d6b..2df3deedf9fd 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -55,6 +55,25 @@ struct nfp_tun_active_tuns { }; /** + * struct nfp_tun_active_tuns_v6 - periodic message of active IPv6 tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @tun_info.ipv6: dest IPv6 address of active route + * @tun_info.egress_port: port the encapsulated packet egressed + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns_v6 { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info_v6 { + struct in6_addr ipv6; + __be32 egress_port; + } tun_info[]; +}; + +/** * struct nfp_tun_neigh - neighbour/route entry on the NFP * @dst_ipv4: destination IPv4 address * @src_ipv4: source IPv4 address @@ -71,6 +90,22 @@ struct nfp_tun_neigh { }; /** + * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP + * @dst_ipv6: destination IPv6 address + * @src_ipv6: source IPv6 address + * @dst_addr: destination MAC address + * @src_addr: source MAC address + * @port_id: NFP port to output packet on - associated with source IPv6 + */ +struct nfp_tun_neigh_v6 { + struct in6_addr dst_ipv6; + struct in6_addr src_ipv6; + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup * @ingress_port: ingress port of packet that signalled request * @ipv4_addr: destination ipv4 address for route @@ -83,13 +118,23 @@ struct nfp_tun_req_route_ipv4 { }; /** - * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP - * @ipv4_addr: destination of route + * struct nfp_tun_req_route_ipv6 - NFP requests an IPv6 route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv6_addr: destination ipv6 address for route + */ +struct nfp_tun_req_route_ipv6 { + __be32 ingress_port; + struct in6_addr ipv6_addr; +}; + +/** + * struct nfp_offloaded_route - routes that are offloaded to the NFP * @list: list pointer + * @ip_add: destination of route - can be IPv4 or IPv6 */ -struct nfp_ipv4_route_entry { - __be32 ipv4_addr; +struct nfp_offloaded_route { struct list_head list; + u8 ip_add[]; }; #define NFP_FL_IPV4_ADDRS_MAX 32 @@ -116,6 +161,18 @@ struct nfp_ipv4_addr_entry { struct list_head list; }; +#define NFP_FL_IPV6_ADDRS_MAX 4 + +/** + * struct nfp_tun_ipv6_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv6_addr: array of IPV6_ADDRS_MAX 128 bit IPv6 addresses + */ +struct nfp_tun_ipv6_addr { + __be32 count; + struct in6_addr ipv6_addr[NFP_FL_IPV6_ADDRS_MAX]; +}; + #define NFP_TUN_MAC_OFFLOAD_DEL_FLAG 0x2 /** @@ -206,6 +263,49 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) rcu_read_unlock(); } +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct nfp_tun_active_tuns_v6 *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + void *ipv6_add; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "IPv6 tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != struct_size(payload, tun_info, count)) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + rcu_read_lock(); + for (i = 0; i < count; i++) { + ipv6_add = &payload->tun_info[i].ipv6; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_dev_get(app, port, NULL); + if (!netdev) + continue; + + n = neigh_lookup(&nd_tbl, ipv6_add, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + rcu_read_unlock(); +#endif +} + static int nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, gfp_t flag) @@ -224,71 +324,126 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, return 0; } -static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr) +static bool +__nfp_tun_has_route(struct list_head *route_list, spinlock_t *list_lock, + void *add, int add_len) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *entry; - struct list_head *ptr, *storage; + struct nfp_offloaded_route *entry; - spin_lock_bh(&priv->tun.neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); - if (entry->ipv4_addr == ipv4_addr) { - spin_unlock_bh(&priv->tun.neigh_off_lock); + spin_lock_bh(list_lock); + list_for_each_entry(entry, route_list, list) + if (!memcmp(entry->ip_add, add, add_len)) { + spin_unlock_bh(list_lock); return true; } - } - spin_unlock_bh(&priv->tun.neigh_off_lock); + spin_unlock_bh(list_lock); return false; } -static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr) +static int +__nfp_tun_add_route_to_cache(struct list_head *route_list, + spinlock_t *list_lock, void *add, int add_len) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *entry; - struct list_head *ptr, *storage; + struct nfp_offloaded_route *entry; - spin_lock_bh(&priv->tun.neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); - if (entry->ipv4_addr == ipv4_addr) { - spin_unlock_bh(&priv->tun.neigh_off_lock); - return; + spin_lock_bh(list_lock); + list_for_each_entry(entry, route_list, list) + if (!memcmp(entry->ip_add, add, add_len)) { + spin_unlock_bh(list_lock); + return 0; } - } - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + + entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC); if (!entry) { - spin_unlock_bh(&priv->tun.neigh_off_lock); - nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n"); - return; + spin_unlock_bh(list_lock); + return -ENOMEM; } - entry->ipv4_addr = ipv4_addr; - list_add_tail(&entry->list, &priv->tun.neigh_off_list); - spin_unlock_bh(&priv->tun.neigh_off_lock); + memcpy(entry->ip_add, add, add_len); + list_add_tail(&entry->list, route_list); + spin_unlock_bh(list_lock); + + return 0; } -static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) +static void +__nfp_tun_del_route_from_cache(struct list_head *route_list, + spinlock_t *list_lock, void *add, int add_len) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *entry; - struct list_head *ptr, *storage; + struct nfp_offloaded_route *entry; - spin_lock_bh(&priv->tun.neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); - if (entry->ipv4_addr == ipv4_addr) { + spin_lock_bh(list_lock); + list_for_each_entry(entry, route_list, list) + if (!memcmp(entry->ip_add, add, add_len)) { list_del(&entry->list); kfree(entry); break; } - } - spin_unlock_bh(&priv->tun.neigh_off_lock); + spin_unlock_bh(list_lock); +} + +static bool nfp_tun_has_route_v4(struct nfp_app *app, __be32 *ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + return __nfp_tun_has_route(&priv->tun.neigh_off_list_v4, + &priv->tun.neigh_off_lock_v4, ipv4_addr, + sizeof(*ipv4_addr)); +} + +static bool +nfp_tun_has_route_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + return __nfp_tun_has_route(&priv->tun.neigh_off_list_v6, + &priv->tun.neigh_off_lock_v6, ipv6_addr, + sizeof(*ipv6_addr)); +} + +static void +nfp_tun_add_route_to_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v4, + &priv->tun.neigh_off_lock_v4, ipv4_addr, + sizeof(*ipv4_addr)); +} + +static void +nfp_tun_add_route_to_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v6, + &priv->tun.neigh_off_lock_v6, ipv6_addr, + sizeof(*ipv6_addr)); } static void -nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, - struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) +nfp_tun_del_route_from_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v4, + &priv->tun.neigh_off_lock_v4, ipv4_addr, + sizeof(*ipv4_addr)); +} + +static void +nfp_tun_del_route_from_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v6, + &priv->tun.neigh_off_lock_v6, ipv6_addr, + sizeof(*ipv6_addr)); +} + +static void +nfp_tun_write_neigh_v4(struct net_device *netdev, struct nfp_app *app, + struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) { struct nfp_tun_neigh payload; u32 port_id; @@ -302,7 +457,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, /* If entry has expired send dst IP with all other fields 0. */ if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { - nfp_tun_del_route_from_cache(app, payload.dst_ipv4); + nfp_tun_del_route_from_cache_v4(app, &payload.dst_ipv4); /* Trigger ARP to verify invalid neighbour state. */ neigh_event_send(neigh, NULL); goto send_msg; @@ -314,7 +469,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, neigh_ha_snapshot(payload.dst_addr, neigh, netdev); payload.port_id = cpu_to_be32(port_id); /* Add destination of new route to NFP cache. */ - nfp_tun_add_route_to_cache(app, payload.dst_ipv4); + nfp_tun_add_route_to_cache_v4(app, &payload.dst_ipv4); send_msg: nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, @@ -322,16 +477,54 @@ send_msg: (unsigned char *)&payload, flag); } +static void +nfp_tun_write_neigh_v6(struct net_device *netdev, struct nfp_app *app, + struct flowi6 *flow, struct neighbour *neigh, gfp_t flag) +{ + struct nfp_tun_neigh_v6 payload; + u32 port_id; + + port_id = nfp_flower_get_port_id_from_netdev(app, netdev); + if (!port_id) + return; + + memset(&payload, 0, sizeof(struct nfp_tun_neigh_v6)); + payload.dst_ipv6 = flow->daddr; + + /* If entry has expired send dst IP with all other fields 0. */ + if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { + nfp_tun_del_route_from_cache_v6(app, &payload.dst_ipv6); + /* Trigger probe to verify invalid neighbour state. */ + neigh_event_send(neigh, NULL); + goto send_msg; + } + + /* Have a valid neighbour so populate rest of entry. */ + payload.src_ipv6 = flow->saddr; + ether_addr_copy(payload.src_addr, netdev->dev_addr); + neigh_ha_snapshot(payload.dst_addr, neigh, netdev); + payload.port_id = cpu_to_be32(port_id); + /* Add destination of new route to NFP cache. */ + nfp_tun_add_route_to_cache_v6(app, &payload.dst_ipv6); + +send_msg: + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, + sizeof(struct nfp_tun_neigh_v6), + (unsigned char *)&payload, flag); +} + static int nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, void *ptr) { struct nfp_flower_priv *app_priv; struct netevent_redirect *redir; - struct flowi4 flow = {}; + struct flowi4 flow4 = {}; + struct flowi6 flow6 = {}; struct neighbour *n; struct nfp_app *app; struct rtable *rt; + bool ipv6 = false; int err; switch (event) { @@ -346,7 +539,13 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; } - flow.daddr = *(__be32 *)n->primary_key; + if (n->tbl->family == AF_INET6) + ipv6 = true; + + if (ipv6) + flow6.daddr = *(struct in6_addr *)n->primary_key; + else + flow4.daddr = *(__be32 *)n->primary_key; app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); app = app_priv->app; @@ -356,28 +555,46 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; /* Only concerned with changes to routes already added to NFP. */ - if (!nfp_tun_has_route(app, flow.daddr)) + if ((ipv6 && !nfp_tun_has_route_v6(app, &flow6.daddr)) || + (!ipv6 && !nfp_tun_has_route_v4(app, &flow4.daddr))) return NOTIFY_DONE; #if IS_ENABLED(CONFIG_INET) - /* Do a route lookup to populate flow data. */ - rt = ip_route_output_key(dev_net(n->dev), &flow); - err = PTR_ERR_OR_ZERO(rt); - if (err) + if (ipv6) { +#if IS_ENABLED(CONFIG_IPV6) + struct dst_entry *dst; + + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(n->dev), NULL, + &flow6, NULL); + if (IS_ERR(dst)) + return NOTIFY_DONE; + + dst_release(dst); + flow6.flowi6_proto = IPPROTO_UDP; + nfp_tun_write_neigh_v6(n->dev, app, &flow6, n, GFP_ATOMIC); +#else return NOTIFY_DONE; +#endif /* CONFIG_IPV6 */ + } else { + /* Do a route lookup to populate flow data. */ + rt = ip_route_output_key(dev_net(n->dev), &flow4); + err = PTR_ERR_OR_ZERO(rt); + if (err) + return NOTIFY_DONE; - ip_rt_put(rt); + ip_rt_put(rt); + + flow4.flowi4_proto = IPPROTO_UDP; + nfp_tun_write_neigh_v4(n->dev, app, &flow4, n, GFP_ATOMIC); + } #else return NOTIFY_DONE; -#endif - - flow.flowi4_proto = IPPROTO_UDP; - nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); +#endif /* CONFIG_INET */ return NOTIFY_OK; } -void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) { struct nfp_tun_req_route_ipv4 *payload; struct net_device *netdev; @@ -411,7 +628,7 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); + nfp_tun_write_neigh_v4(n->dev, app, &flow, n, GFP_ATOMIC); neigh_release(n); rcu_read_unlock(); return; @@ -421,6 +638,48 @@ fail_rcu_unlock: nfp_flower_cmsg_warn(app, "Requested route not found.\n"); } +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv6 *payload; + struct net_device *netdev; + struct flowi6 flow = {}; + struct dst_entry *dst; + struct neighbour *n; + + payload = nfp_flower_cmsg_get_data(skb); + + rcu_read_lock(); + netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); + if (!netdev) + goto fail_rcu_unlock; + + flow.daddr = payload->ipv6_addr; + flow.flowi6_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(netdev), NULL, &flow, + NULL); + if (IS_ERR(dst)) + goto fail_rcu_unlock; +#else + goto fail_rcu_unlock; +#endif + + n = dst_neigh_lookup(dst, &flow.daddr); + dst_release(dst); + if (!n) + goto fail_rcu_unlock; + + nfp_tun_write_neigh_v6(n->dev, app, &flow, n, GFP_ATOMIC); + neigh_release(n); + rcu_read_unlock(); + return; + +fail_rcu_unlock: + rcu_read_unlock(); + nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n"); +} + static void nfp_tun_write_ipv4_list(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; @@ -502,6 +761,78 @@ void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) nfp_tun_write_ipv4_list(app); } +static void nfp_tun_write_ipv6_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + struct nfp_tun_ipv6_addr payload; + int count = 0; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv6_addr)); + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) { + if (count >= NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "Too many IPv6 tunnel endpoint addresses, some cannot be offloaded.\n"); + break; + } + payload.ipv6_addr[count++] = entry->ipv6_addr; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + payload.count = cpu_to_be32(count); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6, + sizeof(struct nfp_tun_ipv6_addr), + &payload, GFP_KERNEL); +} + +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) + if (!memcmp(&entry->ipv6_addr, ipv6, sizeof(*ipv6))) { + entry->ref_count++; + mutex_unlock(&priv->tun.ipv6_off_lock); + return entry; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->tun.ipv6_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return NULL; + } + entry->ipv6_addr = *ipv6; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->tun.ipv6_off_list); + mutex_unlock(&priv->tun.ipv6_off_lock); + + nfp_tun_write_ipv6_list(app); + + return entry; +} + +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry) +{ + struct nfp_flower_priv *priv = app->priv; + bool freed = false; + + mutex_lock(&priv->tun.ipv6_off_lock); + if (!--entry->ref_count) { + list_del(&entry->list); + kfree(entry); + freed = true; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + + if (freed) + nfp_tun_write_ipv6_list(app); +} + static int __nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del) { @@ -1013,13 +1344,17 @@ int nfp_tunnel_config_start(struct nfp_app *app) ida_init(&priv->tun.mac_off_ids); - /* Initialise priv data for IPv4 offloading. */ + /* Initialise priv data for IPv4/v6 offloading. */ mutex_init(&priv->tun.ipv4_off_lock); INIT_LIST_HEAD(&priv->tun.ipv4_off_list); + mutex_init(&priv->tun.ipv6_off_lock); + INIT_LIST_HEAD(&priv->tun.ipv6_off_list); /* Initialise priv data for neighbour offloading. */ - spin_lock_init(&priv->tun.neigh_off_lock); - INIT_LIST_HEAD(&priv->tun.neigh_off_list); + spin_lock_init(&priv->tun.neigh_off_lock_v4); + INIT_LIST_HEAD(&priv->tun.neigh_off_list_v4); + spin_lock_init(&priv->tun.neigh_off_lock_v6); + INIT_LIST_HEAD(&priv->tun.neigh_off_list_v6); priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler; err = register_netevent_notifier(&priv->tun.neigh_nb); @@ -1034,9 +1369,11 @@ int nfp_tunnel_config_start(struct nfp_app *app) void nfp_tunnel_config_stop(struct nfp_app *app) { + struct nfp_offloaded_route *route_entry, *temp; struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *route_entry; struct nfp_ipv4_addr_entry *ip_entry; + struct nfp_tun_neigh_v6 ipv6_route; + struct nfp_tun_neigh ipv4_route; struct list_head *ptr, *storage; unregister_netevent_notifier(&priv->tun.neigh_nb); @@ -1050,12 +1387,35 @@ void nfp_tunnel_config_stop(struct nfp_app *app) kfree(ip_entry); } - /* Free any memory that may be occupied by the route list. */ - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - route_entry = list_entry(ptr, struct nfp_ipv4_route_entry, - list); + mutex_destroy(&priv->tun.ipv6_off_lock); + + /* Free memory in the route list and remove entries from fw cache. */ + list_for_each_entry_safe(route_entry, temp, + &priv->tun.neigh_off_list_v4, list) { + memset(&ipv4_route, 0, sizeof(ipv4_route)); + memcpy(&ipv4_route.dst_ipv4, &route_entry->ip_add, + sizeof(ipv4_route.dst_ipv4)); list_del(&route_entry->list); kfree(route_entry); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, + sizeof(struct nfp_tun_neigh), + (unsigned char *)&ipv4_route, + GFP_KERNEL); + } + + list_for_each_entry_safe(route_entry, temp, + &priv->tun.neigh_off_list_v6, list) { + memset(&ipv6_route, 0, sizeof(ipv6_route)); + memcpy(&ipv6_route.dst_ipv6, &route_entry->ip_add, + sizeof(ipv6_route.dst_ipv6)); + list_del(&route_entry->list); + kfree(route_entry); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, + sizeof(struct nfp_tun_neigh), + (unsigned char *)&ipv6_route, + GFP_KERNEL); } /* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 250f510b1d21..ff4438478ea9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -586,6 +586,9 @@ struct nfp_net_dp { * @ktls_conn_id_gen: Trivial generator for kTLS connection ids (for TX) * @ktls_no_space: Counter of firmware rejecting kTLS connection due to * lack of space + * @ktls_rx_resync_req: Counter of TLS RX resync requested + * @ktls_rx_resync_ign: Counter of TLS RX resync requests ignored + * @ktls_rx_resync_sent: Counter of TLS RX resync completed * @mbox_cmsg: Common Control Message via vNIC mailbox state * @mbox_cmsg.queue: CCM mbox queue of pending messages * @mbox_cmsg.wq: CCM mbox wait queue of waiting processes @@ -674,6 +677,9 @@ struct nfp_net { atomic64_t ktls_conn_id_gen; atomic_t ktls_no_space; + atomic_t ktls_rx_resync_req; + atomic_t ktls_rx_resync_ign; + atomic_t ktls_rx_resync_sent; struct { struct sk_buff_head queue; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index bcdcd6de7dea..9bfb3b077bc1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -47,6 +47,7 @@ #include "nfp_net_sriov.h" #include "nfp_port.h" #include "crypto/crypto.h" +#include "crypto/fw.h" /** * nfp_net_get_fw_version() - Read and parse the FW version @@ -1321,17 +1322,11 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) netdev_tx_reset_queue(nd_q); } -static void nfp_net_tx_timeout(struct net_device *netdev) +static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct nfp_net *nn = netdev_priv(netdev); - int i; - for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) { - if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i))) - continue; - nn_warn(nn, "TX timeout on ring: %d\n", i); - } - nn_warn(nn, "TX watchdog timeout\n"); + nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue); } /* Receive processing @@ -1667,9 +1662,9 @@ nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, &rx_hash->hash); } -static void * +static bool nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, - void *data, int meta_len) + void *data, void *pkt, unsigned int pkt_len, int meta_len) { u32 meta_info; @@ -1699,14 +1694,20 @@ nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, (__force __wsum)__get_unaligned_cpu32(data); data += 4; break; + case NFP_NET_META_RESYNC_INFO: + if (nfp_net_tls_rx_resync_req(netdev, data, pkt, + pkt_len)) + return NULL; + data += sizeof(struct nfp_net_tls_resync_req); + break; default: - return NULL; + return true; } meta_info >>= NFP_NET_META_FIELD_SIZE; } - return data; + return data != pkt; } static void @@ -1891,12 +1892,10 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_set_hash_desc(dp->netdev, &meta, rxbuf->frag + meta_off, rxd); } else if (meta_len) { - void *end; - - end = nfp_net_parse_meta(dp->netdev, &meta, - rxbuf->frag + meta_off, - meta_len); - if (unlikely(end != rxbuf->frag + pkt_off)) { + if (unlikely(nfp_net_parse_meta(dp->netdev, &meta, + rxbuf->frag + meta_off, + rxbuf->frag + pkt_off, + pkt_len, meta_len))) { nn_dp_warn(dp, "invalid RX packet metadata\n"); nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c index d835c14b7257..c3a763134e79 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c @@ -17,6 +17,30 @@ static void nfp_net_tlv_caps_reset(struct nfp_net_tlv_caps *caps) caps->mbox_len = NFP_NET_CFG_MBOX_VAL_MAX_SZ; } +static bool +nfp_net_tls_parse_crypto_ops(struct device *dev, struct nfp_net_tlv_caps *caps, + u8 __iomem *ctrl_mem, u8 __iomem *data, + unsigned int length, unsigned int offset, + bool rx_stream_scan) +{ + /* Ignore the legacy TLV if new one was already parsed */ + if (caps->tls_resync_ss && !rx_stream_scan) + return true; + + if (length < 32) { + dev_err(dev, + "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n", + length, offset); + return false; + } + + caps->crypto_ops = readl(data); + caps->crypto_enable_off = data - ctrl_mem + 16; + caps->tls_resync_ss = rx_stream_scan; + + return true; +} + int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, struct nfp_net_tlv_caps *caps) { @@ -104,15 +128,25 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, caps->mbox_cmsg_types = readl(data); break; case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS: - if (length < 32) { - dev_err(dev, - "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n", - length, offset); + if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem, + data, length, offset, + false)) return -EINVAL; + break; + case NFP_NET_CFG_TLV_TYPE_VNIC_STATS: + if ((data - ctrl_mem) % 8) { + dev_warn(dev, "VNIC STATS TLV misaligned, ignoring offset:%u len:%u\n", + offset, length); + break; } - - caps->crypto_ops = readl(data); - caps->crypto_enable_off = data - ctrl_mem + 16; + caps->vnic_stats_off = data - ctrl_mem; + caps->vnic_stats_cnt = length / 10; + break; + case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN: + if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem, + data, length, offset, + true)) + return -EINVAL; break; default: if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr)) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index ee6b24e4eacd..3d61a8cb60b0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -45,6 +45,7 @@ #define NFP_NET_META_PORTID 5 #define NFP_NET_META_CSUM 6 /* checksum complete type */ #define NFP_NET_META_CONN_HANDLE 7 +#define NFP_NET_META_RESYNC_INFO 8 /* RX resync info request */ #define NFP_META_PORT_ID_CTRL ~0U @@ -479,6 +480,22 @@ * 8 words, bitmaps of supported and enabled crypto operations. * First 16B (4 words) contains a bitmap of supported crypto operations, * and next 16B contain the enabled operations. + * This capability is made obsolete by ones with better sync methods. + * + * %NFP_NET_CFG_TLV_TYPE_VNIC_STATS: + * Variable, per-vNIC statistics, data should be 8B aligned (FW should insert + * zero-length RESERVED TLV to pad). + * TLV data has two sections. First is an array of statistics' IDs (2B each). + * Second 8B statistics themselves. Statistics are 8B aligned, meaning there + * may be a padding between sections. + * Number of statistics can be determined as floor(tlv.length / (2 + 8)). + * This TLV overwrites %NFP_NET_CFG_STATS_* values (statistics in this TLV + * duplicate the old ones, so driver should be careful not to unnecessarily + * render both). + * + * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN: + * Same as %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS, but crypto TLS does stream scan + * RX sync, rather than kernel-assisted sync. */ #define NFP_NET_CFG_TLV_TYPE_UNKNOWN 0 #define NFP_NET_CFG_TLV_TYPE_RESERVED 1 @@ -490,6 +507,8 @@ #define NFP_NET_CFG_TLV_TYPE_REPR_CAP 7 #define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES 10 #define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS 11 /* see crypto/fw.h */ +#define NFP_NET_CFG_TLV_TYPE_VNIC_STATS 12 +#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN 13 struct device; @@ -502,6 +521,9 @@ struct device; * @mbox_cmsg_types: cmsgs which can be passed through the mailbox * @crypto_ops: supported crypto operations * @crypto_enable_off: offset of crypto ops enable region + * @vnic_stats_off: offset of vNIC stats area + * @vnic_stats_cnt: number of vNIC stats + * @tls_resync_ss: TLS resync will be performed via stream scan */ struct nfp_net_tlv_caps { u32 me_freq_mhz; @@ -511,6 +533,9 @@ struct nfp_net_tlv_caps { u32 mbox_cmsg_types; u32 crypto_ops; unsigned int crypto_enable_off; + unsigned int vnic_stats_off; + unsigned int vnic_stats_cnt; + unsigned int tls_resync_ss:1; }; int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1b840ee47339..d648e32c0520 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -148,11 +148,33 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = { { "tx_pause_frames_class7", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS7, }, }; +static const char nfp_tlv_stat_names[][ETH_GSTRING_LEN] = { + [1] = "dev_rx_discards", + [2] = "dev_rx_errors", + [3] = "dev_rx_bytes", + [4] = "dev_rx_uc_bytes", + [5] = "dev_rx_mc_bytes", + [6] = "dev_rx_bc_bytes", + [7] = "dev_rx_pkts", + [8] = "dev_rx_mc_pkts", + [9] = "dev_rx_bc_pkts", + + [10] = "dev_tx_discards", + [11] = "dev_tx_errors", + [12] = "dev_tx_bytes", + [13] = "dev_tx_uc_bytes", + [14] = "dev_tx_mc_bytes", + [15] = "dev_tx_bc_bytes", + [16] = "dev_tx_pkts", + [17] = "dev_tx_mc_pkts", + [18] = "dev_tx_bc_pkts", +}; + #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) #define NN_ET_SWITCH_STATS_LEN 9 #define NN_RVEC_GATHER_STATS 13 #define NN_RVEC_PER_Q_STATS 3 -#define NN_CTRL_PATH_STATS 1 +#define NN_CTRL_PATH_STATS 4 #define SFP_SFF_REV_COMPLIANCE 1 @@ -454,6 +476,9 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) data = nfp_pr_et(data, "tx_tls_drop_no_sync_data"); data = nfp_pr_et(data, "hw_tls_no_space"); + data = nfp_pr_et(data, "rx_tls_resync_req_ok"); + data = nfp_pr_et(data, "rx_tls_resync_req_ign"); + data = nfp_pr_et(data, "rx_tls_resync_sent"); return data; } @@ -502,6 +527,9 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) *data++ = gathered_stats[j]; *data++ = atomic_read(&nn->ktls_no_space); + *data++ = atomic_read(&nn->ktls_rx_resync_req); + *data++ = atomic_read(&nn->ktls_rx_resync_ign); + *data++ = atomic_read(&nn->ktls_rx_resync_sent); return data; } @@ -560,6 +588,65 @@ nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, unsigned int num_vecs) return data; } +static unsigned int nfp_vnic_get_tlv_stats_count(struct nfp_net *nn) +{ + return nn->tlv_caps.vnic_stats_cnt + nn->max_r_vecs * 4; +} + +static u8 *nfp_vnic_get_tlv_stats_strings(struct nfp_net *nn, u8 *data) +{ + unsigned int i, id; + u8 __iomem *mem; + u64 id_word = 0; + + mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off; + for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) { + if (!(i % 4)) + id_word = readq(mem + i * 2); + + id = (u16)id_word; + id_word >>= 16; + + if (id < ARRAY_SIZE(nfp_tlv_stat_names) && + nfp_tlv_stat_names[id][0]) { + memcpy(data, nfp_tlv_stat_names[id], ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } else { + data = nfp_pr_et(data, "dev_unknown_stat%u", id); + } + } + + for (i = 0; i < nn->max_r_vecs; i++) { + data = nfp_pr_et(data, "rxq_%u_pkts", i); + data = nfp_pr_et(data, "rxq_%u_bytes", i); + data = nfp_pr_et(data, "txq_%u_pkts", i); + data = nfp_pr_et(data, "txq_%u_bytes", i); + } + + return data; +} + +static u64 *nfp_vnic_get_tlv_stats(struct nfp_net *nn, u64 *data) +{ + u8 __iomem *mem; + unsigned int i; + + mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off; + mem += roundup(2 * nn->tlv_caps.vnic_stats_cnt, 8); + for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) + *data++ = readq(mem + i * 8); + + mem = nn->dp.ctrl_bar; + for (i = 0; i < nn->max_r_vecs; i++) { + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8); + } + + return data; +} + static unsigned int nfp_mac_get_stats_count(struct net_device *netdev) { struct nfp_port *port; @@ -609,8 +696,12 @@ static void nfp_net_get_strings(struct net_device *netdev, switch (stringset) { case ETH_SS_STATS: data = nfp_vnic_get_sw_stats_strings(netdev, data); - data = nfp_vnic_get_hw_stats_strings(data, nn->max_r_vecs, - false); + if (!nn->tlv_caps.vnic_stats_off) + data = nfp_vnic_get_hw_stats_strings(data, + nn->max_r_vecs, + false); + else + data = nfp_vnic_get_tlv_stats_strings(nn, data); data = nfp_mac_get_stats_strings(netdev, data); data = nfp_app_port_get_stats_strings(nn->port, data); break; @@ -624,7 +715,11 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats, struct nfp_net *nn = netdev_priv(netdev); data = nfp_vnic_get_sw_stats(netdev, data); - data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, nn->max_r_vecs); + if (!nn->tlv_caps.vnic_stats_off) + data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, + nn->max_r_vecs); + else + data = nfp_vnic_get_tlv_stats(nn, data); data = nfp_mac_get_stats(netdev, data); data = nfp_app_port_get_stats(nn->port, data); } @@ -632,13 +727,18 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats, static int nfp_net_get_sset_count(struct net_device *netdev, int sset) { struct nfp_net *nn = netdev_priv(netdev); + unsigned int cnt; switch (sset) { case ETH_SS_STATS: - return nfp_vnic_get_sw_stats_count(netdev) + - nfp_vnic_get_hw_stats_count(nn->max_r_vecs) + - nfp_mac_get_stats_count(netdev) + - nfp_app_port_get_stats_count(nn->port); + cnt = nfp_vnic_get_sw_stats_count(netdev); + if (!nn->tlv_caps.vnic_stats_off) + cnt += nfp_vnic_get_hw_stats_count(nn->max_r_vecs); + else + cnt += nfp_vnic_get_tlv_stats_count(nn); + cnt += nfp_mac_get_stats_count(netdev); + cnt += nfp_app_port_get_stats_count(nn->port); + return cnt; default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 6b54cb3b681d..2fc10a36afa4 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -2739,7 +2739,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit) * nv_tx_timeout: dev->tx_timeout function * Called with netif_tx_lock held. */ -static void nv_tx_timeout(struct net_device *dev) +static void nv_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 18e6d87c607b..73ec195fbc30 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2271,7 +2271,7 @@ static int pch_gbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) * pch_gbe_tx_timeout - Respond to a Tx Hang * @netdev: Network interface device structure */ -static void pch_gbe_tx_timeout(struct net_device *netdev) +static void pch_gbe_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index eee883a2aa8d..70816d2e2990 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -548,7 +548,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int hamachi_open(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void hamachi_timer(struct timer_list *t); -static void hamachi_tx_timeout(struct net_device *dev); +static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue); static void hamachi_init_ring(struct net_device *dev); static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -1042,7 +1042,7 @@ static void hamachi_timer(struct timer_list *t) add_timer(&hmp->timer); } -static void hamachi_tx_timeout(struct net_device *dev) +static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue) { int i; struct hamachi_private *hmp = netdev_priv(dev); diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 5113ee647090..520779f05e1a 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -344,7 +344,7 @@ static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int yellowfin_open(struct net_device *dev); static void yellowfin_timer(struct timer_list *t); -static void yellowfin_tx_timeout(struct net_device *dev); +static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue); static int yellowfin_init_ring(struct net_device *dev); static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -677,7 +677,7 @@ static void yellowfin_timer(struct timer_list *t) add_timer(&yp->timer); } -static void yellowfin_tx_timeout(struct net_device *dev) +static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct yellowfin_private *yp = netdev_priv(dev); void __iomem *ioaddr = yp->base; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index ef8258713369..a76108a9c7db 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1285,7 +1285,7 @@ static void ionic_tx_timeout_work(struct work_struct *ws) rtnl_unlock(); } -static void ionic_tx_timeout(struct net_device *netdev) +static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ionic_lif *lif = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index c692a41e4548..8067ea04d455 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -49,7 +49,7 @@ static int netxen_nic_open(struct net_device *netdev); static int netxen_nic_close(struct net_device *netdev); static netdev_tx_t netxen_nic_xmit_frame(struct sk_buff *, struct net_device *); -static void netxen_tx_timeout(struct net_device *netdev); +static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue); static void netxen_tx_timeout_task(struct work_struct *work); static void netxen_fw_poll_work(struct work_struct *work); static void netxen_schedule_work(struct netxen_adapter *adapter, @@ -2222,7 +2222,7 @@ static void netxen_nic_handle_phy_intr(struct netxen_adapter *adapter) netxen_advert_link_change(adapter, linkup); } -static void netxen_tx_timeout(struct net_device *netdev) +static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct netxen_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 7e0b795230b2..900bc603e30a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -331,8 +331,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, u8 sb_index = p_hwfn->p_eq->eq_sb_index; struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; u8 page_cnt, i; + int rc; /* update initial eq producer */ qed_eq_prod_update(p_hwfn, @@ -447,7 +447,7 @@ int qed_sp_pf_update(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); @@ -471,7 +471,7 @@ int qed_sp_pf_update_ufp(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EOPNOTSUPP; + int rc; if (p_hwfn->ufp_info.pri_type == QED_UFP_PRI_UNKNOWN) { DP_INFO(p_hwfn, "Invalid priority type %d\n", @@ -509,7 +509,7 @@ int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn, { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; if (IS_VF(p_hwfn->cdev)) return qed_vf_pf_tunnel_param_update(p_hwfn, p_tunn); @@ -546,7 +546,7 @@ int qed_sp_pf_stop(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index b4b8ba00ee01..bb864765c761 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3602,7 +3602,7 @@ static int ql3xxx_set_mac_address(struct net_device *ndev, void *p) return 0; } -static void ql3xxx_tx_timeout(struct net_device *ndev) +static void ql3xxx_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ql3_adapter *qdev = netdev_priv(ndev); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index c07438db30ba..9dd6cb36f366 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -56,7 +56,7 @@ static int qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent); static void qlcnic_remove(struct pci_dev *pdev); static int qlcnic_open(struct net_device *netdev); static int qlcnic_close(struct net_device *netdev); -static void qlcnic_tx_timeout(struct net_device *netdev); +static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue); static void qlcnic_attach_work(struct work_struct *work); static void qlcnic_fwinit_work(struct work_struct *work); @@ -3068,7 +3068,7 @@ static void qlcnic_dump_rings(struct qlcnic_adapter *adapter) } -static void qlcnic_tx_timeout(struct net_device *netdev) +static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct qlcnic_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 98f92268cbaa..522fad4cb2cd 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -282,7 +282,7 @@ static int emac_close(struct net_device *netdev) } /* Respond to a TX hang */ -static void emac_tx_timeout(struct net_device *netdev) +static void emac_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct emac_adapter *adpt = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index baac016f3ec0..5a3b65a6eb4f 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -785,7 +785,7 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev) } static void -qcaspi_netdev_tx_timeout(struct net_device *dev) +qcaspi_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct qcaspi *qca = netdev_priv(dev); diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c index 0981068504fa..375a844cd27c 100644 --- a/drivers/net/ethernet/qualcomm/qca_uart.c +++ b/drivers/net/ethernet/qualcomm/qca_uart.c @@ -248,7 +248,7 @@ out: return NETDEV_TX_OK; } -static void qcauart_netdev_tx_timeout(struct net_device *dev) +static void qcauart_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct qcauart *qca = netdev_priv(dev); diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 274e5b4bc4ac..c23cb61bbd30 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -410,7 +410,7 @@ static void r6040_init_mac_regs(struct net_device *dev) iowrite16(TM2TX, ioaddr + MTPR); } -static void r6040_tx_timeout(struct net_device *dev) +static void r6040_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct r6040_private *priv = netdev_priv(dev); void __iomem *ioaddr = priv->base; diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 4f910c4f67b0..60d342f82fb3 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1235,7 +1235,7 @@ static int cp_close (struct net_device *dev) return 0; } -static void cp_tx_timeout(struct net_device *dev) +static void cp_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct cp_private *cp = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 55d01266e615..5caeb8368eab 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -642,7 +642,7 @@ static int mdio_read (struct net_device *dev, int phy_id, int location); static void mdio_write (struct net_device *dev, int phy_id, int location, int val); static void rtl8139_start_thread(struct rtl8139_private *tp); -static void rtl8139_tx_timeout (struct net_device *dev); +static void rtl8139_tx_timeout (struct net_device *dev, unsigned int txqueue); static void rtl8139_init_ring (struct net_device *dev); static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev); @@ -1700,7 +1700,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) spin_unlock_bh(&tp->rx_lock); } -static void rtl8139_tx_timeout (struct net_device *dev) +static void rtl8139_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rtl8139_private *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index 58e0ca9093d3..9e3b35c97e63 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -204,7 +204,7 @@ static void net_rx(struct net_device *dev); static void read_block(long ioaddr, int length, unsigned char *buffer, int data_mode); static int net_close(struct net_device *dev); static void set_rx_mode(struct net_device *dev); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); /* A list of all installed ATP devices, for removing the driver module. */ @@ -533,7 +533,7 @@ static void write_packet(long ioaddr, int length, unsigned char *packet, int pad outb(Ctrl_HNibWrite | Ctrl_SelData | Ctrl_IRQEN, ioaddr + PAR_CONTROL); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { long ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 67a4d5d45e3a..a887b685d070 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5435,7 +5435,7 @@ static void rtl_reset_work(struct rtl8169_private *tp) netif_wake_queue(dev); } -static void rtl8169_tx_timeout(struct net_device *dev) +static void rtl8169_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rtl8169_private *tp = netdev_priv(dev); @@ -6825,6 +6825,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) int chipset, region; int jumbo_max, rc; + /* Some tools for creating an initramfs don't consider softdeps, then + * r8169.ko may be in initramfs, but realtek.ko not. Then the generic + * PHY driver is used that doesn't work with most chip versions. + */ + if (!driver_find("RTL8201CP Ethernet", &mdio_bus_type)) { + dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n"); + return -ENOENT; + } + dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp)); if (!dev) return -ENOMEM; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 4b13a184bfc7..067ad25553b9 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1425,7 +1425,7 @@ out_napi_off: } /* Timeout function for Ethernet AVB */ -static void ravb_tx_timeout(struct net_device *ndev) +static void ravb_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ravb_private *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index e19b49c4013e..cdd8ab2eb910 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2478,7 +2478,7 @@ out_napi_off: } /* Timeout function */ -static void sh_eth_tx_timeout(struct net_device *ndev) +static void sh_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct sh_eth_private *mdp = netdev_priv(ndev); struct sh_eth_rxdesc *rxdesc; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index bc4f951315da..7585cd2270ba 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2159,7 +2159,7 @@ static void rocker_router_fib_event_work(struct work_struct *work) /* Protect internal structures from changes */ rtnl_lock(); switch (fib_work->event) { - case FIB_EVENT_ENTRY_ADD: + case FIB_EVENT_ENTRY_REPLACE: err = rocker_world_fib4_add(rocker, &fib_work->fen_info); if (err) rocker_world_fib4_abort(rocker); @@ -2201,7 +2201,7 @@ static int rocker_router_fib_event(struct notifier_block *nb, fib_work->event = event; switch (event) { - case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_DEL: if (info->family == AF_INET) { struct fib_entry_notifier_info *fen_info = ptr; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index c56fcbb37066..cd6e0de48248 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -1572,7 +1572,7 @@ static int sxgbe_poll(struct napi_struct *napi, int budget) * netdev structure and arrange for the device to be reset to a sane state * in order to transmit a new packet. */ -static void sxgbe_tx_timeout(struct net_device *dev) +static void sxgbe_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sxgbe_priv_data *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 632a7c85964d..128ee7cda1ed 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -79,7 +79,7 @@ static netdev_tx_t ether3_sendpacket(struct sk_buff *skb, static irqreturn_t ether3_interrupt (int irq, void *dev_id); static int ether3_close (struct net_device *dev); static void ether3_setmulticastlist (struct net_device *dev); -static void ether3_timeout(struct net_device *dev); +static void ether3_timeout(struct net_device *dev, unsigned int txqueue); #define BUS_16 2 #define BUS_8 1 @@ -450,7 +450,7 @@ static void ether3_setmulticastlist(struct net_device *dev) ether3_outw(priv(dev)->regs.config1 | CFG1_LOCBUFMEM, REG_CONFIG1); } -static void ether3_timeout(struct net_device *dev) +static void ether3_timeout(struct net_device *dev, unsigned int txqueue) { unsigned long flags; diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 276c7cae7cee..8507ff242014 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -645,7 +645,7 @@ sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void timeout(struct net_device *dev) +static void timeout(struct net_device *dev, unsigned int txqueue) { printk(KERN_NOTICE "%s: transmit timed out, resetting\n", dev->name); sgiseeq_reset(dev); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 992c773620ec..f358709fab67 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2395,7 +2395,7 @@ static void efx_net_stats(struct net_device *net_dev, } /* Context: netif_tx_lock held, BHs disabled. */ -static void efx_watchdog(struct net_device *net_dev) +static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) { struct efx_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index eecc348b1c32..bee4cd9d7135 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2108,7 +2108,7 @@ static void ef4_net_stats(struct net_device *net_dev, } /* Context: netif_tx_lock held, BHs disabled. */ -static void ef4_watchdog(struct net_device *net_dev) +static void ef4_watchdog(struct net_device *net_dev, unsigned int txqueue) { struct ef4_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index d242906ae233..06637b03deed 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -114,7 +114,7 @@ struct ioc3_private { static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void ioc3_set_multicast_list(struct net_device *dev); static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void ioc3_timeout(struct net_device *dev); +static void ioc3_timeout(struct net_device *dev, unsigned int txqueue); static inline unsigned int ioc3_hash(const unsigned char *addr); static void ioc3_start(struct ioc3_private *ip); static inline void ioc3_stop(struct ioc3_private *ip); @@ -1479,7 +1479,7 @@ drop_packet: return NETDEV_TX_OK; } -static void ioc3_timeout(struct net_device *dev) +static void ioc3_timeout(struct net_device *dev, unsigned int txqueue) { struct ioc3_private *ip = netdev_priv(dev); diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 539bc5db989c..0c396ecd3389 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -90,7 +90,7 @@ struct meth_private { spinlock_t meth_lock; }; -static void meth_tx_timeout(struct net_device *dev); +static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t meth_interrupt(int irq, void *dev_id); /* global, initialized in ip32-setup.c */ @@ -727,7 +727,7 @@ static netdev_tx_t meth_tx(struct sk_buff *skb, struct net_device *dev) /* * Deal with a transmit timeout. */ -static void meth_tx_timeout(struct net_device *dev) +static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct meth_private *priv = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c index c7641a236eb8..cb043eb1bdc1 100644 --- a/drivers/net/ethernet/silan/sc92031.c +++ b/drivers/net/ethernet/silan/sc92031.c @@ -1078,7 +1078,7 @@ static void sc92031_set_multicast_list(struct net_device *dev) spin_unlock_bh(&priv->lock); } -static void sc92031_tx_timeout(struct net_device *dev) +static void sc92031_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sc92031_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 5b351beb78cb..5a4b6e3ab38f 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1538,7 +1538,7 @@ err_out_0: goto out; } -static void sis190_tx_timeout(struct net_device *dev) +static void sis190_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sis190_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 85eaccbbbac1..81ed7589e33c 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -222,7 +222,7 @@ static int mdio_read(struct net_device *net_dev, int phy_id, int location); static void mdio_write(struct net_device *net_dev, int phy_id, int location, int val); static void sis900_timer(struct timer_list *t); static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_phy); -static void sis900_tx_timeout(struct net_device *net_dev); +static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue); static void sis900_init_tx_ring(struct net_device *net_dev); static void sis900_init_rx_ring(struct net_device *net_dev); static netdev_tx_t sis900_start_xmit(struct sk_buff *skb, @@ -1537,7 +1537,7 @@ static void sis900_read_mode(struct net_device *net_dev, int *speed, int *duplex * disable interrupts and do some tasks */ -static void sis900_tx_timeout(struct net_device *net_dev) +static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue) { struct sis900_private *sis_priv = netdev_priv(net_dev); void __iomem *ioaddr = sis_priv->ioaddr; diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index be47d864f8b9..912760e8514c 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -291,7 +291,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int loc, int val); static void epic_restart(struct net_device *dev); static void epic_timer(struct timer_list *t); -static void epic_tx_timeout(struct net_device *dev); +static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue); static void epic_init_ring(struct net_device *dev); static netdev_tx_t epic_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -861,7 +861,7 @@ static void epic_timer(struct timer_list *t) add_timer(&ep->timer); } -static void epic_tx_timeout(struct net_device *dev) +static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct epic_private *ep = netdev_priv(dev); void __iomem *ioaddr = ep->ioaddr; diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 8d88e4083456..186c0bddbe5f 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -936,7 +936,7 @@ static void smc911x_phy_configure(struct work_struct *work) if (lp->ctl_rspeed != 100) my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF); - if (!lp->ctl_rfduplx) + if (!lp->ctl_rfduplx) my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL); /* Update our Auto-Neg Advertisement Register */ @@ -1245,7 +1245,7 @@ static void smc911x_poll_controller(struct net_device *dev) #endif /* Our watchdog timed out. Called by the networking layer */ -static void smc911x_timeout(struct net_device *dev) +static void smc911x_timeout(struct net_device *dev, unsigned int txqueue) { struct smc911x_local *lp = netdev_priv(dev); int status, mask; diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c index d3bb2ba51f40..4b2330deed47 100644 --- a/drivers/net/ethernet/smsc/smc9194.c +++ b/drivers/net/ethernet/smsc/smc9194.c @@ -216,7 +216,7 @@ static int smc_open(struct net_device *dev); /* . Our watchdog timed out. Called by the networking layer */ -static void smc_timeout(struct net_device *dev); +static void smc_timeout(struct net_device *dev, unsigned int txqueue); /* . This is called by the kernel in response to 'ifconfig ethX down'. It @@ -1094,7 +1094,7 @@ static int smc_open(struct net_device *dev) .-------------------------------------------------------- */ -static void smc_timeout(struct net_device *dev) +static void smc_timeout(struct net_device *dev, unsigned int txqueue) { /* If we get here, some higher level has decided we are broken. There should really be a "kick me" function call instead. */ diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index a55f430f6a7b..f2a50eb3c1e0 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -271,7 +271,7 @@ static void smc91c92_release(struct pcmcia_device *link); static int smc_open(struct net_device *dev); static int smc_close(struct net_device *dev); static int smc_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -static void smc_tx_timeout(struct net_device *dev); +static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue); static netdev_tx_t smc_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t smc_interrupt(int irq, void *dev_id); @@ -1178,7 +1178,7 @@ static void smc_hardware_send_packet(struct net_device * dev) /*====================================================================*/ -static void smc_tx_timeout(struct net_device *dev) +static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct smc_private *smc = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 3a6761131f4c..90410f9d3b1a 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -1321,7 +1321,7 @@ static void smc_poll_controller(struct net_device *dev) #endif /* Our watchdog timed out. Called by the networking layer */ -static void smc_timeout(struct net_device *dev) +static void smc_timeout(struct net_device *dev, unsigned int txqueue) { struct smc_local *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index b210e987a1db..09c72025ed3e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -363,6 +363,11 @@ struct dma_features { unsigned int dvlan; unsigned int l3l4fnum; unsigned int arpoffsel; + /* TSN Features */ + unsigned int estwid; + unsigned int estdep; + unsigned int estsel; + unsigned int fpesel; }; /* GMAC TX FIFO is 8K, Rx FIFO is 16K */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index dd9967aeda22..2342d497348e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -40,7 +40,7 @@ struct tegra_eqos { static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat_dat) { - struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; u32 burst_map = 0; u32 bit_index = 0; u32 a_index = 0; @@ -52,9 +52,10 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, return -ENOMEM; } - plat_dat->axi->axi_lpi_en = of_property_read_bool(np, "snps,en-lpi"); - if (of_property_read_u32(np, "snps,write-requests", - &plat_dat->axi->axi_wr_osr_lmt)) { + plat_dat->axi->axi_lpi_en = device_property_read_bool(dev, + "snps,en-lpi"); + if (device_property_read_u32(dev, "snps,write-requests", + &plat_dat->axi->axi_wr_osr_lmt)) { /** * Since the register has a reset value of 1, if property * is missing, default to 1. @@ -68,8 +69,8 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, plat_dat->axi->axi_wr_osr_lmt--; } - if (of_property_read_u32(np, "snps,read-requests", - &plat_dat->axi->axi_rd_osr_lmt)) { + if (device_property_read_u32(dev, "snps,read-requests", + &plat_dat->axi->axi_rd_osr_lmt)) { /** * Since the register has a reset value of 1, if property * is missing, default to 1. @@ -82,7 +83,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, */ plat_dat->axi->axi_rd_osr_lmt--; } - of_property_read_u32(np, "snps,burst-map", &burst_map); + device_property_read_u32(dev, "snps,burst-map", &burst_map); /* converts burst-map bitmask to burst array */ for (bit_index = 0; bit_index < 7; bit_index++) { @@ -270,6 +271,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev, struct plat_stmmacenet_data *data, struct stmmac_resources *res) { + struct device *dev = &pdev->dev; struct tegra_eqos *eqos; int err; @@ -282,6 +284,9 @@ static void *tegra_eqos_probe(struct platform_device *pdev, eqos->dev = &pdev->dev; eqos->regs = res->addr; + if (!is_of_node(dev->fwnode)) + goto bypass_clk_reset_gpio; + eqos->clk_master = devm_clk_get(&pdev->dev, "master_bus"); if (IS_ERR(eqos->clk_master)) { err = PTR_ERR(eqos->clk_master); @@ -354,6 +359,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev, usleep_range(2000, 4000); +bypass_clk_reset_gpio: data->fix_mac_speed = tegra_eqos_fix_speed; data->init = tegra_eqos_init; data->bsp_priv = eqos; @@ -421,7 +427,7 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) void *priv; int ret; - data = of_device_get_match_data(&pdev->dev); + data = device_get_match_data(&pdev->dev); memset(&stmmac_res, 0, sizeof(struct stmmac_resources)); @@ -478,7 +484,7 @@ static int dwc_eth_dwmac_remove(struct platform_device *pdev) const struct dwc_eth_dwmac_data *data; int err; - data = of_device_get_match_data(&pdev->dev); + data = device_get_match_data(&pdev->dev); err = stmmac_dvr_remove(&pdev->dev); if (err < 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index bdb80421acac..9e4b83832938 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -55,6 +55,8 @@ struct mediatek_dwmac_plat_data { struct regmap *peri_regmap; struct device *dev; phy_interface_t phy_mode; + int num_clks_to_config; + bool rmii_clk_from_mac; bool rmii_rxc; }; @@ -73,21 +75,33 @@ struct mediatek_dwmac_variant { /* list of clocks required for mac */ static const char * const mt2712_dwmac_clk_l[] = { - "axi", "apb", "mac_main", "ptp_ref" + "axi", "apb", "mac_main", "ptp_ref", "rmii_internal" }; static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat) { + int rmii_clk_from_mac = plat->rmii_clk_from_mac ? RMII_CLK_SRC_INTERNAL : 0; int rmii_rxc = plat->rmii_rxc ? RMII_CLK_SRC_RXC : 0; u32 intf_val = 0; + /* The clock labeled as "rmii_internal" in mt2712_dwmac_clk_l is needed + * only in RMII(when MAC provides the reference clock), and useless for + * RGMII/MII/RMII(when PHY provides the reference clock). + * num_clks_to_config indicates the real number of clocks should be + * configured, equals to (plat->variant->num_clks - 1) in default for all the case, + * then +1 for rmii_clk_from_mac case. + */ + plat->num_clks_to_config = plat->variant->num_clks - 1; + /* select phy interface in top control domain */ switch (plat->phy_mode) { case PHY_INTERFACE_MODE_MII: intf_val |= PHY_INTF_MII; break; case PHY_INTERFACE_MODE_RMII: - intf_val |= (PHY_INTF_RMII | rmii_rxc); + if (plat->rmii_clk_from_mac) + plat->num_clks_to_config++; + intf_val |= (PHY_INTF_RMII | rmii_rxc | rmii_clk_from_mac); break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: @@ -173,35 +187,50 @@ static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat) delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv); break; case PHY_INTERFACE_MODE_RMII: - /* the rmii reference clock is from external phy, - * and the property "rmii_rxc" indicates which pin(TXC/RXC) - * the reference clk is connected to. The reference clock is a - * received signal, so rx_delay/rx_inv are used to indicate - * the reference clock timing adjustment - */ - if (plat->rmii_rxc) { - /* the rmii reference clock from outside is connected - * to RXC pin, the reference clock will be adjusted - * by RXC delay macro circuit. - */ - delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay); - delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay); - delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv); - } else { - /* the rmii reference clock from outside is connected - * to TXC pin, the reference clock will be adjusted - * by TXC delay macro circuit. + if (plat->rmii_clk_from_mac) { + /* case 1: mac provides the rmii reference clock, + * and the clock output to TXC pin. + * The egress timing can be adjusted by GTXC delay macro circuit. + * The ingress timing can be adjusted by TXC delay macro circuit. */ delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay); delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay); delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv); + + delay_val |= FIELD_PREP(ETH_DLY_GTXC_ENABLE, !!mac_delay->tx_delay); + delay_val |= FIELD_PREP(ETH_DLY_GTXC_STAGES, mac_delay->tx_delay); + delay_val |= FIELD_PREP(ETH_DLY_GTXC_INV, mac_delay->tx_inv); + } else { + /* case 2: the rmii reference clock is from external phy, + * and the property "rmii_rxc" indicates which pin(TXC/RXC) + * the reference clk is connected to. The reference clock is a + * received signal, so rx_delay/rx_inv are used to indicate + * the reference clock timing adjustment + */ + if (plat->rmii_rxc) { + /* the rmii reference clock from outside is connected + * to RXC pin, the reference clock will be adjusted + * by RXC delay macro circuit. + */ + delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv); + } else { + /* the rmii reference clock from outside is connected + * to TXC pin, the reference clock will be adjusted + * by TXC delay macro circuit. + */ + delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv); + } + /* tx_inv will inverse the tx clock inside mac relateive to + * reference clock from external phy, + * and this bit is located in the same register with fine-tune + */ + if (mac_delay->tx_inv) + fine_val = ETH_RMII_DLY_TX_INV; } - /* tx_inv will inverse the tx clock inside mac relateive to - * reference clock from external phy, - * and this bit is located in the same register with fine-tune - */ - if (mac_delay->tx_inv) - fine_val = ETH_RMII_DLY_TX_INV; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: @@ -278,6 +307,7 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat) mac_delay->tx_inv = of_property_read_bool(plat->np, "mediatek,txc-inverse"); mac_delay->rx_inv = of_property_read_bool(plat->np, "mediatek,rxc-inverse"); plat->rmii_rxc = of_property_read_bool(plat->np, "mediatek,rmii-rxc"); + plat->rmii_clk_from_mac = of_property_read_bool(plat->np, "mediatek,rmii-clk-from-mac"); return 0; } @@ -294,6 +324,8 @@ static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat) for (i = 0; i < num; i++) plat->clks[i].id = variant->clk_list[i]; + plat->num_clks_to_config = variant->num_clks; + return devm_clk_bulk_get(plat->dev, num, plat->clks); } @@ -321,7 +353,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) return ret; } - ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks); + ret = clk_bulk_prepare_enable(plat->num_clks_to_config, plat->clks); if (ret) { dev_err(plat->dev, "failed to enable clks, err = %d\n", ret); return ret; @@ -336,9 +368,8 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv) { struct mediatek_dwmac_plat_data *plat = priv; - const struct mediatek_dwmac_variant *variant = plat->variant; - clk_bulk_disable_unprepare(variant->num_clks, plat->clks); + clk_bulk_disable_unprepare(plat->num_clks_to_config, plat->clks); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 1c8d84ed8410..6f834302fda3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -335,14 +335,30 @@ static void sun8i_dwmac_dump_mac_regs(struct mac_device_info *hw, } } -static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan) +static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN); + u32 value = readl(ioaddr + EMAC_INT_EN); + + if (rx) + value |= EMAC_RX_INT; + if (tx) + value |= EMAC_TX_INT; + + writel(value, ioaddr + EMAC_INT_EN); } -static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan) +static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(0, ioaddr + EMAC_INT_EN); + u32 value = readl(ioaddr + EMAC_INT_EN); + + if (rx) + value &= ~EMAC_RX_INT; + if (tx) + value &= ~EMAC_TX_INT; + + writel(value, ioaddr + EMAC_INT_EN); } static void sun8i_dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 2dc70d104161..2e6b60a476c6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -64,6 +64,8 @@ #define GMAC_RXQCTRL_MCBCQEN_SHIFT 20 #define GMAC_RXQCTRL_TACPQE BIT(21) #define GMAC_RXQCTRL_TACPQE_SHIFT 21 +#define GMAC_RXQCTRL_FPRQ GENMASK(26, 24) +#define GMAC_RXQCTRL_FPRQ_SHIFT 24 /* MAC Packet Filtering */ #define GMAC_PACKET_FILTER_PR BIT(0) @@ -176,6 +178,8 @@ enum power_event { #define GMAC_CONFIG_SARC GENMASK(30, 28) #define GMAC_CONFIG_SARC_SHIFT 28 #define GMAC_CONFIG_IPC BIT(27) +#define GMAC_CONFIG_IPG GENMASK(26, 24) +#define GMAC_CONFIG_IPG_SHIFT 24 #define GMAC_CONFIG_2K BIT(22) #define GMAC_CONFIG_ACS BIT(20) #define GMAC_CONFIG_BE BIT(18) @@ -183,6 +187,7 @@ enum power_event { #define GMAC_CONFIG_JE BIT(16) #define GMAC_CONFIG_PS BIT(15) #define GMAC_CONFIG_FES BIT(14) +#define GMAC_CONFIG_FES_SHIFT 14 #define GMAC_CONFIG_DM BIT(13) #define GMAC_CONFIG_LM BIT(12) #define GMAC_CONFIG_DCRS BIT(9) @@ -190,6 +195,9 @@ enum power_event { #define GMAC_CONFIG_RE BIT(0) /* MAC extended config */ +#define GMAC_CONFIG_EIPG GENMASK(29, 25) +#define GMAC_CONFIG_EIPG_SHIFT 25 +#define GMAC_CONFIG_EIPG_EN BIT(24) #define GMAC_CONFIG_HDSMS GENMASK(22, 20) #define GMAC_CONFIG_HDSMS_SHIFT 20 #define GMAC_CONFIG_HDSMS_256 (0x2 << GMAC_CONFIG_HDSMS_SHIFT) @@ -231,6 +239,10 @@ enum power_event { /* MAC HW features3 bitmap */ #define GMAC_HW_FEAT_ASP GENMASK(29, 28) +#define GMAC_HW_FEAT_FPESEL BIT(26) +#define GMAC_HW_FEAT_ESTWID GENMASK(21, 20) +#define GMAC_HW_FEAT_ESTDEP GENMASK(19, 17) +#define GMAC_HW_FEAT_ESTSEL BIT(16) #define GMAC_HW_FEAT_FRPES GENMASK(14, 13) #define GMAC_HW_FEAT_FRPBS GENMASK(12, 11) #define GMAC_HW_FEAT_FRPSEL BIT(10) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 40ca00e596dd..f0c0ea616032 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -984,6 +984,8 @@ const struct stmmac_ops dwmac410_ops = { .set_arp_offload = dwmac4_set_arp_offload, .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, + .est_configure = dwmac5_est_configure, + .fpe_configure = dwmac5_fpe_configure, }; const struct stmmac_ops dwmac510_ops = { @@ -1027,6 +1029,8 @@ const struct stmmac_ops dwmac510_ops = { .set_arp_offload = dwmac4_set_arp_offload, .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, + .est_configure = dwmac5_est_configure, + .fpe_configure = dwmac5_fpe_configure, }; int dwmac4_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index c15409030710..213d44482ffa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -404,6 +404,10 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, /* 5.10 Features */ dma_cap->asp = (hw_cap & GMAC_HW_FEAT_ASP) >> 28; + dma_cap->fpesel = (hw_cap & GMAC_HW_FEAT_FPESEL) >> 26; + dma_cap->estwid = (hw_cap & GMAC_HW_FEAT_ESTWID) >> 20; + dma_cap->estdep = (hw_cap & GMAC_HW_FEAT_ESTDEP) >> 17; + dma_cap->estsel = (hw_cap & GMAC_HW_FEAT_ESTSEL) >> 16; dma_cap->frpes = (hw_cap & GMAC_HW_FEAT_FRPES) >> 13; dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11; dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index 589931795847..bcb6d5190f3d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -168,6 +168,8 @@ /* DMA default interrupt mask for 4.00 */ #define DMA_CHAN_INTR_DEFAULT_MASK (DMA_CHAN_INTR_NORMAL | \ DMA_CHAN_INTR_ABNORMAL) +#define DMA_CHAN_INTR_DEFAULT_RX (DMA_CHAN_INTR_ENA_RIE) +#define DMA_CHAN_INTR_DEFAULT_TX (DMA_CHAN_INTR_ENA_TIE) #define DMA_CHAN_INTR_NORMAL_4_10 (DMA_CHAN_INTR_ENA_NIE_4_10 | \ DMA_CHAN_INTR_ENA_RIE | \ @@ -178,6 +180,8 @@ /* DMA default interrupt mask for 4.10a */ #define DMA_CHAN_INTR_DEFAULT_MASK_4_10 (DMA_CHAN_INTR_NORMAL_4_10 | \ DMA_CHAN_INTR_ABNORMAL_4_10) +#define DMA_CHAN_INTR_DEFAULT_RX_4_10 (DMA_CHAN_INTR_ENA_RIE) +#define DMA_CHAN_INTR_DEFAULT_TX_4_10 (DMA_CHAN_INTR_ENA_TIE) /* channel 0 specific fields */ #define DMA_CHAN0_DBG_STAT_TPS GENMASK(15, 12) @@ -186,9 +190,10 @@ #define DMA_CHAN0_DBG_STAT_RPS_SHIFT 8 int dwmac4_dma_reset(void __iomem *ioaddr); -void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan); -void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan); -void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan); +void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index f2a29a90e085..9becca280074 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -97,21 +97,52 @@ void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan) writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(chan)); } -void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(DMA_CHAN_INTR_DEFAULT_MASK, ioaddr + - DMA_CHAN_INTR_ENA(chan)); + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value |= DMA_CHAN_INTR_DEFAULT_RX; + if (tx) + value |= DMA_CHAN_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); } -void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10, - ioaddr + DMA_CHAN_INTR_ENA(chan)); + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value |= DMA_CHAN_INTR_DEFAULT_RX_4_10; + if (tx) + value |= DMA_CHAN_INTR_DEFAULT_TX_4_10; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); } -void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(0, ioaddr + DMA_CHAN_INTR_ENA(chan)); + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value &= ~DMA_CHAN_INTR_DEFAULT_RX; + if (tx) + value &= ~DMA_CHAN_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); +} + +void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) +{ + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value &= ~DMA_CHAN_INTR_DEFAULT_RX_4_10; + if (tx) + value &= ~DMA_CHAN_INTR_DEFAULT_TX_4_10; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); } int dwmac4_dma_interrupt(void __iomem *ioaddr, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index e436fa160c7d..5d4a3c2458ea 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -550,3 +550,121 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, writel(val, ioaddr + MAC_PPS_CONTROL); return 0; } + +static int dwmac5_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) +{ + u32 ctrl; + + writel(val, ioaddr + MTL_EST_GCL_DATA); + + ctrl = (reg << ADDR_SHIFT); + ctrl |= gcl ? 0 : GCRR; + + writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL); + + ctrl |= SRWO; + writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL); + + return readl_poll_timeout(ioaddr + MTL_EST_GCL_CONTROL, + ctrl, !(ctrl & SRWO), 100, 5000); +} + +int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate) +{ + u32 speed, total_offset, offset, ctrl, ctr_low; + u32 extcfg = readl(ioaddr + GMAC_EXT_CONFIG); + u32 mac_cfg = readl(ioaddr + GMAC_CONFIG); + int i, ret = 0x0; + u64 total_ctr; + + if (extcfg & GMAC_CONFIG_EIPG_EN) { + offset = (extcfg & GMAC_CONFIG_EIPG) >> GMAC_CONFIG_EIPG_SHIFT; + offset = 104 + (offset * 8); + } else { + offset = (mac_cfg & GMAC_CONFIG_IPG) >> GMAC_CONFIG_IPG_SHIFT; + offset = 96 - (offset * 8); + } + + speed = mac_cfg & (GMAC_CONFIG_PS | GMAC_CONFIG_FES); + speed = speed >> GMAC_CONFIG_FES_SHIFT; + + switch (speed) { + case 0x0: + offset = offset * 1000; /* 1G */ + break; + case 0x1: + offset = offset * 400; /* 2.5G */ + break; + case 0x2: + offset = offset * 100000; /* 10M */ + break; + case 0x3: + offset = offset * 10000; /* 100M */ + break; + default: + return -EINVAL; + } + + offset = offset / 1000; + + ret |= dwmac5_est_write(ioaddr, BTR_LOW, cfg->btr[0], false); + ret |= dwmac5_est_write(ioaddr, BTR_HIGH, cfg->btr[1], false); + ret |= dwmac5_est_write(ioaddr, TER, cfg->ter, false); + ret |= dwmac5_est_write(ioaddr, LLR, cfg->gcl_size, false); + if (ret) + return ret; + + total_offset = 0; + for (i = 0; i < cfg->gcl_size; i++) { + ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i] + offset, true); + if (ret) + return ret; + + total_offset += offset; + } + + total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000; + total_ctr += total_offset; + + ctr_low = do_div(total_ctr, 1000000000); + + ret |= dwmac5_est_write(ioaddr, CTR_LOW, ctr_low, false); + ret |= dwmac5_est_write(ioaddr, CTR_HIGH, total_ctr, false); + if (ret) + return ret; + + ctrl = readl(ioaddr + MTL_EST_CONTROL); + ctrl &= ~PTOV; + ctrl |= ((1000000000 / ptp_rate) * 6) << PTOV_SHIFT; + if (cfg->enable) + ctrl |= EEST | SSWL; + else + ctrl &= ~EEST; + + writel(ctrl, ioaddr + MTL_EST_CONTROL); + return 0; +} + +void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + bool enable) +{ + u32 value; + + if (!enable) { + value = readl(ioaddr + MAC_FPE_CTRL_STS); + + value &= ~EFPE; + + writel(value, ioaddr + MAC_FPE_CTRL_STS); + } + + value = readl(ioaddr + GMAC_RXQ_CTRL1); + value &= ~GMAC_RXQCTRL_FPRQ; + value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT; + writel(value, ioaddr + GMAC_RXQ_CTRL1); + + value = readl(ioaddr + MAC_FPE_CTRL_STS); + value |= EFPE; + writel(value, ioaddr + MAC_FPE_CTRL_STS); +} diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index 23fecf68f781..3e8faa96b4d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -11,6 +11,9 @@ #define PRTYEN BIT(1) #define TMOUTEN BIT(0) +#define MAC_FPE_CTRL_STS 0x00000234 +#define EFPE BIT(0) + #define MAC_PPS_CONTROL 0x00000b70 #define PPS_MAXIDX(x) ((((x) + 1) * 8) - 1) #define PPS_MINIDX(x) ((x) * 8) @@ -30,6 +33,23 @@ #define MAC_PPSx_INTERVAL(x) (0x00000b88 + ((x) * 0x10)) #define MAC_PPSx_WIDTH(x) (0x00000b8c + ((x) * 0x10)) +#define MTL_EST_CONTROL 0x00000c50 +#define PTOV GENMASK(31, 24) +#define PTOV_SHIFT 24 +#define SSWL BIT(1) +#define EEST BIT(0) +#define MTL_EST_GCL_CONTROL 0x00000c80 +#define BTR_LOW 0x0 +#define BTR_HIGH 0x1 +#define CTR_LOW 0x2 +#define CTR_HIGH 0x3 +#define TER 0x4 +#define LLR 0x5 +#define ADDR_SHIFT 8 +#define GCRR BIT(2) +#define SRWO BIT(0) +#define MTL_EST_GCL_DATA 0x00000c84 + #define MTL_RXP_CONTROL_STATUS 0x00000ca0 #define RXPI BIT(31) #define NPE GENMASK(23, 16) @@ -83,5 +103,9 @@ int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries, int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, struct stmmac_pps_cfg *cfg, bool enable, u32 sub_second_inc, u32 systime_flags); +int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate); +void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + bool enable); #endif /* __DWMAC5_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index 292b880f3f9f..e5dbd0bc257e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -96,6 +96,8 @@ /* DMA default interrupt mask */ #define DMA_INTR_DEFAULT_MASK (DMA_INTR_NORMAL | DMA_INTR_ABNORMAL) +#define DMA_INTR_DEFAULT_RX (DMA_INTR_ENA_RIE) +#define DMA_INTR_DEFAULT_TX (DMA_INTR_ENA_TIE) /* DMA Status register defines */ #define DMA_STATUS_GLPII 0x40000000 /* GMAC LPI interrupt */ @@ -130,8 +132,8 @@ #define NUM_DWMAC1000_DMA_REGS 23 void dwmac_enable_dma_transmission(void __iomem *ioaddr); -void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan); -void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan); +void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan); void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 1bc25aa86dbd..688d36095333 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -37,14 +37,28 @@ void dwmac_enable_dma_transmission(void __iomem *ioaddr) writel(1, ioaddr + DMA_XMT_POLL_DEMAND); } -void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); + u32 value = readl(ioaddr + DMA_INTR_ENA); + + if (rx) + value |= DMA_INTR_DEFAULT_RX; + if (tx) + value |= DMA_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_INTR_ENA); } -void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(0, ioaddr + DMA_INTR_ENA); + u32 value = readl(ioaddr + DMA_INTR_ENA); + + if (rx) + value &= ~DMA_INTR_DEFAULT_RX; + if (tx) + value &= ~DMA_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_INTR_ENA); } void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 3b6e559aa0b9..174b903a8211 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -73,6 +73,9 @@ #define XGMAC_RXQ_CTRL0 0x000000a0 #define XGMAC_RXQEN(x) GENMASK((x) * 2 + 1, (x) * 2) #define XGMAC_RXQEN_SHIFT(x) ((x) * 2) +#define XGMAC_RXQ_CTRL1 0x000000a4 +#define XGMAC_RQ GENMASK(7, 4) +#define XGMAC_RQ_SHIFT 4 #define XGMAC_RXQ_CTRL2 0x000000a8 #define XGMAC_RXQ_CTRL3 0x000000ac #define XGMAC_PSRQ(x) GENMASK((x) * 8 + 7, (x) * 8) @@ -136,6 +139,10 @@ #define XGMAC_HWFEAT_TXQCNT GENMASK(9, 6) #define XGMAC_HWFEAT_RXQCNT GENMASK(3, 0) #define XGMAC_HW_FEATURE3 0x00000128 +#define XGMAC_HWFEAT_FPESEL BIT(26) +#define XGMAC_HWFEAT_ESTWID GENMASK(24, 23) +#define XGMAC_HWFEAT_ESTDEP GENMASK(22, 20) +#define XGMAC_HWFEAT_ESTSEL BIT(19) #define XGMAC_HWFEAT_ASP GENMASK(15, 14) #define XGMAC_HWFEAT_DVLAN BIT(13) #define XGMAC_HWFEAT_FRPES GENMASK(12, 11) @@ -148,6 +155,8 @@ #define XGMAC_MDIO_ADDR 0x00000200 #define XGMAC_MDIO_DATA 0x00000204 #define XGMAC_MDIO_C22P 0x00000220 +#define XGMAC_FPE_CTRL_STS 0x00000280 +#define XGMAC_EFPE BIT(0) #define XGMAC_ADDRx_HIGH(x) (0x00000300 + (x) * 0x8) #define XGMAC_ADDR_MAX 32 #define XGMAC_AE BIT(31) @@ -237,6 +246,22 @@ #define XGMAC_TC_PRTY_MAP1 0x00001044 #define XGMAC_PSTC(x) GENMASK((x) * 8 + 7, (x) * 8) #define XGMAC_PSTC_SHIFT(x) ((x) * 8) +#define XGMAC_MTL_EST_CONTROL 0x00001050 +#define XGMAC_PTOV GENMASK(31, 23) +#define XGMAC_PTOV_SHIFT 23 +#define XGMAC_SSWL BIT(1) +#define XGMAC_EEST BIT(0) +#define XGMAC_MTL_EST_GCL_CONTROL 0x00001080 +#define XGMAC_BTR_LOW 0x0 +#define XGMAC_BTR_HIGH 0x1 +#define XGMAC_CTR_LOW 0x2 +#define XGMAC_CTR_HIGH 0x3 +#define XGMAC_TER 0x4 +#define XGMAC_LLR 0x5 +#define XGMAC_ADDR_SHIFT 8 +#define XGMAC_GCRR BIT(2) +#define XGMAC_SRWO BIT(0) +#define XGMAC_MTL_EST_GCL_DATA 0x00001084 #define XGMAC_MTL_RXP_CONTROL_STATUS 0x000010a0 #define XGMAC_RXPI BIT(31) #define XGMAC_NPE GENMASK(23, 16) @@ -361,6 +386,8 @@ #define XGMAC_TIE BIT(0) #define XGMAC_DMA_INT_DEFAULT_EN (XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \ XGMAC_RIE | XGMAC_TIE) +#define XGMAC_DMA_INT_DEFAULT_RX (XGMAC_RBUE | XGMAC_RIE) +#define XGMAC_DMA_INT_DEFAULT_TX (XGMAC_TIE) #define XGMAC_DMA_CH_Rx_WATCHDOG(x) (0x0000313c + (0x80 * (x))) #define XGMAC_RWT GENMASK(7, 0) #define XGMAC_DMA_CH_STATUS(x) (0x00003160 + (0x80 * (x))) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 082f5ee9e525..307105e8dea0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1359,6 +1359,80 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, writel(value, ioaddr + XGMAC_RX_CONFIG); } +static int dwxgmac3_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) +{ + u32 ctrl; + + writel(val, ioaddr + XGMAC_MTL_EST_GCL_DATA); + + ctrl = (reg << XGMAC_ADDR_SHIFT); + ctrl |= gcl ? 0 : XGMAC_GCRR; + + writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL); + + ctrl |= XGMAC_SRWO; + writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL); + + return readl_poll_timeout_atomic(ioaddr + XGMAC_MTL_EST_GCL_CONTROL, + ctrl, !(ctrl & XGMAC_SRWO), 100, 5000); +} + +static int dwxgmac3_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate) +{ + int i, ret = 0x0; + u32 ctrl; + + ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_LOW, cfg->btr[0], false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_HIGH, cfg->btr[1], false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_TER, cfg->ter, false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_LLR, cfg->gcl_size, false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_LOW, cfg->ctr[0], false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_HIGH, cfg->ctr[1], false); + if (ret) + return ret; + + for (i = 0; i < cfg->gcl_size; i++) { + ret = dwxgmac3_est_write(ioaddr, i, cfg->gcl[i], true); + if (ret) + return ret; + } + + ctrl = readl(ioaddr + XGMAC_MTL_EST_CONTROL); + ctrl &= ~XGMAC_PTOV; + ctrl |= ((1000000000 / ptp_rate) * 9) << XGMAC_PTOV_SHIFT; + if (cfg->enable) + ctrl |= XGMAC_EEST | XGMAC_SSWL; + else + ctrl &= ~XGMAC_EEST; + + writel(ctrl, ioaddr + XGMAC_MTL_EST_CONTROL); + return 0; +} + +static void dwxgmac3_fpe_configure(void __iomem *ioaddr, u32 num_txq, + u32 num_rxq, bool enable) +{ + u32 value; + + if (!enable) { + value = readl(ioaddr + XGMAC_FPE_CTRL_STS); + + value &= ~XGMAC_EFPE; + + writel(value, ioaddr + XGMAC_FPE_CTRL_STS); + } + + value = readl(ioaddr + XGMAC_RXQ_CTRL1); + value &= ~XGMAC_RQ; + value |= (num_rxq - 1) << XGMAC_RQ_SHIFT; + writel(value, ioaddr + XGMAC_RXQ_CTRL1); + + value = readl(ioaddr + XGMAC_FPE_CTRL_STS); + value |= XGMAC_EFPE; + writel(value, ioaddr + XGMAC_FPE_CTRL_STS); +} + const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, .set_mac = dwxgmac2_set_mac, @@ -1402,6 +1476,8 @@ const struct stmmac_ops dwxgmac210_ops = { .config_l3_filter = dwxgmac2_config_l3_filter, .config_l4_filter = dwxgmac2_config_l4_filter, .set_arp_offload = dwxgmac2_set_arp_offload, + .est_configure = dwxgmac3_est_configure, + .fpe_configure = dwxgmac3_fpe_configure, }; int dwxgmac2_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 22a7f0cc1b90..c1ca73ebb0e7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -248,14 +248,30 @@ static void dwxgmac2_dma_tx_mode(void __iomem *ioaddr, int mode, writel(value, ioaddr + XGMAC_MTL_TXQ_OPMODE(channel)); } -static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan) +static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(XGMAC_DMA_INT_DEFAULT_EN, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + + if (rx) + value |= XGMAC_DMA_INT_DEFAULT_RX; + if (tx) + value |= XGMAC_DMA_INT_DEFAULT_TX; + + writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); } -static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan) +static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(0, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + + if (rx) + value &= ~XGMAC_DMA_INT_DEFAULT_RX; + if (tx) + value &= ~XGMAC_DMA_INT_DEFAULT_TX; + + writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); } static void dwxgmac2_dma_start_tx(void __iomem *ioaddr, u32 chan) @@ -413,6 +429,10 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature 3 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE3); + dma_cap->fpesel = (hw_cap & XGMAC_HWFEAT_FPESEL) >> 26; + dma_cap->estwid = (hw_cap & XGMAC_HWFEAT_ESTWID) >> 23; + dma_cap->estdep = (hw_cap & XGMAC_HWFEAT_ESTDEP) >> 20; + dma_cap->estsel = (hw_cap & XGMAC_HWFEAT_ESTSEL) >> 19; dma_cap->asp = (hw_cap & XGMAC_HWFEAT_ASP) >> 14; dma_cap->dvlan = (hw_cap & XGMAC_HWFEAT_DVLAN) >> 13; dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index aa5b917398fe..905a6f0edaca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -187,8 +187,10 @@ struct stmmac_dma_ops { void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, void __iomem *ioaddr); void (*enable_dma_transmission) (void __iomem *ioaddr); - void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan); - void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan); + void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan, + bool rx, bool tx); + void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan, + bool rx, bool tx); void (*start_tx)(void __iomem *ioaddr, u32 chan); void (*stop_tx)(void __iomem *ioaddr, u32 chan); void (*start_rx)(void __iomem *ioaddr, u32 chan); @@ -274,6 +276,7 @@ struct stmmac_safety_stats; struct stmmac_tc_entry; struct stmmac_pps_cfg; struct stmmac_rss; +struct stmmac_est; /* Helpers to program the MAC core */ struct stmmac_ops { @@ -371,6 +374,10 @@ struct stmmac_ops { bool en, bool udp, bool sa, bool inv, u32 match); void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr); + int (*est_configure)(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate); + void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + bool enable); }; #define stmmac_core_init(__priv, __args...) \ @@ -457,6 +464,10 @@ struct stmmac_ops { stmmac_do_callback(__priv, mac, config_l4_filter, __args) #define stmmac_set_arp_offload(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, set_arp_offload, __args) +#define stmmac_est_configure(__priv, __args...) \ + stmmac_do_callback(__priv, mac, est_configure, __args) +#define stmmac_fpe_configure(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, fpe_configure, __args) /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { @@ -514,6 +525,7 @@ struct stmmac_priv; struct tc_cls_u32_offload; struct tc_cbs_qopt_offload; struct flow_cls_offload; +struct tc_taprio_qopt_offload; struct stmmac_tc_ops { int (*init)(struct stmmac_priv *priv); @@ -523,6 +535,8 @@ struct stmmac_tc_ops { struct tc_cbs_qopt_offload *qopt); int (*setup_cls)(struct stmmac_priv *priv, struct flow_cls_offload *cls); + int (*setup_taprio)(struct stmmac_priv *priv, + struct tc_taprio_qopt_offload *qopt); }; #define stmmac_tc_init(__priv, __args...) \ @@ -533,6 +547,8 @@ struct stmmac_tc_ops { stmmac_do_callback(__priv, tc, setup_cbs, __args) #define stmmac_tc_setup_cls(__priv, __args...) \ stmmac_do_callback(__priv, tc, setup_cls, __args) +#define stmmac_tc_setup_taprio(__priv, __args...) \ + stmmac_do_callback(__priv, tc, setup_taprio, __args) struct stmmac_counters; diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index 252cf48c5816..a57b0fa815ab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -119,6 +119,13 @@ #define MMC_RX_ICMP_GD_OCTETS 0x180 #define MMC_RX_ICMP_ERR_OCTETS 0x184 +#define MMC_TX_FPE_FRAG 0x1a8 +#define MMC_TX_HOLD_REQ 0x1ac +#define MMC_RX_PKT_ASSEMBLY_ERR 0x1c8 +#define MMC_RX_PKT_SMD_ERR 0x1cc +#define MMC_RX_PKT_ASSEMBLY_OK 0x1d0 +#define MMC_RX_FPE_FRAG 0x1d4 + /* XGMAC MMC Registers */ #define MMC_XGMAC_TX_OCTET_GB 0x14 #define MMC_XGMAC_TX_PKT_GB 0x1c @@ -315,6 +322,15 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc) mmc->mmc_rx_tcp_err_octets += readl(mmcaddr + MMC_RX_TCP_ERR_OCTETS); mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS); mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS); + + mmc->mmc_tx_fpe_fragment_cntr += readl(mmcaddr + MMC_TX_FPE_FRAG); + mmc->mmc_tx_hold_req_cntr += readl(mmcaddr + MMC_TX_HOLD_REQ); + mmc->mmc_rx_packet_assembly_err_cntr += + readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_ERR); + mmc->mmc_rx_packet_smd_err_cntr += readl(mmcaddr + MMC_RX_PKT_SMD_ERR); + mmc->mmc_rx_packet_assembly_ok_cntr += + readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_OK); + mmc->mmc_rx_fpe_fragment_cntr += readl(mmcaddr + MMC_RX_FPE_FRAG); } const struct stmmac_mmc_ops dwmac_mmc_ops = { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index d993fc7e82c3..f98c5eefb382 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -88,6 +88,7 @@ struct stmmac_channel { struct napi_struct rx_napi ____cacheline_aligned_in_smp; struct napi_struct tx_napi ____cacheline_aligned_in_smp; struct stmmac_priv *priv_data; + spinlock_t lock; u32 index; }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bbc65bd332a8..18a959589cbc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1975,7 +1975,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) /* We still have pending packets, let's call for a new scheduling */ if (tx_q->dirty_tx != tx_q->cur_tx) - mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10)); + mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer)); __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue)); @@ -2069,17 +2069,25 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, &priv->xstats, chan); struct stmmac_channel *ch = &priv->channel[chan]; + unsigned long flags; if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) { if (napi_schedule_prep(&ch->rx_napi)) { - stmmac_disable_dma_irq(priv, priv->ioaddr, chan); + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); __napi_schedule_irqoff(&ch->rx_napi); - status |= handle_tx; } } - if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) - napi_schedule_irqoff(&ch->tx_napi); + if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) { + if (napi_schedule_prep(&ch->tx_napi)) { + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); + __napi_schedule_irqoff(&ch->tx_napi); + } + } return status; } @@ -2274,14 +2282,14 @@ static void stmmac_tx_timer(struct timer_list *t) ch = &priv->channel[tx_q->queue_index]; - /* - * If NAPI is already running we can miss some events. Let's rearm - * the timer and try again. - */ - if (likely(napi_schedule_prep(&ch->tx_napi))) + if (likely(napi_schedule_prep(&ch->tx_napi))) { + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, ch->index, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); __napi_schedule(&ch->tx_napi); - else - mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10)); + } } /** @@ -3751,8 +3759,14 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget) priv->xstats.napi_poll++; work_done = stmmac_rx(priv, budget, chan); - if (work_done < budget && napi_complete_done(napi, work_done)) - stmmac_enable_dma_irq(priv, priv->ioaddr, chan); + if (work_done < budget && napi_complete_done(napi, work_done)) { + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); + } + return work_done; } @@ -3761,7 +3775,6 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) struct stmmac_channel *ch = container_of(napi, struct stmmac_channel, tx_napi); struct stmmac_priv *priv = ch->priv_data; - struct stmmac_tx_queue *tx_q; u32 chan = ch->index; int work_done; @@ -3770,15 +3783,12 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan); work_done = min(work_done, budget); - if (work_done < budget) - napi_complete_done(napi, work_done); + if (work_done < budget && napi_complete_done(napi, work_done)) { + unsigned long flags; - /* Force transmission restart */ - tx_q = &priv->tx_queue[chan]; - if (tx_q->cur_tx != tx_q->dirty_tx) { - stmmac_enable_dma_transmission(priv, priv->ioaddr); - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, - chan); + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); } return work_done; @@ -3792,7 +3802,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) * netdev structure and arrange for the device to be reset to a sane state * in order to transmit a new packet. */ -static void stmmac_tx_timeout(struct net_device *dev) +static void stmmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct stmmac_priv *priv = netdev_priv(dev); @@ -4066,6 +4076,8 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type, priv, priv, true); case TC_SETUP_QDISC_CBS: return stmmac_tc_setup_cbs(priv, priv, type_data); + case TC_SETUP_QDISC_TAPRIO: + return stmmac_tc_setup_taprio(priv, priv, type_data); default: return -EOPNOTSUPP; } @@ -4238,9 +4250,38 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) priv->dma_cap.number_rx_channel); seq_printf(seq, "\tNumber of Additional TX channel: %d\n", priv->dma_cap.number_tx_channel); + seq_printf(seq, "\tNumber of Additional RX queues: %d\n", + priv->dma_cap.number_rx_queues); + seq_printf(seq, "\tNumber of Additional TX queues: %d\n", + priv->dma_cap.number_tx_queues); seq_printf(seq, "\tEnhanced descriptors: %s\n", (priv->dma_cap.enh_desc) ? "Y" : "N"); - + seq_printf(seq, "\tTX Fifo Size: %d\n", priv->dma_cap.tx_fifo_size); + seq_printf(seq, "\tRX Fifo Size: %d\n", priv->dma_cap.rx_fifo_size); + seq_printf(seq, "\tHash Table Size: %d\n", priv->dma_cap.hash_tb_sz); + seq_printf(seq, "\tTSO: %s\n", priv->dma_cap.tsoen ? "Y" : "N"); + seq_printf(seq, "\tNumber of PPS Outputs: %d\n", + priv->dma_cap.pps_out_num); + seq_printf(seq, "\tSafety Features: %s\n", + priv->dma_cap.asp ? "Y" : "N"); + seq_printf(seq, "\tFlexible RX Parser: %s\n", + priv->dma_cap.frpsel ? "Y" : "N"); + seq_printf(seq, "\tEnhanced Addressing: %d\n", + priv->dma_cap.addr64); + seq_printf(seq, "\tReceive Side Scaling: %s\n", + priv->dma_cap.rssen ? "Y" : "N"); + seq_printf(seq, "\tVLAN Hash Filtering: %s\n", + priv->dma_cap.vlhash ? "Y" : "N"); + seq_printf(seq, "\tSplit Header: %s\n", + priv->dma_cap.sphen ? "Y" : "N"); + seq_printf(seq, "\tVLAN TX Insertion: %s\n", + priv->dma_cap.vlins ? "Y" : "N"); + seq_printf(seq, "\tDouble VLAN: %s\n", + priv->dma_cap.dvlan ? "Y" : "N"); + seq_printf(seq, "\tNumber of L3/L4 Filters: %d\n", + priv->dma_cap.l3l4fnum); + seq_printf(seq, "\tARP Offloading: %s\n", + priv->dma_cap.arpoffsel ? "Y" : "N"); return 0; } DEFINE_SHOW_ATTRIBUTE(stmmac_dma_cap); @@ -4685,6 +4726,7 @@ int stmmac_dvr_probe(struct device *device, for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; + spin_lock_init(&ch->lock); ch->priv_data = priv; ch->index = queue; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 7d972e0fd2b0..6c4686b77516 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -591,9 +591,146 @@ static int tc_setup_cls(struct stmmac_priv *priv, return ret; } +static int tc_setup_taprio(struct stmmac_priv *priv, + struct tc_taprio_qopt_offload *qopt) +{ + u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; + struct plat_stmmacenet_data *plat = priv->plat; + struct timespec64 time; + bool fpe = false; + int i, ret = 0; + u64 ctr; + + if (!priv->dma_cap.estsel) + return -EOPNOTSUPP; + + switch (wid) { + case 0x1: + wid = 16; + break; + case 0x2: + wid = 20; + break; + case 0x3: + wid = 24; + break; + default: + return -EOPNOTSUPP; + } + + switch (dep) { + case 0x1: + dep = 64; + break; + case 0x2: + dep = 128; + break; + case 0x3: + dep = 256; + break; + case 0x4: + dep = 512; + break; + case 0x5: + dep = 1024; + break; + default: + return -EOPNOTSUPP; + } + + if (!qopt->enable) + goto disable; + if (qopt->num_entries >= dep) + return -EINVAL; + if (!qopt->base_time) + return -ERANGE; + if (!qopt->cycle_time) + return -ERANGE; + + if (!plat->est) { + plat->est = devm_kzalloc(priv->device, sizeof(*plat->est), + GFP_KERNEL); + if (!plat->est) + return -ENOMEM; + } else { + memset(plat->est, 0, sizeof(*plat->est)); + } + + size = qopt->num_entries; + + priv->plat->est->gcl_size = size; + priv->plat->est->enable = qopt->enable; + + for (i = 0; i < size; i++) { + s64 delta_ns = qopt->entries[i].interval; + u32 gates = qopt->entries[i].gate_mask; + + if (delta_ns > GENMASK(wid, 0)) + return -ERANGE; + if (gates > GENMASK(31 - wid, 0)) + return -ERANGE; + + switch (qopt->entries[i].command) { + case TC_TAPRIO_CMD_SET_GATES: + if (fpe) + return -EINVAL; + break; + case TC_TAPRIO_CMD_SET_AND_HOLD: + gates |= BIT(0); + fpe = true; + break; + case TC_TAPRIO_CMD_SET_AND_RELEASE: + gates &= ~BIT(0); + fpe = true; + break; + default: + return -EOPNOTSUPP; + } + + priv->plat->est->gcl[i] = delta_ns | (gates << wid); + } + + /* Adjust for real system time */ + time = ktime_to_timespec64(qopt->base_time); + priv->plat->est->btr[0] = (u32)time.tv_nsec; + priv->plat->est->btr[1] = (u32)time.tv_sec; + + ctr = qopt->cycle_time; + priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); + priv->plat->est->ctr[1] = (u32)ctr; + + if (fpe && !priv->dma_cap.fpesel) + return -EOPNOTSUPP; + + ret = stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, fpe); + if (ret && fpe) { + netdev_err(priv->dev, "failed to enable Frame Preemption\n"); + return ret; + } + + ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + if (ret) { + netdev_err(priv->dev, "failed to configure EST\n"); + goto disable; + } + + netdev_info(priv->dev, "configured EST\n"); + return 0; + +disable: + priv->plat->est->enable = false; + stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + return ret; +} + const struct stmmac_tc_ops dwmac510_tc_ops = { .init = tc_init, .setup_cls_u32 = tc_setup_cls_u32, .setup_cbs = tc_setup_cbs, .setup_cls = tc_setup_cls, + .setup_taprio = tc_setup_taprio, }; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index c91876f8c536..6ec9163e232c 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -2666,7 +2666,7 @@ static void cas_netpoll(struct net_device *dev) } #endif -static void cas_tx_timeout(struct net_device *dev) +static void cas_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct cas *cp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index f5fd1f3c07cc..9a5004f674c7 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6517,7 +6517,7 @@ static void niu_reset_task(struct work_struct *work) spin_unlock_irqrestore(&np->lock, flags); } -static void niu_tx_timeout(struct net_device *dev) +static void niu_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct niu *np = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index e9b757b03b56..c5add0b45eed 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -941,7 +941,7 @@ static int bigmac_close(struct net_device *dev) return 0; } -static void bigmac_tx_timeout(struct net_device *dev) +static void bigmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bigmac *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 3e7631160384..8358064fbd48 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -970,7 +970,7 @@ static void gem_poll_controller(struct net_device *dev) } #endif -static void gem_tx_timeout(struct net_device *dev) +static void gem_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gem *gp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index d007dfeba5c3..f0fe7bb2a750 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -2246,7 +2246,7 @@ static int happy_meal_close(struct net_device *dev) #define SXD(x) #endif -static void happy_meal_tx_timeout(struct net_device *dev) +static void happy_meal_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct happy_meal *hp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index 1468fa0a54e9..2102b95ec347 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -544,7 +544,7 @@ static void qe_tx_reclaim(struct sunqe *qep) qep->tx_old = elem; } -static void qe_tx_timeout(struct net_device *dev) +static void qe_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sunqe *qep = netdev_priv(dev); int tx_full; diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 8b94d9ad9e2b..a601a306f9a5 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1539,7 +1539,7 @@ out_dropped: } EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common); -void sunvnet_tx_timeout_common(struct net_device *dev) +void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue) { /* XXX Implement me XXX */ } diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index 2b808d2482d6..5416a3cb9e7d 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -135,7 +135,7 @@ int sunvnet_open_common(struct net_device *dev); int sunvnet_close_common(struct net_device *dev); void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp); int sunvnet_set_mac_addr_common(struct net_device *dev, void *p); -void sunvnet_tx_timeout_common(struct net_device *dev); +void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue); netdev_tx_t sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, struct vnet_port *(*vnet_tx_port) diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index a1f5a1e61040..07046a2370b3 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -689,7 +689,7 @@ static int xlgmac_close(struct net_device *netdev) return 0; } -static void xlgmac_tx_timeout(struct net_device *netdev) +static void xlgmac_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct xlgmac_pdata *pdata = netdev_priv(netdev); diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 3a655a4dc10e..5e1b8292cd3f 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -797,7 +797,7 @@ static irqreturn_t cpmac_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static void cpmac_tx_timeout(struct net_device *dev) +static void cpmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct cpmac_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 707d5eb480ce..97a058ca60ac 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -272,7 +272,7 @@ void soft_reset(const char *module, void __iomem *reg) WARN(readl_relaxed(reg) & 1, "failed to soft-reset %s\n", module); } -void cpsw_ndo_tx_timeout(struct net_device *ndev) +void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index bc726356a72c..b8d7b924ee3d 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -449,7 +449,7 @@ int cpsw_rx_poll(struct napi_struct *napi_rx, int budget); void cpsw_rx_vlan_encap(struct sk_buff *skb); void soft_reset(const char *module, void __iomem *reg); void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv); -void cpsw_ndo_tx_timeout(struct net_device *ndev); +void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue); int cpsw_need_resplit(struct cpsw_common *cpsw); int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd); int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index ae27be85e363..75d4e16c692b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -983,7 +983,7 @@ fail_tx: * error and re-initialize the TX channel for hardware operation * */ -static void emac_dev_tx_timeout(struct net_device *ndev) +static void emac_dev_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct emac_priv *priv = netdev_priv(ndev); struct device *emac_dev = &ndev->dev; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 1b2702f74455..432645e86495 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1811,7 +1811,7 @@ out: return (ret == 0) ? 0 : err; } -static void netcp_ndo_tx_timeout(struct net_device *ndev) +static void netcp_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct netcp_intf *netcp = netdev_priv(ndev); unsigned int descs = knav_pool_count(netcp->tx_pool); diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index 78f0f2d59e22..ad465202980a 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -161,7 +161,7 @@ static void tlan_set_multicast_list(struct net_device *); static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int tlan_probe1(struct pci_dev *pdev, long ioaddr, int irq, int rev, const struct pci_device_id *ent); -static void tlan_tx_timeout(struct net_device *dev); +static void tlan_tx_timeout(struct net_device *dev, unsigned int txqueue); static void tlan_tx_timeout_work(struct work_struct *work); static int tlan_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); @@ -997,7 +997,7 @@ static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) * **************************************************************/ -static void tlan_tx_timeout(struct net_device *dev) +static void tlan_tx_timeout(struct net_device *dev, unsigned int txqueue) { TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Transmit timed out.\n", dev->name); @@ -1028,7 +1028,7 @@ static void tlan_tx_timeout_work(struct work_struct *work) struct tlan_priv *priv = container_of(work, struct tlan_priv, tlan_tqueue); - tlan_tx_timeout(priv->dev); + tlan_tx_timeout(priv->dev, UINT_MAX); } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 9d9f8acb7ee3..070dd6fa9401 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1405,7 +1405,7 @@ out: * * called, if tx hangs. Schedules a task that resets the interface */ -void gelic_net_tx_timeout(struct net_device *netdev) +void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct gelic_card *card; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 051033580f0a..805903dbddcc 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -359,7 +359,7 @@ int gelic_net_open(struct net_device *netdev); int gelic_net_stop(struct net_device *netdev); netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev); void gelic_net_set_multi(struct net_device *netdev); -void gelic_net_tx_timeout(struct net_device *netdev); +void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue); int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card); /* shared ethtool ops */ diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 538e70810d3d..6576271642c1 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2180,7 +2180,7 @@ out: * called, if tx hangs. Schedules a task that resets the interface */ static void -spider_net_tx_timeout(struct net_device *netdev) +spider_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct spider_net_card *card; diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 12466a72cefc..708de826200e 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -483,7 +483,7 @@ static void tc35815_txdone(struct net_device *dev); static int tc35815_close(struct net_device *dev); static struct net_device_stats *tc35815_get_stats(struct net_device *dev); static void tc35815_set_multicast_list(struct net_device *dev); -static void tc35815_tx_timeout(struct net_device *dev); +static void tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue); static int tc35815_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); #ifdef CONFIG_NET_POLL_CONTROLLER static void tc35815_poll_controller(struct net_device *dev); @@ -1189,7 +1189,7 @@ static void tc35815_schedule_restart(struct net_device *dev) spin_unlock_irqrestore(&lp->lock, flags); } -static void tc35815_tx_timeout(struct net_device *dev) +static void tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct tc35815_regs __iomem *tr = (struct tc35815_regs __iomem *)dev->base_addr; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index ed12dbd156f0..803247d51fe9 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -506,7 +506,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int rhine_open(struct net_device *dev); static void rhine_reset_task(struct work_struct *work); static void rhine_slow_event_task(struct work_struct *work); -static void rhine_tx_timeout(struct net_device *dev); +static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue); static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t rhine_interrupt(int irq, void *dev_instance); @@ -1761,7 +1761,7 @@ out_unlock: mutex_unlock(&rp->task_lock); } -static void rhine_tx_timeout(struct net_device *dev) +static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index bede1ff289c5..c0d181a7f83a 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -790,7 +790,7 @@ static void w5100_restart_work(struct work_struct *work) w5100_restart(priv->ndev); } -static void w5100_tx_timeout(struct net_device *ndev) +static void w5100_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct w5100_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index 6ba2747779ce..46aae30c4636 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -341,7 +341,7 @@ static void w5300_get_regs(struct net_device *ndev, } } -static void w5300_tx_timeout(struct net_device *ndev) +static void w5300_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct w5300_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 0de52e70abcc..0c26f5bcc523 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -521,7 +521,7 @@ static int xemaclite_set_mac_address(struct net_device *dev, void *address) * * This function is called when Tx time out occurs for Emaclite device. */ -static void xemaclite_tx_timeout(struct net_device *dev) +static void xemaclite_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct net_local *lp = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index fd5288ff53b5..480ab7251515 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -288,7 +288,7 @@ struct local_info { */ static netdev_tx_t do_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void xirc_tx_timeout(struct net_device *dev); +static void xirc_tx_timeout(struct net_device *dev, unsigned int txqueue); static void xirc2ps_tx_timeout_task(struct work_struct *work); static void set_addresses(struct net_device *dev); static void set_multicast_list(struct net_device *dev); @@ -1203,7 +1203,7 @@ xirc2ps_tx_timeout_task(struct work_struct *work) } static void -xirc_tx_timeout(struct net_device *dev) +xirc_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct local_info *lp = netdev_priv(dev); dev->stats.tx_errors++; diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b517c1af9de0..309a74da2ec3 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -48,7 +48,7 @@ static void fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *); static int fjes_change_mtu(struct net_device *, int); static int fjes_vlan_rx_add_vid(struct net_device *, __be16 proto, u16); static int fjes_vlan_rx_kill_vid(struct net_device *, __be16 proto, u16); -static void fjes_tx_retry(struct net_device *); +static void fjes_tx_retry(struct net_device *, unsigned int txqueue); static int fjes_acpi_add(struct acpi_device *); static int fjes_acpi_remove(struct acpi_device *); @@ -792,7 +792,7 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return ret; } -static void fjes_tx_retry(struct net_device *netdev) +static void fjes_tx_retry(struct net_device *netdev, unsigned int txqueue) { struct netdev_queue *queue = netdev_get_tx_queue(netdev, 0); diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index df495b5595f5..e7413a643929 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -687,8 +687,6 @@ struct net_device *hdlcdrv_register(const struct hdlcdrv_ops *ops, struct hdlcdrv_state *s; int err; - BUG_ON(ops == NULL); - if (privsize < sizeof(struct hdlcdrv_state)) privsize = sizeof(struct hdlcdrv_state); diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 13540dee7364..4e02a4231fcb 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -177,10 +177,10 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, event == FIB_EVENT_RULE_ADD); break; + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - err = nsim_fib_event(data, info, - event == FIB_EVENT_ENTRY_ADD); + err = nsim_fib_event(data, info, event != FIB_EVENT_ENTRY_DEL); break; } diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 5848219005d7..2e016271e126 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -324,6 +324,12 @@ config BROADCOM_PHY Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, BCM5481, BCM54810 and BCM5482 PHYs. +config BCM84881_PHY + bool "Broadcom BCM84881 PHY" + depends on PHYLIB=y + ---help--- + Support the Broadcom BCM84881 PHY. + config CICADA_PHY tristate "Cicada PHYs" ---help--- diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index b433ec3bf9a6..d846b4dc1c68 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o obj-$(CONFIG_BCM_CYGNUS_PHY) += bcm-cygnus.o obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o obj-$(CONFIG_BROADCOM_PHY) += broadcom.o +obj-$(CONFIG_BCM84881_PHY) += bcm84881.o obj-$(CONFIG_CICADA_PHY) += cicada.o obj-$(CONFIG_CORTINA_PHY) += cortina.o obj-$(CONFIG_DAVICOM_PHY) += davicom.o diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c new file mode 100644 index 000000000000..db59911b9b3c --- /dev/null +++ b/drivers/net/phy/bcm84881.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0 +// Broadcom BCM84881 NBASE-T PHY driver, as found on a SFP+ module. +// Copyright (C) 2019 Russell King, Deep Blue Solutions Ltd. +// +// Like the Marvell 88x3310, the Broadcom 84881 changes its host-side +// interface according to the operating speed between 10GBASE-R, +// 2500BASE-X and SGMII (but unlike the 88x3310, without the control +// word). +// +// This driver only supports those aspects of the PHY that I'm able to +// observe and test with the SFP+ module, which is an incomplete subset +// of what this PHY is able to support. For example, I only assume it +// supports a single lane Serdes connection, but it may be that the PHY +// is able to support more than that. +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/phy.h> + +enum { + MDIO_AN_C22 = 0xffe0, +}; + +static int bcm84881_wait_init(struct phy_device *phydev) +{ + unsigned int tries = 20; + int ret, val; + + do { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1); + if (val < 0) { + ret = val; + break; + } + if (!(val & MDIO_CTRL1_RESET)) { + ret = 0; + break; + } + if (!--tries) { + ret = -ETIMEDOUT; + break; + } + msleep(100); + } while (1); + + if (ret) + phydev_err(phydev, "%s failed: %d\n", __func__, ret); + + return ret; +} + +static int bcm84881_config_init(struct phy_device *phydev) +{ + switch (phydev->interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_2500BASEX: + case PHY_INTERFACE_MODE_10GKR: + break; + default: + return -ENODEV; + } + return 0; +} + +static int bcm84881_probe(struct phy_device *phydev) +{ + /* This driver requires PMAPMD and AN blocks */ + const u32 mmd_mask = MDIO_DEVS_PMAPMD | MDIO_DEVS_AN; + + if (!phydev->is_c45 || + (phydev->c45_ids.devices_in_package & mmd_mask) != mmd_mask) + return -ENODEV; + + return 0; +} + +static int bcm84881_get_features(struct phy_device *phydev) +{ + int ret; + + ret = genphy_c45_pma_read_abilities(phydev); + if (ret) + return ret; + + /* Although the PHY sets bit 1.11.8, it does not support 10M modes */ + linkmode_clear_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + phydev->supported); + linkmode_clear_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + phydev->supported); + + return 0; +} + +static int bcm84881_config_aneg(struct phy_device *phydev) +{ + bool changed = false; + u32 adv; + int ret; + + /* Wait for the PHY to finish initialising, otherwise our + * advertisement may be overwritten. + */ + ret = bcm84881_wait_init(phydev); + if (ret) + return ret; + + /* We don't support manual MDI control */ + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + + /* disabled autoneg doesn't seem to work with this PHY */ + if (phydev->autoneg == AUTONEG_DISABLE) + return -EINVAL; + + ret = genphy_c45_an_config_aneg(phydev); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, + MDIO_AN_C22 + MII_CTRL1000, + ADVERTISE_1000FULL | ADVERTISE_1000HALF, + adv); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + return genphy_c45_check_and_restart_aneg(phydev, changed); +} + +static int bcm84881_aneg_done(struct phy_device *phydev) +{ + int bmsr, val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); + if (val < 0) + return val; + + bmsr = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_C22 + MII_BMSR); + if (bmsr < 0) + return val; + + return !!(val & MDIO_AN_STAT1_COMPLETE) && + !!(bmsr & BMSR_ANEGCOMPLETE); +} + +static int bcm84881_read_status(struct phy_device *phydev) +{ + unsigned int mode; + int bmsr, val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); + if (val < 0) + return val; + + if (val & MDIO_AN_CTRL1_RESTART) { + phydev->link = 0; + return 0; + } + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); + if (val < 0) + return val; + + bmsr = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_C22 + MII_BMSR); + if (bmsr < 0) + return val; + + phydev->autoneg_complete = !!(val & MDIO_AN_STAT1_COMPLETE) && + !!(bmsr & BMSR_ANEGCOMPLETE); + phydev->link = !!(val & MDIO_STAT1_LSTATUS) && + !!(bmsr & BMSR_LSTATUS); + if (phydev->autoneg == AUTONEG_ENABLE && !phydev->autoneg_complete) + phydev->link = false; + + if (!phydev->link) + return 0; + + linkmode_zero(phydev->lp_advertising); + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + phydev->mdix = 0; + + if (phydev->autoneg_complete) { + val = genphy_c45_read_lpa(phydev); + if (val < 0) + return val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, + MDIO_AN_C22 + MII_STAT1000); + if (val < 0) + return val; + + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, val); + + if (phydev->autoneg == AUTONEG_ENABLE) + phy_resolve_aneg_linkmode(phydev); + } + + if (phydev->autoneg == AUTONEG_DISABLE) { + /* disabled autoneg doesn't seem to work, so force the link + * down. + */ + phydev->link = 0; + return 0; + } + + /* Set the host link mode - we set the phy interface mode and + * the speed according to this register so that downshift works. + * We leave the duplex setting as per the resolution from the + * above. + */ + val = phy_read_mmd(phydev, MDIO_MMD_VEND1, 0x4011); + mode = (val & 0x1e) >> 1; + if (mode == 1 || mode == 2) + phydev->interface = PHY_INTERFACE_MODE_SGMII; + else if (mode == 3) + phydev->interface = PHY_INTERFACE_MODE_10GKR; + else if (mode == 4) + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + switch (mode & 7) { + case 1: + phydev->speed = SPEED_100; + break; + case 2: + phydev->speed = SPEED_1000; + break; + case 3: + phydev->speed = SPEED_10000; + break; + case 4: + phydev->speed = SPEED_2500; + break; + case 5: + phydev->speed = SPEED_5000; + break; + } + + return genphy_c45_read_mdix(phydev); +} + +static struct phy_driver bcm84881_drivers[] = { + { + .phy_id = 0xae025150, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM84881", + .config_init = bcm84881_config_init, + .probe = bcm84881_probe, + .get_features = bcm84881_get_features, + .config_aneg = bcm84881_config_aneg, + .aneg_done = bcm84881_aneg_done, + .read_status = bcm84881_read_status, + }, +}; + +module_phy_driver(bcm84881_drivers); + +/* FIXME: module auto-loading for Clause 45 PHYs seems non-functional */ +static struct mdio_device_id __maybe_unused bcm84881_tbl[] = { + { 0xae025150, 0xfffffff0 }, + { }, +}; +MODULE_AUTHOR("Russell King"); +MODULE_DESCRIPTION("Broadcom BCM84881 PHY driver"); +MODULE_DEVICE_TABLE(mdio, bcm84881_tbl); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 9cd9dcee4eb2..adda0d0eab80 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -93,9 +93,11 @@ #define DP83867_STRAP_STS2_CLK_SKEW_NONE BIT(2) /* PHY CTRL bits */ -#define DP83867_PHYCR_FIFO_DEPTH_SHIFT 14 +#define DP83867_PHYCR_TX_FIFO_DEPTH_SHIFT 14 +#define DP83867_PHYCR_RX_FIFO_DEPTH_SHIFT 12 #define DP83867_PHYCR_FIFO_DEPTH_MAX 0x03 -#define DP83867_PHYCR_FIFO_DEPTH_MASK GENMASK(15, 14) +#define DP83867_PHYCR_TX_FIFO_DEPTH_MASK GENMASK(15, 14) +#define DP83867_PHYCR_RX_FIFO_DEPTH_MASK GENMASK(13, 12) #define DP83867_PHYCR_RESERVED_MASK BIT(11) /* RGMIIDCTL bits */ @@ -131,7 +133,8 @@ enum { struct dp83867_private { u32 rx_id_delay; u32 tx_id_delay; - u32 fifo_depth; + u32 tx_fifo_depth; + u32 rx_fifo_depth; int io_impedance; int port_mirroring; bool rxctrl_strap_quirk; @@ -408,18 +411,32 @@ static int dp83867_of_init(struct phy_device *phydev) dp83867->port_mirroring = DP83867_PORT_MIRROING_DIS; ret = of_property_read_u32(of_node, "ti,fifo-depth", - &dp83867->fifo_depth); + &dp83867->tx_fifo_depth); if (ret) { - phydev_err(phydev, - "ti,fifo-depth property is required\n"); - return ret; + ret = of_property_read_u32(of_node, "tx-fifo-depth", + &dp83867->tx_fifo_depth); + if (ret) + dp83867->tx_fifo_depth = + DP83867_PHYCR_FIFO_DEPTH_4_B_NIB; } - if (dp83867->fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) { - phydev_err(phydev, - "ti,fifo-depth value %u out of range\n", - dp83867->fifo_depth); + + if (dp83867->tx_fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) { + phydev_err(phydev, "tx-fifo-depth value %u out of range\n", + dp83867->tx_fifo_depth); + return -EINVAL; + } + + ret = of_property_read_u32(of_node, "rx-fifo-depth", + &dp83867->rx_fifo_depth); + if (ret) + dp83867->rx_fifo_depth = DP83867_PHYCR_FIFO_DEPTH_4_B_NIB; + + if (dp83867->rx_fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) { + phydev_err(phydev, "rx-fifo-depth value %u out of range\n", + dp83867->rx_fifo_depth); return -EINVAL; } + return 0; } #else @@ -458,12 +475,31 @@ static int dp83867_config_init(struct phy_device *phydev) phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, BIT(7)); + if (phy_interface_is_rgmii(phydev) || + phydev->interface == PHY_INTERFACE_MODE_SGMII) { + val = phy_read(phydev, MII_DP83867_PHYCTRL); + if (val < 0) + return val; + + val &= ~DP83867_PHYCR_TX_FIFO_DEPTH_MASK; + val |= (dp83867->tx_fifo_depth << + DP83867_PHYCR_TX_FIFO_DEPTH_SHIFT); + + if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { + val &= ~DP83867_PHYCR_RX_FIFO_DEPTH_MASK; + val |= (dp83867->rx_fifo_depth << + DP83867_PHYCR_RX_FIFO_DEPTH_SHIFT); + } + + ret = phy_write(phydev, MII_DP83867_PHYCTRL, val); + if (ret) + return ret; + } + if (phy_interface_is_rgmii(phydev)) { val = phy_read(phydev, MII_DP83867_PHYCTRL); if (val < 0) return val; - val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK; - val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT); /* The code below checks if "port mirroring" N/A MODE4 has been * enabled during power on bootstrap. diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index 93021904c5e4..7996a4aea8d2 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -334,7 +334,7 @@ static int dp83869_configure_mode(struct phy_device *phydev, break; default: return -EINVAL; - }; + } return ret; } diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index 356bd6472f49..fec58ad69e02 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -190,27 +190,11 @@ static int lxt973a2_read_status(struct phy_device *phydev) phydev->duplex = DUPLEX_FULL; } - if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; - phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; - } + phy_resolve_aneg_pause(phydev); } else { - int bmcr = phy_read(phydev, MII_BMCR); - - if (bmcr < 0) - return bmcr; - - if (bmcr & BMCR_FULLDPLX) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - - if (bmcr & BMCR_SPEED1000) - phydev->speed = SPEED_1000; - else if (bmcr & BMCR_SPEED100) - phydev->speed = SPEED_100; - else - phydev->speed = SPEED_10; + err = genphy_read_status_fixed(phydev); + if (err < 0) + return err; phydev->pause = phydev->asym_pause = 0; linkmode_zero(phydev->lp_advertising); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index b1fbd1937328..28e33ece4ce1 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -162,19 +162,9 @@ #define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII 0x1 /* SGMII to copper */ #define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */ -#define LPA_FIBER_1000HALF 0x40 -#define LPA_FIBER_1000FULL 0x20 - #define LPA_PAUSE_FIBER 0x180 #define LPA_PAUSE_ASYM_FIBER 0x100 -#define ADVERTISE_FIBER_1000HALF 0x40 -#define ADVERTISE_FIBER_1000FULL 0x20 - -#define ADVERTISE_PAUSE_FIBER 0x180 -#define ADVERTISE_PAUSE_ASYM_FIBER 0x100 - -#define REGISTER_LINK_STATUS 0x400 #define NB_FIBER_STATS 1 MODULE_DESCRIPTION("Marvell PHY driver"); @@ -497,16 +487,15 @@ static inline u32 linkmode_adv_to_fiber_adv_t(unsigned long *advertise) u32 result = 0; if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, advertise)) - result |= ADVERTISE_FIBER_1000HALF; + result |= ADVERTISE_1000XHALF; if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, advertise)) - result |= ADVERTISE_FIBER_1000FULL; + result |= ADVERTISE_1000XFULL; if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertise) && linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertise)) - result |= LPA_PAUSE_ASYM_FIBER; + result |= ADVERTISE_1000XPSE_ASYM; else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertise)) - result |= (ADVERTISE_PAUSE_FIBER - & (~ADVERTISE_PAUSE_ASYM_FIBER)); + result |= ADVERTISE_1000XPAUSE; return result; } @@ -524,7 +513,7 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev) { int changed = 0; int err; - int adv, oldadv; + u16 adv; if (phydev->autoneg != AUTONEG_ENABLE) return genphy_setup_forced(phydev); @@ -533,44 +522,19 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev) linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); - /* Setup fiber advertisement */ - adv = phy_read(phydev, MII_ADVERTISE); - if (adv < 0) - return adv; - - oldadv = adv; - adv &= ~(ADVERTISE_FIBER_1000HALF | ADVERTISE_FIBER_1000FULL - | LPA_PAUSE_FIBER); - adv |= linkmode_adv_to_fiber_adv_t(phydev->advertising); - - if (adv != oldadv) { - err = phy_write(phydev, MII_ADVERTISE, adv); - if (err < 0) - return err; + adv = linkmode_adv_to_fiber_adv_t(phydev->advertising); + /* Setup fiber advertisement */ + err = phy_modify_changed(phydev, MII_ADVERTISE, + ADVERTISE_1000XHALF | ADVERTISE_1000XFULL | + ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM, + adv); + if (err < 0) + return err; + if (err > 0) changed = 1; - } - - if (changed == 0) { - /* Advertisement hasn't changed, but maybe aneg was never on to - * begin with? Or maybe phy was isolated? - */ - int ctl = phy_read(phydev, MII_BMCR); - - if (ctl < 0) - return ctl; - - if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) - changed = 1; /* do restart aneg */ - } - /* Only restart aneg if we are advertising something different - * than we were before. - */ - if (changed > 0) - changed = genphy_restart_aneg(phydev); - - return changed; + return genphy_check_and_restart_aneg(phydev, changed); } static int m88e1510_config_aneg(struct phy_device *phydev) @@ -1302,93 +1266,29 @@ static int m88e6390_config_aneg(struct phy_device *phydev) static void fiber_lpa_mod_linkmode_lpa_t(unsigned long *advertising, u32 lpa) { linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - advertising, lpa & LPA_FIBER_1000HALF); + advertising, lpa & LPA_1000XHALF); linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - advertising, lpa & LPA_FIBER_1000FULL); -} - -/** - * marvell_update_link - update link status in real time in @phydev - * @phydev: target phy_device struct - * - * Description: Update the value in phydev->link to reflect the - * current link value. - */ -static int marvell_update_link(struct phy_device *phydev, int fiber) -{ - int status; - - /* Use the generic register for copper link, or specific - * register for fiber case - */ - if (fiber) { - status = phy_read(phydev, MII_M1011_PHY_STATUS); - if (status < 0) - return status; - - if ((status & REGISTER_LINK_STATUS) == 0) - phydev->link = 0; - else - phydev->link = 1; - } else { - return genphy_update_link(phydev); - } - - return 0; + advertising, lpa & LPA_1000XFULL); } static int marvell_read_status_page_an(struct phy_device *phydev, - int fiber) + int fiber, int status) { - int status; int lpa; - int lpagb; - - status = phy_read(phydev, MII_M1011_PHY_STATUS); - if (status < 0) - return status; - - lpa = phy_read(phydev, MII_LPA); - if (lpa < 0) - return lpa; - - lpagb = phy_read(phydev, MII_STAT1000); - if (lpagb < 0) - return lpagb; - - if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - - status = status & MII_M1011_PHY_STATUS_SPD_MASK; - phydev->pause = 0; - phydev->asym_pause = 0; - - switch (status) { - case MII_M1011_PHY_STATUS_1000: - phydev->speed = SPEED_1000; - break; - - case MII_M1011_PHY_STATUS_100: - phydev->speed = SPEED_100; - break; - - default: - phydev->speed = SPEED_10; - break; - } + int err; if (!fiber) { - mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa); - mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, lpagb); + err = genphy_read_lpa(phydev); + if (err < 0) + return err; - if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; - phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; - } + phy_resolve_aneg_pause(phydev); } else { + lpa = phy_read(phydev, MII_LPA); + if (lpa < 0) + return lpa; + /* The fiber link is only 1000M capable */ fiber_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); @@ -1405,31 +1305,25 @@ static int marvell_read_status_page_an(struct phy_device *phydev, } } } - return 0; -} -static int marvell_read_status_page_fixed(struct phy_device *phydev) -{ - int bmcr = phy_read(phydev, MII_BMCR); - - if (bmcr < 0) - return bmcr; - - if (bmcr & BMCR_FULLDPLX) + if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) phydev->duplex = DUPLEX_FULL; else phydev->duplex = DUPLEX_HALF; - if (bmcr & BMCR_SPEED1000) + switch (status & MII_M1011_PHY_STATUS_SPD_MASK) { + case MII_M1011_PHY_STATUS_1000: phydev->speed = SPEED_1000; - else if (bmcr & BMCR_SPEED100) + break; + + case MII_M1011_PHY_STATUS_100: phydev->speed = SPEED_100; - else - phydev->speed = SPEED_10; + break; - phydev->pause = 0; - phydev->asym_pause = 0; - linkmode_zero(phydev->lp_advertising); + default: + phydev->speed = SPEED_10; + break; + } return 0; } @@ -1444,25 +1338,38 @@ static int marvell_read_status_page_fixed(struct phy_device *phydev) */ static int marvell_read_status_page(struct phy_device *phydev, int page) { + int status; int fiber; int err; - /* Detect and update the link, but return if there - * was an error + status = phy_read(phydev, MII_M1011_PHY_STATUS); + if (status < 0) + return status; + + /* Use the generic register for copper link status, + * and the PHY status register for fiber link status. */ + if (page == MII_MARVELL_FIBER_PAGE) { + phydev->link = !!(status & MII_M1011_PHY_STATUS_LINK); + } else { + err = genphy_update_link(phydev); + if (err) + return err; + } + if (page == MII_MARVELL_FIBER_PAGE) fiber = 1; else fiber = 0; - err = marvell_update_link(phydev, fiber); - if (err) - return err; + linkmode_zero(phydev->lp_advertising); + phydev->pause = 0; + phydev->asym_pause = 0; if (phydev->autoneg == AUTONEG_ENABLE) - err = marvell_read_status_page_an(phydev, fiber); + err = marvell_read_status_page_an(phydev, fiber, status); else - err = marvell_read_status_page_fixed(phydev); + err = genphy_read_status_fixed(phydev); return err; } diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 1bf13017d288..512f27b0b5cd 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -214,7 +214,7 @@ static int mv3310_sfp_insert(void *upstream, const struct sfp_eeprom_id *id) phy_interface_t iface; sfp_parse_support(phydev->sfp_bus, id, support); - iface = sfp_select_interface(phydev->sfp_bus, id, support); + iface = sfp_select_interface(phydev->sfp_bus, support); if (iface != PHY_INTERFACE_MODE_10GKR) { dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n"); diff --git a/drivers/net/phy/mdio-i2c.c b/drivers/net/phy/mdio-i2c.c index 0dce67672548..0746e2cc39ae 100644 --- a/drivers/net/phy/mdio-i2c.c +++ b/drivers/net/phy/mdio-i2c.c @@ -33,17 +33,24 @@ static int i2c_mii_read(struct mii_bus *bus, int phy_id, int reg) { struct i2c_adapter *i2c = bus->priv; struct i2c_msg msgs[2]; - u8 data[2], dev_addr = reg; + u8 addr[3], data[2], *p; int bus_addr, ret; if (!i2c_mii_valid_phy_id(phy_id)) return 0xffff; + p = addr; + if (reg & MII_ADDR_C45) { + *p++ = 0x20 | ((reg >> 16) & 31); + *p++ = reg >> 8; + } + *p++ = reg; + bus_addr = i2c_mii_phy_addr(phy_id); msgs[0].addr = bus_addr; msgs[0].flags = 0; - msgs[0].len = 1; - msgs[0].buf = &dev_addr; + msgs[0].len = p - addr; + msgs[0].buf = addr; msgs[1].addr = bus_addr; msgs[1].flags = I2C_M_RD; msgs[1].len = sizeof(data); @@ -61,18 +68,23 @@ static int i2c_mii_write(struct mii_bus *bus, int phy_id, int reg, u16 val) struct i2c_adapter *i2c = bus->priv; struct i2c_msg msg; int ret; - u8 data[3]; + u8 data[5], *p; if (!i2c_mii_valid_phy_id(phy_id)) return 0; - data[0] = reg; - data[1] = val >> 8; - data[2] = val; + p = data; + if (reg & MII_ADDR_C45) { + *p++ = (reg >> 16) & 31; + *p++ = reg >> 8; + } + *p++ = reg; + *p++ = val >> 8; + *p++ = val; msg.addr = i2c_mii_phy_addr(phy_id); msg.flags = 0; - msg.len = 3; + msg.len = p - data; msg.buf = data; ret = i2c_transfer(i2c, &msg, 1); diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index d5f8f351d9ef..50214c081164 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -2404,7 +2404,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2429,7 +2428,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2454,7 +2452,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2479,7 +2476,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2504,7 +2500,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2530,7 +2525,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0887ed2bb050..ec19efe7ea99 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1771,6 +1771,36 @@ int genphy_restart_aneg(struct phy_device *phydev) EXPORT_SYMBOL(genphy_restart_aneg); /** + * genphy_check_and_restart_aneg - Enable and restart auto-negotiation + * @phydev: target phy_device struct + * @restart: whether aneg restart is requested + * + * Check, and restart auto-negotiation if needed. + */ +int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart) +{ + int ret = 0; + + if (!restart) { + /* Advertisement hasn't changed, but maybe aneg was never on to + * begin with? Or maybe phy was isolated? + */ + ret = phy_read(phydev, MII_BMCR); + if (ret < 0) + return ret; + + if (!(ret & BMCR_ANENABLE) || (ret & BMCR_ISOLATE)) + restart = true; + } + + if (restart) + ret = genphy_restart_aneg(phydev); + + return ret; +} +EXPORT_SYMBOL(genphy_check_and_restart_aneg); + +/** * __genphy_config_aneg - restart auto-negotiation or write BMCR * @phydev: target phy_device struct * @changed: whether autoneg is requested @@ -1795,23 +1825,7 @@ int __genphy_config_aneg(struct phy_device *phydev, bool changed) else if (err) changed = true; - if (!changed) { - /* Advertisement hasn't changed, but maybe aneg was never on to - * begin with? Or maybe phy was isolated? - */ - int ctl = phy_read(phydev, MII_BMCR); - - if (ctl < 0) - return ctl; - - if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) - changed = true; /* do restart aneg */ - } - - /* Only restart aneg if we are advertising something different - * than we were before. - */ - return changed ? genphy_restart_aneg(phydev) : 0; + return genphy_check_and_restart_aneg(phydev, changed); } EXPORT_SYMBOL(__genphy_config_aneg); @@ -1979,6 +1993,36 @@ int genphy_read_lpa(struct phy_device *phydev) EXPORT_SYMBOL(genphy_read_lpa); /** + * genphy_read_status_fixed - read the link parameters for !aneg mode + * @phydev: target phy_device struct + * + * Read the current duplex and speed state for a PHY operating with + * autonegotiation disabled. + */ +int genphy_read_status_fixed(struct phy_device *phydev) +{ + int bmcr = phy_read(phydev, MII_BMCR); + + if (bmcr < 0) + return bmcr; + + if (bmcr & BMCR_FULLDPLX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + + if (bmcr & BMCR_SPEED1000) + phydev->speed = SPEED_1000; + else if (bmcr & BMCR_SPEED100) + phydev->speed = SPEED_100; + else + phydev->speed = SPEED_10; + + return 0; +} +EXPORT_SYMBOL(genphy_read_status_fixed); + +/** * genphy_read_status - check the link status and update current link state * @phydev: target phy_device struct * @@ -2012,22 +2056,9 @@ int genphy_read_status(struct phy_device *phydev) if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { phy_resolve_aneg_linkmode(phydev); } else if (phydev->autoneg == AUTONEG_DISABLE) { - int bmcr = phy_read(phydev, MII_BMCR); - - if (bmcr < 0) - return bmcr; - - if (bmcr & BMCR_FULLDPLX) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - - if (bmcr & BMCR_SPEED1000) - phydev->speed = SPEED_1000; - else if (bmcr & BMCR_SPEED100) - phydev->speed = SPEED_100; - else - phydev->speed = SPEED_10; + err = genphy_read_status_fixed(phydev); + if (err < 0) + return err; } return 0; @@ -2575,7 +2606,6 @@ static struct phy_driver genphy_driver = { .name = "Generic PHY", .soft_reset = genphy_no_soft_reset, .get_features = genphy_read_abilities, - .aneg_done = genphy_aneg_done, .suspend = genphy_suspend, .resume = genphy_resume, .set_loopback = genphy_loopback, diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 9a616d6bc4eb..a917f95372cd 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -48,7 +48,8 @@ struct phylink { unsigned long phylink_disable_state; /* bitmask of disables */ struct phy_device *phydev; phy_interface_t link_interface; /* PHY_INTERFACE_xxx */ - u8 link_an_mode; /* MLO_AN_xxx */ + u8 cfg_link_an_mode; /* MLO_AN_xxx */ + u8 cur_link_an_mode; u8 link_port; /* The current non-phy ethtool port */ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); @@ -71,6 +72,9 @@ struct phylink { bool mac_link_dropped; struct sfp_bus *sfp_bus; + bool sfp_may_have_phy; + __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support); + u8 sfp_port; }; #define phylink_printk(level, pl, fmt, ...) \ @@ -256,12 +260,12 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode) dn = fwnode_get_named_child_node(fwnode, "fixed-link"); if (dn || fwnode_property_present(fwnode, "fixed-link")) - pl->link_an_mode = MLO_AN_FIXED; + pl->cfg_link_an_mode = MLO_AN_FIXED; fwnode_handle_put(dn); if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 && strcmp(managed, "in-band-status") == 0) { - if (pl->link_an_mode == MLO_AN_FIXED) { + if (pl->cfg_link_an_mode == MLO_AN_FIXED) { phylink_err(pl, "can't use both fixed-link and in-band-status\n"); return -EINVAL; @@ -273,7 +277,7 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode) phylink_set(pl->supported, Asym_Pause); phylink_set(pl->supported, Pause); pl->link_config.an_enabled = true; - pl->link_an_mode = MLO_AN_INBAND; + pl->cfg_link_an_mode = MLO_AN_INBAND; switch (pl->link_config.interface) { case PHY_INTERFACE_MODE_SGMII: @@ -333,14 +337,14 @@ static void phylink_mac_config(struct phylink *pl, { phylink_dbg(pl, "%s: mode=%s/%s/%s/%s adv=%*pb pause=%02x link=%u an=%u\n", - __func__, phylink_an_mode_str(pl->link_an_mode), + __func__, phylink_an_mode_str(pl->cur_link_an_mode), phy_modes(state->interface), phy_speed_to_str(state->speed), phy_duplex_to_str(state->duplex), __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising, state->pause, state->link, state->an_enabled); - pl->ops->mac_config(pl->config, pl->link_an_mode, state); + pl->ops->mac_config(pl->config, pl->cur_link_an_mode, state); } static void phylink_mac_config_up(struct phylink *pl, @@ -441,7 +445,7 @@ static void phylink_mac_link_up(struct phylink *pl, struct net_device *ndev = pl->netdev; pl->cur_interface = link_state.interface; - pl->ops->mac_link_up(pl->config, pl->link_an_mode, + pl->ops->mac_link_up(pl->config, pl->cur_link_an_mode, pl->phy_state.interface, pl->phydev); @@ -461,7 +465,7 @@ static void phylink_mac_link_down(struct phylink *pl) if (ndev) netif_carrier_off(ndev); - pl->ops->mac_link_down(pl->config, pl->link_an_mode, + pl->ops->mac_link_down(pl->config, pl->cur_link_an_mode, pl->cur_interface); phylink_info(pl, "Link is Down\n"); } @@ -480,7 +484,7 @@ static void phylink_resolve(struct work_struct *w) } else if (pl->mac_link_dropped) { link_state.link = false; } else { - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_PHY: link_state = pl->phy_state; phylink_resolve_flow(pl, &link_state); @@ -648,7 +652,7 @@ struct phylink *phylink_create(struct phylink_config *config, return ERR_PTR(ret); } - if (pl->link_an_mode == MLO_AN_FIXED) { + if (pl->cfg_link_an_mode == MLO_AN_FIXED) { ret = phylink_parse_fixedlink(pl, fwnode); if (ret < 0) { kfree(pl); @@ -656,6 +660,8 @@ struct phylink *phylink_create(struct phylink_config *config, } } + pl->cur_link_an_mode = pl->cfg_link_an_mode; + ret = phylink_register_sfp(pl, fwnode); if (ret < 0) { kfree(pl); @@ -711,7 +717,8 @@ static void phylink_phy_change(struct phy_device *phydev, bool up, phy_duplex_to_str(phydev->duplex)); } -static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) +static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, + phy_interface_t interface) { struct phylink_link_state config; __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); @@ -729,7 +736,19 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) memset(&config, 0, sizeof(config)); linkmode_copy(supported, phy->supported); linkmode_copy(config.advertising, phy->advertising); - config.interface = pl->link_config.interface; + + /* Clause 45 PHYs switch their Serdes lane between several different + * modes, normally 10GBASE-R, SGMII. Some use 2500BASE-X for 2.5G + * speeds. We really need to know which interface modes the PHY and + * MAC supports to properly work out which linkmodes can be supported. + */ + if (phy->is_c45 && + interface != PHY_INTERFACE_MODE_RXAUI && + interface != PHY_INTERFACE_MODE_XAUI && + interface != PHY_INTERFACE_MODE_USXGMII) + config.interface = PHY_INTERFACE_MODE_NA; + else + config.interface = interface; ret = phylink_validate(pl, supported, &config); if (ret) @@ -745,6 +764,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) mutex_lock(&phy->lock); mutex_lock(&pl->state_mutex); pl->phydev = phy; + pl->phy_state.interface = interface; linkmode_copy(pl->supported, supported); linkmode_copy(pl->link_config.advertising, config.advertising); @@ -764,28 +784,18 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) return 0; } -static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy, - phy_interface_t interface) +static int phylink_attach_phy(struct phylink *pl, struct phy_device *phy, + phy_interface_t interface) { - int ret; - - if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED || - (pl->link_an_mode == MLO_AN_INBAND && + if (WARN_ON(pl->cfg_link_an_mode == MLO_AN_FIXED || + (pl->cfg_link_an_mode == MLO_AN_INBAND && phy_interface_mode_is_8023z(interface)))) return -EINVAL; if (pl->phydev) return -EBUSY; - ret = phy_attach_direct(pl->netdev, phy, 0, interface); - if (ret) - return ret; - - ret = phylink_bringup_phy(pl, phy); - if (ret) - phy_detach(phy); - - return ret; + return phy_attach_direct(pl->netdev, phy, 0, interface); } /** @@ -805,13 +815,23 @@ static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy, */ int phylink_connect_phy(struct phylink *pl, struct phy_device *phy) { + int ret; + /* Use PHY device/driver interface */ if (pl->link_interface == PHY_INTERFACE_MODE_NA) { pl->link_interface = phy->interface; pl->link_config.interface = pl->link_interface; } - return __phylink_connect_phy(pl, phy, pl->link_interface); + ret = phylink_attach_phy(pl, phy, pl->link_interface); + if (ret < 0) + return ret; + + ret = phylink_bringup_phy(pl, phy, pl->link_config.interface); + if (ret) + phy_detach(phy); + + return ret; } EXPORT_SYMBOL_GPL(phylink_connect_phy); @@ -835,8 +855,8 @@ int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn, int ret; /* Fixed links and 802.3z are handled without needing a PHY */ - if (pl->link_an_mode == MLO_AN_FIXED || - (pl->link_an_mode == MLO_AN_INBAND && + if (pl->cfg_link_an_mode == MLO_AN_FIXED || + (pl->cfg_link_an_mode == MLO_AN_INBAND && phy_interface_mode_is_8023z(pl->link_interface))) return 0; @@ -847,20 +867,23 @@ int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn, phy_node = of_parse_phandle(dn, "phy-device", 0); if (!phy_node) { - if (pl->link_an_mode == MLO_AN_PHY) + if (pl->cfg_link_an_mode == MLO_AN_PHY) return -ENODEV; return 0; } - phy_dev = of_phy_attach(pl->netdev, phy_node, flags, - pl->link_interface); + phy_dev = of_phy_find_device(phy_node); /* We're done with the phy_node handle */ of_node_put(phy_node); - if (!phy_dev) return -ENODEV; - ret = phylink_bringup_phy(pl, phy_dev); + ret = phy_attach_direct(pl->netdev, phy_dev, flags, + pl->link_interface); + if (ret) + return ret; + + ret = phylink_bringup_phy(pl, phy_dev, pl->link_config.interface); if (ret) phy_detach(phy_dev); @@ -910,7 +933,7 @@ int phylink_fixed_state_cb(struct phylink *pl, /* It does not make sense to let the link be overriden unless we use * MLO_AN_FIXED */ - if (pl->link_an_mode != MLO_AN_FIXED) + if (pl->cfg_link_an_mode != MLO_AN_FIXED) return -EINVAL; mutex_lock(&pl->state_mutex); @@ -960,7 +983,7 @@ void phylink_start(struct phylink *pl) ASSERT_RTNL(); phylink_info(pl, "configuring for %s/%s link mode\n", - phylink_an_mode_str(pl->link_an_mode), + phylink_an_mode_str(pl->cur_link_an_mode), phy_modes(pl->link_config.interface)); /* Always set the carrier off */ @@ -983,7 +1006,7 @@ void phylink_start(struct phylink *pl) clear_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); phylink_run_resolve(pl); - if (pl->link_an_mode == MLO_AN_FIXED && pl->link_gpio) { + if (pl->cfg_link_an_mode == MLO_AN_FIXED && pl->link_gpio) { int irq = gpiod_to_irq(pl->link_gpio); if (irq > 0) { @@ -998,7 +1021,7 @@ void phylink_start(struct phylink *pl) if (irq <= 0) mod_timer(&pl->link_poll, jiffies + HZ); } - if (pl->link_an_mode == MLO_AN_FIXED && pl->get_fixed_state) + if (pl->cfg_link_an_mode == MLO_AN_FIXED && pl->get_fixed_state) mod_timer(&pl->link_poll, jiffies + HZ); if (pl->phydev) phy_start(pl->phydev); @@ -1125,7 +1148,7 @@ int phylink_ethtool_ksettings_get(struct phylink *pl, linkmode_copy(kset->link_modes.supported, pl->supported); - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_FIXED: /* We are using fixed settings. Report these as the * current link settings - and note that these also @@ -1197,7 +1220,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, /* If we have a fixed link (as specified by firmware), refuse * to change link parameters. */ - if (pl->link_an_mode == MLO_AN_FIXED && + if (pl->cur_link_an_mode == MLO_AN_FIXED && (s->speed != pl->link_config.speed || s->duplex != pl->link_config.duplex)) return -EINVAL; @@ -1209,7 +1232,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, __clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising); } else { /* If we have a fixed link, refuse to enable autonegotiation */ - if (pl->link_an_mode == MLO_AN_FIXED) + if (pl->cur_link_an_mode == MLO_AN_FIXED) return -EINVAL; config.speed = SPEED_UNKNOWN; @@ -1219,44 +1242,66 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising); } - if (phylink_validate(pl, support, &config)) - return -EINVAL; - - /* If autonegotiation is enabled, we must have an advertisement */ - if (config.an_enabled && phylink_is_empty_linkmode(config.advertising)) - return -EINVAL; - - our_kset = *kset; - linkmode_copy(our_kset.link_modes.advertising, config.advertising); - our_kset.base.speed = config.speed; - our_kset.base.duplex = config.duplex; - - /* If we have a PHY, configure the phy */ if (pl->phydev) { + /* If we have a PHY, we process the kset change via phylib. + * phylib will call our link state function if the PHY + * parameters have changed, which will trigger a resolve + * and update the MAC configuration. + */ + our_kset = *kset; + linkmode_copy(our_kset.link_modes.advertising, + config.advertising); + our_kset.base.speed = config.speed; + our_kset.base.duplex = config.duplex; + ret = phy_ethtool_ksettings_set(pl->phydev, &our_kset); if (ret) return ret; - } - mutex_lock(&pl->state_mutex); - /* Configure the MAC to match the new settings */ - linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising); - pl->link_config.interface = config.interface; - pl->link_config.speed = our_kset.base.speed; - pl->link_config.duplex = our_kset.base.duplex; - pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE; + mutex_lock(&pl->state_mutex); + /* Save the new configuration */ + linkmode_copy(pl->link_config.advertising, + our_kset.link_modes.advertising); + pl->link_config.interface = config.interface; + pl->link_config.speed = our_kset.base.speed; + pl->link_config.duplex = our_kset.base.duplex; + pl->link_config.an_enabled = our_kset.base.autoneg != + AUTONEG_DISABLE; + mutex_unlock(&pl->state_mutex); + } else { + /* For a fixed link, this isn't able to change any parameters, + * which just leaves inband mode. + */ + if (phylink_validate(pl, support, &config)) + return -EINVAL; - /* If we have a PHY, phylib will call our link state function if the - * mode has changed, which will trigger a resolve and update the MAC - * configuration. For a fixed link, this isn't able to change any - * parameters, which just leaves inband mode. - */ - if (pl->link_an_mode == MLO_AN_INBAND && - !test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) { - phylink_mac_config(pl, &pl->link_config); - phylink_mac_an_restart(pl); + /* If autonegotiation is enabled, we must have an advertisement */ + if (config.an_enabled && + phylink_is_empty_linkmode(config.advertising)) + return -EINVAL; + + mutex_lock(&pl->state_mutex); + linkmode_copy(pl->link_config.advertising, config.advertising); + pl->link_config.interface = config.interface; + pl->link_config.speed = config.speed; + pl->link_config.duplex = config.duplex; + pl->link_config.an_enabled = kset->base.autoneg != + AUTONEG_DISABLE; + + if (pl->cur_link_an_mode == MLO_AN_INBAND && + !test_bit(PHYLINK_DISABLE_STOPPED, + &pl->phylink_disable_state)) { + /* If in 802.3z mode, this updates the advertisement. + * + * If we are in SGMII mode without a PHY, there is no + * advertisement; the only thing we have is the pause + * modes which can only come from a PHY. + */ + phylink_mac_config(pl, &pl->link_config); + phylink_mac_an_restart(pl); + } + mutex_unlock(&pl->state_mutex); } - mutex_unlock(&pl->state_mutex); return 0; } @@ -1341,7 +1386,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, pause->tx_pause); } else if (!test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) { - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_FIXED: /* Should we allow fixed links to change against the config? */ phylink_resolve_flow(pl, config); @@ -1548,7 +1593,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id, struct phylink_link_state state; int val = 0xffff; - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_FIXED: if (phy_id == 0) { phylink_get_fixed_state(pl, &state); @@ -1573,7 +1618,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id, static int phylink_mii_write(struct phylink *pl, unsigned int phy_id, unsigned int reg, unsigned int val) { - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_FIXED: break; @@ -1679,25 +1724,21 @@ static void phylink_sfp_detach(void *upstream, struct sfp_bus *bus) pl->netdev->sfp_bus = NULL; } -static int phylink_sfp_module_insert(void *upstream, - const struct sfp_eeprom_id *id) +static int phylink_sfp_config(struct phylink *pl, u8 mode, + const unsigned long *supported, + const unsigned long *advertising) { - struct phylink *pl = upstream; - __ETHTOOL_DECLARE_LINK_MODE_MASK(support) = { 0, }; __ETHTOOL_DECLARE_LINK_MODE_MASK(support1); + __ETHTOOL_DECLARE_LINK_MODE_MASK(support); struct phylink_link_state config; phy_interface_t iface; - int ret = 0; bool changed; - u8 port; - - ASSERT_RTNL(); + int ret; - sfp_parse_support(pl->sfp_bus, id, support); - port = sfp_parse_port(pl->sfp_bus, id, support); + linkmode_copy(support, supported); memset(&config, 0, sizeof(config)); - linkmode_copy(config.advertising, support); + linkmode_copy(config.advertising, advertising); config.interface = PHY_INTERFACE_MODE_NA; config.speed = SPEED_UNKNOWN; config.duplex = DUPLEX_UNKNOWN; @@ -1712,9 +1753,7 @@ static int phylink_sfp_module_insert(void *upstream, return ret; } - linkmode_copy(support1, support); - - iface = sfp_select_interface(pl->sfp_bus, id, config.advertising); + iface = sfp_select_interface(pl->sfp_bus, config.advertising); if (iface == PHY_INTERFACE_MODE_NA) { phylink_err(pl, "selection of interface failed, advertisement %*pb\n", @@ -1723,18 +1762,18 @@ static int phylink_sfp_module_insert(void *upstream, } config.interface = iface; + linkmode_copy(support1, support); ret = phylink_validate(pl, support1, &config); if (ret) { phylink_err(pl, "validation of %s/%s with support %*pb failed: %d\n", - phylink_an_mode_str(MLO_AN_INBAND), + phylink_an_mode_str(mode), phy_modes(config.interface), __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret); return ret; } phylink_dbg(pl, "requesting link mode %s/%s with support %*pb\n", - phylink_an_mode_str(MLO_AN_INBAND), - phy_modes(config.interface), + phylink_an_mode_str(mode), phy_modes(config.interface), __ETHTOOL_LINK_MODE_MASK_NBITS, support); if (phy_interface_mode_is_8023z(iface) && pl->phydev) @@ -1746,19 +1785,19 @@ static int phylink_sfp_module_insert(void *upstream, linkmode_copy(pl->link_config.advertising, config.advertising); } - if (pl->link_an_mode != MLO_AN_INBAND || + if (pl->cur_link_an_mode != mode || pl->link_config.interface != config.interface) { pl->link_config.interface = config.interface; - pl->link_an_mode = MLO_AN_INBAND; + pl->cur_link_an_mode = mode; changed = true; phylink_info(pl, "switched to %s/%s link mode\n", - phylink_an_mode_str(MLO_AN_INBAND), + phylink_an_mode_str(mode), phy_modes(config.interface)); } - pl->link_port = port; + pl->link_port = pl->sfp_port; if (changed && !test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) @@ -1767,6 +1806,55 @@ static int phylink_sfp_module_insert(void *upstream, return ret; } +static int phylink_sfp_module_insert(void *upstream, + const struct sfp_eeprom_id *id) +{ + struct phylink *pl = upstream; + unsigned long *support = pl->sfp_support; + + ASSERT_RTNL(); + + linkmode_zero(support); + sfp_parse_support(pl->sfp_bus, id, support); + pl->sfp_port = sfp_parse_port(pl->sfp_bus, id, support); + + /* If this module may have a PHY connecting later, defer until later */ + pl->sfp_may_have_phy = sfp_may_have_phy(pl->sfp_bus, id); + if (pl->sfp_may_have_phy) + return 0; + + return phylink_sfp_config(pl, MLO_AN_INBAND, support, support); +} + +static int phylink_sfp_module_start(void *upstream) +{ + struct phylink *pl = upstream; + + /* If this SFP module has a PHY, start the PHY now. */ + if (pl->phydev) { + phy_start(pl->phydev); + return 0; + } + + /* If the module may have a PHY but we didn't detect one we + * need to configure the MAC here. + */ + if (!pl->sfp_may_have_phy) + return 0; + + return phylink_sfp_config(pl, MLO_AN_INBAND, + pl->sfp_support, pl->sfp_support); +} + +static void phylink_sfp_module_stop(void *upstream) +{ + struct phylink *pl = upstream; + + /* If this SFP module has a PHY, stop it. */ + if (pl->phydev) + phy_stop(pl->phydev); +} + static void phylink_sfp_link_down(void *upstream) { struct phylink *pl = upstream; @@ -1786,11 +1874,51 @@ static void phylink_sfp_link_up(void *upstream) phylink_run_resolve(pl); } +/* The Broadcom BCM84881 in the Methode DM7052 is unable to provide a SGMII + * or 802.3z control word, so inband will not work. + */ +static bool phylink_phy_no_inband(struct phy_device *phy) +{ + return phy->is_c45 && + (phy->c45_ids.device_ids[1] & 0xfffffff0) == 0xae025150; +} + static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) { struct phylink *pl = upstream; + phy_interface_t interface; + u8 mode; + int ret; - return __phylink_connect_phy(upstream, phy, pl->link_config.interface); + /* + * This is the new way of dealing with flow control for PHYs, + * as described by Timur Tabi in commit 529ed1275263 ("net: phy: + * phy drivers should not set SUPPORTED_[Asym_]Pause") except + * using our validate call to the MAC, we rely upon the MAC + * clearing the bits from both supported and advertising fields. + */ + phy_support_asym_pause(phy); + + if (phylink_phy_no_inband(phy)) + mode = MLO_AN_PHY; + else + mode = MLO_AN_INBAND; + + /* Do the initial configuration */ + ret = phylink_sfp_config(pl, mode, phy->supported, phy->advertising); + if (ret < 0) + return ret; + + interface = pl->link_config.interface; + ret = phylink_attach_phy(pl, phy, interface); + if (ret < 0) + return ret; + + ret = phylink_bringup_phy(pl, phy, interface); + if (ret) + phy_detach(phy); + + return ret; } static void phylink_sfp_disconnect_phy(void *upstream) @@ -1802,6 +1930,8 @@ static const struct sfp_upstream_ops sfp_phylink_ops = { .attach = phylink_sfp_attach, .detach = phylink_sfp_detach, .module_insert = phylink_sfp_module_insert, + .module_start = phylink_sfp_module_start, + .module_stop = phylink_sfp_module_stop, .link_up = phylink_sfp_link_up, .link_down = phylink_sfp_link_down, .connect_phy = phylink_sfp_connect_phy, diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 5a72093ab6e7..06e6429b8b71 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -103,6 +103,7 @@ static const struct sfp_quirk *sfp_lookup_quirk(const struct sfp_eeprom_id *id) return NULL; } + /** * sfp_parse_port() - Parse the EEPROM base ID, setting the port type * @bus: a pointer to the &struct sfp_bus structure for the sfp module @@ -124,35 +125,35 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, /* port is the physical connector, set this from the connector field. */ switch (id->base.connector) { - case SFP_CONNECTOR_SC: - case SFP_CONNECTOR_FIBERJACK: - case SFP_CONNECTOR_LC: - case SFP_CONNECTOR_MT_RJ: - case SFP_CONNECTOR_MU: - case SFP_CONNECTOR_OPTICAL_PIGTAIL: + case SFF8024_CONNECTOR_SC: + case SFF8024_CONNECTOR_FIBERJACK: + case SFF8024_CONNECTOR_LC: + case SFF8024_CONNECTOR_MT_RJ: + case SFF8024_CONNECTOR_MU: + case SFF8024_CONNECTOR_OPTICAL_PIGTAIL: + case SFF8024_CONNECTOR_MPO_1X12: + case SFF8024_CONNECTOR_MPO_2X16: port = PORT_FIBRE; break; - case SFP_CONNECTOR_RJ45: + case SFF8024_CONNECTOR_RJ45: port = PORT_TP; break; - case SFP_CONNECTOR_COPPER_PIGTAIL: + case SFF8024_CONNECTOR_COPPER_PIGTAIL: port = PORT_DA; break; - case SFP_CONNECTOR_UNSPEC: + case SFF8024_CONNECTOR_UNSPEC: if (id->base.e1000_base_t) { port = PORT_TP; break; } /* fallthrough */ - case SFP_CONNECTOR_SG: /* guess */ - case SFP_CONNECTOR_MPO_1X12: - case SFP_CONNECTOR_MPO_2X16: - case SFP_CONNECTOR_HSSDC_II: - case SFP_CONNECTOR_NOSEPARATE: - case SFP_CONNECTOR_MXC_2X16: + case SFF8024_CONNECTOR_SG: /* guess */ + case SFF8024_CONNECTOR_HSSDC_II: + case SFF8024_CONNECTOR_NOSEPARATE: + case SFF8024_CONNECTOR_MXC_2X16: port = PORT_OTHER; break; default: @@ -179,6 +180,33 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, EXPORT_SYMBOL_GPL(sfp_parse_port); /** + * sfp_may_have_phy() - indicate whether the module may have a PHY + * @bus: a pointer to the &struct sfp_bus structure for the sfp module + * @id: a pointer to the module's &struct sfp_eeprom_id + * + * Parse the EEPROM identification given in @id, and return whether + * this module may have a PHY. + */ +bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id) +{ + if (id->base.e1000_base_t) + return true; + + if (id->base.phys_id != SFF8024_ID_DWDM_SFP) { + switch (id->base.extended_cc) { + case SFF8024_ECC_10GBASE_T_SFI: + case SFF8024_ECC_10GBASE_T_SR: + case SFF8024_ECC_5GBASE_T: + case SFF8024_ECC_2_5GBASE_T: + return true; + } + } + + return false; +} +EXPORT_SYMBOL_GPL(sfp_may_have_phy); + +/** * sfp_parse_support() - Parse the eeprom id for supported link modes * @bus: a pointer to the &struct sfp_bus structure for the sfp module * @id: a pointer to the module's &struct sfp_eeprom_id @@ -261,22 +289,33 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, } switch (id->base.extended_cc) { - case 0x00: /* Unspecified */ + case SFF8024_ECC_UNSPEC: break; - case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */ + case SFF8024_ECC_100GBASE_SR4_25GBASE_SR: phylink_set(modes, 100000baseSR4_Full); phylink_set(modes, 25000baseSR_Full); break; - case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */ - case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */ + case SFF8024_ECC_100GBASE_LR4_25GBASE_LR: + case SFF8024_ECC_100GBASE_ER4_25GBASE_ER: phylink_set(modes, 100000baseLR4_ER4_Full); break; - case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */ - case 0x0c: /* 25Gbase-CR CA-S */ - case 0x0d: /* 25Gbase-CR CA-N */ + case SFF8024_ECC_100GBASE_CR4: phylink_set(modes, 100000baseCR4_Full); + /* fallthrough */ + case SFF8024_ECC_25GBASE_CR_S: + case SFF8024_ECC_25GBASE_CR_N: phylink_set(modes, 25000baseCR_Full); break; + case SFF8024_ECC_10GBASE_T_SFI: + case SFF8024_ECC_10GBASE_T_SR: + phylink_set(modes, 10000baseT_Full); + break; + case SFF8024_ECC_5GBASE_T: + phylink_set(modes, 5000baseT_Full); + break; + case SFF8024_ECC_2_5GBASE_T: + phylink_set(modes, 2500baseT_Full); + break; default: dev_warn(bus->sfp_dev, "Unknown/unsupported extended compliance code: 0x%02x\n", @@ -301,7 +340,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, */ if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) { /* If the encoding and bit rate allows 1000baseX */ - if (id->base.encoding == SFP_ENCODING_8B10B && br_nom && + if (id->base.encoding == SFF8024_ENCODING_8B10B && br_nom && br_min <= 1300 && br_max >= 1200) phylink_set(modes, 1000baseX_Full); } @@ -320,31 +359,27 @@ EXPORT_SYMBOL_GPL(sfp_parse_support); /** * sfp_select_interface() - Select appropriate phy_interface_t mode * @bus: a pointer to the &struct sfp_bus structure for the sfp module - * @id: a pointer to the module's &struct sfp_eeprom_id * @link_modes: ethtool link modes mask * - * Derive the phy_interface_t mode for the information found in the - * module's identifying EEPROM and the link modes mask. There is no - * standard or defined way to derive this information, so we decide - * based upon the link mode mask. + * Derive the phy_interface_t mode for the SFP module from the link + * modes mask. */ phy_interface_t sfp_select_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, unsigned long *link_modes) { if (phylink_test(link_modes, 10000baseCR_Full) || phylink_test(link_modes, 10000baseSR_Full) || phylink_test(link_modes, 10000baseLR_Full) || phylink_test(link_modes, 10000baseLRM_Full) || - phylink_test(link_modes, 10000baseER_Full)) + phylink_test(link_modes, 10000baseER_Full) || + phylink_test(link_modes, 10000baseT_Full)) return PHY_INTERFACE_MODE_10GKR; if (phylink_test(link_modes, 2500baseX_Full)) return PHY_INTERFACE_MODE_2500BASEX; - if (id->base.e1000_base_t || - id->base.e100_base_lx || - id->base.e100_base_fx) + if (phylink_test(link_modes, 1000baseT_Half) || + phylink_test(link_modes, 1000baseT_Full)) return PHY_INTERFACE_MODE_SGMII; if (phylink_test(link_modes, 1000baseX_Full)) @@ -705,6 +740,27 @@ void sfp_module_remove(struct sfp_bus *bus) } EXPORT_SYMBOL_GPL(sfp_module_remove); +int sfp_module_start(struct sfp_bus *bus) +{ + const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus); + int ret = 0; + + if (ops && ops->module_start) + ret = ops->module_start(bus->upstream); + + return ret; +} +EXPORT_SYMBOL_GPL(sfp_module_start); + +void sfp_module_stop(struct sfp_bus *bus) +{ + const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus); + + if (ops && ops->module_stop) + ops->module_stop(bus->upstream); +} +EXPORT_SYMBOL_GPL(sfp_module_stop); + static void sfp_socket_clear(struct sfp_bus *bus) { bus->sfp_dev = NULL; diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index c0b9a8e4e65a..73c2969f11a4 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -59,8 +59,10 @@ enum { SFP_DEV_UP, SFP_S_DOWN = 0, + SFP_S_FAIL, SFP_S_WAIT, SFP_S_INIT, + SFP_S_INIT_PHY, SFP_S_INIT_TX_FAULT, SFP_S_WAIT_LOS, SFP_S_LINK_UP, @@ -122,8 +124,10 @@ static const char *event_to_str(unsigned short event) static const char * const sm_state_strings[] = { [SFP_S_DOWN] = "down", + [SFP_S_FAIL] = "fail", [SFP_S_WAIT] = "wait", [SFP_S_INIT] = "init", + [SFP_S_INIT_PHY] = "init_phy", [SFP_S_INIT_TX_FAULT] = "init_tx_fault", [SFP_S_WAIT_LOS] = "wait_los", [SFP_S_LINK_UP] = "link_up", @@ -155,10 +159,34 @@ static const enum gpiod_flags gpio_flags[] = { GPIOD_ASIS, }; -#define T_WAIT msecs_to_jiffies(50) -#define T_INIT_JIFFIES msecs_to_jiffies(300) -#define T_RESET_US 10 -#define T_FAULT_RECOVER msecs_to_jiffies(1000) +/* t_start_up (SFF-8431) or t_init (SFF-8472) is the time required for a + * non-cooled module to initialise its laser safety circuitry. We wait + * an initial T_WAIT period before we check the tx fault to give any PHY + * on board (for a copper SFP) time to initialise. + */ +#define T_WAIT msecs_to_jiffies(50) +#define T_START_UP msecs_to_jiffies(300) +#define T_START_UP_BAD_GPON msecs_to_jiffies(60000) + +/* t_reset is the time required to assert the TX_DISABLE signal to reset + * an indicated TX_FAULT. + */ +#define T_RESET_US 10 +#define T_FAULT_RECOVER msecs_to_jiffies(1000) + +/* N_FAULT_INIT is the number of recovery attempts at module initialisation + * time. If the TX_FAULT signal is not deasserted after this number of + * attempts at clearing it, we decide that the module is faulty. + * N_FAULT is the same but after the module has initialised. + */ +#define N_FAULT_INIT 5 +#define N_FAULT 5 + +/* T_PHY_RETRY is the time interval between attempts to probe the PHY. + * R_PHY_RETRY is the number of attempts. + */ +#define T_PHY_RETRY msecs_to_jiffies(50) +#define R_PHY_RETRY 12 /* SFP module presence detection is poor: the three MOD DEF signals are * the same length on the PCB, which means it's possible for MOD DEF 0 to @@ -214,10 +242,12 @@ struct sfp { unsigned char sm_mod_tries; unsigned char sm_dev_state; unsigned short sm_state; - unsigned int sm_retries; + unsigned char sm_fault_retries; + unsigned char sm_phy_retries; struct sfp_eeprom_id id; unsigned int module_power_mW; + unsigned int module_t_start_up; #if IS_ENABLED(CONFIG_HWMON) struct sfp_diag diag; @@ -231,7 +261,7 @@ struct sfp { static bool sff_module_supported(const struct sfp_eeprom_id *id) { - return id->base.phys_id == SFP_PHYS_ID_SFF && + return id->base.phys_id == SFF8024_ID_SFF_8472 && id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP; } @@ -242,7 +272,7 @@ static const struct sff_data sff_data = { static bool sfp_module_supported(const struct sfp_eeprom_id *id) { - return id->base.phys_id == SFP_PHYS_ID_SFP && + return id->base.phys_id == SFF8024_ID_SFP && id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP; } @@ -412,13 +442,20 @@ static unsigned int sfp_soft_get_state(struct sfp *sfp) { unsigned int state = 0; u8 status; + int ret; - if (sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status)) == - sizeof(status)) { + ret = sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status)); + if (ret == sizeof(status)) { if (status & SFP_STATUS_RX_LOS) state |= SFP_F_LOS; if (status & SFP_STATUS_TX_FAULT) state |= SFP_F_TX_FAULT; + } else { + dev_err_ratelimited(sfp->dev, + "failed to read SFP soft status: %d\n", + ret); + /* Preserve the current state */ + state = sfp->state; } return state & sfp->state_soft_mask; @@ -1383,26 +1420,30 @@ static void sfp_sm_mod_next(struct sfp *sfp, unsigned int state, static void sfp_sm_phy_detach(struct sfp *sfp) { - phy_stop(sfp->mod_phy); sfp_remove_phy(sfp->sfp_bus); phy_device_remove(sfp->mod_phy); phy_device_free(sfp->mod_phy); sfp->mod_phy = NULL; } -static void sfp_sm_probe_phy(struct sfp *sfp) +static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45) { struct phy_device *phy; int err; - phy = mdiobus_scan(sfp->i2c_mii, SFP_PHY_ADDR); - if (phy == ERR_PTR(-ENODEV)) { - dev_info(sfp->dev, "no PHY detected\n"); - return; - } + phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45); + if (phy == ERR_PTR(-ENODEV)) + return PTR_ERR(phy); if (IS_ERR(phy)) { dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy)); - return; + return PTR_ERR(phy); + } + + err = phy_device_register(phy); + if (err) { + phy_device_free(phy); + dev_err(sfp->dev, "phy_device_register failed: %d\n", err); + return err; } err = sfp_add_phy(sfp->sfp_bus, phy); @@ -1410,11 +1451,12 @@ static void sfp_sm_probe_phy(struct sfp *sfp) phy_device_remove(phy); phy_device_free(phy); dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err); - return; + return err; } sfp->mod_phy = phy; - phy_start(phy); + + return 0; } static void sfp_sm_link_up(struct sfp *sfp) @@ -1464,7 +1506,7 @@ static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event) static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn) { - if (sfp->sm_retries && !--sfp->sm_retries) { + if (sfp->sm_fault_retries && !--sfp->sm_fault_retries) { dev_err(sfp->dev, "module persistently indicates fault, disabling\n"); sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0); @@ -1476,21 +1518,35 @@ static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn) } } -static void sfp_sm_probe_for_phy(struct sfp *sfp) +/* Probe a SFP for a PHY device if the module supports copper - the PHY + * normally sits at I2C bus address 0x56, and may either be a clause 22 + * or clause 45 PHY. + * + * Clause 22 copper SFP modules normally operate in Cisco SGMII mode with + * negotiation enabled, but some may be in 1000base-X - which is for the + * PHY driver to determine. + * + * Clause 45 copper SFP+ modules (10G) appear to switch their interface + * mode according to the negotiated line speed. + */ +static int sfp_sm_probe_for_phy(struct sfp *sfp) { - /* Setting the serdes link mode is guesswork: there's no - * field in the EEPROM which indicates what mode should - * be used. - * - * If it's a gigabit-only fiber module, it probably does - * not have a PHY, so switch to 802.3z negotiation mode. - * Otherwise, switch to SGMII mode (which is required to - * support non-gigabit speeds) and probe for a PHY. - */ - if (sfp->id.base.e1000_base_t || - sfp->id.base.e100_base_lx || - sfp->id.base.e100_base_fx) - sfp_sm_probe_phy(sfp); + int err = 0; + + switch (sfp->id.base.extended_cc) { + case SFF8024_ECC_10GBASE_T_SFI: + case SFF8024_ECC_10GBASE_T_SR: + case SFF8024_ECC_5GBASE_T: + case SFF8024_ECC_2_5GBASE_T: + err = sfp_sm_probe_phy(sfp, true); + break; + + default: + if (sfp->id.base.e1000_base_t) + err = sfp_sm_probe_phy(sfp, false); + break; + } + return err; } static int sfp_module_parse_power(struct sfp *sfp) @@ -1550,6 +1606,13 @@ static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable) return -EAGAIN; } + /* DM7052 reports as a high power module, responds to reads (with + * all bytes 0xff) at 0x51 but does not accept writes. In any case, + * if the bit is already set, we're already in high power mode. + */ + if (!!(val & BIT(0)) == enable) + return 0; + if (enable) val |= BIT(0); else @@ -1655,6 +1718,12 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) if (ret < 0) return ret; + if (!memcmp(id.base.vendor_name, "ALCATELLUCENT ", 16) && + !memcmp(id.base.vendor_pn, "3FE46541AA ", 16)) + sfp->module_t_start_up = T_START_UP_BAD_GPON; + else + sfp->module_t_start_up = T_START_UP; + return 0; } @@ -1812,6 +1881,7 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) static void sfp_sm_main(struct sfp *sfp, unsigned int event) { unsigned long timeout; + int ret; /* Some events are global */ if (sfp->sm_state != SFP_S_DOWN && @@ -1820,6 +1890,8 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) if (sfp->sm_state == SFP_S_LINK_UP && sfp->sm_dev_state == SFP_DEV_UP) sfp_sm_link_down(sfp); + if (sfp->sm_state > SFP_S_INIT) + sfp_module_stop(sfp->sfp_bus); if (sfp->mod_phy) sfp_sm_phy_detach(sfp); sfp_module_tx_disable(sfp); @@ -1841,7 +1913,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) sfp_module_tx_enable(sfp); /* Initialise the fault clearance retries */ - sfp->sm_retries = 5; + sfp->sm_fault_retries = N_FAULT_INIT; /* We need to check the TX_FAULT state, which is not defined * while TX_DISABLE is asserted. The earliest we want to do @@ -1855,11 +1927,12 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) break; if (sfp->state & SFP_F_TX_FAULT) { - /* Wait t_init before indicating that the link is up, - * provided the current state indicates no TX_FAULT. If - * TX_FAULT clears before this time, that's fine too. + /* Wait up to t_init (SFF-8472) or t_start_up (SFF-8431) + * from the TX_DISABLE deassertion for the module to + * initialise, which is indicated by TX_FAULT + * deasserting. */ - timeout = T_INIT_JIFFIES; + timeout = sfp->module_t_start_up; if (timeout > T_WAIT) timeout -= T_WAIT; else @@ -1876,27 +1949,51 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) case SFP_S_INIT: if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) { - /* TX_FAULT is still asserted after t_init, so assume - * there is a fault. + /* TX_FAULT is still asserted after t_init or + * or t_start_up, so assume there is a fault. */ sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT, - sfp->sm_retries == 5); + sfp->sm_fault_retries == N_FAULT_INIT); } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) { - init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT - * clear. Probe for the PHY and check the LOS state. - */ - sfp_sm_probe_for_phy(sfp); - sfp_sm_link_check_los(sfp); + init_done: + sfp->sm_phy_retries = R_PHY_RETRY; + goto phy_probe; + } + break; - /* Reset the fault retry count */ - sfp->sm_retries = 5; + case SFP_S_INIT_PHY: + if (event != SFP_E_TIMEOUT) + break; + phy_probe: + /* TX_FAULT deasserted or we timed out with TX_FAULT + * clear. Probe for the PHY and check the LOS state. + */ + ret = sfp_sm_probe_for_phy(sfp); + if (ret == -ENODEV) { + if (--sfp->sm_phy_retries) { + sfp_sm_next(sfp, SFP_S_INIT_PHY, T_PHY_RETRY); + break; + } else { + dev_info(sfp->dev, "no PHY detected\n"); + } + } else if (ret) { + sfp_sm_next(sfp, SFP_S_FAIL, 0); + break; } + if (sfp_module_start(sfp->sfp_bus)) { + sfp_sm_next(sfp, SFP_S_FAIL, 0); + break; + } + sfp_sm_link_check_los(sfp); + + /* Reset the fault retry count */ + sfp->sm_fault_retries = N_FAULT; break; case SFP_S_INIT_TX_FAULT: if (event == SFP_E_TIMEOUT) { sfp_module_tx_fault_reset(sfp); - sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES); + sfp_sm_next(sfp, SFP_S_INIT, sfp->module_t_start_up); } break; @@ -1920,7 +2017,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) case SFP_S_TX_FAULT: if (event == SFP_E_TIMEOUT) { sfp_module_tx_fault_reset(sfp); - sfp_sm_next(sfp, SFP_S_REINIT, T_INIT_JIFFIES); + sfp_sm_next(sfp, SFP_S_REINIT, sfp->module_t_start_up); } break; diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h index 64f54b0bbd8c..b83f70526270 100644 --- a/drivers/net/phy/sfp.h +++ b/drivers/net/phy/sfp.h @@ -22,6 +22,8 @@ void sfp_link_up(struct sfp_bus *bus); void sfp_link_down(struct sfp_bus *bus); int sfp_module_insert(struct sfp_bus *bus, const struct sfp_eeprom_id *id); void sfp_module_remove(struct sfp_bus *bus); +int sfp_module_start(struct sfp_bus *bus); +void sfp_module_stop(struct sfp_bus *bus); int sfp_link_configure(struct sfp_bus *bus, const struct sfp_eeprom_id *id); struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp, const struct sfp_socket_ops *ops); diff --git a/drivers/net/phy/uPD60620.c b/drivers/net/phy/uPD60620.c index a32b3fd8a370..38834347a427 100644 --- a/drivers/net/phy/uPD60620.c +++ b/drivers/net/phy/uPD60620.c @@ -68,12 +68,7 @@ static int upd60620_read_status(struct phy_device *phydev) mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, phy_state); - if (phydev->duplex == DUPLEX_FULL) { - if (phy_state & LPA_PAUSE_CAP) - phydev->pause = 1; - if (phy_state & LPA_PAUSE_ASYM) - phydev->asym_pause = 1; - } + phy_resolve_aneg_pause(phydev); } } return 0; diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index a7b9cf3269bf..29a0917a81e6 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -874,15 +874,15 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf, skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); if (!skb) goto nomem; - ap->rpkt = skb; - } - if (skb->len == 0) { - /* Try to get the payload 4-byte aligned. - * This should match the - * PPP_ALLSTATIONS/PPP_UI/compressed tests in - * process_input_packet, but we do not have - * enough chars here to test buf[1] and buf[2]. - */ + ap->rpkt = skb; + } + if (skb->len == 0) { + /* Try to get the payload 4-byte aligned. + * This should match the + * PPP_ALLSTATIONS/PPP_UI/compressed tests in + * process_input_packet, but we do not have + * enough chars here to test buf[1] and buf[2]. + */ if (buf[0] != PPP_ALLSTATIONS) skb_reserve(skb, 2 + (buf[0] & 1)); } diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 2a91c192659f..317d3a8df316 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -457,7 +457,7 @@ static void slip_write_wakeup(struct tty_struct *tty) schedule_work(&sl->tx_work); } -static void sl_tx_timeout(struct net_device *dev) +static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct slip *sl = netdev_priv(dev); diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 1e58702c737f..d387bc7ac1b6 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -447,7 +447,7 @@ static netdev_tx_t catc_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } -static void catc_tx_timeout(struct net_device *netdev) +static void catc_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct catc *catc = netdev_priv(netdev); diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index ca827802f291..417e42c9fd03 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -820,7 +820,7 @@ static const struct ethtool_ops ops = { }; /* called when a packet did not ack after watchdogtimeout */ -static void hso_net_tx_timeout(struct net_device *net) +static void hso_net_tx_timeout(struct net_device *net, unsigned int txqueue) { struct hso_net *odev = netdev_priv(net); diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 8c01fbf68a89..c792d65dd7b4 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -400,7 +400,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) return NETDEV_TX_OK; } -static void ipheth_tx_timeout(struct net_device *net) +static void ipheth_tx_timeout(struct net_device *net, unsigned int txqueue) { struct ipheth_device *dev = netdev_priv(net); diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 8e210ba4a313..ed01dc964c99 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -894,7 +894,7 @@ static void kaweth_async_set_rx_mode(struct kaweth_device *kaweth) /**************************************************************** * kaweth_tx_timeout ****************************************************************/ -static void kaweth_tx_timeout(struct net_device *net) +static void kaweth_tx_timeout(struct net_device *net, unsigned int txqueue) { struct kaweth_device *kaweth = netdev_priv(net); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index cf1f3f0a4b9b..0c8b9363366b 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3662,7 +3662,7 @@ static void lan78xx_disconnect(struct usb_interface *intf) usb_put_dev(udev); } -static void lan78xx_tx_timeout(struct net_device *net) +static void lan78xx_tx_timeout(struct net_device *net, unsigned int txqueue) { struct lan78xx_net *dev = netdev_priv(net); diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index f7d117d80cfb..8783e2ab3ec0 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -693,7 +693,7 @@ static void intr_callback(struct urb *urb) "can't resubmit interrupt urb, %d\n", res); } -static void pegasus_tx_timeout(struct net_device *net) +static void pegasus_tx_timeout(struct net_device *net, unsigned int txqueue) { pegasus_t *pegasus = netdev_priv(net); netif_warn(pegasus, timer, net, "tx timeout\n"); diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c5ebf35d2488..9ec1da429514 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2507,7 +2507,7 @@ static void rtl_drop_queued_tx(struct r8152 *tp) } } -static void rtl8152_tx_timeout(struct net_device *netdev) +static void rtl8152_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct r8152 *tp = netdev_priv(netdev); diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 13e51ccf0214..e7c630d37589 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -655,7 +655,7 @@ static void disable_net_traffic(rtl8150_t * dev) set_registers(dev, CR, 1, &cr); } -static void rtl8150_tx_timeout(struct net_device *netdev) +static void rtl8150_tx_timeout(struct net_device *netdev, unsigned int txqueue) { rtl8150_t *dev = netdev_priv(netdev); dev_warn(&netdev->dev, "Tx timeout.\n"); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 30e511c2c8d0..bc88923db369 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1293,7 +1293,7 @@ static void tx_complete (struct urb *urb) /*-------------------------------------------------------------------------*/ -void usbnet_tx_timeout (struct net_device *net) +void usbnet_tx_timeout (struct net_device *net, unsigned int txqueue) { struct usbnet *dev = netdev_priv(net); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 216acf37ca7c..18f152fa0068 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3198,7 +3198,7 @@ vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter) static void -vmxnet3_tx_timeout(struct net_device *netdev) +vmxnet3_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); adapter->tx_timeout_count++; diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index af539151d663..5d6532ad6b78 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -268,7 +268,7 @@ static int cosa_net_attach(struct net_device *dev, unsigned short encoding, unsigned short parity); static int cosa_net_open(struct net_device *d); static int cosa_net_close(struct net_device *d); -static void cosa_net_timeout(struct net_device *d); +static void cosa_net_timeout(struct net_device *d, unsigned int txqueue); static netdev_tx_t cosa_net_tx(struct sk_buff *skb, struct net_device *d); static char *cosa_net_setup_rx(struct channel_data *channel, int size); static int cosa_net_rx_done(struct channel_data *channel); @@ -670,7 +670,7 @@ static netdev_tx_t cosa_net_tx(struct sk_buff *skb, return NETDEV_TX_OK; } -static void cosa_net_timeout(struct net_device *dev) +static void cosa_net_timeout(struct net_device *dev, unsigned int txqueue) { struct channel_data *chan = dev_to_chan(dev); diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 1901ec7948d8..7916efce7188 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2239,7 +2239,7 @@ fst_attach(struct net_device *dev, unsigned short encoding, unsigned short parit } static void -fst_tx_timeout(struct net_device *dev) +fst_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct fst_port_info *port; struct fst_card_info *card; diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index ca0f3be2b6bf..308384756e6f 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -1039,7 +1039,7 @@ static const struct dev_pm_ops uhdlc_pm_ops = { #define HDLC_PM_OPS NULL #endif -static void uhdlc_tx_timeout(struct net_device *ndev) +static void uhdlc_tx_timeout(struct net_device *ndev, unsigned int txqueue) { netdev_err(ndev, "%s\n", __func__); } diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 0e6a51525d91..a20f467ca48a 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -99,7 +99,7 @@ static int lmc_ifdown(struct net_device * const); static void lmc_watchdog(struct timer_list *t); static void lmc_reset(lmc_softc_t * const sc); static void lmc_dec_reset(lmc_softc_t * const sc); -static void lmc_driver_timeout(struct net_device *dev); +static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue); /* * linux reserves 16 device specific IOCTLs. We call them @@ -2044,7 +2044,7 @@ static void lmc_initcsrs(lmc_softc_t * const sc, lmc_csrptr_t csr_base, /*fold00 lmc_trace(sc->lmc_device, "lmc_initcsrs out"); } -static void lmc_driver_timeout(struct net_device *dev) +static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue) { lmc_softc_t *sc = dev_to_sc(dev); u32 csr6; diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 914be5847386..69773d228ec1 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -276,7 +276,7 @@ static void x25_asy_write_wakeup(struct tty_struct *tty) sl->xhead += actual; } -static void x25_asy_timeout(struct net_device *dev) +static void x25_asy_timeout(struct net_device *dev, unsigned int txqueue) { struct x25_asy *sl = netdev_priv(dev); diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c index a5db3c06b646..a7fcbceb6e6b 100644 --- a/drivers/net/wimax/i2400m/netdev.c +++ b/drivers/net/wimax/i2400m/netdev.c @@ -380,7 +380,7 @@ drop: static -void i2400m_tx_timeout(struct net_device *net_dev) +void i2400m_tx_timeout(struct net_device *net_dev, unsigned int txqueue) { /* * We might want to kick the device diff --git a/drivers/net/wireguard/Makefile b/drivers/net/wireguard/Makefile new file mode 100644 index 000000000000..fc52b2cb500b --- /dev/null +++ b/drivers/net/wireguard/Makefile @@ -0,0 +1,18 @@ +ccflags-y := -O3 +ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' +ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG +wireguard-y := main.o +wireguard-y += noise.o +wireguard-y += device.o +wireguard-y += peer.o +wireguard-y += timers.o +wireguard-y += queueing.o +wireguard-y += send.o +wireguard-y += receive.o +wireguard-y += socket.o +wireguard-y += peerlookup.o +wireguard-y += allowedips.o +wireguard-y += ratelimiter.o +wireguard-y += cookie.o +wireguard-y += netlink.o +obj-$(CONFIG_WIREGUARD) := wireguard.o diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c new file mode 100644 index 000000000000..121d9ea0f135 --- /dev/null +++ b/drivers/net/wireguard/allowedips.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "allowedips.h" +#include "peer.h" + +static void swap_endian(u8 *dst, const u8 *src, u8 bits) +{ + if (bits == 32) { + *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); + } else if (bits == 128) { + ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); + ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); + } +} + +static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, + u8 cidr, u8 bits) +{ + node->cidr = cidr; + node->bit_at_a = cidr / 8U; +#ifdef __LITTLE_ENDIAN + node->bit_at_a ^= (bits / 8U - 1U) % 8U; +#endif + node->bit_at_b = 7U - (cidr % 8U); + node->bitlen = bits; + memcpy(node->bits, src, bits / 8U); +} +#define CHOOSE_NODE(parent, key) \ + parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] + +static void push_rcu(struct allowedips_node **stack, + struct allowedips_node __rcu *p, unsigned int *len) +{ + if (rcu_access_pointer(p)) { + WARN_ON(IS_ENABLED(DEBUG) && *len >= 128); + stack[(*len)++] = rcu_dereference_raw(p); + } +} + +static void root_free_rcu(struct rcu_head *rcu) +{ + struct allowedips_node *node, *stack[128] = { + container_of(rcu, struct allowedips_node, rcu) }; + unsigned int len = 1; + + while (len > 0 && (node = stack[--len])) { + push_rcu(stack, node->bit[0], &len); + push_rcu(stack, node->bit[1], &len); + kfree(node); + } +} + +static void root_remove_peer_lists(struct allowedips_node *root) +{ + struct allowedips_node *node, *stack[128] = { root }; + unsigned int len = 1; + + while (len > 0 && (node = stack[--len])) { + push_rcu(stack, node->bit[0], &len); + push_rcu(stack, node->bit[1], &len); + if (rcu_access_pointer(node->peer)) + list_del(&node->peer_list); + } +} + +static void walk_remove_by_peer(struct allowedips_node __rcu **top, + struct wg_peer *peer, struct mutex *lock) +{ +#define REF(p) rcu_access_pointer(p) +#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock)) +#define PUSH(p) ({ \ + WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \ + stack[len++] = p; \ + }) + + struct allowedips_node __rcu **stack[128], **nptr; + struct allowedips_node *node, *prev; + unsigned int len; + + if (unlikely(!peer || !REF(*top))) + return; + + for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) { + nptr = stack[len - 1]; + node = DEREF(nptr); + if (!node) { + --len; + continue; + } + if (!prev || REF(prev->bit[0]) == node || + REF(prev->bit[1]) == node) { + if (REF(node->bit[0])) + PUSH(&node->bit[0]); + else if (REF(node->bit[1])) + PUSH(&node->bit[1]); + } else if (REF(node->bit[0]) == prev) { + if (REF(node->bit[1])) + PUSH(&node->bit[1]); + } else { + if (rcu_dereference_protected(node->peer, + lockdep_is_held(lock)) == peer) { + RCU_INIT_POINTER(node->peer, NULL); + list_del_init(&node->peer_list); + if (!node->bit[0] || !node->bit[1]) { + rcu_assign_pointer(*nptr, DEREF( + &node->bit[!REF(node->bit[0])])); + kfree_rcu(node, rcu); + node = DEREF(nptr); + } + } + --len; + } + } + +#undef REF +#undef DEREF +#undef PUSH +} + +static unsigned int fls128(u64 a, u64 b) +{ + return a ? fls64(a) + 64U : fls64(b); +} + +static u8 common_bits(const struct allowedips_node *node, const u8 *key, + u8 bits) +{ + if (bits == 32) + return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key); + else if (bits == 128) + return 128U - fls128( + *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0], + *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]); + return 0; +} + +static bool prefix_matches(const struct allowedips_node *node, const u8 *key, + u8 bits) +{ + /* This could be much faster if it actually just compared the common + * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and + * the rest, but it turns out that common_bits is already super fast on + * modern processors, even taking into account the unfortunate bswap. + * So, we just inline it like this instead. + */ + return common_bits(node, key, bits) >= node->cidr; +} + +static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, + const u8 *key) +{ + struct allowedips_node *node = trie, *found = NULL; + + while (node && prefix_matches(node, key, bits)) { + if (rcu_access_pointer(node->peer)) + found = node; + if (node->cidr == bits) + break; + node = rcu_dereference_bh(CHOOSE_NODE(node, key)); + } + return found; +} + +/* Returns a strong reference to a peer */ +static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits, + const void *be_ip) +{ + /* Aligned so it can be passed to fls/fls64 */ + u8 ip[16] __aligned(__alignof(u64)); + struct allowedips_node *node; + struct wg_peer *peer = NULL; + + swap_endian(ip, be_ip, bits); + + rcu_read_lock_bh(); +retry: + node = find_node(rcu_dereference_bh(root), bits, ip); + if (node) { + peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer)); + if (!peer) + goto retry; + } + rcu_read_unlock_bh(); + return peer; +} + +static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, + u8 cidr, u8 bits, struct allowedips_node **rnode, + struct mutex *lock) +{ + struct allowedips_node *node = rcu_dereference_protected(trie, + lockdep_is_held(lock)); + struct allowedips_node *parent = NULL; + bool exact = false; + + while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) { + parent = node; + if (parent->cidr == cidr) { + exact = true; + break; + } + node = rcu_dereference_protected(CHOOSE_NODE(parent, key), + lockdep_is_held(lock)); + } + *rnode = parent; + return exact; +} + +static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + struct allowedips_node *node, *parent, *down, *newnode; + + if (unlikely(cidr > bits || !peer)) + return -EINVAL; + + if (!rcu_access_pointer(*trie)) { + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (unlikely(!node)) + return -ENOMEM; + RCU_INIT_POINTER(node->peer, peer); + list_add_tail(&node->peer_list, &peer->allowedips_list); + copy_and_assign_cidr(node, key, cidr, bits); + rcu_assign_pointer(*trie, node); + return 0; + } + if (node_placement(*trie, key, cidr, bits, &node, lock)) { + rcu_assign_pointer(node->peer, peer); + list_move_tail(&node->peer_list, &peer->allowedips_list); + return 0; + } + + newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); + if (unlikely(!newnode)) + return -ENOMEM; + RCU_INIT_POINTER(newnode->peer, peer); + list_add_tail(&newnode->peer_list, &peer->allowedips_list); + copy_and_assign_cidr(newnode, key, cidr, bits); + + if (!node) { + down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); + } else { + down = rcu_dereference_protected(CHOOSE_NODE(node, key), + lockdep_is_held(lock)); + if (!down) { + rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); + return 0; + } + } + cidr = min(cidr, common_bits(down, key, bits)); + parent = node; + + if (newnode->cidr == cidr) { + rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); + if (!parent) + rcu_assign_pointer(*trie, newnode); + else + rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), + newnode); + } else { + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (unlikely(!node)) { + kfree(newnode); + return -ENOMEM; + } + INIT_LIST_HEAD(&node->peer_list); + copy_and_assign_cidr(node, newnode->bits, cidr, bits); + + rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); + rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); + if (!parent) + rcu_assign_pointer(*trie, node); + else + rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), + node); + } + return 0; +} + +void wg_allowedips_init(struct allowedips *table) +{ + table->root4 = table->root6 = NULL; + table->seq = 1; +} + +void wg_allowedips_free(struct allowedips *table, struct mutex *lock) +{ + struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6; + + ++table->seq; + RCU_INIT_POINTER(table->root4, NULL); + RCU_INIT_POINTER(table->root6, NULL); + if (rcu_access_pointer(old4)) { + struct allowedips_node *node = rcu_dereference_protected(old4, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); + } + if (rcu_access_pointer(old6)) { + struct allowedips_node *node = rcu_dereference_protected(old6, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); + } +} + +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls */ + u8 key[4] __aligned(__alignof(u32)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 32); + return add(&table->root4, 32, key, cidr, peer, lock); +} + +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls64 */ + u8 key[16] __aligned(__alignof(u64)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 128); + return add(&table->root6, 128, key, cidr, peer, lock); +} + +void wg_allowedips_remove_by_peer(struct allowedips *table, + struct wg_peer *peer, struct mutex *lock) +{ + ++table->seq; + walk_remove_by_peer(&table->root4, peer, lock); + walk_remove_by_peer(&table->root6, peer, lock); +} + +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) +{ + const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U); + swap_endian(ip, node->bits, node->bitlen); + memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes); + if (node->cidr) + ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U); + + *cidr = node->cidr; + return node->bitlen == 32 ? AF_INET : AF_INET6; +} + +/* Returns a strong reference to a peer */ +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return lookup(table->root4, 32, &ip_hdr(skb)->daddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr); + return NULL; +} + +/* Returns a strong reference to a peer */ +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return lookup(table->root4, 32, &ip_hdr(skb)->saddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr); + return NULL; +} + +#include "selftest/allowedips.c" diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h new file mode 100644 index 000000000000..e5c83cafcef4 --- /dev/null +++ b/drivers/net/wireguard/allowedips.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_ALLOWEDIPS_H +#define _WG_ALLOWEDIPS_H + +#include <linux/mutex.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +struct wg_peer; + +struct allowedips_node { + struct wg_peer __rcu *peer; + struct allowedips_node __rcu *bit[2]; + /* While it may seem scandalous that we waste space for v4, + * we're alloc'ing to the nearest power of 2 anyway, so this + * doesn't actually make a difference. + */ + u8 bits[16] __aligned(__alignof(u64)); + u8 cidr, bit_at_a, bit_at_b, bitlen; + + /* Keep rarely used list at bottom to be beyond cache line. */ + union { + struct list_head peer_list; + struct rcu_head rcu; + }; +}; + +struct allowedips { + struct allowedips_node __rcu *root4; + struct allowedips_node __rcu *root6; + u64 seq; +}; + +void wg_allowedips_init(struct allowedips *table); +void wg_allowedips_free(struct allowedips *table, struct mutex *mutex); +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); +void wg_allowedips_remove_by_peer(struct allowedips *table, + struct wg_peer *peer, struct mutex *lock); +/* The ip input pointer should be __aligned(__alignof(u64))) */ +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr); + +/* These return a strong reference to a peer: */ +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, + struct sk_buff *skb); +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, + struct sk_buff *skb); + +#ifdef DEBUG +bool wg_allowedips_selftest(void); +#endif + +#endif /* _WG_ALLOWEDIPS_H */ diff --git a/drivers/net/wireguard/cookie.c b/drivers/net/wireguard/cookie.c new file mode 100644 index 000000000000..4956f0499c19 --- /dev/null +++ b/drivers/net/wireguard/cookie.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "cookie.h" +#include "peer.h" +#include "device.h" +#include "messages.h" +#include "ratelimiter.h" +#include "timers.h" + +#include <crypto/blake2s.h> +#include <crypto/chacha20poly1305.h> + +#include <net/ipv6.h> +#include <crypto/algapi.h> + +void wg_cookie_checker_init(struct cookie_checker *checker, + struct wg_device *wg) +{ + init_rwsem(&checker->secret_lock); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + checker->device = wg; +} + +enum { COOKIE_KEY_LABEL_LEN = 8 }; +static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----"; +static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--"; + +static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 pubkey[NOISE_PUBLIC_KEY_LEN], + const u8 label[COOKIE_KEY_LABEL_LEN]) +{ + struct blake2s_state blake; + + blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN); + blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN); + blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN); + blake2s_final(&blake, key); +} + +/* Must hold peer->handshake.static_identity->lock */ +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker) +{ + if (likely(checker->device->static_identity.has_identity)) { + precompute_key(checker->cookie_encryption_key, + checker->device->static_identity.static_public, + cookie_key_label); + precompute_key(checker->message_mac1_key, + checker->device->static_identity.static_public, + mac1_key_label); + } else { + memset(checker->cookie_encryption_key, 0, + NOISE_SYMMETRIC_KEY_LEN); + memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN); + } +} + +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer) +{ + precompute_key(peer->latest_cookie.cookie_decryption_key, + peer->handshake.remote_static, cookie_key_label); + precompute_key(peer->latest_cookie.message_mac1_key, + peer->handshake.remote_static, mac1_key_label); +} + +void wg_cookie_init(struct cookie *cookie) +{ + memset(cookie, 0, sizeof(*cookie)); + init_rwsem(&cookie->lock); +} + +static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, + const u8 key[NOISE_SYMMETRIC_KEY_LEN]) +{ + len = len - sizeof(struct message_macs) + + offsetof(struct message_macs, mac1); + blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN); +} + +static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, + const u8 cookie[COOKIE_LEN]) +{ + len = len - sizeof(struct message_macs) + + offsetof(struct message_macs, mac2); + blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN); +} + +static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, + struct cookie_checker *checker) +{ + struct blake2s_state state; + + if (wg_birthdate_has_expired(checker->secret_birthdate, + COOKIE_SECRET_MAX_AGE)) { + down_write(&checker->secret_lock); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + up_write(&checker->secret_lock); + } + + down_read(&checker->secret_lock); + + blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN); + if (skb->protocol == htons(ETH_P_IP)) + blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, + sizeof(struct in_addr)); + else if (skb->protocol == htons(ETH_P_IPV6)) + blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16)); + blake2s_final(&state, cookie); + + up_read(&checker->secret_lock); +} + +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, + struct sk_buff *skb, + bool check_cookie) +{ + struct message_macs *macs = (struct message_macs *) + (skb->data + skb->len - sizeof(*macs)); + enum cookie_mac_state ret; + u8 computed_mac[COOKIE_LEN]; + u8 cookie[COOKIE_LEN]; + + ret = INVALID_MAC; + compute_mac1(computed_mac, skb->data, skb->len, + checker->message_mac1_key); + if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_BUT_NO_COOKIE; + + if (!check_cookie) + goto out; + + make_cookie(cookie, skb, checker); + + compute_mac2(computed_mac, skb->data, skb->len, cookie); + if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED; + if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev))) + goto out; + + ret = VALID_MAC_WITH_COOKIE; + +out: + return ret; +} + +void wg_cookie_add_mac_to_packet(void *message, size_t len, + struct wg_peer *peer) +{ + struct message_macs *macs = (struct message_macs *) + ((u8 *)message + len - sizeof(*macs)); + + down_write(&peer->latest_cookie.lock); + compute_mac1(macs->mac1, message, len, + peer->latest_cookie.message_mac1_key); + memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN); + peer->latest_cookie.have_sent_mac1 = true; + up_write(&peer->latest_cookie.lock); + + down_read(&peer->latest_cookie.lock); + if (peer->latest_cookie.is_valid && + !wg_birthdate_has_expired(peer->latest_cookie.birthdate, + COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY)) + compute_mac2(macs->mac2, message, len, + peer->latest_cookie.cookie); + else + memset(macs->mac2, 0, COOKIE_LEN); + up_read(&peer->latest_cookie.lock); +} + +void wg_cookie_message_create(struct message_handshake_cookie *dst, + struct sk_buff *skb, __le32 index, + struct cookie_checker *checker) +{ + struct message_macs *macs = (struct message_macs *) + ((u8 *)skb->data + skb->len - sizeof(*macs)); + u8 cookie[COOKIE_LEN]; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE); + dst->receiver_index = index; + get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN); + + make_cookie(cookie, skb, checker); + xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, + macs->mac1, COOKIE_LEN, dst->nonce, + checker->cookie_encryption_key); +} + +void wg_cookie_message_consume(struct message_handshake_cookie *src, + struct wg_device *wg) +{ + struct wg_peer *peer = NULL; + u8 cookie[COOKIE_LEN]; + bool ret; + + if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable, + INDEX_HASHTABLE_HANDSHAKE | + INDEX_HASHTABLE_KEYPAIR, + src->receiver_index, &peer))) + return; + + down_read(&peer->latest_cookie.lock); + if (unlikely(!peer->latest_cookie.have_sent_mac1)) { + up_read(&peer->latest_cookie.lock); + goto out; + } + ret = xchacha20poly1305_decrypt( + cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), + peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce, + peer->latest_cookie.cookie_decryption_key); + up_read(&peer->latest_cookie.lock); + + if (ret) { + down_write(&peer->latest_cookie.lock); + memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN); + peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns(); + peer->latest_cookie.is_valid = true; + peer->latest_cookie.have_sent_mac1 = false; + up_write(&peer->latest_cookie.lock); + } else { + net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n", + wg->dev->name); + } + +out: + wg_peer_put(peer); +} diff --git a/drivers/net/wireguard/cookie.h b/drivers/net/wireguard/cookie.h new file mode 100644 index 000000000000..c4bd61ca03f2 --- /dev/null +++ b/drivers/net/wireguard/cookie.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_COOKIE_H +#define _WG_COOKIE_H + +#include "messages.h" +#include <linux/rwsem.h> + +struct wg_peer; + +struct cookie_checker { + u8 secret[NOISE_HASH_LEN]; + u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN]; + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; + u64 secret_birthdate; + struct rw_semaphore secret_lock; + struct wg_device *device; +}; + +struct cookie { + u64 birthdate; + bool is_valid; + u8 cookie[COOKIE_LEN]; + bool have_sent_mac1; + u8 last_mac1_sent[COOKIE_LEN]; + u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN]; + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; + struct rw_semaphore lock; +}; + +enum cookie_mac_state { + INVALID_MAC, + VALID_MAC_BUT_NO_COOKIE, + VALID_MAC_WITH_COOKIE_BUT_RATELIMITED, + VALID_MAC_WITH_COOKIE +}; + +void wg_cookie_checker_init(struct cookie_checker *checker, + struct wg_device *wg); +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker); +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer); +void wg_cookie_init(struct cookie *cookie); + +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, + struct sk_buff *skb, + bool check_cookie); +void wg_cookie_add_mac_to_packet(void *message, size_t len, + struct wg_peer *peer); + +void wg_cookie_message_create(struct message_handshake_cookie *src, + struct sk_buff *skb, __le32 index, + struct cookie_checker *checker); +void wg_cookie_message_consume(struct message_handshake_cookie *src, + struct wg_device *wg); + +#endif /* _WG_COOKIE_H */ diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c new file mode 100644 index 000000000000..16b19824b9ad --- /dev/null +++ b/drivers/net/wireguard/device.c @@ -0,0 +1,458 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "queueing.h" +#include "socket.h" +#include "timers.h" +#include "device.h" +#include "ratelimiter.h" +#include "peer.h" +#include "messages.h" + +#include <linux/module.h> +#include <linux/rtnetlink.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/if_arp.h> +#include <linux/icmp.h> +#include <linux/suspend.h> +#include <net/icmp.h> +#include <net/rtnetlink.h> +#include <net/ip_tunnels.h> +#include <net/addrconf.h> + +static LIST_HEAD(device_list); + +static int wg_open(struct net_device *dev) +{ + struct in_device *dev_v4 = __in_dev_get_rtnl(dev); + struct inet6_dev *dev_v6 = __in6_dev_get(dev); + struct wg_device *wg = netdev_priv(dev); + struct wg_peer *peer; + int ret; + + if (dev_v4) { + /* At some point we might put this check near the ip_rt_send_ + * redirect call of ip_forward in net/ipv4/ip_forward.c, similar + * to the current secpath check. + */ + IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); + IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; + } + if (dev_v6) + dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; + + ret = wg_socket_init(wg, wg->incoming_port); + if (ret < 0) + return ret; + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + wg_packet_send_staged_packets(peer); + if (peer->persistent_keepalive_interval) + wg_packet_send_keepalive(peer); + } + mutex_unlock(&wg->device_update_lock); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int wg_pm_notification(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct wg_device *wg; + struct wg_peer *peer; + + /* If the machine is constantly suspending and resuming, as part of + * its normal operation rather than as a somewhat rare event, then we + * don't actually want to clear keys. + */ + if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID)) + return 0; + + if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE) + return 0; + + rtnl_lock(); + list_for_each_entry(wg, &device_list, device_list) { + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + del_timer(&peer->timer_zero_key_material); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + } + mutex_unlock(&wg->device_update_lock); + } + rtnl_unlock(); + rcu_barrier(); + return 0; +} + +static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification }; +#endif + +static int wg_stop(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + struct wg_peer *peer; + + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + wg_packet_purge_staged_packets(peer); + wg_timers_stop(peer); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + } + mutex_unlock(&wg->device_update_lock); + skb_queue_purge(&wg->incoming_handshakes); + wg_socket_reinit(wg, NULL, NULL); + return 0; +} + +static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + struct sk_buff_head packets; + struct wg_peer *peer; + struct sk_buff *next; + sa_family_t family; + u32 mtu; + int ret; + + if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) { + ret = -EPROTONOSUPPORT; + net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name); + goto err; + } + + peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb); + if (unlikely(!peer)) { + ret = -ENOKEY; + if (skb->protocol == htons(ETH_P_IP)) + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n", + dev->name, &ip_hdr(skb)->daddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", + dev->name, &ipv6_hdr(skb)->daddr); + goto err; + } + + family = READ_ONCE(peer->endpoint.addr.sa_family); + if (unlikely(family != AF_INET && family != AF_INET6)) { + ret = -EDESTADDRREQ; + net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n", + dev->name, peer->internal_id); + goto err_peer; + } + + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + __skb_queue_head_init(&packets); + if (!skb_is_gso(skb)) { + skb_mark_not_on_list(skb); + } else { + struct sk_buff *segs = skb_gso_segment(skb, 0); + + if (unlikely(IS_ERR(segs))) { + ret = PTR_ERR(segs); + goto err_peer; + } + dev_kfree_skb(skb); + skb = segs; + } + + skb_list_walk_safe(skb, skb, next) { + skb_mark_not_on_list(skb); + + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + continue; + + /* We only need to keep the original dst around for icmp, + * so at this point we're in a position to drop it. + */ + skb_dst_drop(skb); + + PACKET_CB(skb)->mtu = mtu; + + __skb_queue_tail(&packets, skb); + } + + spin_lock_bh(&peer->staged_packet_queue.lock); + /* If the queue is getting too big, we start removing the oldest packets + * until it's small again. We do this before adding the new packet, so + * we don't remove GSO segments that are in excess. + */ + while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) { + dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); + ++dev->stats.tx_dropped; + } + skb_queue_splice_tail(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + wg_packet_send_staged_packets(peer); + + wg_peer_put(peer); + return NETDEV_TX_OK; + +err_peer: + wg_peer_put(peer); +err: + ++dev->stats.tx_errors; + if (skb->protocol == htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + else if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + kfree_skb(skb); + return ret; +} + +static const struct net_device_ops netdev_ops = { + .ndo_open = wg_open, + .ndo_stop = wg_stop, + .ndo_start_xmit = wg_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64 +}; + +static void wg_destruct(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + + rtnl_lock(); + list_del(&wg->device_list); + rtnl_unlock(); + mutex_lock(&wg->device_update_lock); + wg->incoming_port = 0; + wg_socket_reinit(wg, NULL, NULL); + /* The final references are cleared in the below calls to destroy_workqueue. */ + wg_peer_remove_all(wg); + destroy_workqueue(wg->handshake_receive_wq); + destroy_workqueue(wg->handshake_send_wq); + destroy_workqueue(wg->packet_crypt_wq); + wg_packet_queue_free(&wg->decrypt_queue, true); + wg_packet_queue_free(&wg->encrypt_queue, true); + rcu_barrier(); /* Wait for all the peers to be actually freed. */ + wg_ratelimiter_uninit(); + memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); + skb_queue_purge(&wg->incoming_handshakes); + free_percpu(dev->tstats); + free_percpu(wg->incoming_handshakes_worker); + if (wg->have_creating_net_ref) + put_net(wg->creating_net); + kvfree(wg->index_hashtable); + kvfree(wg->peer_hashtable); + mutex_unlock(&wg->device_update_lock); + + pr_debug("%s: Interface deleted\n", dev->name); + free_netdev(dev); +} + +static const struct device_type device_type = { .name = KBUILD_MODNAME }; + +static void wg_setup(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | + NETIF_F_SG | NETIF_F_GSO | + NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; + + dev->netdev_ops = &netdev_ops; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->needed_headroom = DATA_PACKET_HEAD_ROOM; + dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE); + dev->type = ARPHRD_NONE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP; + dev->priv_flags |= IFF_NO_QUEUE; + dev->features |= NETIF_F_LLTX; + dev->features |= WG_NETDEV_FEATURES; + dev->hw_features |= WG_NETDEV_FEATURES; + dev->hw_enc_features |= WG_NETDEV_FEATURES; + dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - + sizeof(struct udphdr) - + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); + + SET_NETDEV_DEVTYPE(dev, &device_type); + + /* We need to keep the dst around in case of icmp replies. */ + netif_keep_dst(dev); + + memset(wg, 0, sizeof(*wg)); + wg->dev = dev; +} + +static int wg_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct wg_device *wg = netdev_priv(dev); + int ret = -ENOMEM; + + wg->creating_net = src_net; + init_rwsem(&wg->static_identity.lock); + mutex_init(&wg->socket_update_lock); + mutex_init(&wg->device_update_lock); + skb_queue_head_init(&wg->incoming_handshakes); + wg_allowedips_init(&wg->peer_allowedips); + wg_cookie_checker_init(&wg->cookie_checker, wg); + INIT_LIST_HEAD(&wg->peer_list); + wg->device_update_gen = 1; + + wg->peer_hashtable = wg_pubkey_hashtable_alloc(); + if (!wg->peer_hashtable) + return ret; + + wg->index_hashtable = wg_index_hashtable_alloc(); + if (!wg->index_hashtable) + goto err_free_peer_hashtable; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + goto err_free_index_hashtable; + + wg->incoming_handshakes_worker = + wg_packet_percpu_multicore_worker_alloc( + wg_packet_handshake_receive_worker, wg); + if (!wg->incoming_handshakes_worker) + goto err_free_tstats; + + wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", + WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_receive_wq) + goto err_free_incoming_handshakes; + + wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", + WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_send_wq) + goto err_destroy_handshake_receive; + + wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s", + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name); + if (!wg->packet_crypt_wq) + goto err_destroy_handshake_send; + + ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, + true, MAX_QUEUED_PACKETS); + if (ret < 0) + goto err_destroy_packet_crypt; + + ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, + true, MAX_QUEUED_PACKETS); + if (ret < 0) + goto err_free_encrypt_queue; + + ret = wg_ratelimiter_init(); + if (ret < 0) + goto err_free_decrypt_queue; + + ret = register_netdevice(dev); + if (ret < 0) + goto err_uninit_ratelimiter; + + list_add(&wg->device_list, &device_list); + + /* We wait until the end to assign priv_destructor, so that + * register_netdevice doesn't call it for us if it fails. + */ + dev->priv_destructor = wg_destruct; + + pr_debug("%s: Interface created\n", dev->name); + return ret; + +err_uninit_ratelimiter: + wg_ratelimiter_uninit(); +err_free_decrypt_queue: + wg_packet_queue_free(&wg->decrypt_queue, true); +err_free_encrypt_queue: + wg_packet_queue_free(&wg->encrypt_queue, true); +err_destroy_packet_crypt: + destroy_workqueue(wg->packet_crypt_wq); +err_destroy_handshake_send: + destroy_workqueue(wg->handshake_send_wq); +err_destroy_handshake_receive: + destroy_workqueue(wg->handshake_receive_wq); +err_free_incoming_handshakes: + free_percpu(wg->incoming_handshakes_worker); +err_free_tstats: + free_percpu(dev->tstats); +err_free_index_hashtable: + kvfree(wg->index_hashtable); +err_free_peer_hashtable: + kvfree(wg->peer_hashtable); + return ret; +} + +static struct rtnl_link_ops link_ops __read_mostly = { + .kind = KBUILD_MODNAME, + .priv_size = sizeof(struct wg_device), + .setup = wg_setup, + .newlink = wg_newlink, +}; + +static int wg_netdevice_notification(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct net_device *dev = ((struct netdev_notifier_info *)data)->dev; + struct wg_device *wg = netdev_priv(dev); + + ASSERT_RTNL(); + + if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops) + return 0; + + if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) { + put_net(wg->creating_net); + wg->have_creating_net_ref = false; + } else if (dev_net(dev) != wg->creating_net && + !wg->have_creating_net_ref) { + wg->have_creating_net_ref = true; + get_net(wg->creating_net); + } + return 0; +} + +static struct notifier_block netdevice_notifier = { + .notifier_call = wg_netdevice_notification +}; + +int __init wg_device_init(void) +{ + int ret; + +#ifdef CONFIG_PM_SLEEP + ret = register_pm_notifier(&pm_notifier); + if (ret) + return ret; +#endif + + ret = register_netdevice_notifier(&netdevice_notifier); + if (ret) + goto error_pm; + + ret = rtnl_link_register(&link_ops); + if (ret) + goto error_netdevice; + + return 0; + +error_netdevice: + unregister_netdevice_notifier(&netdevice_notifier); +error_pm: +#ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); +#endif + return ret; +} + +void wg_device_uninit(void) +{ + rtnl_link_unregister(&link_ops); + unregister_netdevice_notifier(&netdevice_notifier); +#ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); +#endif + rcu_barrier(); +} diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h new file mode 100644 index 000000000000..c91f3051c5c7 --- /dev/null +++ b/drivers/net/wireguard/device.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_DEVICE_H +#define _WG_DEVICE_H + +#include "noise.h" +#include "allowedips.h" +#include "peerlookup.h" +#include "cookie.h" + +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <linux/net.h> +#include <linux/ptr_ring.h> + +struct wg_device; + +struct multicore_worker { + void *ptr; + struct work_struct work; +}; + +struct crypt_queue { + struct ptr_ring ring; + union { + struct { + struct multicore_worker __percpu *worker; + int last_cpu; + }; + struct work_struct work; + }; +}; + +struct wg_device { + struct net_device *dev; + struct crypt_queue encrypt_queue, decrypt_queue; + struct sock __rcu *sock4, *sock6; + struct net *creating_net; + struct noise_static_identity static_identity; + struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; + struct workqueue_struct *packet_crypt_wq; + struct sk_buff_head incoming_handshakes; + int incoming_handshake_cpu; + struct multicore_worker __percpu *incoming_handshakes_worker; + struct cookie_checker cookie_checker; + struct pubkey_hashtable *peer_hashtable; + struct index_hashtable *index_hashtable; + struct allowedips peer_allowedips; + struct mutex device_update_lock, socket_update_lock; + struct list_head device_list, peer_list; + unsigned int num_peers, device_update_gen; + u32 fwmark; + u16 incoming_port; + bool have_creating_net_ref; +}; + +int wg_device_init(void); +void wg_device_uninit(void); + +/* Later after the dust settles, this can be moved into include/linux/skbuff.h, + * where virtually all code that deals with GSO segs can benefit, around ~30 + * drivers as of writing. + */ +#define skb_list_walk_safe(first, skb, next) \ + for (skb = first, next = skb->next; skb; \ + skb = next, next = skb ? skb->next : NULL) + +#endif /* _WG_DEVICE_H */ diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c new file mode 100644 index 000000000000..7a7d5f1a80fc --- /dev/null +++ b/drivers/net/wireguard/main.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "version.h" +#include "device.h" +#include "noise.h" +#include "queueing.h" +#include "ratelimiter.h" +#include "netlink.h" + +#include <uapi/linux/wireguard.h> + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/genetlink.h> +#include <net/rtnetlink.h> + +static int __init mod_init(void) +{ + int ret; + +#ifdef DEBUG + if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || + !wg_ratelimiter_selftest()) + return -ENOTRECOVERABLE; +#endif + wg_noise_init(); + + ret = wg_device_init(); + if (ret < 0) + goto err_device; + + ret = wg_genetlink_init(); + if (ret < 0) + goto err_netlink; + + pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n"); + pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n"); + + return 0; + +err_netlink: + wg_device_uninit(); +err_device: + return ret; +} + +static void __exit mod_exit(void) +{ + wg_genetlink_uninit(); + wg_device_uninit(); +} + +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("WireGuard secure network tunnel"); +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); +MODULE_VERSION(WIREGUARD_VERSION); +MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME); +MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME); diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h new file mode 100644 index 000000000000..b8a7b9ce32ba --- /dev/null +++ b/drivers/net/wireguard/messages.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_MESSAGES_H +#define _WG_MESSAGES_H + +#include <crypto/curve25519.h> +#include <crypto/chacha20poly1305.h> +#include <crypto/blake2s.h> + +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/skbuff.h> + +enum noise_lengths { + NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE, + NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE, + NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32), + NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE, + NOISE_HASH_LEN = BLAKE2S_HASH_SIZE +}; + +#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN) + +enum cookie_values { + COOKIE_SECRET_MAX_AGE = 2 * 60, + COOKIE_SECRET_LATENCY = 5, + COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE, + COOKIE_LEN = 16 +}; + +enum counter_values { + COUNTER_BITS_TOTAL = 2048, + COUNTER_REDUNDANT_BITS = BITS_PER_LONG, + COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS +}; + +enum limits { + REKEY_AFTER_MESSAGES = 1ULL << 60, + REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1, + REKEY_TIMEOUT = 5, + REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3, + REKEY_AFTER_TIME = 120, + REJECT_AFTER_TIME = 180, + INITIATIONS_PER_SECOND = 50, + MAX_PEERS_PER_DEVICE = 1U << 20, + KEEPALIVE_TIMEOUT = 10, + MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT, + MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */ + MAX_STAGED_PACKETS = 128, + MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */ +}; + +enum message_type { + MESSAGE_INVALID = 0, + MESSAGE_HANDSHAKE_INITIATION = 1, + MESSAGE_HANDSHAKE_RESPONSE = 2, + MESSAGE_HANDSHAKE_COOKIE = 3, + MESSAGE_DATA = 4 +}; + +struct message_header { + /* The actual layout of this that we want is: + * u8 type + * u8 reserved_zero[3] + * + * But it turns out that by encoding this as little endian, + * we achieve the same thing, and it makes checking faster. + */ + __le32 type; +}; + +struct message_macs { + u8 mac1[COOKIE_LEN]; + u8 mac2[COOKIE_LEN]; +}; + +struct message_handshake_initiation { + struct message_header header; + __le32 sender_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)]; + u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)]; + struct message_macs macs; +}; + +struct message_handshake_response { + struct message_header header; + __le32 sender_index; + __le32 receiver_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_nothing[noise_encrypted_len(0)]; + struct message_macs macs; +}; + +struct message_handshake_cookie { + struct message_header header; + __le32 receiver_index; + u8 nonce[COOKIE_NONCE_LEN]; + u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)]; +}; + +struct message_data { + struct message_header header; + __le32 key_idx; + __le64 counter; + u8 encrypted_data[]; +}; + +#define message_data_len(plain_len) \ + (noise_encrypted_len(plain_len) + sizeof(struct message_data)) + +enum message_alignments { + MESSAGE_PADDING_MULTIPLE = 16, + MESSAGE_MINIMUM_LENGTH = message_data_len(0) +}; + +#define SKB_HEADER_LEN \ + (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \ + sizeof(struct udphdr) + NET_SKB_PAD) +#define DATA_PACKET_HEAD_ROOM \ + ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4) + +enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ }; + +#endif /* _WG_MESSAGES_H */ diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c new file mode 100644 index 000000000000..0fdbd1c45977 --- /dev/null +++ b/drivers/net/wireguard/netlink.c @@ -0,0 +1,642 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "netlink.h" +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "queueing.h" +#include "messages.h" + +#include <uapi/linux/wireguard.h> + +#include <linux/if.h> +#include <net/genetlink.h> +#include <net/sock.h> +#include <crypto/algapi.h> + +static struct genl_family genl_family; + +static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { + [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 }, + [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGDEVICE_A_FLAGS] = { .type = NLA_U32 }, + [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 }, + [WGDEVICE_A_FWMARK] = { .type = NLA_U32 }, + [WGDEVICE_A_PEERS] = { .type = NLA_NESTED } +}; + +static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { + [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN }, + [WGPEER_A_FLAGS] = { .type = NLA_U32 }, + [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) }, + [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, + [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) }, + [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, + [WGPEER_A_TX_BYTES] = { .type = NLA_U64 }, + [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED }, + [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 } +}; + +static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { + [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, + [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) }, + [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } +}; + +static struct wg_device *lookup_interface(struct nlattr **attrs, + struct sk_buff *skb) +{ + struct net_device *dev = NULL; + + if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME]) + return ERR_PTR(-EBADR); + if (attrs[WGDEVICE_A_IFINDEX]) + dev = dev_get_by_index(sock_net(skb->sk), + nla_get_u32(attrs[WGDEVICE_A_IFINDEX])); + else if (attrs[WGDEVICE_A_IFNAME]) + dev = dev_get_by_name(sock_net(skb->sk), + nla_data(attrs[WGDEVICE_A_IFNAME])); + if (!dev) + return ERR_PTR(-ENODEV); + if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind || + strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) { + dev_put(dev); + return ERR_PTR(-EOPNOTSUPP); + } + return netdev_priv(dev); +} + +static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr, + int family) +{ + struct nlattr *allowedip_nest; + + allowedip_nest = nla_nest_start(skb, 0); + if (!allowedip_nest) + return -EMSGSIZE; + + if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) || + nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) || + nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ? + sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) { + nla_nest_cancel(skb, allowedip_nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, allowedip_nest); + return 0; +} + +struct dump_ctx { + struct wg_device *wg; + struct wg_peer *next_peer; + u64 allowedips_seq; + struct allowedips_node *next_allowedip; +}; + +#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args) + +static int +get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) +{ + + struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0); + struct allowedips_node *allowedips_node = ctx->next_allowedip; + bool fail; + + if (!peer_nest) + return -EMSGSIZE; + + down_read(&peer->handshake.lock); + fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, + peer->handshake.remote_static); + up_read(&peer->handshake.lock); + if (fail) + goto err; + + if (!allowedips_node) { + const struct __kernel_timespec last_handshake = { + .tv_sec = peer->walltime_last_handshake.tv_sec, + .tv_nsec = peer->walltime_last_handshake.tv_nsec + }; + + down_read(&peer->handshake.lock); + fail = nla_put(skb, WGPEER_A_PRESHARED_KEY, + NOISE_SYMMETRIC_KEY_LEN, + peer->handshake.preshared_key); + up_read(&peer->handshake.lock); + if (fail) + goto err; + + if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME, + sizeof(last_handshake), &last_handshake) || + nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + peer->persistent_keepalive_interval) || + nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes, + WGPEER_A_UNSPEC) || + nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes, + WGPEER_A_UNSPEC) || + nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1)) + goto err; + + read_lock_bh(&peer->endpoint_lock); + if (peer->endpoint.addr.sa_family == AF_INET) + fail = nla_put(skb, WGPEER_A_ENDPOINT, + sizeof(peer->endpoint.addr4), + &peer->endpoint.addr4); + else if (peer->endpoint.addr.sa_family == AF_INET6) + fail = nla_put(skb, WGPEER_A_ENDPOINT, + sizeof(peer->endpoint.addr6), + &peer->endpoint.addr6); + read_unlock_bh(&peer->endpoint_lock); + if (fail) + goto err; + allowedips_node = + list_first_entry_or_null(&peer->allowedips_list, + struct allowedips_node, peer_list); + } + if (!allowedips_node) + goto no_allowedips; + if (!ctx->allowedips_seq) + ctx->allowedips_seq = peer->device->peer_allowedips.seq; + else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) + goto no_allowedips; + + allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); + if (!allowedips_nest) + goto err; + + list_for_each_entry_from(allowedips_node, &peer->allowedips_list, + peer_list) { + u8 cidr, ip[16] __aligned(__alignof(u64)); + int family; + + family = wg_allowedips_read_node(allowedips_node, ip, &cidr); + if (get_allowedips(skb, ip, cidr, family)) { + nla_nest_end(skb, allowedips_nest); + nla_nest_end(skb, peer_nest); + ctx->next_allowedip = allowedips_node; + return -EMSGSIZE; + } + } + nla_nest_end(skb, allowedips_nest); +no_allowedips: + nla_nest_end(skb, peer_nest); + ctx->next_allowedip = NULL; + ctx->allowedips_seq = 0; + return 0; +err: + nla_nest_cancel(skb, peer_nest); + return -EMSGSIZE; +} + +static int wg_get_device_start(struct netlink_callback *cb) +{ + struct wg_device *wg; + + wg = lookup_interface(genl_dumpit_info(cb)->attrs, cb->skb); + if (IS_ERR(wg)) + return PTR_ERR(wg); + DUMP_CTX(cb)->wg = wg; + return 0; +} + +static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct wg_peer *peer, *next_peer_cursor; + struct dump_ctx *ctx = DUMP_CTX(cb); + struct wg_device *wg = ctx->wg; + struct nlattr *peers_nest; + int ret = -EMSGSIZE; + bool done = true; + void *hdr; + + rtnl_lock(); + mutex_lock(&wg->device_update_lock); + cb->seq = wg->device_update_gen; + next_peer_cursor = ctx->next_peer; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE); + if (!hdr) + goto out; + genl_dump_check_consistent(cb, hdr); + + if (!ctx->next_peer) { + if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT, + wg->incoming_port) || + nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) || + nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) || + nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name)) + goto out; + + down_read(&wg->static_identity.lock); + if (wg->static_identity.has_identity) { + if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY, + NOISE_PUBLIC_KEY_LEN, + wg->static_identity.static_private) || + nla_put(skb, WGDEVICE_A_PUBLIC_KEY, + NOISE_PUBLIC_KEY_LEN, + wg->static_identity.static_public)) { + up_read(&wg->static_identity.lock); + goto out; + } + } + up_read(&wg->static_identity.lock); + } + + peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS); + if (!peers_nest) + goto out; + ret = 0; + /* If the last cursor was removed via list_del_init in peer_remove, then + * we just treat this the same as there being no more peers left. The + * reason is that seq_nr should indicate to userspace that this isn't a + * coherent dump anyway, so they'll try again. + */ + if (list_empty(&wg->peer_list) || + (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { + nla_nest_cancel(skb, peers_nest); + goto out; + } + lockdep_assert_held(&wg->device_update_lock); + peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); + list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { + if (get_peer(peer, skb, ctx)) { + done = false; + break; + } + next_peer_cursor = peer; + } + nla_nest_end(skb, peers_nest); + +out: + if (!ret && !done && next_peer_cursor) + wg_peer_get(next_peer_cursor); + wg_peer_put(ctx->next_peer); + mutex_unlock(&wg->device_update_lock); + rtnl_unlock(); + + if (ret) { + genlmsg_cancel(skb, hdr); + return ret; + } + genlmsg_end(skb, hdr); + if (done) { + ctx->next_peer = NULL; + return 0; + } + ctx->next_peer = next_peer_cursor; + return skb->len; + + /* At this point, we can't really deal ourselves with safely zeroing out + * the private key material after usage. This will need an additional API + * in the kernel for marking skbs as zero_on_free. + */ +} + +static int wg_get_device_done(struct netlink_callback *cb) +{ + struct dump_ctx *ctx = DUMP_CTX(cb); + + if (ctx->wg) + dev_put(ctx->wg->dev); + wg_peer_put(ctx->next_peer); + return 0; +} + +static int set_port(struct wg_device *wg, u16 port) +{ + struct wg_peer *peer; + + if (wg->incoming_port == port) + return 0; + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); + if (!netif_running(wg->dev)) { + wg->incoming_port = port; + return 0; + } + return wg_socket_init(wg, port); +} + +static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs) +{ + int ret = -EINVAL; + u16 family; + u8 cidr; + + if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] || + !attrs[WGALLOWEDIP_A_CIDR_MASK]) + return ret; + family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]); + cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]); + + if (family == AF_INET && cidr <= 32 && + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr)) + ret = wg_allowedips_insert_v4( + &peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, + &peer->device->device_update_lock); + else if (family == AF_INET6 && cidr <= 128 && + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr)) + ret = wg_allowedips_insert_v6( + &peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, + &peer->device->device_update_lock); + + return ret; +} + +static int set_peer(struct wg_device *wg, struct nlattr **attrs) +{ + u8 *public_key = NULL, *preshared_key = NULL; + struct wg_peer *peer = NULL; + u32 flags = 0; + int ret; + + ret = -EINVAL; + if (attrs[WGPEER_A_PUBLIC_KEY] && + nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN) + public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]); + else + goto out; + if (attrs[WGPEER_A_PRESHARED_KEY] && + nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN) + preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]); + + if (attrs[WGPEER_A_FLAGS]) + flags = nla_get_u32(attrs[WGPEER_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGPEER_F_ALL) + goto out; + + ret = -EPFNOSUPPORT; + if (attrs[WGPEER_A_PROTOCOL_VERSION]) { + if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1) + goto out; + } + + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, + nla_data(attrs[WGPEER_A_PUBLIC_KEY])); + ret = 0; + if (!peer) { /* Peer doesn't exist yet. Add a new one. */ + if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY)) + goto out; + + /* The peer is new, so there aren't allowed IPs to remove. */ + flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS; + + down_read(&wg->static_identity.lock); + if (wg->static_identity.has_identity && + !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]), + wg->static_identity.static_public, + NOISE_PUBLIC_KEY_LEN)) { + /* We silently ignore peers that have the same public + * key as the device. The reason we do it silently is + * that we'd like for people to be able to reuse the + * same set of API calls across peers. + */ + up_read(&wg->static_identity.lock); + ret = 0; + goto out; + } + up_read(&wg->static_identity.lock); + + peer = wg_peer_create(wg, public_key, preshared_key); + if (IS_ERR(peer)) { + /* Similar to the above, if the key is invalid, we skip + * it without fanfare, so that services don't need to + * worry about doing key validation themselves. + */ + ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); + peer = NULL; + goto out; + } + /* Take additional reference, as though we've just been + * looked up. + */ + wg_peer_get(peer); + } + + if (flags & WGPEER_F_REMOVE_ME) { + wg_peer_remove(peer); + goto out; + } + + if (preshared_key) { + down_write(&peer->handshake.lock); + memcpy(&peer->handshake.preshared_key, preshared_key, + NOISE_SYMMETRIC_KEY_LEN); + up_write(&peer->handshake.lock); + } + + if (attrs[WGPEER_A_ENDPOINT]) { + struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]); + size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]); + + if ((len == sizeof(struct sockaddr_in) && + addr->sa_family == AF_INET) || + (len == sizeof(struct sockaddr_in6) && + addr->sa_family == AF_INET6)) { + struct endpoint endpoint = { { { 0 } } }; + + memcpy(&endpoint.addr, addr, len); + wg_socket_set_peer_endpoint(peer, &endpoint); + } + } + + if (flags & WGPEER_F_REPLACE_ALLOWEDIPS) + wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer, + &wg->device_update_lock); + + if (attrs[WGPEER_A_ALLOWEDIPS]) { + struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1]; + int rem; + + nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) { + ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX, + attr, allowedip_policy, NULL); + if (ret < 0) + goto out; + ret = set_allowedip(peer, allowedip); + if (ret < 0) + goto out; + } + } + + if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) { + const u16 persistent_keepalive_interval = nla_get_u16( + attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]); + const bool send_keepalive = + !peer->persistent_keepalive_interval && + persistent_keepalive_interval && + netif_running(wg->dev); + + peer->persistent_keepalive_interval = persistent_keepalive_interval; + if (send_keepalive) + wg_packet_send_keepalive(peer); + } + + if (netif_running(wg->dev)) + wg_packet_send_staged_packets(peer); + +out: + wg_peer_put(peer); + if (attrs[WGPEER_A_PRESHARED_KEY]) + memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]), + nla_len(attrs[WGPEER_A_PRESHARED_KEY])); + return ret; +} + +static int wg_set_device(struct sk_buff *skb, struct genl_info *info) +{ + struct wg_device *wg = lookup_interface(info->attrs, skb); + u32 flags = 0; + int ret; + + if (IS_ERR(wg)) { + ret = PTR_ERR(wg); + goto out_nodev; + } + + rtnl_lock(); + mutex_lock(&wg->device_update_lock); + + if (info->attrs[WGDEVICE_A_FLAGS]) + flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGDEVICE_F_ALL) + goto out; + + ret = -EPERM; + if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || + info->attrs[WGDEVICE_A_FWMARK]) && + !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN)) + goto out; + + ++wg->device_update_gen; + + if (info->attrs[WGDEVICE_A_FWMARK]) { + struct wg_peer *peer; + + wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); + } + + if (info->attrs[WGDEVICE_A_LISTEN_PORT]) { + ret = set_port(wg, + nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT])); + if (ret) + goto out; + } + + if (flags & WGDEVICE_F_REPLACE_PEERS) + wg_peer_remove_all(wg); + + if (info->attrs[WGDEVICE_A_PRIVATE_KEY] && + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) == + NOISE_PUBLIC_KEY_LEN) { + u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]); + u8 public_key[NOISE_PUBLIC_KEY_LEN]; + struct wg_peer *peer, *temp; + + if (!crypto_memneq(wg->static_identity.static_private, + private_key, NOISE_PUBLIC_KEY_LEN)) + goto skip_set_private_key; + + /* We remove before setting, to prevent race, which means doing + * two 25519-genpub ops. + */ + if (curve25519_generate_public(public_key, private_key)) { + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, + public_key); + if (peer) { + wg_peer_put(peer); + wg_peer_remove(peer); + } + } + + down_write(&wg->static_identity.lock); + wg_noise_set_static_identity_private_key(&wg->static_identity, + private_key); + list_for_each_entry_safe(peer, temp, &wg->peer_list, + peer_list) { + if (wg_noise_precompute_static_static(peer)) + wg_noise_expire_current_peer_keypairs(peer); + else + wg_peer_remove(peer); + } + wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); + up_write(&wg->static_identity.lock); + } +skip_set_private_key: + + if (info->attrs[WGDEVICE_A_PEERS]) { + struct nlattr *attr, *peer[WGPEER_A_MAX + 1]; + int rem; + + nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) { + ret = nla_parse_nested(peer, WGPEER_A_MAX, attr, + peer_policy, NULL); + if (ret < 0) + goto out; + ret = set_peer(wg, peer); + if (ret < 0) + goto out; + } + } + ret = 0; + +out: + mutex_unlock(&wg->device_update_lock); + rtnl_unlock(); + dev_put(wg->dev); +out_nodev: + if (info->attrs[WGDEVICE_A_PRIVATE_KEY]) + memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]), + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY])); + return ret; +} + +static const struct genl_ops genl_ops[] = { + { + .cmd = WG_CMD_GET_DEVICE, + .start = wg_get_device_start, + .dumpit = wg_get_device_dump, + .done = wg_get_device_done, + .flags = GENL_UNS_ADMIN_PERM + }, { + .cmd = WG_CMD_SET_DEVICE, + .doit = wg_set_device, + .flags = GENL_UNS_ADMIN_PERM + } +}; + +static struct genl_family genl_family __ro_after_init = { + .ops = genl_ops, + .n_ops = ARRAY_SIZE(genl_ops), + .name = WG_GENL_NAME, + .version = WG_GENL_VERSION, + .maxattr = WGDEVICE_A_MAX, + .module = THIS_MODULE, + .policy = device_policy, + .netnsok = true +}; + +int __init wg_genetlink_init(void) +{ + return genl_register_family(&genl_family); +} + +void __exit wg_genetlink_uninit(void) +{ + genl_unregister_family(&genl_family); +} diff --git a/drivers/net/wireguard/netlink.h b/drivers/net/wireguard/netlink.h new file mode 100644 index 000000000000..15100d92e2e3 --- /dev/null +++ b/drivers/net/wireguard/netlink.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_NETLINK_H +#define _WG_NETLINK_H + +int wg_genetlink_init(void); +void wg_genetlink_uninit(void); + +#endif /* _WG_NETLINK_H */ diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c new file mode 100644 index 000000000000..d71c8db68a8c --- /dev/null +++ b/drivers/net/wireguard/noise.c @@ -0,0 +1,828 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "noise.h" +#include "device.h" +#include "peer.h" +#include "messages.h" +#include "queueing.h" +#include "peerlookup.h" + +#include <linux/rcupdate.h> +#include <linux/slab.h> +#include <linux/bitmap.h> +#include <linux/scatterlist.h> +#include <linux/highmem.h> +#include <crypto/algapi.h> + +/* This implements Noise_IKpsk2: + * + * <- s + * ****** + * -> e, es, s, ss, {t} + * <- e, ee, se, psk, {} + */ + +static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"; +static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com"; +static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init; +static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init; +static atomic64_t keypair_counter = ATOMIC64_INIT(0); + +void __init wg_noise_init(void) +{ + struct blake2s_state blake; + + blake2s(handshake_init_chaining_key, handshake_name, NULL, + NOISE_HASH_LEN, sizeof(handshake_name), 0); + blake2s_init(&blake, NOISE_HASH_LEN); + blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN); + blake2s_update(&blake, identifier_name, sizeof(identifier_name)); + blake2s_final(&blake, handshake_init_hash); +} + +/* Must hold peer->handshake.static_identity->lock */ +bool wg_noise_precompute_static_static(struct wg_peer *peer) +{ + bool ret = true; + + down_write(&peer->handshake.lock); + if (peer->handshake.static_identity->has_identity) + ret = curve25519( + peer->handshake.precomputed_static_static, + peer->handshake.static_identity->static_private, + peer->handshake.remote_static); + else + memset(peer->handshake.precomputed_static_static, 0, + NOISE_PUBLIC_KEY_LEN); + up_write(&peer->handshake.lock); + return ret; +} + +bool wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer) +{ + memset(handshake, 0, sizeof(*handshake)); + init_rwsem(&handshake->lock); + handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE; + handshake->entry.peer = peer; + memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN); + if (peer_preshared_key) + memcpy(handshake->preshared_key, peer_preshared_key, + NOISE_SYMMETRIC_KEY_LEN); + handshake->static_identity = static_identity; + handshake->state = HANDSHAKE_ZEROED; + return wg_noise_precompute_static_static(peer); +} + +static void handshake_zero(struct noise_handshake *handshake) +{ + memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->hash, 0, NOISE_HASH_LEN); + memset(&handshake->chaining_key, 0, NOISE_HASH_LEN); + handshake->remote_index = 0; + handshake->state = HANDSHAKE_ZEROED; +} + +void wg_noise_handshake_clear(struct noise_handshake *handshake) +{ + wg_index_hashtable_remove( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + down_write(&handshake->lock); + handshake_zero(handshake); + up_write(&handshake->lock); + wg_index_hashtable_remove( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); +} + +static struct noise_keypair *keypair_create(struct wg_peer *peer) +{ + struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL); + + if (unlikely(!keypair)) + return NULL; + keypair->internal_id = atomic64_inc_return(&keypair_counter); + keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; + keypair->entry.peer = peer; + kref_init(&keypair->refcount); + return keypair; +} + +static void keypair_free_rcu(struct rcu_head *rcu) +{ + kzfree(container_of(rcu, struct noise_keypair, rcu)); +} + +static void keypair_free_kref(struct kref *kref) +{ + struct noise_keypair *keypair = + container_of(kref, struct noise_keypair, refcount); + + net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n", + keypair->entry.peer->device->dev->name, + keypair->internal_id, + keypair->entry.peer->internal_id); + wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable, + &keypair->entry); + call_rcu(&keypair->rcu, keypair_free_rcu); +} + +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now) +{ + if (unlikely(!keypair)) + return; + if (unlikely(unreference_now)) + wg_index_hashtable_remove( + keypair->entry.peer->device->index_hashtable, + &keypair->entry); + kref_put(&keypair->refcount, keypair_free_kref); +} + +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), + "Taking noise keypair reference without holding the RCU BH read lock"); + if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount))) + return NULL; + return keypair; +} + +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs) +{ + struct noise_keypair *old; + + spin_lock_bh(&keypairs->keypair_update_lock); + + /* We zero the next_keypair before zeroing the others, so that + * wg_noise_received_with_keypair returns early before subsequent ones + * are zeroed. + */ + old = rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + wg_noise_keypair_put(old, true); + + old = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); + wg_noise_keypair_put(old, true); + + old = rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->current_keypair, NULL); + wg_noise_keypair_put(old, true); + + spin_unlock_bh(&keypairs->keypair_update_lock); +} + +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + + wg_noise_handshake_clear(&peer->handshake); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + + spin_lock_bh(&peer->keypairs.keypair_update_lock); + keypair = rcu_dereference_protected(peer->keypairs.next_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + keypair = rcu_dereference_protected(peer->keypairs.current_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + spin_unlock_bh(&peer->keypairs.keypair_update_lock); +} + +static void add_new_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *new_keypair) +{ + struct noise_keypair *previous_keypair, *next_keypair, *current_keypair; + + spin_lock_bh(&keypairs->keypair_update_lock); + previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + next_keypair = rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + current_keypair = rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + if (new_keypair->i_am_the_initiator) { + /* If we're the initiator, it means we've sent a handshake, and + * received a confirmation response, which means this new + * keypair can now be used. + */ + if (next_keypair) { + /* If there already was a next keypair pending, we + * demote it to be the previous keypair, and free the + * existing current. Note that this means KCI can result + * in this transition. It would perhaps be more sound to + * always just get rid of the unused next keypair + * instead of putting it in the previous slot, but this + * might be a bit less robust. Something to think about + * for the future. + */ + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + rcu_assign_pointer(keypairs->previous_keypair, + next_keypair); + wg_noise_keypair_put(current_keypair, true); + } else /* If there wasn't an existing next keypair, we replace + * the previous with the current one. + */ + rcu_assign_pointer(keypairs->previous_keypair, + current_keypair); + /* At this point we can get rid of the old previous keypair, and + * set up the new keypair. + */ + wg_noise_keypair_put(previous_keypair, true); + rcu_assign_pointer(keypairs->current_keypair, new_keypair); + } else { + /* If we're the responder, it means we can't use the new keypair + * until we receive confirmation via the first data packet, so + * we get rid of the existing previous one, the possibly + * existing next one, and slide in the new next one. + */ + rcu_assign_pointer(keypairs->next_keypair, new_keypair); + wg_noise_keypair_put(next_keypair, true); + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); + wg_noise_keypair_put(previous_keypair, true); + } + spin_unlock_bh(&keypairs->keypair_update_lock); +} + +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *received_keypair) +{ + struct noise_keypair *old_keypair; + bool key_is_new; + + /* We first check without taking the spinlock. */ + key_is_new = received_keypair == + rcu_access_pointer(keypairs->next_keypair); + if (likely(!key_is_new)) + return false; + + spin_lock_bh(&keypairs->keypair_update_lock); + /* After locking, we double check that things didn't change from + * beneath us. + */ + if (unlikely(received_keypair != + rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)))) { + spin_unlock_bh(&keypairs->keypair_update_lock); + return false; + } + + /* When we've finally received the confirmation, we slide the next + * into the current, the current into the previous, and get rid of + * the old previous. + */ + old_keypair = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + rcu_assign_pointer(keypairs->previous_keypair, + rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock))); + wg_noise_keypair_put(old_keypair, true); + rcu_assign_pointer(keypairs->current_keypair, received_keypair); + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + + spin_unlock_bh(&keypairs->keypair_update_lock); + return true; +} + +/* Must hold static_identity->lock */ +void wg_noise_set_static_identity_private_key( + struct noise_static_identity *static_identity, + const u8 private_key[NOISE_PUBLIC_KEY_LEN]) +{ + memcpy(static_identity->static_private, private_key, + NOISE_PUBLIC_KEY_LEN); + curve25519_clamp_secret(static_identity->static_private); + static_identity->has_identity = curve25519_generate_public( + static_identity->static_public, private_key); +} + +/* This is Hugo Krawczyk's HKDF: + * - https://eprint.iacr.org/2010/264.pdf + * - https://tools.ietf.org/html/rfc5869 + */ +static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, + size_t first_len, size_t second_len, size_t third_len, + size_t data_len, const u8 chaining_key[NOISE_HASH_LEN]) +{ + u8 output[BLAKE2S_HASH_SIZE + 1]; + u8 secret[BLAKE2S_HASH_SIZE]; + + WARN_ON(IS_ENABLED(DEBUG) && + (first_len > BLAKE2S_HASH_SIZE || + second_len > BLAKE2S_HASH_SIZE || + third_len > BLAKE2S_HASH_SIZE || + ((second_len || second_dst || third_len || third_dst) && + (!first_len || !first_dst)) || + ((third_len || third_dst) && (!second_len || !second_dst)))); + + /* Extract entropy from data into secret */ + blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); + + if (!first_dst || !first_len) + goto out; + + /* Expand first key: key = secret, data = 0x1 */ + output[0] = 1; + blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); + memcpy(first_dst, output, first_len); + + if (!second_dst || !second_len) + goto out; + + /* Expand second key: key = secret, data = first-key || 0x2 */ + output[BLAKE2S_HASH_SIZE] = 2; + blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, + BLAKE2S_HASH_SIZE); + memcpy(second_dst, output, second_len); + + if (!third_dst || !third_len) + goto out; + + /* Expand third key: key = secret, data = second-key || 0x3 */ + output[BLAKE2S_HASH_SIZE] = 3; + blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, + BLAKE2S_HASH_SIZE); + memcpy(third_dst, output, third_len); + +out: + /* Clear sensitive data from stack */ + memzero_explicit(secret, BLAKE2S_HASH_SIZE); + memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); +} + +static void symmetric_key_init(struct noise_symmetric_key *key) +{ + spin_lock_init(&key->counter.receive.lock); + atomic64_set(&key->counter.counter, 0); + memset(key->counter.receive.backtrack, 0, + sizeof(key->counter.receive.backtrack)); + key->birthdate = ktime_get_coarse_boottime_ns(); + key->is_valid = true; +} + +static void derive_keys(struct noise_symmetric_key *first_dst, + struct noise_symmetric_key *second_dst, + const u8 chaining_key[NOISE_HASH_LEN]) +{ + kdf(first_dst->key, second_dst->key, NULL, NULL, + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, + chaining_key); + symmetric_key_init(first_dst); + symmetric_key_init(second_dst); +} + +static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 private[NOISE_PUBLIC_KEY_LEN], + const u8 public[NOISE_PUBLIC_KEY_LEN]) +{ + u8 dh_calculation[NOISE_PUBLIC_KEY_LEN]; + + if (unlikely(!curve25519(dh_calculation, private, public))) + return false; + kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key); + memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN); + return true; +} + +static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) +{ + struct blake2s_state blake; + + blake2s_init(&blake, NOISE_HASH_LEN); + blake2s_update(&blake, hash, NOISE_HASH_LEN); + blake2s_update(&blake, src, src_len); + blake2s_final(&blake, hash); +} + +static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 psk[NOISE_SYMMETRIC_KEY_LEN]) +{ + u8 temp_hash[NOISE_HASH_LEN]; + + kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key); + mix_hash(hash, temp_hash, NOISE_HASH_LEN); + memzero_explicit(temp_hash, NOISE_HASH_LEN); +} + +static void handshake_init(u8 chaining_key[NOISE_HASH_LEN], + u8 hash[NOISE_HASH_LEN], + const u8 remote_static[NOISE_PUBLIC_KEY_LEN]) +{ + memcpy(hash, handshake_init_hash, NOISE_HASH_LEN); + memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN); + mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN); +} + +static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, + NOISE_HASH_LEN, + 0 /* Always zero for Noise_IK */, key); + mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len)); +} + +static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, + hash, NOISE_HASH_LEN, + 0 /* Always zero for Noise_IK */, key)) + return false; + mix_hash(hash, src_ciphertext, src_len); + return true; +} + +static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN], + const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN], + u8 chaining_key[NOISE_HASH_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + if (ephemeral_dst != ephemeral_src) + memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN); + mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN); + kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0, + NOISE_PUBLIC_KEY_LEN, chaining_key); +} + +static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN]) +{ + struct timespec64 now; + + ktime_get_real_ts64(&now); + + /* In order to prevent some sort of infoleak from precise timers, we + * round down the nanoseconds part to the closest rounded-down power of + * two to the maximum initiations per second allowed anyway by the + * implementation. + */ + now.tv_nsec = ALIGN_DOWN(now.tv_nsec, + rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND)); + + /* https://cr.yp.to/libtai/tai64.html */ + *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec); + *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec); +} + +bool +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, + struct noise_handshake *handshake) +{ + u8 timestamp[NOISE_TIMESTAMP_LEN]; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + bool ret = false; + + /* We need to wait for crng _before_ taking any locks, since + * curve25519_generate_secret uses get_random_bytes_wait. + */ + wait_for_random_bytes(); + + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (unlikely(!handshake->static_identity->has_identity)) + goto out; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION); + + handshake_init(handshake->chaining_key, handshake->hash, + handshake->remote_static); + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + if (!curve25519_generate_public(dst->unencrypted_ephemeral, + handshake->ephemeral_private)) + goto out; + message_ephemeral(dst->unencrypted_ephemeral, + dst->unencrypted_ephemeral, handshake->chaining_key, + handshake->hash); + + /* es */ + if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private, + handshake->remote_static)) + goto out; + + /* s */ + message_encrypt(dst->encrypted_static, + handshake->static_identity->static_public, + NOISE_PUBLIC_KEY_LEN, key, handshake->hash); + + /* ss */ + kdf(handshake->chaining_key, key, NULL, + handshake->precomputed_static_static, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, + handshake->chaining_key); + + /* {t} */ + tai64n_now(timestamp); + message_encrypt(dst->encrypted_timestamp, timestamp, + NOISE_TIMESTAMP_LEN, key, handshake->hash); + + dst->sender_index = wg_index_hashtable_insert( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + + handshake->state = HANDSHAKE_CREATED_INITIATION; + ret = true; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + return ret; +} + +struct wg_peer * +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, + struct wg_device *wg) +{ + struct wg_peer *peer = NULL, *ret_peer = NULL; + struct noise_handshake *handshake; + bool replay_attack, flood_attack; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 s[NOISE_PUBLIC_KEY_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 t[NOISE_TIMESTAMP_LEN]; + u64 initiation_consumption; + + down_read(&wg->static_identity.lock); + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake_init(chaining_key, hash, wg->static_identity.static_public); + + /* e */ + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); + + /* es */ + if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e)) + goto out; + + /* s */ + if (!message_decrypt(s, src->encrypted_static, + sizeof(src->encrypted_static), key, hash)) + goto out; + + /* Lookup which peer we're actually talking to */ + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s); + if (!peer) + goto out; + handshake = &peer->handshake; + + /* ss */ + kdf(chaining_key, key, NULL, handshake->precomputed_static_static, + NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, + chaining_key); + + /* {t} */ + if (!message_decrypt(t, src->encrypted_timestamp, + sizeof(src->encrypted_timestamp), key, hash)) + goto out; + + down_read(&handshake->lock); + replay_attack = memcmp(t, handshake->latest_timestamp, + NOISE_TIMESTAMP_LEN) <= 0; + flood_attack = (s64)handshake->last_initiation_consumption + + NSEC_PER_SEC / INITIATIONS_PER_SECOND > + (s64)ktime_get_coarse_boottime_ns(); + up_read(&handshake->lock); + if (replay_attack || flood_attack) + goto out; + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0) + memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + if ((s64)(handshake->last_initiation_consumption - + (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0) + handshake->last_initiation_consumption = initiation_consumption; + handshake->state = HANDSHAKE_CONSUMED_INITIATION; + up_write(&handshake->lock); + ret_peer = peer; + +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + up_read(&wg->static_identity.lock); + if (!ret_peer) + wg_peer_put(peer); + return ret_peer; +} + +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, + struct noise_handshake *handshake) +{ + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + bool ret = false; + + /* We need to wait for crng _before_ taking any locks, since + * curve25519_generate_secret uses get_random_bytes_wait. + */ + wait_for_random_bytes(); + + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (handshake->state != HANDSHAKE_CONSUMED_INITIATION) + goto out; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE); + dst->receiver_index = handshake->remote_index; + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + if (!curve25519_generate_public(dst->unencrypted_ephemeral, + handshake->ephemeral_private)) + goto out; + message_ephemeral(dst->unencrypted_ephemeral, + dst->unencrypted_ephemeral, handshake->chaining_key, + handshake->hash); + + /* ee */ + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, + handshake->remote_ephemeral)) + goto out; + + /* se */ + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, + handshake->remote_static)) + goto out; + + /* psk */ + mix_psk(handshake->chaining_key, handshake->hash, key, + handshake->preshared_key); + + /* {} */ + message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash); + + dst->sender_index = wg_index_hashtable_insert( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + + handshake->state = HANDSHAKE_CREATED_RESPONSE; + ret = true; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + return ret; +} + +struct wg_peer * +wg_noise_handshake_consume_response(struct message_handshake_response *src, + struct wg_device *wg) +{ + enum noise_handshake_state state = HANDSHAKE_ZEROED; + struct wg_peer *peer = NULL, *ret_peer = NULL; + struct noise_handshake *handshake; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + + down_read(&wg->static_identity.lock); + + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake = (struct noise_handshake *)wg_index_hashtable_lookup( + wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, + src->receiver_index, &peer); + if (unlikely(!handshake)) + goto out; + + down_read(&handshake->lock); + state = handshake->state; + memcpy(hash, handshake->hash, NOISE_HASH_LEN); + memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); + memcpy(ephemeral_private, handshake->ephemeral_private, + NOISE_PUBLIC_KEY_LEN); + up_read(&handshake->lock); + + if (state != HANDSHAKE_CREATED_INITIATION) + goto fail; + + /* e */ + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); + + /* ee */ + if (!mix_dh(chaining_key, NULL, ephemeral_private, e)) + goto fail; + + /* se */ + if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e)) + goto fail; + + /* psk */ + mix_psk(chaining_key, hash, key, handshake->preshared_key); + + /* {} */ + if (!message_decrypt(NULL, src->encrypted_nothing, + sizeof(src->encrypted_nothing), key, hash)) + goto fail; + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + /* It's important to check that the state is still the same, while we + * have an exclusive lock. + */ + if (handshake->state != state) { + up_write(&handshake->lock); + goto fail; + } + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + handshake->state = HANDSHAKE_CONSUMED_RESPONSE; + up_write(&handshake->lock); + ret_peer = peer; + goto out; + +fail: + wg_peer_put(peer); +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); + memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); + up_read(&wg->static_identity.lock); + return ret_peer; +} + +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, + struct noise_keypairs *keypairs) +{ + struct noise_keypair *new_keypair; + bool ret = false; + + down_write(&handshake->lock); + if (handshake->state != HANDSHAKE_CREATED_RESPONSE && + handshake->state != HANDSHAKE_CONSUMED_RESPONSE) + goto out; + + new_keypair = keypair_create(handshake->entry.peer); + if (!new_keypair) + goto out; + new_keypair->i_am_the_initiator = handshake->state == + HANDSHAKE_CONSUMED_RESPONSE; + new_keypair->remote_index = handshake->remote_index; + + if (new_keypair->i_am_the_initiator) + derive_keys(&new_keypair->sending, &new_keypair->receiving, + handshake->chaining_key); + else + derive_keys(&new_keypair->receiving, &new_keypair->sending, + handshake->chaining_key); + + handshake_zero(handshake); + rcu_read_lock_bh(); + if (likely(!READ_ONCE(container_of(handshake, struct wg_peer, + handshake)->is_dead))) { + add_new_keypair(keypairs, new_keypair); + net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n", + handshake->entry.peer->device->dev->name, + new_keypair->internal_id, + handshake->entry.peer->internal_id); + ret = wg_index_hashtable_replace( + handshake->entry.peer->device->index_hashtable, + &handshake->entry, &new_keypair->entry); + } else { + kzfree(new_keypair); + } + rcu_read_unlock_bh(); + +out: + up_write(&handshake->lock); + return ret; +} diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h new file mode 100644 index 000000000000..138a07bb817c --- /dev/null +++ b/drivers/net/wireguard/noise.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ +#ifndef _WG_NOISE_H +#define _WG_NOISE_H + +#include "messages.h" +#include "peerlookup.h" + +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/rwsem.h> +#include <linux/mutex.h> +#include <linux/kref.h> + +union noise_counter { + struct { + u64 counter; + unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; + spinlock_t lock; + } receive; + atomic64_t counter; +}; + +struct noise_symmetric_key { + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + union noise_counter counter; + u64 birthdate; + bool is_valid; +}; + +struct noise_keypair { + struct index_hashtable_entry entry; + struct noise_symmetric_key sending; + struct noise_symmetric_key receiving; + __le32 remote_index; + bool i_am_the_initiator; + struct kref refcount; + struct rcu_head rcu; + u64 internal_id; +}; + +struct noise_keypairs { + struct noise_keypair __rcu *current_keypair; + struct noise_keypair __rcu *previous_keypair; + struct noise_keypair __rcu *next_keypair; + spinlock_t keypair_update_lock; +}; + +struct noise_static_identity { + u8 static_public[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + struct rw_semaphore lock; + bool has_identity; +}; + +enum noise_handshake_state { + HANDSHAKE_ZEROED, + HANDSHAKE_CREATED_INITIATION, + HANDSHAKE_CONSUMED_INITIATION, + HANDSHAKE_CREATED_RESPONSE, + HANDSHAKE_CONSUMED_RESPONSE +}; + +struct noise_handshake { + struct index_hashtable_entry entry; + + enum noise_handshake_state state; + u64 last_initiation_consumption; + + struct noise_static_identity *static_identity; + + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 remote_static[NOISE_PUBLIC_KEY_LEN]; + u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN]; + + u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; + + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + + u8 latest_timestamp[NOISE_TIMESTAMP_LEN]; + __le32 remote_index; + + /* Protects all members except the immutable (after noise_handshake_ + * init): remote_static, precomputed_static_static, static_identity. + */ + struct rw_semaphore lock; +}; + +struct wg_device; + +void wg_noise_init(void); +bool wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer); +void wg_noise_handshake_clear(struct noise_handshake *handshake); +static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) +{ + atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() - + (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC); +} + +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now); +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair); +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs); +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *received_keypair); +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); + +void wg_noise_set_static_identity_private_key( + struct noise_static_identity *static_identity, + const u8 private_key[NOISE_PUBLIC_KEY_LEN]); +bool wg_noise_precompute_static_static(struct wg_peer *peer); + +bool +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, + struct noise_handshake *handshake); +struct wg_peer * +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, + struct wg_device *wg); + +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, + struct noise_handshake *handshake); +struct wg_peer * +wg_noise_handshake_consume_response(struct message_handshake_response *src, + struct wg_device *wg); + +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, + struct noise_keypairs *keypairs); + +#endif /* _WG_NOISE_H */ diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c new file mode 100644 index 000000000000..071eedf33f5a --- /dev/null +++ b/drivers/net/wireguard/peer.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "peer.h" +#include "device.h" +#include "queueing.h" +#include "timers.h" +#include "peerlookup.h" +#include "noise.h" + +#include <linux/kref.h> +#include <linux/lockdep.h> +#include <linux/rcupdate.h> +#include <linux/list.h> + +static atomic64_t peer_counter = ATOMIC64_INIT(0); + +struct wg_peer *wg_peer_create(struct wg_device *wg, + const u8 public_key[NOISE_PUBLIC_KEY_LEN], + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]) +{ + struct wg_peer *peer; + int ret = -ENOMEM; + + lockdep_assert_held(&wg->device_update_lock); + + if (wg->num_peers >= MAX_PEERS_PER_DEVICE) + return ERR_PTR(ret); + + peer = kzalloc(sizeof(*peer), GFP_KERNEL); + if (unlikely(!peer)) + return ERR_PTR(ret); + peer->device = wg; + + if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, + public_key, preshared_key, peer)) { + ret = -EKEYREJECTED; + goto err_1; + } + if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) + goto err_1; + if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, + MAX_QUEUED_PACKETS)) + goto err_2; + if (wg_packet_queue_init(&peer->rx_queue, NULL, false, + MAX_QUEUED_PACKETS)) + goto err_3; + + peer->internal_id = atomic64_inc_return(&peer_counter); + peer->serial_work_cpu = nr_cpumask_bits; + wg_cookie_init(&peer->latest_cookie); + wg_timers_init(peer); + wg_cookie_checker_precompute_peer_keys(peer); + spin_lock_init(&peer->keypairs.keypair_update_lock); + INIT_WORK(&peer->transmit_handshake_work, + wg_packet_handshake_send_worker); + rwlock_init(&peer->endpoint_lock); + kref_init(&peer->refcount); + skb_queue_head_init(&peer->staged_packet_queue); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state); + netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll, + NAPI_POLL_WEIGHT); + napi_enable(&peer->napi); + list_add_tail(&peer->peer_list, &wg->peer_list); + INIT_LIST_HEAD(&peer->allowedips_list); + wg_pubkey_hashtable_add(wg->peer_hashtable, peer); + ++wg->num_peers; + pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); + return peer; + +err_3: + wg_packet_queue_free(&peer->tx_queue, false); +err_2: + dst_cache_destroy(&peer->endpoint_cache); +err_1: + kfree(peer); + return ERR_PTR(ret); +} + +struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), + "Taking peer reference without holding the RCU read lock"); + if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount))) + return NULL; + return peer; +} + +static void peer_make_dead(struct wg_peer *peer) +{ + /* Remove from configuration-time lookup structures. */ + list_del_init(&peer->peer_list); + wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer, + &peer->device->device_update_lock); + wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer); + + /* Mark as dead, so that we don't allow jumping contexts after. */ + WRITE_ONCE(peer->is_dead, true); + + /* The caller must now synchronize_rcu() for this to take effect. */ +} + +static void peer_remove_after_dead(struct wg_peer *peer) +{ + WARN_ON(!peer->is_dead); + + /* No more keypairs can be created for this peer, since is_dead protects + * add_new_keypair, so we can now destroy existing ones. + */ + wg_noise_keypairs_clear(&peer->keypairs); + + /* Destroy all ongoing timers that were in-flight at the beginning of + * this function. + */ + wg_timers_stop(peer); + + /* The transition between packet encryption/decryption queues isn't + * guarded by is_dead, but each reference's life is strictly bounded by + * two generations: once for parallel crypto and once for serial + * ingestion, so we can simply flush twice, and be sure that we no + * longer have references inside these queues. + */ + + /* a) For encrypt/decrypt. */ + flush_workqueue(peer->device->packet_crypt_wq); + /* b.1) For send (but not receive, since that's napi). */ + flush_workqueue(peer->device->packet_crypt_wq); + /* b.2.1) For receive (but not send, since that's wq). */ + napi_disable(&peer->napi); + /* b.2.1) It's now safe to remove the napi struct, which must be done + * here from process context. + */ + netif_napi_del(&peer->napi); + + /* Ensure any workstructs we own (like transmit_handshake_work or + * clear_peer_work) no longer are in use. + */ + flush_workqueue(peer->device->handshake_send_wq); + + /* After the above flushes, a peer might still be active in a few + * different contexts: 1) from xmit(), before hitting is_dead and + * returning, 2) from wg_packet_consume_data(), before hitting is_dead + * and returning, 3) from wg_receive_handshake_packet() after a point + * where it has processed an incoming handshake packet, but where + * all calls to pass it off to timers fails because of is_dead. We won't + * have new references in (1) eventually, because we're removed from + * allowedips; we won't have new references in (2) eventually, because + * wg_index_hashtable_lookup will always return NULL, since we removed + * all existing keypairs and no more can be created; we won't have new + * references in (3) eventually, because we're removed from the pubkey + * hash table, which allows for a maximum of one handshake response, + * via the still-uncleared index hashtable entry, but not more than one, + * and in wg_cookie_message_consume, the lookup eventually gets a peer + * with a refcount of zero, so no new reference is taken. + */ + + --peer->device->num_peers; + wg_peer_put(peer); +} + +/* We have a separate "remove" function make sure that all active places where + * a peer is currently operating will eventually come to an end and not pass + * their reference onto another context. + */ +void wg_peer_remove(struct wg_peer *peer) +{ + if (unlikely(!peer)) + return; + lockdep_assert_held(&peer->device->device_update_lock); + + peer_make_dead(peer); + synchronize_rcu(); + peer_remove_after_dead(peer); +} + +void wg_peer_remove_all(struct wg_device *wg) +{ + struct wg_peer *peer, *temp; + LIST_HEAD(dead_peers); + + lockdep_assert_held(&wg->device_update_lock); + + /* Avoid having to traverse individually for each one. */ + wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock); + + list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { + peer_make_dead(peer); + list_add_tail(&peer->peer_list, &dead_peers); + } + synchronize_rcu(); + list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) + peer_remove_after_dead(peer); +} + +static void rcu_release(struct rcu_head *rcu) +{ + struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); + + dst_cache_destroy(&peer->endpoint_cache); + wg_packet_queue_free(&peer->rx_queue, false); + wg_packet_queue_free(&peer->tx_queue, false); + + /* The final zeroing takes care of clearing any remaining handshake key + * material and other potentially sensitive information. + */ + kzfree(peer); +} + +static void kref_release(struct kref *refcount) +{ + struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount); + + pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + /* Remove ourself from dynamic runtime lookup structures, now that the + * last reference is gone. + */ + wg_index_hashtable_remove(peer->device->index_hashtable, + &peer->handshake.entry); + + /* Remove any lingering packets that didn't have a chance to be + * transmitted. + */ + wg_packet_purge_staged_packets(peer); + + /* Free the memory used. */ + call_rcu(&peer->rcu, rcu_release); +} + +void wg_peer_put(struct wg_peer *peer) +{ + if (unlikely(!peer)) + return; + kref_put(&peer->refcount, kref_release); +} diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h new file mode 100644 index 000000000000..23af40922997 --- /dev/null +++ b/drivers/net/wireguard/peer.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_PEER_H +#define _WG_PEER_H + +#include "device.h" +#include "noise.h" +#include "cookie.h" + +#include <linux/types.h> +#include <linux/netfilter.h> +#include <linux/spinlock.h> +#include <linux/kref.h> +#include <net/dst_cache.h> + +struct wg_device; + +struct endpoint { + union { + struct sockaddr addr; + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + }; + union { + struct { + struct in_addr src4; + /* Essentially the same as addr6->scope_id */ + int src_if4; + }; + struct in6_addr src6; + }; +}; + +struct wg_peer { + struct wg_device *device; + struct crypt_queue tx_queue, rx_queue; + struct sk_buff_head staged_packet_queue; + int serial_work_cpu; + struct noise_keypairs keypairs; + struct endpoint endpoint; + struct dst_cache endpoint_cache; + rwlock_t endpoint_lock; + struct noise_handshake handshake; + atomic64_t last_sent_handshake; + struct work_struct transmit_handshake_work, clear_peer_work; + struct cookie latest_cookie; + struct hlist_node pubkey_hash; + u64 rx_bytes, tx_bytes; + struct timer_list timer_retransmit_handshake, timer_send_keepalive; + struct timer_list timer_new_handshake, timer_zero_key_material; + struct timer_list timer_persistent_keepalive; + unsigned int timer_handshake_attempts; + u16 persistent_keepalive_interval; + bool timer_need_another_keepalive; + bool sent_lastminute_handshake; + struct timespec64 walltime_last_handshake; + struct kref refcount; + struct rcu_head rcu; + struct list_head peer_list; + struct list_head allowedips_list; + u64 internal_id; + struct napi_struct napi; + bool is_dead; +}; + +struct wg_peer *wg_peer_create(struct wg_device *wg, + const u8 public_key[NOISE_PUBLIC_KEY_LEN], + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]); + +struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer); +static inline struct wg_peer *wg_peer_get(struct wg_peer *peer) +{ + kref_get(&peer->refcount); + return peer; +} +void wg_peer_put(struct wg_peer *peer); +void wg_peer_remove(struct wg_peer *peer); +void wg_peer_remove_all(struct wg_device *wg); + +#endif /* _WG_PEER_H */ diff --git a/drivers/net/wireguard/peerlookup.c b/drivers/net/wireguard/peerlookup.c new file mode 100644 index 000000000000..e4deb331476b --- /dev/null +++ b/drivers/net/wireguard/peerlookup.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "peerlookup.h" +#include "peer.h" +#include "noise.h" + +static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + /* siphash gives us a secure 64bit number based on a random key. Since + * the bits are uniformly distributed, we can then mask off to get the + * bits we need. + */ + const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key); + + return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)]; +} + +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void) +{ + struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + get_random_bytes(&table->key, sizeof(table->key)); + hash_init(table->hashtable); + mutex_init(&table->lock); + return table; +} + +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, + struct wg_peer *peer) +{ + mutex_lock(&table->lock); + hlist_add_head_rcu(&peer->pubkey_hash, + pubkey_bucket(table, peer->handshake.remote_static)); + mutex_unlock(&table->lock); +} + +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, + struct wg_peer *peer) +{ + mutex_lock(&table->lock); + hlist_del_init_rcu(&peer->pubkey_hash); + mutex_unlock(&table->lock); +} + +/* Returns a strong reference to a peer */ +struct wg_peer * +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + struct wg_peer *iter_peer, *peer = NULL; + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey), + pubkey_hash) { + if (!memcmp(pubkey, iter_peer->handshake.remote_static, + NOISE_PUBLIC_KEY_LEN)) { + peer = iter_peer; + break; + } + } + peer = wg_peer_get_maybe_zero(peer); + rcu_read_unlock_bh(); + return peer; +} + +static struct hlist_head *index_bucket(struct index_hashtable *table, + const __le32 index) +{ + /* Since the indices are random and thus all bits are uniformly + * distributed, we can find its bucket simply by masking. + */ + return &table->hashtable[(__force u32)index & + (HASH_SIZE(table->hashtable) - 1)]; +} + +struct index_hashtable *wg_index_hashtable_alloc(void) +{ + struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + hash_init(table->hashtable); + spin_lock_init(&table->lock); + return table; +} + +/* At the moment, we limit ourselves to 2^20 total peers, which generally might + * amount to 2^20*3 items in this hashtable. The algorithm below works by + * picking a random number and testing it. We can see that these limits mean we + * usually succeed pretty quickly: + * + * >>> def calculation(tries, size): + * ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32)) + * ... + * >>> calculation(1, 2**20 * 3) + * 0.999267578125 + * >>> calculation(2, 2**20 * 3) + * 0.0007318854331970215 + * >>> calculation(3, 2**20 * 3) + * 5.360489012673497e-07 + * >>> calculation(4, 2**20 * 3) + * 3.9261394135792216e-10 + * + * At the moment, we don't do any masking, so this algorithm isn't exactly + * constant time in either the random guessing or in the hash list lookup. We + * could require a minimum of 3 tries, which would successfully mask the + * guessing. this would not, however, help with the growing hash lengths, which + * is another thing to consider moving forward. + */ + +__le32 wg_index_hashtable_insert(struct index_hashtable *table, + struct index_hashtable_entry *entry) +{ + struct index_hashtable_entry *existing_entry; + + spin_lock_bh(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock_bh(&table->lock); + + rcu_read_lock_bh(); + +search_unused_slot: + /* First we try to find an unused slot, randomly, while unlocked. */ + entry->index = (__force __le32)get_random_u32(); + hlist_for_each_entry_rcu_bh(existing_entry, + index_bucket(table, entry->index), + index_hash) { + if (existing_entry->index == entry->index) + /* If it's already in use, we continue searching. */ + goto search_unused_slot; + } + + /* Once we've found an unused slot, we lock it, and then double-check + * that nobody else stole it from us. + */ + spin_lock_bh(&table->lock); + hlist_for_each_entry_rcu_bh(existing_entry, + index_bucket(table, entry->index), + index_hash) { + if (existing_entry->index == entry->index) { + spin_unlock_bh(&table->lock); + /* If it was stolen, we start over. */ + goto search_unused_slot; + } + } + /* Otherwise, we know we have it exclusively (since we're locked), + * so we insert. + */ + hlist_add_head_rcu(&entry->index_hash, + index_bucket(table, entry->index)); + spin_unlock_bh(&table->lock); + + rcu_read_unlock_bh(); + + return entry->index; +} + +bool wg_index_hashtable_replace(struct index_hashtable *table, + struct index_hashtable_entry *old, + struct index_hashtable_entry *new) +{ + if (unlikely(hlist_unhashed(&old->index_hash))) + return false; + spin_lock_bh(&table->lock); + new->index = old->index; + hlist_replace_rcu(&old->index_hash, &new->index_hash); + + /* Calling init here NULLs out index_hash, and in fact after this + * function returns, it's theoretically possible for this to get + * reinserted elsewhere. That means the RCU lookup below might either + * terminate early or jump between buckets, in which case the packet + * simply gets dropped, which isn't terrible. + */ + INIT_HLIST_NODE(&old->index_hash); + spin_unlock_bh(&table->lock); + return true; +} + +void wg_index_hashtable_remove(struct index_hashtable *table, + struct index_hashtable_entry *entry) +{ + spin_lock_bh(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock_bh(&table->lock); +} + +/* Returns a strong reference to a entry->peer */ +struct index_hashtable_entry * +wg_index_hashtable_lookup(struct index_hashtable *table, + const enum index_hashtable_type type_mask, + const __le32 index, struct wg_peer **peer) +{ + struct index_hashtable_entry *iter_entry, *entry = NULL; + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index), + index_hash) { + if (iter_entry->index == index) { + if (likely(iter_entry->type & type_mask)) + entry = iter_entry; + break; + } + } + if (likely(entry)) { + entry->peer = wg_peer_get_maybe_zero(entry->peer); + if (likely(entry->peer)) + *peer = entry->peer; + else + entry = NULL; + } + rcu_read_unlock_bh(); + return entry; +} diff --git a/drivers/net/wireguard/peerlookup.h b/drivers/net/wireguard/peerlookup.h new file mode 100644 index 000000000000..ced811797680 --- /dev/null +++ b/drivers/net/wireguard/peerlookup.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_PEERLOOKUP_H +#define _WG_PEERLOOKUP_H + +#include "messages.h" + +#include <linux/hashtable.h> +#include <linux/mutex.h> +#include <linux/siphash.h> + +struct wg_peer; + +struct pubkey_hashtable { + /* TODO: move to rhashtable */ + DECLARE_HASHTABLE(hashtable, 11); + siphash_key_t key; + struct mutex lock; +}; + +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void); +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, + struct wg_peer *peer); +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, + struct wg_peer *peer); +struct wg_peer * +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]); + +struct index_hashtable { + /* TODO: move to rhashtable */ + DECLARE_HASHTABLE(hashtable, 13); + spinlock_t lock; +}; + +enum index_hashtable_type { + INDEX_HASHTABLE_HANDSHAKE = 1U << 0, + INDEX_HASHTABLE_KEYPAIR = 1U << 1 +}; + +struct index_hashtable_entry { + struct wg_peer *peer; + struct hlist_node index_hash; + enum index_hashtable_type type; + __le32 index; +}; + +struct index_hashtable *wg_index_hashtable_alloc(void); +__le32 wg_index_hashtable_insert(struct index_hashtable *table, + struct index_hashtable_entry *entry); +bool wg_index_hashtable_replace(struct index_hashtable *table, + struct index_hashtable_entry *old, + struct index_hashtable_entry *new); +void wg_index_hashtable_remove(struct index_hashtable *table, + struct index_hashtable_entry *entry); +struct index_hashtable_entry * +wg_index_hashtable_lookup(struct index_hashtable *table, + const enum index_hashtable_type type_mask, + const __le32 index, struct wg_peer **peer); + +#endif /* _WG_PEERLOOKUP_H */ diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c new file mode 100644 index 000000000000..5c964fcb994e --- /dev/null +++ b/drivers/net/wireguard/queueing.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "queueing.h" + +struct multicore_worker __percpu * +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) +{ + int cpu; + struct multicore_worker __percpu *worker = + alloc_percpu(struct multicore_worker); + + if (!worker) + return NULL; + + for_each_possible_cpu(cpu) { + per_cpu_ptr(worker, cpu)->ptr = ptr; + INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function); + } + return worker; +} + +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, + bool multicore, unsigned int len) +{ + int ret; + + memset(queue, 0, sizeof(*queue)); + ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); + if (ret) + return ret; + if (function) { + if (multicore) { + queue->worker = wg_packet_percpu_multicore_worker_alloc( + function, queue); + if (!queue->worker) + return -ENOMEM; + } else { + INIT_WORK(&queue->work, function); + } + } + return 0; +} + +void wg_packet_queue_free(struct crypt_queue *queue, bool multicore) +{ + if (multicore) + free_percpu(queue->worker); + WARN_ON(!__ptr_ring_empty(&queue->ring)); + ptr_ring_cleanup(&queue->ring, NULL); +} diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h new file mode 100644 index 000000000000..e49a464238fd --- /dev/null +++ b/drivers/net/wireguard/queueing.h @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_QUEUEING_H +#define _WG_QUEUEING_H + +#include "peer.h" +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +struct wg_device; +struct wg_peer; +struct multicore_worker; +struct crypt_queue; +struct sk_buff; + +/* queueing.c APIs: */ +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, + bool multicore, unsigned int len); +void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); +struct multicore_worker __percpu * +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); + +/* receive.c APIs: */ +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb); +void wg_packet_handshake_receive_worker(struct work_struct *work); +/* NAPI poll function: */ +int wg_packet_rx_poll(struct napi_struct *napi, int budget); +/* Workqueue worker: */ +void wg_packet_decrypt_worker(struct work_struct *work); + +/* send.c APIs: */ +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, + bool is_retry); +void wg_packet_send_handshake_response(struct wg_peer *peer); +void wg_packet_send_handshake_cookie(struct wg_device *wg, + struct sk_buff *initiating_skb, + __le32 sender_index); +void wg_packet_send_keepalive(struct wg_peer *peer); +void wg_packet_purge_staged_packets(struct wg_peer *peer); +void wg_packet_send_staged_packets(struct wg_peer *peer); +/* Workqueue workers: */ +void wg_packet_handshake_send_worker(struct work_struct *work); +void wg_packet_tx_worker(struct work_struct *work); +void wg_packet_encrypt_worker(struct work_struct *work); + +enum packet_state { + PACKET_STATE_UNCRYPTED, + PACKET_STATE_CRYPTED, + PACKET_STATE_DEAD +}; + +struct packet_cb { + u64 nonce; + struct noise_keypair *keypair; + atomic_t state; + u32 mtu; + u8 ds; +}; + +#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) +#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) + +/* Returns either the correct skb->protocol value, or 0 if invalid. */ +static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) +{ + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct iphdr)) <= + skb_tail_pointer(skb) && + ip_hdr(skb)->version == 4) + return htons(ETH_P_IP); + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= + skb_tail_pointer(skb) && + ipv6_hdr(skb)->version == 6) + return htons(ETH_P_IPV6); + return 0; +} + +static inline void wg_reset_packet(struct sk_buff *skb) +{ + const int pfmemalloc = skb->pfmemalloc; + + skb_scrub_packet(skb, true); + memset(&skb->headers_start, 0, + offsetof(struct sk_buff, headers_end) - + offsetof(struct sk_buff, headers_start)); + skb->pfmemalloc = pfmemalloc; + skb->queue_mapping = 0; + skb->nohdr = 0; + skb->peeked = 0; + skb->mac_len = 0; + skb->dev = NULL; +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; + skb_reset_tc(skb); +#endif + skb->hdr_len = skb_headroom(skb); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb_probe_transport_header(skb); + skb_reset_inner_headers(skb); +} + +static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) +{ + unsigned int cpu = *stored_cpu, cpu_index, i; + + if (unlikely(cpu == nr_cpumask_bits || + !cpumask_test_cpu(cpu, cpu_online_mask))) { + cpu_index = id % cpumask_weight(cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); + for (i = 0; i < cpu_index; ++i) + cpu = cpumask_next(cpu, cpu_online_mask); + *stored_cpu = cpu; + } + return cpu; +} + +/* This function is racy, in the sense that next is unlocked, so it could return + * the same CPU twice. A race-free version of this would be to instead store an + * atomic sequence number, do an increment-and-return, and then iterate through + * every possible CPU until we get to that index -- choose_cpu. However that's + * a bit slower, and it doesn't seem like this potential race actually + * introduces any performance loss, so we live with it. + */ +static inline int wg_cpumask_next_online(int *next) +{ + int cpu = *next; + + while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) + cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + return cpu; +} + +static inline int wg_queue_enqueue_per_device_and_peer( + struct crypt_queue *device_queue, struct crypt_queue *peer_queue, + struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) +{ + int cpu; + + atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED); + /* We first queue this up for the peer ingestion, but the consumer + * will wait for the state to change to CRYPTED or DEAD before. + */ + if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) + return -ENOSPC; + /* Then we queue it up in the device queue, which consumes the + * packet as soon as it can. + */ + cpu = wg_cpumask_next_online(next_cpu); + if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) + return -EPIPE; + queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); + return 0; +} + +static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, + struct sk_buff *skb, + enum packet_state state) +{ + /* We take a reference, because as soon as we call atomic_set, the + * peer can be freed from below us. + */ + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); + + atomic_set_release(&PACKET_CB(skb)->state, state); + queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, + peer->internal_id), + peer->device->packet_crypt_wq, &queue->work); + wg_peer_put(peer); +} + +static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, + enum packet_state state) +{ + /* We take a reference, because as soon as we call atomic_set, the + * peer can be freed from below us. + */ + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); + + atomic_set_release(&PACKET_CB(skb)->state, state); + napi_schedule(&peer->napi); + wg_peer_put(peer); +} + +#ifdef DEBUG +bool wg_packet_counter_selftest(void); +#endif + +#endif /* _WG_QUEUEING_H */ diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c new file mode 100644 index 000000000000..3fedd1d21f5e --- /dev/null +++ b/drivers/net/wireguard/ratelimiter.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "ratelimiter.h" +#include <linux/siphash.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <net/ip.h> + +static struct kmem_cache *entry_cache; +static hsiphash_key_t key; +static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock"); +static DEFINE_MUTEX(init_lock); +static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */ +static atomic_t total_entries = ATOMIC_INIT(0); +static unsigned int max_entries, table_size; +static void wg_ratelimiter_gc_entries(struct work_struct *); +static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries); +static struct hlist_head *table_v4; +#if IS_ENABLED(CONFIG_IPV6) +static struct hlist_head *table_v6; +#endif + +struct ratelimiter_entry { + u64 last_time_ns, tokens, ip; + void *net; + spinlock_t lock; + struct hlist_node hash; + struct rcu_head rcu; +}; + +enum { + PACKETS_PER_SECOND = 20, + PACKETS_BURSTABLE = 5, + PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND, + TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE +}; + +static void entry_free(struct rcu_head *rcu) +{ + kmem_cache_free(entry_cache, + container_of(rcu, struct ratelimiter_entry, rcu)); + atomic_dec(&total_entries); +} + +static void entry_uninit(struct ratelimiter_entry *entry) +{ + hlist_del_rcu(&entry->hash); + call_rcu(&entry->rcu, entry_free); +} + +/* Calling this function with a NULL work uninits all entries. */ +static void wg_ratelimiter_gc_entries(struct work_struct *work) +{ + const u64 now = ktime_get_coarse_boottime_ns(); + struct ratelimiter_entry *entry; + struct hlist_node *temp; + unsigned int i; + + for (i = 0; i < table_size; ++i) { + spin_lock(&table_lock); + hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) { + if (unlikely(!work) || + now - entry->last_time_ns > NSEC_PER_SEC) + entry_uninit(entry); + } +#if IS_ENABLED(CONFIG_IPV6) + hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) { + if (unlikely(!work) || + now - entry->last_time_ns > NSEC_PER_SEC) + entry_uninit(entry); + } +#endif + spin_unlock(&table_lock); + if (likely(work)) + cond_resched(); + } + if (likely(work)) + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); +} + +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net) +{ + /* We only take the bottom half of the net pointer, so that we can hash + * 3 words in the end. This way, siphash's len param fits into the final + * u32, and we don't incur an extra round. + */ + const u32 net_word = (unsigned long)net; + struct ratelimiter_entry *entry; + struct hlist_head *bucket; + u64 ip; + + if (skb->protocol == htons(ETH_P_IP)) { + ip = (u64 __force)ip_hdr(skb)->saddr; + bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) & + (table_size - 1)]; + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + /* Only use 64 bits, so as to ratelimit the whole /64. */ + memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip)); + bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) & + (table_size - 1)]; + } +#endif + else + return false; + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, bucket, hash) { + if (entry->net == net && entry->ip == ip) { + u64 now, tokens; + bool ret; + /* Quasi-inspired by nft_limit.c, but this is actually a + * slightly different algorithm. Namely, we incorporate + * the burst as part of the maximum tokens, rather than + * as part of the rate. + */ + spin_lock(&entry->lock); + now = ktime_get_coarse_boottime_ns(); + tokens = min_t(u64, TOKEN_MAX, + entry->tokens + now - + entry->last_time_ns); + entry->last_time_ns = now; + ret = tokens >= PACKET_COST; + entry->tokens = ret ? tokens - PACKET_COST : tokens; + spin_unlock(&entry->lock); + rcu_read_unlock(); + return ret; + } + } + rcu_read_unlock(); + + if (atomic_inc_return(&total_entries) > max_entries) + goto err_oom; + + entry = kmem_cache_alloc(entry_cache, GFP_KERNEL); + if (unlikely(!entry)) + goto err_oom; + + entry->net = net; + entry->ip = ip; + INIT_HLIST_NODE(&entry->hash); + spin_lock_init(&entry->lock); + entry->last_time_ns = ktime_get_coarse_boottime_ns(); + entry->tokens = TOKEN_MAX - PACKET_COST; + spin_lock(&table_lock); + hlist_add_head_rcu(&entry->hash, bucket); + spin_unlock(&table_lock); + return true; + +err_oom: + atomic_dec(&total_entries); + return false; +} + +int wg_ratelimiter_init(void) +{ + mutex_lock(&init_lock); + if (++init_refcnt != 1) + goto out; + + entry_cache = KMEM_CACHE(ratelimiter_entry, 0); + if (!entry_cache) + goto err; + + /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting, + * but what it shares in common is that it uses a massive hashtable. So, + * we borrow their wisdom about good table sizes on different systems + * dependent on RAM. This calculation here comes from there. + */ + table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 : + max_t(unsigned long, 16, roundup_pow_of_two( + (totalram_pages() << PAGE_SHIFT) / + (1U << 14) / sizeof(struct hlist_head))); + max_entries = table_size * 8; + + table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL); + if (unlikely(!table_v4)) + goto err_kmemcache; + +#if IS_ENABLED(CONFIG_IPV6) + table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL); + if (unlikely(!table_v6)) { + kvfree(table_v4); + goto err_kmemcache; + } +#endif + + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); + get_random_bytes(&key, sizeof(key)); +out: + mutex_unlock(&init_lock); + return 0; + +err_kmemcache: + kmem_cache_destroy(entry_cache); +err: + --init_refcnt; + mutex_unlock(&init_lock); + return -ENOMEM; +} + +void wg_ratelimiter_uninit(void) +{ + mutex_lock(&init_lock); + if (!init_refcnt || --init_refcnt) + goto out; + + cancel_delayed_work_sync(&gc_work); + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + kvfree(table_v4); +#if IS_ENABLED(CONFIG_IPV6) + kvfree(table_v6); +#endif + kmem_cache_destroy(entry_cache); +out: + mutex_unlock(&init_lock); +} + +#include "selftest/ratelimiter.c" diff --git a/drivers/net/wireguard/ratelimiter.h b/drivers/net/wireguard/ratelimiter.h new file mode 100644 index 000000000000..83067f71ea99 --- /dev/null +++ b/drivers/net/wireguard/ratelimiter.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_RATELIMITER_H +#define _WG_RATELIMITER_H + +#include <linux/skbuff.h> + +int wg_ratelimiter_init(void); +void wg_ratelimiter_uninit(void); +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net); + +#ifdef DEBUG +bool wg_ratelimiter_selftest(void); +#endif + +#endif /* _WG_RATELIMITER_H */ diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c new file mode 100644 index 000000000000..9c6bab9c981f --- /dev/null +++ b/drivers/net/wireguard/receive.c @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "queueing.h" +#include "device.h" +#include "peer.h" +#include "timers.h" +#include "messages.h" +#include "cookie.h" +#include "socket.h" + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <net/ip_tunnels.h> + +/* Must be called with bh disabled. */ +static void update_rx_stats(struct wg_peer *peer, size_t len) +{ + struct pcpu_sw_netstats *tstats = + get_cpu_ptr(peer->device->dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + ++tstats->rx_packets; + tstats->rx_bytes += len; + peer->rx_bytes += len; + u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); +} + +#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type) + +static size_t validate_header_len(struct sk_buff *skb) +{ + if (unlikely(skb->len < sizeof(struct message_header))) + return 0; + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) && + skb->len >= MESSAGE_MINIMUM_LENGTH) + return sizeof(struct message_data); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) && + skb->len == sizeof(struct message_handshake_initiation)) + return sizeof(struct message_handshake_initiation); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) && + skb->len == sizeof(struct message_handshake_response)) + return sizeof(struct message_handshake_response); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) && + skb->len == sizeof(struct message_handshake_cookie)) + return sizeof(struct message_handshake_cookie); + return 0; +} + +static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg) +{ + size_t data_offset, data_len, header_len; + struct udphdr *udp; + + if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol || + skb_transport_header(skb) < skb->head || + (skb_transport_header(skb) + sizeof(struct udphdr)) > + skb_tail_pointer(skb))) + return -EINVAL; /* Bogus IP header */ + udp = udp_hdr(skb); + data_offset = (u8 *)udp - skb->data; + if (unlikely(data_offset > U16_MAX || + data_offset + sizeof(struct udphdr) > skb->len)) + /* Packet has offset at impossible location or isn't big enough + * to have UDP fields. + */ + return -EINVAL; + data_len = ntohs(udp->len); + if (unlikely(data_len < sizeof(struct udphdr) || + data_len > skb->len - data_offset)) + /* UDP packet is reporting too small of a size or lying about + * its size. + */ + return -EINVAL; + data_len -= sizeof(struct udphdr); + data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data; + if (unlikely(!pskb_may_pull(skb, + data_offset + sizeof(struct message_header)) || + pskb_trim(skb, data_len + data_offset) < 0)) + return -EINVAL; + skb_pull(skb, data_offset); + if (unlikely(skb->len != data_len)) + /* Final len does not agree with calculated len */ + return -EINVAL; + header_len = validate_header_len(skb); + if (unlikely(!header_len)) + return -EINVAL; + __skb_push(skb, data_offset); + if (unlikely(!pskb_may_pull(skb, data_offset + header_len))) + return -EINVAL; + __skb_pull(skb, data_offset); + return 0; +} + +static void wg_receive_handshake_packet(struct wg_device *wg, + struct sk_buff *skb) +{ + enum cookie_mac_state mac_state; + struct wg_peer *peer = NULL; + /* This is global, so that our load calculation applies to the whole + * system. We don't care about races with it at all. + */ + static u64 last_under_load; + bool packet_needs_cookie; + bool under_load; + + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) { + net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n", + wg->dev->name, skb); + wg_cookie_message_consume( + (struct message_handshake_cookie *)skb->data, wg); + return; + } + + under_load = skb_queue_len(&wg->incoming_handshakes) >= + MAX_QUEUED_INCOMING_HANDSHAKES / 8; + if (under_load) + last_under_load = ktime_get_coarse_boottime_ns(); + else if (last_under_load) + under_load = !wg_birthdate_has_expired(last_under_load, 1); + mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, + under_load); + if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || + (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) { + packet_needs_cookie = false; + } else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) { + packet_needs_cookie = true; + } else { + net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n", + wg->dev->name, skb); + return; + } + + switch (SKB_TYPE_LE32(skb)) { + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): { + struct message_handshake_initiation *message = + (struct message_handshake_initiation *)skb->data; + + if (packet_needs_cookie) { + wg_packet_send_handshake_cookie(wg, skb, + message->sender_index); + return; + } + peer = wg_noise_handshake_consume_initiation(message, wg); + if (unlikely(!peer)) { + net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n", + wg->dev->name, skb); + return; + } + wg_socket_set_peer_endpoint_from_skb(peer, skb); + net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n", + wg->dev->name, peer->internal_id, + &peer->endpoint.addr); + wg_packet_send_handshake_response(peer); + break; + } + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): { + struct message_handshake_response *message = + (struct message_handshake_response *)skb->data; + + if (packet_needs_cookie) { + wg_packet_send_handshake_cookie(wg, skb, + message->sender_index); + return; + } + peer = wg_noise_handshake_consume_response(message, wg); + if (unlikely(!peer)) { + net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n", + wg->dev->name, skb); + return; + } + wg_socket_set_peer_endpoint_from_skb(peer, skb); + net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n", + wg->dev->name, peer->internal_id, + &peer->endpoint.addr); + if (wg_noise_handshake_begin_session(&peer->handshake, + &peer->keypairs)) { + wg_timers_session_derived(peer); + wg_timers_handshake_complete(peer); + /* Calling this function will either send any existing + * packets in the queue and not send a keepalive, which + * is the best case, Or, if there's nothing in the + * queue, it will send a keepalive, in order to give + * immediate confirmation of the session. + */ + wg_packet_send_keepalive(peer); + } + break; + } + } + + if (unlikely(!peer)) { + WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n"); + return; + } + + local_bh_disable(); + update_rx_stats(peer, skb->len); + local_bh_enable(); + + wg_timers_any_authenticated_packet_received(peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_peer_put(peer); +} + +void wg_packet_handshake_receive_worker(struct work_struct *work) +{ + struct wg_device *wg = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { + wg_receive_handshake_packet(wg, skb); + dev_kfree_skb(skb); + cond_resched(); + } +} + +static void keep_key_fresh(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + bool send = false; + + if (peer->sent_lastminute_handshake) + return; + + rcu_read_lock_bh(); + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); + if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && + keypair->i_am_the_initiator && + unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, + REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT))) + send = true; + rcu_read_unlock_bh(); + + if (send) { + peer->sent_lastminute_handshake = true; + wg_packet_send_queued_handshake_initiation(peer, false); + } +} + +static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) +{ + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + struct sk_buff *trailer; + unsigned int offset; + int num_frags; + + if (unlikely(!key)) + return false; + + if (unlikely(!READ_ONCE(key->is_valid) || + wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) || + key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { + WRITE_ONCE(key->is_valid, false); + return false; + } + + PACKET_CB(skb)->nonce = + le64_to_cpu(((struct message_data *)skb->data)->counter); + + /* We ensure that the network header is part of the packet before we + * call skb_cow_data, so that there's no chance that data is removed + * from the skb, so that later we can extract the original endpoint. + */ + offset = skb->data - skb_network_header(skb); + skb_push(skb, offset); + num_frags = skb_cow_data(skb, 0, &trailer); + offset += sizeof(struct message_data); + skb_pull(skb, offset); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) + return false; + + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, + PACKET_CB(skb)->nonce, + key->key)) + return false; + + /* Another ugly situation of pushing and pulling the header so as to + * keep endpoint information intact. + */ + skb_push(skb, offset); + if (pskb_trim(skb, skb->len - noise_encrypted_len(0))) + return false; + skb_pull(skb, offset); + + return true; +} + +/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ +static bool counter_validate(union noise_counter *counter, u64 their_counter) +{ + unsigned long index, index_current, top, i; + bool ret = false; + + spin_lock_bh(&counter->receive.lock); + + if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || + their_counter >= REJECT_AFTER_MESSAGES)) + goto out; + + ++their_counter; + + if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < + counter->receive.counter)) + goto out; + + index = their_counter >> ilog2(BITS_PER_LONG); + + if (likely(their_counter > counter->receive.counter)) { + index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); + top = min_t(unsigned long, index - index_current, + COUNTER_BITS_TOTAL / BITS_PER_LONG); + for (i = 1; i <= top; ++i) + counter->receive.backtrack[(i + index_current) & + ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; + counter->receive.counter = their_counter; + } + + index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; + ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), + &counter->receive.backtrack[index]); + +out: + spin_unlock_bh(&counter->receive.lock); + return ret; +} + +#include "selftest/counter.c" + +static void wg_packet_consume_data_done(struct wg_peer *peer, + struct sk_buff *skb, + struct endpoint *endpoint) +{ + struct net_device *dev = peer->device->dev; + unsigned int len, len_before_trim; + struct wg_peer *routed_peer; + + wg_socket_set_peer_endpoint(peer, endpoint); + + if (unlikely(wg_noise_received_with_keypair(&peer->keypairs, + PACKET_CB(skb)->keypair))) { + wg_timers_handshake_complete(peer); + wg_packet_send_staged_packets(peer); + } + + keep_key_fresh(peer); + + wg_timers_any_authenticated_packet_received(peer); + wg_timers_any_authenticated_packet_traversal(peer); + + /* A packet with length 0 is a keepalive packet */ + if (unlikely(!skb->len)) { + update_rx_stats(peer, message_data_len(0)); + net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, + &peer->endpoint.addr); + goto packet_processed; + } + + wg_timers_data_received(peer); + + if (unlikely(skb_network_header(skb) < skb->head)) + goto dishonest_packet_size; + if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) && + (ip_hdr(skb)->version == 4 || + (ip_hdr(skb)->version == 6 && + pskb_network_may_pull(skb, sizeof(struct ipv6hdr))))))) + goto dishonest_packet_type; + + skb->dev = dev; + /* We've already verified the Poly1305 auth tag, which means this packet + * was not modified in transit. We can therefore tell the networking + * stack that all checksums of every layer of encapsulation have already + * been checked "by the hardware" and therefore is unnecessary to check + * again in software. + */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = ~0; /* All levels */ + skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb); + if (skb->protocol == htons(ETH_P_IP)) { + len = ntohs(ip_hdr(skb)->tot_len); + if (unlikely(len < sizeof(struct iphdr))) + goto dishonest_packet_size; + if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) + IP_ECN_set_ce(ip_hdr(skb)); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + len = ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr); + if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) + IP6_ECN_set_ce(skb, ipv6_hdr(skb)); + } else { + goto dishonest_packet_type; + } + + if (unlikely(len > skb->len)) + goto dishonest_packet_size; + len_before_trim = skb->len; + if (unlikely(pskb_trim(skb, len))) + goto packet_processed; + + routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips, + skb); + wg_peer_put(routed_peer); /* We don't need the extra reference. */ + + if (unlikely(routed_peer != peer)) + goto dishonest_packet_peer; + + if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) { + ++dev->stats.rx_dropped; + net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, + &peer->endpoint.addr); + } else { + update_rx_stats(peer, message_data_len(len_before_trim)); + } + return; + +dishonest_packet_peer: + net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", + dev->name, skb, peer->internal_id, + &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_frame_errors; + goto packet_processed; +dishonest_packet_type: + net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_frame_errors; + goto packet_processed; +dishonest_packet_size: + net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_length_errors; + goto packet_processed; +packet_processed: + dev_kfree_skb(skb); +} + +int wg_packet_rx_poll(struct napi_struct *napi, int budget) +{ + struct wg_peer *peer = container_of(napi, struct wg_peer, napi); + struct crypt_queue *queue = &peer->rx_queue; + struct noise_keypair *keypair; + struct endpoint endpoint; + enum packet_state state; + struct sk_buff *skb; + int work_done = 0; + bool free; + + if (unlikely(budget <= 0)) + return 0; + + while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && + (state = atomic_read_acquire(&PACKET_CB(skb)->state)) != + PACKET_STATE_UNCRYPTED) { + __ptr_ring_discard_one(&queue->ring); + peer = PACKET_PEER(skb); + keypair = PACKET_CB(skb)->keypair; + free = true; + + if (unlikely(state != PACKET_STATE_CRYPTED)) + goto next; + + if (unlikely(!counter_validate(&keypair->receiving.counter, + PACKET_CB(skb)->nonce))) { + net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", + peer->device->dev->name, + PACKET_CB(skb)->nonce, + keypair->receiving.counter.receive.counter); + goto next; + } + + if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) + goto next; + + wg_reset_packet(skb); + wg_packet_consume_data_done(peer, skb, &endpoint); + free = false; + +next: + wg_noise_keypair_put(keypair, false); + wg_peer_put(peer); + if (unlikely(free)) + dev_kfree_skb(skb); + + if (++work_done >= budget) + break; + } + + if (work_done < budget) + napi_complete_done(napi, work_done); + + return work_done; +} + +void wg_packet_decrypt_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *skb; + + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { + enum packet_state state = likely(decrypt_packet(skb, + &PACKET_CB(skb)->keypair->receiving)) ? + PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; + wg_queue_enqueue_per_peer_napi(skb, state); + } +} + +static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) +{ + __le32 idx = ((struct message_data *)skb->data)->key_idx; + struct wg_peer *peer = NULL; + int ret; + + rcu_read_lock_bh(); + PACKET_CB(skb)->keypair = + (struct noise_keypair *)wg_index_hashtable_lookup( + wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx, + &peer); + if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair))) + goto err_keypair; + + if (unlikely(READ_ONCE(peer->is_dead))) + goto err; + + ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, + &peer->rx_queue, skb, + wg->packet_crypt_wq, + &wg->decrypt_queue.last_cpu); + if (unlikely(ret == -EPIPE)) + wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); + if (likely(!ret || ret == -EPIPE)) { + rcu_read_unlock_bh(); + return; + } +err: + wg_noise_keypair_put(PACKET_CB(skb)->keypair, false); +err_keypair: + rcu_read_unlock_bh(); + wg_peer_put(peer); + dev_kfree_skb(skb); +} + +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) +{ + if (unlikely(prepare_skb_header(skb, wg) < 0)) + goto err; + switch (SKB_TYPE_LE32(skb)) { + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): + case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { + int cpu; + + if (skb_queue_len(&wg->incoming_handshakes) > + MAX_QUEUED_INCOMING_HANDSHAKES || + unlikely(!rng_is_initialized())) { + net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", + wg->dev->name, skb); + goto err; + } + skb_queue_tail(&wg->incoming_handshakes, skb); + /* Queues up a call to packet_process_queued_handshake_ + * packets(skb): + */ + cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu); + queue_work_on(cpu, wg->handshake_receive_wq, + &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); + break; + } + case cpu_to_le32(MESSAGE_DATA): + PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb); + wg_packet_consume_data(wg, skb); + break; + default: + net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", + wg->dev->name, skb); + goto err; + } + return; + +err: + dev_kfree_skb(skb); +} diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c new file mode 100644 index 000000000000..846db14cb046 --- /dev/null +++ b/drivers/net/wireguard/selftest/allowedips.c @@ -0,0 +1,683 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * This contains some basic static unit tests for the allowedips data structure. + * It also has two additional modes that are disabled and meant to be used by + * folks directly playing with this file. If you define the macro + * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in + * memory, it will be printed out as KERN_DEBUG in a format that can be passed + * to graphviz (the dot command) to visualize it. If you define the macro + * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of + * randomized tests done against a trivial implementation, which may take + * upwards of a half-hour to complete. There's no set of users who should be + * enabling these, and the only developers that should go anywhere near these + * nobs are the ones who are reading this comment. + */ + +#ifdef DEBUG + +#include <linux/siphash.h> + +static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits, + u8 cidr) +{ + swap_endian(dst, src, bits); + memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8); + if (cidr) + dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8); +} + +static __init void print_node(struct allowedips_node *node, u8 bits) +{ + char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n"; + char *fmt_declaration = KERN_DEBUG + "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; + char *style = "dotted"; + u8 ip1[16], ip2[16]; + u32 color = 0; + + if (bits == 32) { + fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n"; + fmt_declaration = KERN_DEBUG + "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; + } else if (bits == 128) { + fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n"; + fmt_declaration = KERN_DEBUG + "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; + } + if (node->peer) { + hsiphash_key_t key = { { 0 } }; + + memcpy(&key, &node->peer, sizeof(node->peer)); + color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 | + hsiphash_1u32(0xbabecafe, &key) % 200 << 8 | + hsiphash_1u32(0xabad1dea, &key) % 200; + style = "bold"; + } + swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr); + printk(fmt_declaration, ip1, node->cidr, style, color); + if (node->bit[0]) { + swap_endian_and_apply_cidr(ip2, + rcu_dereference_raw(node->bit[0])->bits, bits, + node->cidr); + printk(fmt_connection, ip1, node->cidr, ip2, + rcu_dereference_raw(node->bit[0])->cidr); + print_node(rcu_dereference_raw(node->bit[0]), bits); + } + if (node->bit[1]) { + swap_endian_and_apply_cidr(ip2, + rcu_dereference_raw(node->bit[1])->bits, + bits, node->cidr); + printk(fmt_connection, ip1, node->cidr, ip2, + rcu_dereference_raw(node->bit[1])->cidr); + print_node(rcu_dereference_raw(node->bit[1]), bits); + } +} + +static __init void print_tree(struct allowedips_node __rcu *top, u8 bits) +{ + printk(KERN_DEBUG "digraph trie {\n"); + print_node(rcu_dereference_raw(top), bits); + printk(KERN_DEBUG "}\n"); +} + +enum { + NUM_PEERS = 2000, + NUM_RAND_ROUTES = 400, + NUM_MUTATED_ROUTES = 100, + NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30 +}; + +struct horrible_allowedips { + struct hlist_head head; +}; + +struct horrible_allowedips_node { + struct hlist_node table; + union nf_inet_addr ip; + union nf_inet_addr mask; + u8 ip_version; + void *value; +}; + +static __init void horrible_allowedips_init(struct horrible_allowedips *table) +{ + INIT_HLIST_HEAD(&table->head); +} + +static __init void horrible_allowedips_free(struct horrible_allowedips *table) +{ + struct horrible_allowedips_node *node; + struct hlist_node *h; + + hlist_for_each_entry_safe(node, h, &table->head, table) { + hlist_del(&node->table); + kfree(node); + } +} + +static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr) +{ + union nf_inet_addr mask; + + memset(&mask, 0x00, 128 / 8); + memset(&mask, 0xff, cidr / 8); + if (cidr % 32) + mask.all[cidr / 32] = (__force u32)htonl( + (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL); + return mask; +} + +static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet) +{ + return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) + + hweight32(subnet.all[2]) + hweight32(subnet.all[3]); +} + +static __init inline void +horrible_mask_self(struct horrible_allowedips_node *node) +{ + if (node->ip_version == 4) { + node->ip.ip &= node->mask.ip; + } else if (node->ip_version == 6) { + node->ip.ip6[0] &= node->mask.ip6[0]; + node->ip.ip6[1] &= node->mask.ip6[1]; + node->ip.ip6[2] &= node->mask.ip6[2]; + node->ip.ip6[3] &= node->mask.ip6[3]; + } +} + +static __init inline bool +horrible_match_v4(const struct horrible_allowedips_node *node, + struct in_addr *ip) +{ + return (ip->s_addr & node->mask.ip) == node->ip.ip; +} + +static __init inline bool +horrible_match_v6(const struct horrible_allowedips_node *node, + struct in6_addr *ip) +{ + return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == + node->ip.ip6[0] && + (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == + node->ip.ip6[1] && + (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == + node->ip.ip6[2] && + (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3]; +} + +static __init void +horrible_insert_ordered(struct horrible_allowedips *table, + struct horrible_allowedips_node *node) +{ + struct horrible_allowedips_node *other = NULL, *where = NULL; + u8 my_cidr = horrible_mask_to_cidr(node->mask); + + hlist_for_each_entry(other, &table->head, table) { + if (!memcmp(&other->mask, &node->mask, + sizeof(union nf_inet_addr)) && + !memcmp(&other->ip, &node->ip, + sizeof(union nf_inet_addr)) && + other->ip_version == node->ip_version) { + other->value = node->value; + kfree(node); + return; + } + where = other; + if (horrible_mask_to_cidr(other->mask) <= my_cidr) + break; + } + if (!other && !where) + hlist_add_head(&node->table, &table->head); + else if (!other) + hlist_add_behind(&node->table, &where->table); + else + hlist_add_before(&node->table, &where->table); +} + +static __init int +horrible_allowedips_insert_v4(struct horrible_allowedips *table, + struct in_addr *ip, u8 cidr, void *value) +{ + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), + GFP_KERNEL); + + if (unlikely(!node)) + return -ENOMEM; + node->ip.in = *ip; + node->mask = horrible_cidr_to_mask(cidr); + node->ip_version = 4; + node->value = value; + horrible_mask_self(node); + horrible_insert_ordered(table, node); + return 0; +} + +static __init int +horrible_allowedips_insert_v6(struct horrible_allowedips *table, + struct in6_addr *ip, u8 cidr, void *value) +{ + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), + GFP_KERNEL); + + if (unlikely(!node)) + return -ENOMEM; + node->ip.in6 = *ip; + node->mask = horrible_cidr_to_mask(cidr); + node->ip_version = 6; + node->value = value; + horrible_mask_self(node); + horrible_insert_ordered(table, node); + return 0; +} + +static __init void * +horrible_allowedips_lookup_v4(struct horrible_allowedips *table, + struct in_addr *ip) +{ + struct horrible_allowedips_node *node; + void *ret = NULL; + + hlist_for_each_entry(node, &table->head, table) { + if (node->ip_version != 4) + continue; + if (horrible_match_v4(node, ip)) { + ret = node->value; + break; + } + } + return ret; +} + +static __init void * +horrible_allowedips_lookup_v6(struct horrible_allowedips *table, + struct in6_addr *ip) +{ + struct horrible_allowedips_node *node; + void *ret = NULL; + + hlist_for_each_entry(node, &table->head, table) { + if (node->ip_version != 6) + continue; + if (horrible_match_v6(node, ip)) { + ret = node->value; + break; + } + } + return ret; +} + +static __init bool randomized_test(void) +{ + unsigned int i, j, k, mutate_amount, cidr; + u8 ip[16], mutate_mask[16], mutated[16]; + struct wg_peer **peers, *peer; + struct horrible_allowedips h; + DEFINE_MUTEX(mutex); + struct allowedips t; + bool ret = false; + + mutex_init(&mutex); + + wg_allowedips_init(&t); + horrible_allowedips_init(&h); + + peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL); + if (unlikely(!peers)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free; + } + for (i = 0; i < NUM_PEERS; ++i) { + peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL); + if (unlikely(!peers[i])) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free; + } + kref_init(&peers[i]->refcount); + } + + mutex_lock(&mutex); + + for (i = 0; i < NUM_RAND_ROUTES; ++i) { + prandom_bytes(ip, 4); + cidr = prandom_u32_max(32) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr, + peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip, + cidr, peer) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { + memcpy(mutated, ip, 4); + prandom_bytes(mutate_mask, 4); + mutate_amount = prandom_u32_max(32); + for (k = 0; k < mutate_amount / 8; ++k) + mutate_mask[k] = 0xff; + mutate_mask[k] = 0xff + << ((8 - (mutate_amount % 8)) % 8); + for (; k < 4; ++k) + mutate_mask[k] = 0; + for (k = 0; k < 4; ++k) + mutated[k] = (mutated[k] & mutate_mask[k]) | + (~mutate_mask[k] & + prandom_u32_max(256)); + cidr = prandom_u32_max(32) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v4(&t, + (struct in_addr *)mutated, + cidr, peer, &mutex) < 0) { + pr_err("allowedips random malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v4(&h, + (struct in_addr *)mutated, cidr, peer)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + } + } + + for (i = 0; i < NUM_RAND_ROUTES; ++i) { + prandom_bytes(ip, 16); + cidr = prandom_u32_max(128) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr, + peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip, + cidr, peer) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { + memcpy(mutated, ip, 16); + prandom_bytes(mutate_mask, 16); + mutate_amount = prandom_u32_max(128); + for (k = 0; k < mutate_amount / 8; ++k) + mutate_mask[k] = 0xff; + mutate_mask[k] = 0xff + << ((8 - (mutate_amount % 8)) % 8); + for (; k < 4; ++k) + mutate_mask[k] = 0; + for (k = 0; k < 4; ++k) + mutated[k] = (mutated[k] & mutate_mask[k]) | + (~mutate_mask[k] & + prandom_u32_max(256)); + cidr = prandom_u32_max(128) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v6(&t, + (struct in6_addr *)mutated, + cidr, peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v6( + &h, (struct in6_addr *)mutated, cidr, + peer)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + } + } + + mutex_unlock(&mutex); + + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { + print_tree(t.root4, 32); + print_tree(t.root6, 128); + } + + for (i = 0; i < NUM_QUERIES; ++i) { + prandom_bytes(ip, 4); + if (lookup(t.root4, 32, ip) != + horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { + pr_err("allowedips random self-test: FAIL\n"); + goto free; + } + } + + for (i = 0; i < NUM_QUERIES; ++i) { + prandom_bytes(ip, 16); + if (lookup(t.root6, 128, ip) != + horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { + pr_err("allowedips random self-test: FAIL\n"); + goto free; + } + } + ret = true; + +free: + mutex_lock(&mutex); +free_locked: + wg_allowedips_free(&t, &mutex); + mutex_unlock(&mutex); + horrible_allowedips_free(&h); + if (peers) { + for (i = 0; i < NUM_PEERS; ++i) + kfree(peers[i]); + } + kfree(peers); + return ret; +} + +static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d) +{ + static struct in_addr ip; + u8 *split = (u8 *)&ip; + + split[0] = a; + split[1] = b; + split[2] = c; + split[3] = d; + return &ip; +} + +static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d) +{ + static struct in6_addr ip; + __be32 *split = (__be32 *)&ip; + + split[0] = cpu_to_be32(a); + split[1] = cpu_to_be32(b); + split[2] = cpu_to_be32(c); + split[3] = cpu_to_be32(d); + return &ip; +} + +static __init struct wg_peer *init_peer(void) +{ + struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL); + + if (!peer) + return NULL; + kref_init(&peer->refcount); + INIT_LIST_HEAD(&peer->allowedips_list); + return peer; +} + +#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \ + wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \ + cidr, mem, &mutex) + +#define maybe_fail() do { \ + ++i; \ + if (!_s) { \ + pr_info("allowedips self-test %zu: FAIL\n", i); \ + success = false; \ + } \ + } while (0) + +#define test(version, mem, ipa, ipb, ipc, ipd) do { \ + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ + ip##version(ipa, ipb, ipc, ipd)) == (mem); \ + maybe_fail(); \ + } while (0) + +#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \ + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ + ip##version(ipa, ipb, ipc, ipd)) != (mem); \ + maybe_fail(); \ + } while (0) + +#define test_boolean(cond) do { \ + bool _s = (cond); \ + maybe_fail(); \ + } while (0) + +bool __init wg_allowedips_selftest(void) +{ + bool found_a = false, found_b = false, found_c = false, found_d = false, + found_e = false, found_other = false; + struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(), + *d = init_peer(), *e = init_peer(), *f = init_peer(), + *g = init_peer(), *h = init_peer(); + struct allowedips_node *iter_node; + bool success = false; + struct allowedips t; + DEFINE_MUTEX(mutex); + struct in6_addr ip; + size_t i = 0, count = 0; + __be64 part; + + mutex_init(&mutex); + mutex_lock(&mutex); + wg_allowedips_init(&t); + + if (!a || !b || !c || !d || !e || !f || !g || !h) { + pr_err("allowedips self-test malloc: FAIL\n"); + goto free; + } + + insert(4, a, 192, 168, 4, 0, 24); + insert(4, b, 192, 168, 4, 4, 32); + insert(4, c, 192, 168, 0, 0, 16); + insert(4, d, 192, 95, 5, 64, 27); + /* replaces previous entry, and maskself is required */ + insert(4, c, 192, 95, 5, 65, 27); + insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); + insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64); + insert(4, e, 0, 0, 0, 0, 0); + insert(6, e, 0, 0, 0, 0, 0); + /* replaces previous entry */ + insert(6, f, 0, 0, 0, 0, 0); + insert(6, g, 0x24046800, 0, 0, 0, 32); + /* maskself is required */ + insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64); + insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128); + insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128); + insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98); + insert(4, g, 64, 15, 112, 0, 20); + /* maskself is required */ + insert(4, h, 64, 15, 123, 211, 25); + insert(4, a, 10, 0, 0, 0, 25); + insert(4, b, 10, 0, 0, 128, 25); + insert(4, a, 10, 1, 0, 0, 30); + insert(4, b, 10, 1, 0, 4, 30); + insert(4, c, 10, 1, 0, 8, 29); + insert(4, d, 10, 1, 0, 16, 29); + + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { + print_tree(t.root4, 32); + print_tree(t.root6, 128); + } + + success = true; + + test(4, a, 192, 168, 4, 20); + test(4, a, 192, 168, 4, 0); + test(4, b, 192, 168, 4, 4); + test(4, c, 192, 168, 200, 182); + test(4, c, 192, 95, 5, 68); + test(4, e, 192, 95, 5, 96); + test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543); + test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee); + test(6, f, 0x26075300, 0x60006b01, 0, 0); + test(6, g, 0x24046800, 0x40040806, 0, 0x1006); + test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0, 0); + test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010); + test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef); + test(4, g, 64, 15, 116, 26); + test(4, g, 64, 15, 127, 3); + test(4, g, 64, 15, 123, 1); + test(4, h, 64, 15, 123, 128); + test(4, h, 64, 15, 123, 129); + test(4, a, 10, 0, 0, 52); + test(4, b, 10, 0, 0, 220); + test(4, a, 10, 1, 0, 2); + test(4, b, 10, 1, 0, 6); + test(4, c, 10, 1, 0, 10); + test(4, d, 10, 1, 0, 20); + + insert(4, a, 1, 0, 0, 0, 32); + insert(4, a, 64, 0, 0, 0, 32); + insert(4, a, 128, 0, 0, 0, 32); + insert(4, a, 192, 0, 0, 0, 32); + insert(4, a, 255, 0, 0, 0, 32); + wg_allowedips_remove_by_peer(&t, a, &mutex); + test_negative(4, a, 1, 0, 0, 0); + test_negative(4, a, 64, 0, 0, 0); + test_negative(4, a, 128, 0, 0, 0); + test_negative(4, a, 192, 0, 0, 0); + test_negative(4, a, 255, 0, 0, 0); + + wg_allowedips_free(&t, &mutex); + wg_allowedips_init(&t); + insert(4, a, 192, 168, 0, 0, 16); + insert(4, a, 192, 168, 0, 0, 24); + wg_allowedips_remove_by_peer(&t, a, &mutex); + test_negative(4, a, 192, 168, 0, 1); + + /* These will hit the WARN_ON(len >= 128) in free_node if something + * goes wrong. + */ + for (i = 0; i < 128; ++i) { + part = cpu_to_be64(~(1LLU << (i % 64))); + memset(&ip, 0xff, 16); + memcpy((u8 *)&ip + (i < 64) * 8, &part, 8); + wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex); + } + + wg_allowedips_free(&t, &mutex); + + wg_allowedips_init(&t); + insert(4, a, 192, 95, 5, 93, 27); + insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); + insert(4, a, 10, 1, 0, 20, 29); + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83); + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21); + list_for_each_entry(iter_node, &a->allowedips_list, peer_list) { + u8 cidr, ip[16] __aligned(__alignof(u64)); + int family = wg_allowedips_read_node(iter_node, ip, &cidr); + + count++; + + if (cidr == 27 && family == AF_INET && + !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr))) + found_a = true; + else if (cidr == 128 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543), + sizeof(struct in6_addr))) + found_b = true; + else if (cidr == 29 && family == AF_INET && + !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr))) + found_c = true; + else if (cidr == 83 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0), + sizeof(struct in6_addr))) + found_d = true; + else if (cidr == 21 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075000, 0, 0, 0), + sizeof(struct in6_addr))) + found_e = true; + else + found_other = true; + } + test_boolean(count == 5); + test_boolean(found_a); + test_boolean(found_b); + test_boolean(found_c); + test_boolean(found_d); + test_boolean(found_e); + test_boolean(!found_other); + + if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success) + success = randomized_test(); + + if (success) + pr_info("allowedips self-tests: pass\n"); + +free: + wg_allowedips_free(&t, &mutex); + kfree(a); + kfree(b); + kfree(c); + kfree(d); + kfree(e); + kfree(f); + kfree(g); + kfree(h); + mutex_unlock(&mutex); + + return success; +} + +#undef test_negative +#undef test +#undef remove +#undef insert +#undef init_peer + +#endif diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c new file mode 100644 index 000000000000..f4fbb9072ed7 --- /dev/null +++ b/drivers/net/wireguard/selftest/counter.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifdef DEBUG +bool __init wg_packet_counter_selftest(void) +{ + unsigned int test_num = 0, i; + union noise_counter counter; + bool success = true; + +#define T_INIT do { \ + memset(&counter, 0, sizeof(union noise_counter)); \ + spin_lock_init(&counter.receive.lock); \ + } while (0) +#define T_LIM (COUNTER_WINDOW_SIZE + 1) +#define T(n, v) do { \ + ++test_num; \ + if (counter_validate(&counter, n) != (v)) { \ + pr_err("nonce counter self-test %u: FAIL\n", \ + test_num); \ + success = false; \ + } \ + } while (0) + + T_INIT; + /* 1 */ T(0, true); + /* 2 */ T(1, true); + /* 3 */ T(1, false); + /* 4 */ T(9, true); + /* 5 */ T(8, true); + /* 6 */ T(7, true); + /* 7 */ T(7, false); + /* 8 */ T(T_LIM, true); + /* 9 */ T(T_LIM - 1, true); + /* 10 */ T(T_LIM - 1, false); + /* 11 */ T(T_LIM - 2, true); + /* 12 */ T(2, true); + /* 13 */ T(2, false); + /* 14 */ T(T_LIM + 16, true); + /* 15 */ T(3, false); + /* 16 */ T(T_LIM + 16, false); + /* 17 */ T(T_LIM * 4, true); + /* 18 */ T(T_LIM * 4 - (T_LIM - 1), true); + /* 19 */ T(10, false); + /* 20 */ T(T_LIM * 4 - T_LIM, false); + /* 21 */ T(T_LIM * 4 - (T_LIM + 1), false); + /* 22 */ T(T_LIM * 4 - (T_LIM - 2), true); + /* 23 */ T(T_LIM * 4 + 1 - T_LIM, false); + /* 24 */ T(0, false); + /* 25 */ T(REJECT_AFTER_MESSAGES, false); + /* 26 */ T(REJECT_AFTER_MESSAGES - 1, true); + /* 27 */ T(REJECT_AFTER_MESSAGES, false); + /* 28 */ T(REJECT_AFTER_MESSAGES - 1, false); + /* 29 */ T(REJECT_AFTER_MESSAGES - 2, true); + /* 30 */ T(REJECT_AFTER_MESSAGES + 1, false); + /* 31 */ T(REJECT_AFTER_MESSAGES + 2, false); + /* 32 */ T(REJECT_AFTER_MESSAGES - 2, false); + /* 33 */ T(REJECT_AFTER_MESSAGES - 3, true); + /* 34 */ T(0, false); + + T_INIT; + for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i) + T(i, true); + T(0, true); + T(0, false); + + T_INIT; + for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i) + T(i, true); + T(1, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;) + T(i, true); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;) + T(i, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) + T(i, true); + T(COUNTER_WINDOW_SIZE + 1, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) + T(i, true); + T(0, true); + T(COUNTER_WINDOW_SIZE + 1, true); + +#undef T +#undef T_LIM +#undef T_INIT + + if (success) + pr_info("nonce counter self-tests: pass\n"); + return success; +} +#endif diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c new file mode 100644 index 000000000000..bcd6462e4540 --- /dev/null +++ b/drivers/net/wireguard/selftest/ratelimiter.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifdef DEBUG + +#include <linux/jiffies.h> + +static const struct { + bool result; + unsigned int msec_to_sleep_before; +} expected_results[] __initconst = { + [0 ... PACKETS_BURSTABLE - 1] = { true, 0 }, + [PACKETS_BURSTABLE] = { false, 0 }, + [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND }, + [PACKETS_BURSTABLE + 2] = { false, 0 }, + [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 }, + [PACKETS_BURSTABLE + 4] = { true, 0 }, + [PACKETS_BURSTABLE + 5] = { false, 0 } +}; + +static __init unsigned int maximum_jiffies_at_index(int index) +{ + unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3; + int i; + + for (i = 0; i <= index; ++i) + total_msecs += expected_results[i].msec_to_sleep_before; + return msecs_to_jiffies(total_msecs); +} + +static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4, + struct sk_buff *skb6, struct ipv6hdr *hdr6, + int *test) +{ + unsigned long loop_start_time; + int i; + + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + loop_start_time = jiffies; + + for (i = 0; i < ARRAY_SIZE(expected_results); ++i) { + if (expected_results[i].msec_to_sleep_before) + msleep(expected_results[i].msec_to_sleep_before); + + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (wg_ratelimiter_allow(skb4, &init_net) != + expected_results[i].result) + return -EXFULL; + ++(*test); + + hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (!wg_ratelimiter_allow(skb4, &init_net)) + return -EXFULL; + ++(*test); + + hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1); + +#if IS_ENABLED(CONFIG_IPV6) + hdr6->saddr.in6_u.u6_addr32[2] = htonl(i); + hdr6->saddr.in6_u.u6_addr32[3] = htonl(i); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (wg_ratelimiter_allow(skb6, &init_net) != + expected_results[i].result) + return -EXFULL; + ++(*test); + + hdr6->saddr.in6_u.u6_addr32[0] = + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (!wg_ratelimiter_allow(skb6, &init_net)) + return -EXFULL; + ++(*test); + + hdr6->saddr.in6_u.u6_addr32[0] = + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1); + + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; +#endif + } + return 0; +} + +static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4, + int *test) +{ + int i; + + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + + if (atomic_read(&total_entries)) + return -EXFULL; + ++(*test); + + for (i = 0; i <= max_entries; ++i) { + hdr4->saddr = htonl(i); + if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries)) + return -EXFULL; + ++(*test); + } + return 0; +} + +bool __init wg_ratelimiter_selftest(void) +{ + enum { TRIALS_BEFORE_GIVING_UP = 5000 }; + bool success = false; + int test = 0, trials; + struct sk_buff *skb4, *skb6; + struct iphdr *hdr4; + struct ipv6hdr *hdr6; + + if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN)) + return true; + + BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0); + + if (wg_ratelimiter_init()) + goto out; + ++test; + if (wg_ratelimiter_init()) { + wg_ratelimiter_uninit(); + goto out; + } + ++test; + if (wg_ratelimiter_init()) { + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + goto out; + } + ++test; + + skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL); + if (unlikely(!skb4)) + goto err_nofree; + skb4->protocol = htons(ETH_P_IP); + hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4)); + hdr4->saddr = htonl(8182); + skb_reset_network_header(skb4); + ++test; + +#if IS_ENABLED(CONFIG_IPV6) + skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL); + if (unlikely(!skb6)) { + kfree_skb(skb4); + goto err_nofree; + } + skb6->protocol = htons(ETH_P_IPV6); + hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6)); + hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212); + hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188); + skb_reset_network_header(skb6); + ++test; +#endif + + for (trials = TRIALS_BEFORE_GIVING_UP;;) { + int test_count = 0, ret; + + ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); + if (ret == -ETIMEDOUT) { + if (!trials--) { + test += test_count; + goto err; + } + msleep(500); + continue; + } else if (ret < 0) { + test += test_count; + goto err; + } else { + test += test_count; + break; + } + } + + for (trials = TRIALS_BEFORE_GIVING_UP;;) { + int test_count = 0; + + if (capacity_test(skb4, hdr4, &test_count) < 0) { + if (!trials--) { + test += test_count; + goto err; + } + msleep(50); + continue; + } + test += test_count; + break; + } + + success = true; + +err: + kfree_skb(skb4); +#if IS_ENABLED(CONFIG_IPV6) + kfree_skb(skb6); +#endif +err_nofree: + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + /* Uninit one extra time to check underflow detection. */ + wg_ratelimiter_uninit(); +out: + if (success) + pr_info("ratelimiter self-tests: pass\n"); + else + pr_err("ratelimiter self-test %d: FAIL\n", test); + + return success; +} +#endif diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c new file mode 100644 index 000000000000..c13260563446 --- /dev/null +++ b/drivers/net/wireguard/send.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "queueing.h" +#include "timers.h" +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "messages.h" +#include "cookie.h" + +#include <linux/uio.h> +#include <linux/inetdevice.h> +#include <linux/socket.h> +#include <net/ip_tunnels.h> +#include <net/udp.h> +#include <net/sock.h> + +static void wg_packet_send_handshake_initiation(struct wg_peer *peer) +{ + struct message_handshake_initiation packet; + + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), + REKEY_TIMEOUT)) + return; /* This function is rate limited. */ + + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); + net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) { + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + atomic64_set(&peer->last_sent_handshake, + ktime_get_coarse_boottime_ns()); + wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet), + HANDSHAKE_DSCP); + wg_timers_handshake_initiated(peer); + } +} + +void wg_packet_handshake_send_worker(struct work_struct *work) +{ + struct wg_peer *peer = container_of(work, struct wg_peer, + transmit_handshake_work); + + wg_packet_send_handshake_initiation(peer); + wg_peer_put(peer); +} + +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, + bool is_retry) +{ + if (!is_retry) + peer->timer_handshake_attempts = 0; + + rcu_read_lock_bh(); + /* We check last_sent_handshake here in addition to the actual function + * we're queueing up, so that we don't queue things if not strictly + * necessary: + */ + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), + REKEY_TIMEOUT) || + unlikely(READ_ONCE(peer->is_dead))) + goto out; + + wg_peer_get(peer); + /* Queues up calling packet_send_queued_handshakes(peer), where we do a + * peer_put(peer) after: + */ + if (!queue_work(peer->device->handshake_send_wq, + &peer->transmit_handshake_work)) + /* If the work was already queued, we want to drop the + * extra reference: + */ + wg_peer_put(peer); +out: + rcu_read_unlock_bh(); +} + +void wg_packet_send_handshake_response(struct wg_peer *peer) +{ + struct message_handshake_response packet; + + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); + net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + if (wg_noise_handshake_create_response(&packet, &peer->handshake)) { + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + if (wg_noise_handshake_begin_session(&peer->handshake, + &peer->keypairs)) { + wg_timers_session_derived(peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + atomic64_set(&peer->last_sent_handshake, + ktime_get_coarse_boottime_ns()); + wg_socket_send_buffer_to_peer(peer, &packet, + sizeof(packet), + HANDSHAKE_DSCP); + } + } +} + +void wg_packet_send_handshake_cookie(struct wg_device *wg, + struct sk_buff *initiating_skb, + __le32 sender_index) +{ + struct message_handshake_cookie packet; + + net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n", + wg->dev->name, initiating_skb); + wg_cookie_message_create(&packet, initiating_skb, sender_index, + &wg->cookie_checker); + wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet, + sizeof(packet)); +} + +static void keep_key_fresh(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + bool send = false; + + rcu_read_lock_bh(); + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); + if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && + (unlikely(atomic64_read(&keypair->sending.counter.counter) > + REKEY_AFTER_MESSAGES) || + (keypair->i_am_the_initiator && + unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, + REKEY_AFTER_TIME))))) + send = true; + rcu_read_unlock_bh(); + + if (send) + wg_packet_send_queued_handshake_initiation(peer, false); +} + +static unsigned int calculate_skb_padding(struct sk_buff *skb) +{ + /* We do this modulo business with the MTU, just in case the networking + * layer gives us a packet that's bigger than the MTU. In that case, we + * wouldn't want the final subtraction to overflow in the case of the + * padded_size being clamped. + */ + unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; + unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); + + if (padded_size > PACKET_CB(skb)->mtu) + padded_size = PACKET_CB(skb)->mtu; + return padded_size - last_unit; +} + +static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) +{ + unsigned int padding_len, plaintext_len, trailer_len; + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + struct message_data *header; + struct sk_buff *trailer; + int num_frags; + + /* Calculate lengths. */ + padding_len = calculate_skb_padding(skb); + trailer_len = padding_len + noise_encrypted_len(0); + plaintext_len = skb->len + padding_len; + + /* Expand data section to have room for padding and auth tag. */ + num_frags = skb_cow_data(skb, trailer_len, &trailer); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + /* Set the padding to zeros, and make sure it and the auth tag are part + * of the skb. + */ + memset(skb_tail_pointer(trailer), 0, padding_len); + + /* Expand head section to have room for our header and the network + * stack's headers. + */ + if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) + return false; + + /* Finalize checksum calculation for the inner packet, if required. */ + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb))) + return false; + + /* Only after checksumming can we safely add on the padding at the end + * and the header. + */ + skb_set_inner_network_header(skb, 0); + header = (struct message_data *)skb_push(skb, sizeof(*header)); + header->header.type = cpu_to_le32(MESSAGE_DATA); + header->key_idx = keypair->remote_index; + header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); + pskb_put(skb, trailer, trailer_len); + + /* Now we can encrypt the scattergather segments */ + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, sizeof(struct message_data), + noise_encrypted_len(plaintext_len)) <= 0) + return false; + return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0, + PACKET_CB(skb)->nonce, + keypair->sending.key); +} + +void wg_packet_send_keepalive(struct wg_peer *peer) +{ + struct sk_buff *skb; + + if (skb_queue_empty(&peer->staged_packet_queue)) { + skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, + GFP_ATOMIC); + if (unlikely(!skb)) + return; + skb_reserve(skb, DATA_PACKET_HEAD_ROOM); + skb->dev = peer->device->dev; + PACKET_CB(skb)->mtu = skb->dev->mtu; + skb_queue_tail(&peer->staged_packet_queue, skb); + net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + } + + wg_packet_send_staged_packets(peer); +} + +static void wg_packet_create_data_done(struct sk_buff *first, + struct wg_peer *peer) +{ + struct sk_buff *skb, *next; + bool is_keepalive, data_sent = false; + + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + skb_list_walk_safe(first, skb, next) { + is_keepalive = skb->len == message_data_len(0); + if (likely(!wg_socket_send_skb_to_peer(peer, skb, + PACKET_CB(skb)->ds) && !is_keepalive)) + data_sent = true; + } + + if (likely(data_sent)) + wg_timers_data_sent(peer); + + keep_key_fresh(peer); +} + +void wg_packet_tx_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct crypt_queue, + work); + struct noise_keypair *keypair; + enum packet_state state; + struct sk_buff *first; + struct wg_peer *peer; + + while ((first = __ptr_ring_peek(&queue->ring)) != NULL && + (state = atomic_read_acquire(&PACKET_CB(first)->state)) != + PACKET_STATE_UNCRYPTED) { + __ptr_ring_discard_one(&queue->ring); + peer = PACKET_PEER(first); + keypair = PACKET_CB(first)->keypair; + + if (likely(state == PACKET_STATE_CRYPTED)) + wg_packet_create_data_done(first, peer); + else + kfree_skb_list(first); + + wg_noise_keypair_put(keypair, false); + wg_peer_put(peer); + } +} + +void wg_packet_encrypt_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *first, *skb, *next; + + while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { + enum packet_state state = PACKET_STATE_CRYPTED; + + skb_list_walk_safe(first, skb, next) { + if (likely(encrypt_packet(skb, + PACKET_CB(first)->keypair))) { + wg_reset_packet(skb); + } else { + state = PACKET_STATE_DEAD; + break; + } + } + wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, + state); + + } +} + +static void wg_packet_create_data(struct sk_buff *first) +{ + struct wg_peer *peer = PACKET_PEER(first); + struct wg_device *wg = peer->device; + int ret = -EINVAL; + + rcu_read_lock_bh(); + if (unlikely(READ_ONCE(peer->is_dead))) + goto err; + + ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, + &peer->tx_queue, first, + wg->packet_crypt_wq, + &wg->encrypt_queue.last_cpu); + if (unlikely(ret == -EPIPE)) + wg_queue_enqueue_per_peer(&peer->tx_queue, first, + PACKET_STATE_DEAD); +err: + rcu_read_unlock_bh(); + if (likely(!ret || ret == -EPIPE)) + return; + wg_noise_keypair_put(PACKET_CB(first)->keypair, false); + wg_peer_put(peer); + kfree_skb_list(first); +} + +void wg_packet_purge_staged_packets(struct wg_peer *peer) +{ + spin_lock_bh(&peer->staged_packet_queue.lock); + peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; + __skb_queue_purge(&peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); +} + +void wg_packet_send_staged_packets(struct wg_peer *peer) +{ + struct noise_symmetric_key *key; + struct noise_keypair *keypair; + struct sk_buff_head packets; + struct sk_buff *skb; + + /* Steal the current queue into our local one. */ + __skb_queue_head_init(&packets); + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice_init(&peer->staged_packet_queue, &packets); + spin_unlock_bh(&peer->staged_packet_queue.lock); + if (unlikely(skb_queue_empty(&packets))) + return; + + /* First we make sure we have a valid reference to a valid key. */ + rcu_read_lock_bh(); + keypair = wg_noise_keypair_get( + rcu_dereference_bh(peer->keypairs.current_keypair)); + rcu_read_unlock_bh(); + if (unlikely(!keypair)) + goto out_nokey; + key = &keypair->sending; + if (unlikely(!READ_ONCE(key->is_valid))) + goto out_nokey; + if (unlikely(wg_birthdate_has_expired(key->birthdate, + REJECT_AFTER_TIME))) + goto out_invalid; + + /* After we know we have a somewhat valid key, we now try to assign + * nonces to all of the packets in the queue. If we can't assign nonces + * for all of them, we just consider it a failure and wait for the next + * handshake. + */ + skb_queue_walk(&packets, skb) { + /* 0 for no outer TOS: no leak. TODO: at some later point, we + * might consider using flowi->tos as outer instead. + */ + PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); + PACKET_CB(skb)->nonce = + atomic64_inc_return(&key->counter.counter) - 1; + if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) + goto out_invalid; + } + + packets.prev->next = NULL; + wg_peer_get(keypair->entry.peer); + PACKET_CB(packets.next)->keypair = keypair; + wg_packet_create_data(packets.next); + return; + +out_invalid: + WRITE_ONCE(key->is_valid, false); +out_nokey: + wg_noise_keypair_put(keypair, false); + + /* We orphan the packets if we're waiting on a handshake, so that they + * don't block a socket's pool. + */ + skb_queue_walk(&packets, skb) + skb_orphan(skb); + /* Then we put them back on the top of the queue. We're not too + * concerned about accidentally getting things a little out of order if + * packets are being added really fast, because this queue is for before + * packets can even be sent and it's small anyway. + */ + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + /* If we're exiting because there's something wrong with the key, it + * means we should initiate a new handshake. + */ + wg_packet_send_queued_handshake_initiation(peer, false); +} diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c new file mode 100644 index 000000000000..c46256d0d81c --- /dev/null +++ b/drivers/net/wireguard/socket.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "queueing.h" +#include "messages.h" + +#include <linux/ctype.h> +#include <linux/net.h> +#include <linux/if_vlan.h> +#include <linux/if_ether.h> +#include <linux/inetdevice.h> +#include <net/udp_tunnel.h> +#include <net/ipv6.h> + +static int send4(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ + struct flowi4 fl = { + .saddr = endpoint->src4.s_addr, + .daddr = endpoint->addr4.sin_addr.s_addr, + .fl4_dport = endpoint->addr4.sin_port, + .flowi4_mark = wg->fwmark, + .flowi4_proto = IPPROTO_UDP + }; + struct rtable *rt = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock4); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl4_sport = inet_sk(sock)->inet_sport; + + if (cache) + rt = dst_cache_get_ip4(cache, &fl.saddr); + + if (!rt) { + security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); + if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, + fl.saddr, RT_SCOPE_HOST))) { + endpoint->src4.s_addr = 0; + *(__force __be32 *)&endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + } + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && + PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && + rt->dst.dev->ifindex != endpoint->src_if4)))) { + endpoint->src4.s_addr = 0; + *(__force __be32 *)&endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + if (!IS_ERR(rt)) + ip_rt_put(rt); + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + } + if (unlikely(IS_ERR(rt))) { + ret = PTR_ERR(rt); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } else if (unlikely(rt->dst.dev == skb->dev)) { + ip_rt_put(rt); + ret = -ELOOP; + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", + wg->dev->name, &endpoint->addr); + goto err; + } + if (cache) + dst_cache_set_ip4(cache, &rt->dst, fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, + ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, + fl.fl4_dport, false, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +} + +static int send6(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 fl = { + .saddr = endpoint->src6, + .daddr = endpoint->addr6.sin6_addr, + .fl6_dport = endpoint->addr6.sin6_port, + .flowi6_mark = wg->fwmark, + .flowi6_oif = endpoint->addr6.sin6_scope_id, + .flowi6_proto = IPPROTO_UDP + /* TODO: addr->sin6_flowinfo */ + }; + struct dst_entry *dst = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock6); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl6_sport = inet_sk(sock)->inet_sport; + + if (cache) + dst = dst_cache_get_ip6(cache, &fl.saddr); + + if (!dst) { + security_sk_classify_flow(sock, flowi6_to_flowi(&fl)); + if (unlikely(!ipv6_addr_any(&fl.saddr) && + !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { + endpoint->src6 = fl.saddr = in6addr_any; + if (cache) + dst_cache_reset(cache); + } + dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, + NULL); + if (unlikely(IS_ERR(dst))) { + ret = PTR_ERR(dst); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } else if (unlikely(dst->dev == skb->dev)) { + dst_release(dst); + ret = -ELOOP; + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", + wg->dev->name, &endpoint->addr); + goto err; + } + if (cache) + dst_cache_set_ip6(cache, dst, &fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, + ip6_dst_hoplimit(dst), 0, fl.fl6_sport, + fl.fl6_dport, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +#else + return -EAFNOSUPPORT; +#endif +} + +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) +{ + size_t skb_len = skb->len; + int ret = -EAFNOSUPPORT; + + read_lock_bh(&peer->endpoint_lock); + if (peer->endpoint.addr.sa_family == AF_INET) + ret = send4(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else if (peer->endpoint.addr.sa_family == AF_INET6) + ret = send6(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else + dev_kfree_skb(skb); + if (likely(!ret)) + peer->tx_bytes += skb_len; + read_unlock_bh(&peer->endpoint_lock); + + return ret; +} + +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, + size_t len, u8 ds) +{ + struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + + if (unlikely(!skb)) + return -ENOMEM; + + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + return wg_socket_send_skb_to_peer(peer, skb, ds); +} + +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, + struct sk_buff *in_skb, void *buffer, + size_t len) +{ + int ret = 0; + struct sk_buff *skb; + struct endpoint endpoint; + + if (unlikely(!in_skb)) + return -EINVAL; + ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); + if (unlikely(ret < 0)) + return ret; + + skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + if (unlikely(!skb)) + return -ENOMEM; + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + + if (endpoint.addr.sa_family == AF_INET) + ret = send4(wg, skb, &endpoint, 0, NULL); + else if (endpoint.addr.sa_family == AF_INET6) + ret = send6(wg, skb, &endpoint, 0, NULL); + /* No other possibilities if the endpoint is valid, which it is, + * as we checked above. + */ + + return ret; +} + +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, + const struct sk_buff *skb) +{ + memset(endpoint, 0, sizeof(*endpoint)); + if (skb->protocol == htons(ETH_P_IP)) { + endpoint->addr4.sin_family = AF_INET; + endpoint->addr4.sin_port = udp_hdr(skb)->source; + endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; + endpoint->src4.s_addr = ip_hdr(skb)->daddr; + endpoint->src_if4 = skb->skb_iif; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + endpoint->addr6.sin6_family = AF_INET6; + endpoint->addr6.sin6_port = udp_hdr(skb)->source; + endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; + endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( + &ipv6_hdr(skb)->saddr, skb->skb_iif); + endpoint->src6 = ipv6_hdr(skb)->daddr; + } else { + return -EINVAL; + } + return 0; +} + +static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) +{ + return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && + a->addr4.sin_port == b->addr4.sin_port && + a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && + a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || + (a->addr.sa_family == AF_INET6 && + b->addr.sa_family == AF_INET6 && + a->addr6.sin6_port == b->addr6.sin6_port && + ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && + a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && + ipv6_addr_equal(&a->src6, &b->src6)) || + unlikely(!a->addr.sa_family && !b->addr.sa_family); +} + +void wg_socket_set_peer_endpoint(struct wg_peer *peer, + const struct endpoint *endpoint) +{ + /* First we check unlocked, in order to optimize, since it's pretty rare + * that an endpoint will change. If we happen to be mid-write, and two + * CPUs wind up writing the same thing or something slightly different, + * it doesn't really matter much either. + */ + if (endpoint_eq(endpoint, &peer->endpoint)) + return; + write_lock_bh(&peer->endpoint_lock); + if (endpoint->addr.sa_family == AF_INET) { + peer->endpoint.addr4 = endpoint->addr4; + peer->endpoint.src4 = endpoint->src4; + peer->endpoint.src_if4 = endpoint->src_if4; + } else if (endpoint->addr.sa_family == AF_INET6) { + peer->endpoint.addr6 = endpoint->addr6; + peer->endpoint.src6 = endpoint->src6; + } else { + goto out; + } + dst_cache_reset(&peer->endpoint_cache); +out: + write_unlock_bh(&peer->endpoint_lock); +} + +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, + const struct sk_buff *skb) +{ + struct endpoint endpoint; + + if (!wg_socket_endpoint_from_skb(&endpoint, skb)) + wg_socket_set_peer_endpoint(peer, &endpoint); +} + +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) +{ + write_lock_bh(&peer->endpoint_lock); + memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); + dst_cache_reset(&peer->endpoint_cache); + write_unlock_bh(&peer->endpoint_lock); +} + +static int wg_receive(struct sock *sk, struct sk_buff *skb) +{ + struct wg_device *wg; + + if (unlikely(!sk)) + goto err; + wg = sk->sk_user_data; + if (unlikely(!wg)) + goto err; + wg_packet_receive(wg, skb); + return 0; + +err: + kfree_skb(skb); + return 0; +} + +static void sock_free(struct sock *sock) +{ + if (unlikely(!sock)) + return; + sk_clear_memalloc(sock); + udp_tunnel_sock_release(sock->sk_socket); +} + +static void set_sock_opts(struct socket *sock) +{ + sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_sndbuf = INT_MAX; + sk_set_memalloc(sock->sk); +} + +int wg_socket_init(struct wg_device *wg, u16 port) +{ + int ret; + struct udp_tunnel_sock_cfg cfg = { + .sk_user_data = wg, + .encap_type = 1, + .encap_rcv = wg_receive + }; + struct socket *new4 = NULL, *new6 = NULL; + struct udp_port_cfg port4 = { + .family = AF_INET, + .local_ip.s_addr = htonl(INADDR_ANY), + .local_udp_port = htons(port), + .use_udp_checksums = true + }; +#if IS_ENABLED(CONFIG_IPV6) + int retries = 0; + struct udp_port_cfg port6 = { + .family = AF_INET6, + .local_ip6 = IN6ADDR_ANY_INIT, + .use_udp6_tx_checksums = true, + .use_udp6_rx_checksums = true, + .ipv6_v6only = true + }; +#endif + +#if IS_ENABLED(CONFIG_IPV6) +retry: +#endif + + ret = udp_sock_create(wg->creating_net, &port4, &new4); + if (ret < 0) { + pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); + return ret; + } + set_sock_opts(new4); + setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); + +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6_mod_enabled()) { + port6.local_udp_port = inet_sk(new4->sk)->inet_sport; + ret = udp_sock_create(wg->creating_net, &port6, &new6); + if (ret < 0) { + udp_tunnel_sock_release(new4); + if (ret == -EADDRINUSE && !port && retries++ < 100) + goto retry; + pr_err("%s: Could not create IPv6 socket\n", + wg->dev->name); + return ret; + } + set_sock_opts(new6); + setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); + } +#endif + + wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); + return 0; +} + +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, + struct sock *new6) +{ + struct sock *old4, *old6; + + mutex_lock(&wg->socket_update_lock); + old4 = rcu_dereference_protected(wg->sock4, + lockdep_is_held(&wg->socket_update_lock)); + old6 = rcu_dereference_protected(wg->sock6, + lockdep_is_held(&wg->socket_update_lock)); + rcu_assign_pointer(wg->sock4, new4); + rcu_assign_pointer(wg->sock6, new6); + if (new4) + wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); + mutex_unlock(&wg->socket_update_lock); + synchronize_rcu(); + synchronize_net(); + sock_free(old4); + sock_free(old6); +} diff --git a/drivers/net/wireguard/socket.h b/drivers/net/wireguard/socket.h new file mode 100644 index 000000000000..bab5848efbcd --- /dev/null +++ b/drivers/net/wireguard/socket.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_SOCKET_H +#define _WG_SOCKET_H + +#include <linux/netdevice.h> +#include <linux/udp.h> +#include <linux/if_vlan.h> +#include <linux/if_ether.h> + +int wg_socket_init(struct wg_device *wg, u16 port); +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, + struct sock *new6); +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data, + size_t len, u8 ds); +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, + u8 ds); +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, + struct sk_buff *in_skb, + void *out_buffer, size_t len); + +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, + const struct sk_buff *skb); +void wg_socket_set_peer_endpoint(struct wg_peer *peer, + const struct endpoint *endpoint); +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, + const struct sk_buff *skb); +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer); + +#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG) +#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \ + struct endpoint __endpoint; \ + wg_socket_endpoint_from_skb(&__endpoint, skb); \ + net_dbg_ratelimited(fmt, dev, &__endpoint.addr, \ + ##__VA_ARGS__); \ + } while (0) +#else +#define net_dbg_skb_ratelimited(fmt, skb, ...) +#endif + +#endif /* _WG_SOCKET_H */ diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c new file mode 100644 index 000000000000..d54d32ac9bc4 --- /dev/null +++ b/drivers/net/wireguard/timers.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "timers.h" +#include "device.h" +#include "peer.h" +#include "queueing.h" +#include "socket.h" + +/* + * - Timer for retransmitting the handshake if we don't hear back after + * `REKEY_TIMEOUT + jitter` ms. + * + * - Timer for sending empty packet if we have received a packet but after have + * not sent one for `KEEPALIVE_TIMEOUT` ms. + * + * - Timer for initiating new handshake if we have sent a packet but after have + * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) + + * jitter` ms. + * + * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms + * if no new keys have been received. + * + * - Timer for, if enabled, sending an empty authenticated packet every user- + * specified seconds. + */ + +static inline void mod_peer_timer(struct wg_peer *peer, + struct timer_list *timer, + unsigned long expires) +{ + rcu_read_lock_bh(); + if (likely(netif_running(peer->device->dev) && + !READ_ONCE(peer->is_dead))) + mod_timer(timer, expires); + rcu_read_unlock_bh(); +} + +static void wg_expired_retransmit_handshake(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, + timer_retransmit_handshake); + + if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2); + + del_timer(&peer->timer_send_keepalive); + /* We drop all packets without a keypair and don't try again, + * if we try unsuccessfully for too long to make a handshake. + */ + wg_packet_purge_staged_packets(peer); + + /* We set a timer for destroying any residue that might be left + * of a partial exchange. + */ + if (!timer_pending(&peer->timer_zero_key_material)) + mod_peer_timer(peer, &peer->timer_zero_key_material, + jiffies + REJECT_AFTER_TIME * 3 * HZ); + } else { + ++peer->timer_handshake_attempts; + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, REKEY_TIMEOUT, + peer->timer_handshake_attempts + 1); + + /* We clear the endpoint address src address, in case this is + * the cause of trouble. + */ + wg_socket_clear_peer_endpoint_src(peer); + + wg_packet_send_queued_handshake_initiation(peer, true); + } +} + +static void wg_expired_send_keepalive(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive); + + wg_packet_send_keepalive(peer); + if (peer->timer_need_another_keepalive) { + peer->timer_need_another_keepalive = false; + mod_peer_timer(peer, &peer->timer_send_keepalive, + jiffies + KEEPALIVE_TIMEOUT * HZ); + } +} + +static void wg_expired_new_handshake(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake); + + pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT); + /* We clear the endpoint address src address, in case this is the cause + * of trouble. + */ + wg_socket_clear_peer_endpoint_src(peer); + wg_packet_send_queued_handshake_initiation(peer, false); +} + +static void wg_expired_zero_key_material(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material); + + rcu_read_lock_bh(); + if (!READ_ONCE(peer->is_dead)) { + wg_peer_get(peer); + if (!queue_work(peer->device->handshake_send_wq, + &peer->clear_peer_work)) + /* If the work was already on the queue, we want to drop + * the extra reference. + */ + wg_peer_put(peer); + } + rcu_read_unlock_bh(); +} + +static void wg_queued_expired_zero_key_material(struct work_struct *work) +{ + struct wg_peer *peer = container_of(work, struct wg_peer, + clear_peer_work); + + pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, REJECT_AFTER_TIME * 3); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + wg_peer_put(peer); +} + +static void wg_expired_send_persistent_keepalive(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, + timer_persistent_keepalive); + + if (likely(peer->persistent_keepalive_interval)) + wg_packet_send_keepalive(peer); +} + +/* Should be called after an authenticated data packet is sent. */ +void wg_timers_data_sent(struct wg_peer *peer) +{ + if (!timer_pending(&peer->timer_new_handshake)) + mod_peer_timer(peer, &peer->timer_new_handshake, + jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ + + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); +} + +/* Should be called after an authenticated data packet is received. */ +void wg_timers_data_received(struct wg_peer *peer) +{ + if (likely(netif_running(peer->device->dev))) { + if (!timer_pending(&peer->timer_send_keepalive)) + mod_peer_timer(peer, &peer->timer_send_keepalive, + jiffies + KEEPALIVE_TIMEOUT * HZ); + else + peer->timer_need_another_keepalive = true; + } +} + +/* Should be called after any type of authenticated packet is sent, whether + * keepalive, data, or handshake. + */ +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) +{ + del_timer(&peer->timer_send_keepalive); +} + +/* Should be called after any type of authenticated packet is received, whether + * keepalive, data, or handshake. + */ +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer) +{ + del_timer(&peer->timer_new_handshake); +} + +/* Should be called after a handshake initiation message is sent. */ +void wg_timers_handshake_initiated(struct wg_peer *peer) +{ + mod_peer_timer(peer, &peer->timer_retransmit_handshake, + jiffies + REKEY_TIMEOUT * HZ + + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); +} + +/* Should be called after a handshake response message is received and processed + * or when getting key confirmation via the first data message. + */ +void wg_timers_handshake_complete(struct wg_peer *peer) +{ + del_timer(&peer->timer_retransmit_handshake); + peer->timer_handshake_attempts = 0; + peer->sent_lastminute_handshake = false; + ktime_get_real_ts64(&peer->walltime_last_handshake); +} + +/* Should be called after an ephemeral key is created, which is before sending a + * handshake response or after receiving a handshake response. + */ +void wg_timers_session_derived(struct wg_peer *peer) +{ + mod_peer_timer(peer, &peer->timer_zero_key_material, + jiffies + REJECT_AFTER_TIME * 3 * HZ); +} + +/* Should be called before a packet with authentication, whether + * keepalive, data, or handshakem is sent, or after one is received. + */ +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer) +{ + if (peer->persistent_keepalive_interval) + mod_peer_timer(peer, &peer->timer_persistent_keepalive, + jiffies + peer->persistent_keepalive_interval * HZ); +} + +void wg_timers_init(struct wg_peer *peer) +{ + timer_setup(&peer->timer_retransmit_handshake, + wg_expired_retransmit_handshake, 0); + timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0); + timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0); + timer_setup(&peer->timer_zero_key_material, + wg_expired_zero_key_material, 0); + timer_setup(&peer->timer_persistent_keepalive, + wg_expired_send_persistent_keepalive, 0); + INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material); + peer->timer_handshake_attempts = 0; + peer->sent_lastminute_handshake = false; + peer->timer_need_another_keepalive = false; +} + +void wg_timers_stop(struct wg_peer *peer) +{ + del_timer_sync(&peer->timer_retransmit_handshake); + del_timer_sync(&peer->timer_send_keepalive); + del_timer_sync(&peer->timer_new_handshake); + del_timer_sync(&peer->timer_zero_key_material); + del_timer_sync(&peer->timer_persistent_keepalive); + flush_work(&peer->clear_peer_work); +} diff --git a/drivers/net/wireguard/timers.h b/drivers/net/wireguard/timers.h new file mode 100644 index 000000000000..f0653dcb1326 --- /dev/null +++ b/drivers/net/wireguard/timers.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_TIMERS_H +#define _WG_TIMERS_H + +#include <linux/ktime.h> + +struct wg_peer; + +void wg_timers_init(struct wg_peer *peer); +void wg_timers_stop(struct wg_peer *peer); +void wg_timers_data_sent(struct wg_peer *peer); +void wg_timers_data_received(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer); +void wg_timers_handshake_initiated(struct wg_peer *peer); +void wg_timers_handshake_complete(struct wg_peer *peer); +void wg_timers_session_derived(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer); + +static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds, + u64 expiration_seconds) +{ + return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC) + <= (s64)ktime_get_coarse_boottime_ns(); +} + +#endif /* _WG_TIMERS_H */ diff --git a/drivers/net/wireguard/version.h b/drivers/net/wireguard/version.h new file mode 100644 index 000000000000..a1a269a11634 --- /dev/null +++ b/drivers/net/wireguard/version.h @@ -0,0 +1 @@ +#define WIREGUARD_VERSION "1.0.0" diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index 0579554ed4b3..3c505636b7cc 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -5834,7 +5834,7 @@ static int ipw2100_close(struct net_device *dev) /* * TODO: Fix this function... its just wrong */ -static void ipw2100_tx_timeout(struct net_device *dev) +static void ipw2100_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ipw2100_priv *priv = libipw_priv(dev); diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c index 05466281afb6..de97b3304115 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_main.c +++ b/drivers/net/wireless/intersil/hostap/hostap_main.c @@ -761,7 +761,7 @@ static void hostap_set_multicast_list(struct net_device *dev) } -static void prism2_tx_timeout(struct net_device *dev) +static void prism2_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct hostap_interface *iface; local_info_t *local; diff --git a/drivers/net/wireless/intersil/orinoco/main.c b/drivers/net/wireless/intersil/orinoco/main.c index 28dac36d7c4c..00264a14e52c 100644 --- a/drivers/net/wireless/intersil/orinoco/main.c +++ b/drivers/net/wireless/intersil/orinoco/main.c @@ -647,7 +647,7 @@ static void __orinoco_ev_txexc(struct net_device *dev, struct hermes *hw) netif_wake_queue(dev); } -void orinoco_tx_timeout(struct net_device *dev) +void orinoco_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct orinoco_private *priv = ndev_priv(dev); struct net_device_stats *stats = &dev->stats; diff --git a/drivers/net/wireless/intersil/orinoco/orinoco.h b/drivers/net/wireless/intersil/orinoco/orinoco.h index 430862a6a24b..cdd026af100b 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco.h +++ b/drivers/net/wireless/intersil/orinoco/orinoco.h @@ -207,7 +207,7 @@ int orinoco_open(struct net_device *dev); int orinoco_stop(struct net_device *dev); void orinoco_set_multicast_list(struct net_device *dev); int orinoco_change_mtu(struct net_device *dev, int new_mtu); -void orinoco_tx_timeout(struct net_device *dev); +void orinoco_tx_timeout(struct net_device *dev, unsigned int txqueue); /********************************************************************/ /* Locking and synchronization functions */ diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.c b/drivers/net/wireless/intersil/prism54/islpci_eth.c index 2b8fb07d07e7..8d680250a281 100644 --- a/drivers/net/wireless/intersil/prism54/islpci_eth.c +++ b/drivers/net/wireless/intersil/prism54/islpci_eth.c @@ -473,7 +473,7 @@ islpci_do_reset_and_wake(struct work_struct *work) } void -islpci_eth_tx_timeout(struct net_device *ndev) +islpci_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue) { islpci_private *priv = netdev_priv(ndev); diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.h b/drivers/net/wireless/intersil/prism54/islpci_eth.h index 61f4b43c6054..e433ccdc526b 100644 --- a/drivers/net/wireless/intersil/prism54/islpci_eth.h +++ b/drivers/net/wireless/intersil/prism54/islpci_eth.h @@ -53,7 +53,7 @@ struct avs_80211_1_header { void islpci_eth_cleanup_transmit(islpci_private *, isl38xx_control_block *); netdev_tx_t islpci_eth_transmit(struct sk_buff *, struct net_device *); int islpci_eth_receive(islpci_private *); -void islpci_eth_tx_timeout(struct net_device *); +void islpci_eth_tx_timeout(struct net_device *, unsigned int txqueue); void islpci_do_reset_and_wake(struct work_struct *); #endif /* _ISL_GEN_H */ diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index d14e55e3c9da..7d94695e7961 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -1020,7 +1020,7 @@ static void mwifiex_set_multicast_list(struct net_device *dev) * CFG802.11 network device handler for transmission timeout. */ static void -mwifiex_tx_timeout(struct net_device *dev) +mwifiex_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 5fb598389487..648dfc38bd70 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -156,7 +156,7 @@ static void qtnf_netdev_get_stats64(struct net_device *ndev, /* Netdev handler for transmission timeout. */ -static void qtnf_netdev_tx_timeout(struct net_device *ndev) +static void qtnf_netdev_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct qtnf_vif *vif = qtnf_netdev_get_priv(ndev); struct qtnf_wmac *mac; diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 007bf6803293..686161db8706 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1285,7 +1285,7 @@ out: return rc; } -static void wl3501_tx_timeout(struct net_device *dev) +static void wl3501_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct net_device_stats *stats = &dev->stats; int rc; diff --git a/drivers/net/wireless/zydas/zd1201.c b/drivers/net/wireless/zydas/zd1201.c index 0db7362bedb4..41641fc2be74 100644 --- a/drivers/net/wireless/zydas/zd1201.c +++ b/drivers/net/wireless/zydas/zd1201.c @@ -830,7 +830,7 @@ static netdev_tx_t zd1201_hard_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } -static void zd1201_tx_timeout(struct net_device *dev) +static void zd1201_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct zd1201 *zd = netdev_priv(dev); diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index f533b7372d59..17b4950ec051 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -195,185 +195,6 @@ static void xenvif_debugfs_delif(struct xenvif *vif) } #endif /* CONFIG_DEBUG_FS */ -static int netback_remove(struct xenbus_device *dev) -{ - struct backend_info *be = dev_get_drvdata(&dev->dev); - - set_backend_state(be, XenbusStateClosed); - - unregister_hotplug_status_watch(be); - if (be->vif) { - kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); - xen_unregister_watchers(be->vif); - xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); - xenvif_free(be->vif); - be->vif = NULL; - } - kfree(be->hotplug_script); - kfree(be); - dev_set_drvdata(&dev->dev, NULL); - return 0; -} - - -/** - * Entry point to this code when a new device is created. Allocate the basic - * structures and switch to InitWait. - */ -static int netback_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) -{ - const char *message; - struct xenbus_transaction xbt; - int err; - int sg; - const char *script; - struct backend_info *be = kzalloc(sizeof(struct backend_info), - GFP_KERNEL); - if (!be) { - xenbus_dev_fatal(dev, -ENOMEM, - "allocating backend structure"); - return -ENOMEM; - } - - be->dev = dev; - dev_set_drvdata(&dev->dev, be); - - be->state = XenbusStateInitialising; - err = xenbus_switch_state(dev, XenbusStateInitialising); - if (err) - goto fail; - - sg = 1; - - do { - err = xenbus_transaction_start(&xbt); - if (err) { - xenbus_dev_fatal(dev, err, "starting transaction"); - goto fail; - } - - err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); - if (err) { - message = "writing feature-sg"; - goto abort_transaction; - } - - err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", - "%d", sg); - if (err) { - message = "writing feature-gso-tcpv4"; - goto abort_transaction; - } - - err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", - "%d", sg); - if (err) { - message = "writing feature-gso-tcpv6"; - goto abort_transaction; - } - - /* We support partial checksum setup for IPv6 packets */ - err = xenbus_printf(xbt, dev->nodename, - "feature-ipv6-csum-offload", - "%d", 1); - if (err) { - message = "writing feature-ipv6-csum-offload"; - goto abort_transaction; - } - - /* We support rx-copy path. */ - err = xenbus_printf(xbt, dev->nodename, - "feature-rx-copy", "%d", 1); - if (err) { - message = "writing feature-rx-copy"; - goto abort_transaction; - } - - /* - * We don't support rx-flip path (except old guests who don't - * grok this feature flag). - */ - err = xenbus_printf(xbt, dev->nodename, - "feature-rx-flip", "%d", 0); - if (err) { - message = "writing feature-rx-flip"; - goto abort_transaction; - } - - /* We support dynamic multicast-control. */ - err = xenbus_printf(xbt, dev->nodename, - "feature-multicast-control", "%d", 1); - if (err) { - message = "writing feature-multicast-control"; - goto abort_transaction; - } - - err = xenbus_printf(xbt, dev->nodename, - "feature-dynamic-multicast-control", - "%d", 1); - if (err) { - message = "writing feature-dynamic-multicast-control"; - goto abort_transaction; - } - - err = xenbus_transaction_end(xbt, 0); - } while (err == -EAGAIN); - - if (err) { - xenbus_dev_fatal(dev, err, "completing transaction"); - goto fail; - } - - /* - * Split event channels support, this is optional so it is not - * put inside the above loop. - */ - err = xenbus_printf(XBT_NIL, dev->nodename, - "feature-split-event-channels", - "%u", separate_tx_rx_irq); - if (err) - pr_debug("Error writing feature-split-event-channels\n"); - - /* Multi-queue support: This is an optional feature. */ - err = xenbus_printf(XBT_NIL, dev->nodename, - "multi-queue-max-queues", "%u", xenvif_max_queues); - if (err) - pr_debug("Error writing multi-queue-max-queues\n"); - - err = xenbus_printf(XBT_NIL, dev->nodename, - "feature-ctrl-ring", - "%u", true); - if (err) - pr_debug("Error writing feature-ctrl-ring\n"); - - script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL); - if (IS_ERR(script)) { - err = PTR_ERR(script); - xenbus_dev_fatal(dev, err, "reading script"); - goto fail; - } - - be->hotplug_script = script; - - - /* This kicks hotplug scripts, so do it immediately. */ - err = backend_create_xenvif(be); - if (err) - goto fail; - - return 0; - -abort_transaction: - xenbus_transaction_end(xbt, 1); - xenbus_dev_fatal(dev, err, "%s", message); -fail: - pr_debug("failed\n"); - netback_remove(dev); - return err; -} - - /* * Handle the creation of the hotplug script environment. We add the script * and vif variables to the environment, for the benefit of the vif-* hotplug @@ -827,6 +648,7 @@ static void hotplug_status_changed(struct xenbus_watch *watch, /* Not interested in this watch anymore. */ unregister_hotplug_status_watch(be); + xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status"); } kfree(str); } @@ -1128,6 +950,176 @@ static int read_xenbus_vif_flags(struct backend_info *be) return 0; } +static int netback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + + set_backend_state(be, XenbusStateClosed); + + unregister_hotplug_status_watch(be); + if (be->vif) { + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); + xen_unregister_watchers(be->vif); + xenvif_free(be->vif); + be->vif = NULL; + } + kfree(be->hotplug_script); + kfree(be); + dev_set_drvdata(&dev->dev, NULL); + return 0; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures and switch to InitWait. + */ +static int netback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + const char *message; + struct xenbus_transaction xbt; + int err; + int sg; + const char *script; + struct backend_info *be = kzalloc(sizeof(*be), GFP_KERNEL); + + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + + be->dev = dev; + dev_set_drvdata(&dev->dev, be); + + sg = 1; + + do { + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto fail; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); + if (err) { + message = "writing feature-sg"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", + "%d", sg); + if (err) { + message = "writing feature-gso-tcpv4"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", + "%d", sg); + if (err) { + message = "writing feature-gso-tcpv6"; + goto abort_transaction; + } + + /* We support partial checksum setup for IPv6 packets */ + err = xenbus_printf(xbt, dev->nodename, + "feature-ipv6-csum-offload", + "%d", 1); + if (err) { + message = "writing feature-ipv6-csum-offload"; + goto abort_transaction; + } + + /* We support rx-copy path. */ + err = xenbus_printf(xbt, dev->nodename, + "feature-rx-copy", "%d", 1); + if (err) { + message = "writing feature-rx-copy"; + goto abort_transaction; + } + + /* We don't support rx-flip path (except old guests who + * don't grok this feature flag). + */ + err = xenbus_printf(xbt, dev->nodename, + "feature-rx-flip", "%d", 0); + if (err) { + message = "writing feature-rx-flip"; + goto abort_transaction; + } + + /* We support dynamic multicast-control. */ + err = xenbus_printf(xbt, dev->nodename, + "feature-multicast-control", "%d", 1); + if (err) { + message = "writing feature-multicast-control"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, + "feature-dynamic-multicast-control", + "%d", 1); + if (err) { + message = "writing feature-dynamic-multicast-control"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + } while (err == -EAGAIN); + + if (err) { + xenbus_dev_fatal(dev, err, "completing transaction"); + goto fail; + } + + /* Split event channels support, this is optional so it is not + * put inside the above loop. + */ + err = xenbus_printf(XBT_NIL, dev->nodename, + "feature-split-event-channels", + "%u", separate_tx_rx_irq); + if (err) + pr_debug("Error writing feature-split-event-channels\n"); + + /* Multi-queue support: This is an optional feature. */ + err = xenbus_printf(XBT_NIL, dev->nodename, + "multi-queue-max-queues", "%u", xenvif_max_queues); + if (err) + pr_debug("Error writing multi-queue-max-queues\n"); + + err = xenbus_printf(XBT_NIL, dev->nodename, + "feature-ctrl-ring", + "%u", true); + if (err) + pr_debug("Error writing feature-ctrl-ring\n"); + + backend_switch_state(be, XenbusStateInitWait); + + script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL); + if (IS_ERR(script)) { + err = PTR_ERR(script); + xenbus_dev_fatal(dev, err, "reading script"); + goto fail; + } + + be->hotplug_script = script; + + /* This kicks hotplug scripts, so do it immediately. */ + err = backend_create_xenvif(be); + if (err) + goto fail; + + return 0; + +abort_transaction: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, err, "%s", message); +fail: + pr_debug("failed\n"); + netback_remove(dev); + return err; +} + static const struct xenbus_device_id netback_ids[] = { { "vif" }, { "" } diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index cda996f6954e..2b83156efe3f 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -693,7 +693,7 @@ static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, target->nfcid1_len != 10) return -EOPNOTSUPP; - return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, + return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, PN544_RF_READER_CMD_ACTIVATE_NEXT, target->nfcid1, target->nfcid1_len, NULL); } else if (target->supported_protocols & (NFC_PROTO_JEWEL_MASK | diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 604dba4f18af..8e4d355dc3ae 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -565,7 +565,7 @@ static void port100_tx_update_payload_len(void *_frame, int len) { struct port100_frame *frame = _frame; - frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len); + le16_add_cpu(&frame->datalen, len); } static bool port100_rx_frame_is_valid(void *_frame) diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index a577218d1ab7..b27c46ebfc8f 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -74,14 +74,13 @@ static void set_fipers(struct ptp_qoriq *ptp_qoriq) ptp_qoriq->write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); } -static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, - bool update_event) +int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event) { struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; struct ptp_clock_event event; void __iomem *reg_etts_l; void __iomem *reg_etts_h; - u32 valid, stat, lo, hi; + u32 valid, lo, hi; switch (index) { case 0: @@ -101,6 +100,10 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, event.type = PTP_CLOCK_EXTTS; event.index = index; + if (ptp_qoriq->extts_fifo_support) + if (!(ptp_qoriq->read(®s->ctrl_regs->tmr_stat) & valid)) + return 0; + do { lo = ptp_qoriq->read(reg_etts_l); hi = ptp_qoriq->read(reg_etts_h); @@ -111,11 +114,13 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, ptp_clock_event(ptp_qoriq->clock, &event); } - stat = ptp_qoriq->read(®s->ctrl_regs->tmr_stat); - } while (ptp_qoriq->extts_fifo_support && (stat & valid)); + if (!ptp_qoriq->extts_fifo_support) + break; + } while (ptp_qoriq->read(®s->ctrl_regs->tmr_stat) & valid); return 0; } +EXPORT_SYMBOL_GPL(extts_clean_up); /* * Interrupt service routine diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 871d44746f5c..6e16b19732f6 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -125,12 +125,6 @@ struct qeth_routing_info { enum qeth_routing_types type; }; -/* IPA stuff */ -struct qeth_ipa_info { - __u32 supported_funcs; - __u32 enabled_funcs; -}; - /* SETBRIDGEPORT stuff */ enum qeth_sbp_roles { QETH_SBP_ROLE_NONE = 0, @@ -169,41 +163,6 @@ struct qeth_vnicc_info { bool rx_bcast_enabled; }; -static inline int qeth_is_adp_supported(struct qeth_ipa_info *ipa, - enum qeth_ipa_setadp_cmd func) -{ - return (ipa->supported_funcs & func); -} - -static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa, - enum qeth_ipa_funcs func) -{ - return (ipa->supported_funcs & func); -} - -static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa, - enum qeth_ipa_funcs func) -{ - return (ipa->supported_funcs & ipa->enabled_funcs & func); -} - -#define qeth_adp_supported(c, f) \ - qeth_is_adp_supported(&c->options.adp, f) -#define qeth_is_supported(c, f) \ - qeth_is_ipa_supported(&c->options.ipa4, f) -#define qeth_is_enabled(c, f) \ - qeth_is_ipa_enabled(&c->options.ipa4, f) -#define qeth_is_supported6(c, f) \ - qeth_is_ipa_supported(&c->options.ipa6, f) -#define qeth_is_enabled6(c, f) \ - qeth_is_ipa_enabled(&c->options.ipa6, f) -#define qeth_is_ipafunc_supported(c, prot, f) \ - ((prot == QETH_PROT_IPV6) ? \ - qeth_is_supported6(c, f) : qeth_is_supported(c, f)) -#define qeth_is_ipafunc_enabled(c, prot, f) \ - ((prot == QETH_PROT_IPV6) ? \ - qeth_is_enabled6(c, f) : qeth_is_enabled(c, f)) - #define QETH_IDX_FUNC_LEVEL_OSD 0x0101 #define QETH_IDX_FUNC_LEVEL_IQD 0x4108 @@ -735,11 +694,11 @@ enum qeth_discipline_id { }; struct qeth_card_options { + struct qeth_ipa_caps ipa4; + struct qeth_ipa_caps ipa6; struct qeth_routing_info route4; - struct qeth_ipa_info ipa4; - struct qeth_ipa_info adp; /*Adapter parameters*/ struct qeth_routing_info route6; - struct qeth_ipa_info ipa6; + struct qeth_ipa_caps adp; /* Adapter parameters */ struct qeth_sbp_info sbp; /* SETBRIDGEPORT options */ struct qeth_vnicc_info vnicc; /* VNICC options */ int fake_broadcast; @@ -862,6 +821,13 @@ static inline bool qeth_card_hw_is_reachable(struct qeth_card *card) return card->state == CARD_STATE_SOFTSETUP; } +static inline void qeth_unlock_channel(struct qeth_card *card, + struct qeth_channel *channel) +{ + atomic_set(&channel->irq_pending, 0); + wake_up(&card->wait_q); +} + struct qeth_trap_id { __u16 lparnr; char vmname[8]; @@ -1076,7 +1042,7 @@ void qeth_clear_working_pool_list(struct qeth_card *); void qeth_drain_output_queues(struct qeth_card *card); void qeth_setadp_promisc_mode(struct qeth_card *card, bool enable); int qeth_setadpparms_change_macaddr(struct qeth_card *); -void qeth_tx_timeout(struct net_device *); +void qeth_tx_timeout(struct net_device *, unsigned int txqueue); void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, u16 cmd_length); int qeth_query_switch_attributes(struct qeth_card *card, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index b9a2349e4b90..78349355c582 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -520,11 +520,10 @@ static int __qeth_issue_next_read(struct qeth_card *card) } else { QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n", rc, CARD_DEVID(card)); - atomic_set(&channel->irq_pending, 0); + qeth_unlock_channel(card, channel); qeth_put_cmd(iob); card->read_or_write_problem = 1; qeth_schedule_recovery(card); - wake_up(&card->wait_q); } return rc; } @@ -972,8 +971,6 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, /* while we hold the ccwdev lock, this stays valid: */ gdev = dev_get_drvdata(&cdev->dev); card = dev_get_drvdata(&gdev->dev); - if (!card) - return; QETH_CARD_TEXT(card, 5, "irq"); @@ -1003,24 +1000,25 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } channel->active_cmd = NULL; + qeth_unlock_channel(card, channel); rc = qeth_check_irb_error(card, cdev, irb); if (rc) { /* IO was terminated, free its resources. */ if (iob) qeth_cancel_cmd(iob, rc); - atomic_set(&channel->irq_pending, 0); - wake_up(&card->wait_q); return; } - atomic_set(&channel->irq_pending, 0); - - if (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC)) + if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC) { channel->state = CH_STATE_STOPPED; + wake_up(&card->wait_q); + } - if (irb->scsw.cmd.fctl & (SCSW_FCTL_HALT_FUNC)) + if (irb->scsw.cmd.fctl & SCSW_FCTL_HALT_FUNC) { channel->state = CH_STATE_HALTED; + wake_up(&card->wait_q); + } if (iob && (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC))) { @@ -1054,7 +1052,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); - goto out; + return; } } @@ -1062,16 +1060,12 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, /* sanity check: */ if (irb->scsw.cmd.count > iob->length) { qeth_cancel_cmd(iob, -EIO); - goto out; + return; } if (iob->callback) iob->callback(card, iob, iob->length - irb->scsw.cmd.count); } - -out: - wake_up(&card->wait_q); - return; } static void qeth_notify_skbs(struct qeth_qdio_out_q *q, @@ -1198,31 +1192,6 @@ static void qeth_free_buffer_pool(struct qeth_card *card) } } -static void qeth_clean_channel(struct qeth_channel *channel) -{ - struct ccw_device *cdev = channel->ccwdev; - - QETH_DBF_TEXT(SETUP, 2, "freech"); - - spin_lock_irq(get_ccwdev_lock(cdev)); - cdev->handler = NULL; - spin_unlock_irq(get_ccwdev_lock(cdev)); -} - -static void qeth_setup_channel(struct qeth_channel *channel) -{ - struct ccw_device *cdev = channel->ccwdev; - - QETH_DBF_TEXT(SETUP, 2, "setupch"); - - channel->state = CH_STATE_DOWN; - atomic_set(&channel->irq_pending, 0); - - spin_lock_irq(get_ccwdev_lock(cdev)); - cdev->handler = qeth_irq; - spin_unlock_irq(get_ccwdev_lock(cdev)); -} - static int qeth_osa_set_output_queues(struct qeth_card *card, bool single) { unsigned int count = single ? 1 : card->dev->num_tx_queues; @@ -1395,9 +1364,6 @@ static struct qeth_card *qeth_alloc_card(struct ccwgroup_device *gdev) if (!card->read_cmd) goto out_read_cmd; - qeth_setup_channel(&card->read); - qeth_setup_channel(&card->write); - qeth_setup_channel(&card->data); card->qeth_service_level.seq_print = qeth_core_sl_print; register_service_level(&card->qeth_service_level); return card; @@ -1467,12 +1433,38 @@ int qeth_stop_channel(struct qeth_channel *channel) channel->active_cmd); channel->active_cmd = NULL; } + cdev->handler = NULL; spin_unlock_irq(get_ccwdev_lock(cdev)); return rc; } EXPORT_SYMBOL_GPL(qeth_stop_channel); +static int qeth_start_channel(struct qeth_channel *channel) +{ + struct ccw_device *cdev = channel->ccwdev; + int rc; + + channel->state = CH_STATE_DOWN; + atomic_set(&channel->irq_pending, 0); + + spin_lock_irq(get_ccwdev_lock(cdev)); + cdev->handler = qeth_irq; + spin_unlock_irq(get_ccwdev_lock(cdev)); + + rc = ccw_device_set_online(cdev); + if (rc) + goto err; + + return 0; + +err: + spin_lock_irq(get_ccwdev_lock(cdev)); + cdev->handler = NULL; + spin_unlock_irq(get_ccwdev_lock(cdev)); + return rc; +} + static int qeth_halt_channels(struct qeth_card *card) { int rc1 = 0, rc2 = 0, rc3 = 0; @@ -1784,8 +1776,7 @@ static int qeth_send_control_data(struct qeth_card *card, QETH_CARD_TEXT_(card, 2, " err%d", rc); qeth_dequeue_cmd(card, iob); qeth_put_cmd(iob); - atomic_set(&channel->irq_pending, 0); - wake_up(&card->wait_q); + qeth_unlock_channel(card, channel); goto out; } @@ -2871,7 +2862,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card, cmd->data.setadapterparms.data.query_cmds_supp.lan_type; QETH_CARD_TEXT_(card, 2, "lnk %d", card->info.link_type); } - card->options.adp.supported_funcs = + card->options.adp.supported = cmd->data.setadapterparms.data.query_cmds_supp.supported_cmds; return 0; } @@ -2927,8 +2918,8 @@ static int qeth_query_ipassists_cb(struct qeth_card *card, case IPA_RC_NOTSUPP: case IPA_RC_L2_UNSUPPORTED_CMD: QETH_CARD_TEXT(card, 2, "ipaunsup"); - card->options.ipa4.supported_funcs |= IPA_SETADAPTERPARMS; - card->options.ipa6.supported_funcs |= IPA_SETADAPTERPARMS; + card->options.ipa4.supported |= IPA_SETADAPTERPARMS; + card->options.ipa6.supported |= IPA_SETADAPTERPARMS; return -EOPNOTSUPP; default: QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n", @@ -2936,13 +2927,11 @@ static int qeth_query_ipassists_cb(struct qeth_card *card, return -EIO; } - if (cmd->hdr.prot_version == QETH_PROT_IPV4) { - card->options.ipa4.supported_funcs = cmd->hdr.ipa_supported; - card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled; - } else if (cmd->hdr.prot_version == QETH_PROT_IPV6) { - card->options.ipa6.supported_funcs = cmd->hdr.ipa_supported; - card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled; - } else + if (cmd->hdr.prot_version == QETH_PROT_IPV4) + card->options.ipa4 = cmd->hdr.assists; + else if (cmd->hdr.prot_version == QETH_PROT_IPV6) + card->options.ipa6 = cmd->hdr.assists; + else QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Flawed LIC detected\n", CARD_DEVID(card)); return 0; @@ -3413,7 +3402,7 @@ static void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue, struct qeth_card *card = (struct qeth_card *)card_ptr; if (card->dev->flags & IFF_UP) - napi_schedule(&card->napi); + napi_schedule_irqoff(&card->napi); } int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq) @@ -4325,7 +4314,7 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback) return rc; } -void qeth_tx_timeout(struct net_device *dev) +void qeth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct qeth_card *card; @@ -4706,7 +4695,7 @@ static void qeth_determine_capabilities(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "detcapab"); if (!ddev->online) { ddev_offline = 1; - rc = ccw_device_set_online(ddev); + rc = qeth_start_channel(channel); if (rc) { QETH_CARD_TEXT_(card, 2, "3err%d", rc); goto out; @@ -4881,9 +4870,6 @@ out_free_nothing: static void qeth_core_free_card(struct qeth_card *card) { QETH_CARD_TEXT(card, 2, "freecrd"); - qeth_clean_channel(&card->read); - qeth_clean_channel(&card->write); - qeth_clean_channel(&card->data); qeth_put_cmd(card->read_cmd); destroy_workqueue(card->event_wq); unregister_service_level(&card->qeth_service_level); @@ -4946,13 +4932,14 @@ retry: qeth_stop_channel(&card->write); qeth_stop_channel(&card->read); qdio_free(CARD_DDEV(card)); - rc = ccw_device_set_online(CARD_RDEV(card)); + + rc = qeth_start_channel(&card->read); if (rc) goto retriable; - rc = ccw_device_set_online(CARD_WDEV(card)); + rc = qeth_start_channel(&card->write); if (rc) goto retriable; - rc = ccw_device_set_online(CARD_DDEV(card)); + rc = qeth_start_channel(&card->data); if (rc) goto retriable; retriable: @@ -5013,9 +5000,9 @@ retriable: *carrier_ok = true; } - card->options.ipa4.supported_funcs = 0; - card->options.ipa6.supported_funcs = 0; - card->options.adp.supported_funcs = 0; + card->options.ipa4.supported = 0; + card->options.ipa6.supported = 0; + card->options.adp.supported = 0; card->options.sbp.supported_funcs = 0; card->info.diagass_support = 0; rc = qeth_query_ipassists(card, QETH_PROT_IPV4); @@ -5432,9 +5419,9 @@ int qeth_setassparms_cb(struct qeth_card *card, cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; if (cmd->hdr.prot_version == QETH_PROT_IPV4) - card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled; + card->options.ipa4.enabled = cmd->hdr.assists.enabled; if (cmd->hdr.prot_version == QETH_PROT_IPV6) - card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled; + card->options.ipa6.enabled = cmd->hdr.assists.enabled; return 0; } EXPORT_SYMBOL_GPL(qeth_setassparms_cb); diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 88f4dc140751..f4dc37e28ac7 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -53,6 +53,16 @@ static inline bool qeth_ipa_caps_enabled(struct qeth_ipa_caps *caps, u32 mask) return (caps->enabled & mask) == mask; } +#define qeth_adp_supported(c, f) \ + qeth_ipa_caps_supported(&c->options.adp, f) +#define qeth_is_supported(c, f) \ + qeth_ipa_caps_supported(&c->options.ipa4, f) +#define qeth_is_supported6(c, f) \ + qeth_ipa_caps_supported(&c->options.ipa6, f) +#define qeth_is_ipafunc_supported(c, prot, f) \ + ((prot == QETH_PROT_IPV6) ? qeth_is_supported6(c, f) : \ + qeth_is_supported(c, f)) + enum qeth_card_types { QETH_CARD_TYPE_OSD = 1, QETH_CARD_TYPE_IQD = 5, @@ -338,14 +348,14 @@ enum qeth_card_info_port_speed { /* (SET)DELIP(M) IPA stuff ***************************************************/ struct qeth_ipacmd_setdelip4 { - __u8 ip_addr[4]; - __u8 mask[4]; + __be32 addr; + __be32 mask; __u32 flags; } __attribute__ ((packed)); struct qeth_ipacmd_setdelip6 { - __u8 ip_addr[16]; - __u8 mask[16]; + struct in6_addr addr; + struct in6_addr prefix; __u32 flags; } __attribute__ ((packed)); @@ -766,8 +776,7 @@ struct qeth_ipacmd_hdr { __u8 prim_version_no; __u8 param_count; __u16 prot_version; - __u32 ipa_supported; - __u32 ipa_enabled; + struct qeth_ipa_caps assists; } __attribute__ ((packed)); /* The IPA command itself */ diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h index 5db04fe472c0..6ccfe2121095 100644 --- a/drivers/s390/net/qeth_l3.h +++ b/drivers/s390/net/qeth_l3.h @@ -23,7 +23,6 @@ struct qeth_ipaddr { struct hlist_node hnode; enum qeth_ip_types type; u8 is_multicast:1; - u8 in_progress:1; u8 disp_flag:2; u8 ipato:1; /* ucast only */ @@ -35,7 +34,7 @@ struct qeth_ipaddr { union { struct { __be32 addr; - unsigned int mask; + __be32 mask; } a4; struct { struct in6_addr addr; @@ -102,7 +101,8 @@ struct qeth_ipato_entry { extern const struct attribute_group *qeth_l3_attr_groups[]; -void qeth_l3_ipaddr_to_string(enum qeth_prot_versions, const __u8 *, char *); +int qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const u8 *addr, + char *buf); int qeth_l3_create_device_attributes(struct device *); void qeth_l3_remove_device_attributes(struct device *); int qeth_l3_setrouting_v4(struct qeth_card *); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 27126330a4b0..789d3b2ba0de 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -44,23 +44,13 @@ static int qeth_l3_register_addr_entry(struct qeth_card *, static int qeth_l3_deregister_addr_entry(struct qeth_card *, struct qeth_ipaddr *); -static void qeth_l3_ipaddr4_to_string(const __u8 *addr, char *buf) -{ - sprintf(buf, "%pI4", addr); -} - -static void qeth_l3_ipaddr6_to_string(const __u8 *addr, char *buf) -{ - sprintf(buf, "%pI6", addr); -} - -void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr, - char *buf) +int qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const u8 *addr, + char *buf) { if (proto == QETH_PROT_IPV4) - qeth_l3_ipaddr4_to_string(addr, buf); - else if (proto == QETH_PROT_IPV6) - qeth_l3_ipaddr6_to_string(addr, buf); + return sprintf(buf, "%pI4", addr); + else + return sprintf(buf, "%pI6", addr); } static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card, @@ -161,8 +151,6 @@ static int qeth_l3_delete_ip(struct qeth_card *card, addr->ref_counter--; if (addr->type == QETH_IP_TYPE_NORMAL && addr->ref_counter > 0) return rc; - if (addr->in_progress) - return -EINPROGRESS; if (qeth_card_hw_is_reachable(card)) rc = qeth_l3_deregister_addr_entry(card, addr); @@ -223,29 +211,10 @@ static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) return 0; } - /* qeth_l3_register_addr_entry can go to sleep - * if we add a IPV4 addr. It is caused by the reason - * that SETIP ipa cmd starts ARP staff for IPV4 addr. - * Thus we should unlock spinlock, and make a protection - * using in_progress variable to indicate that there is - * an hardware operation with this IPV4 address - */ - if (addr->proto == QETH_PROT_IPV4) { - addr->in_progress = 1; - mutex_unlock(&card->ip_lock); - rc = qeth_l3_register_addr_entry(card, addr); - mutex_lock(&card->ip_lock); - addr->in_progress = 0; - } else - rc = qeth_l3_register_addr_entry(card, addr); + rc = qeth_l3_register_addr_entry(card, addr); if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; - if (addr->ref_counter < 1) { - qeth_l3_deregister_addr_entry(card, addr); - hash_del(&addr->hnode); - kfree(addr); - } } else { hash_del(&addr->hnode); kfree(addr); @@ -313,19 +282,10 @@ static void qeth_l3_recover_ip(struct qeth_card *card) hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { if (addr->disp_flag == QETH_DISP_ADDR_ADD) { - if (addr->proto == QETH_PROT_IPV4) { - addr->in_progress = 1; - mutex_unlock(&card->ip_lock); - rc = qeth_l3_register_addr_entry(card, addr); - mutex_lock(&card->ip_lock); - addr->in_progress = 0; - } else - rc = qeth_l3_register_addr_entry(card, addr); + rc = qeth_l3_register_addr_entry(card, addr); if (!rc) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; - if (addr->ref_counter < 1) - qeth_l3_delete_ip(card, addr); } else { hash_del(&addr->hnode); kfree(addr); @@ -379,17 +339,16 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card, return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL); } -static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len) +static void qeth_l3_set_ipv6_prefix(struct in6_addr *prefix, unsigned int len) { - int i, j; - for (i = 0; i < 16; i++) { - j = (len) - (i * 8); - if (j >= 8) - netmask[i] = 0xff; - else if (j > 0) - netmask[i] = (u8)(0xFF00 >> j); - else - netmask[i] = 0; + unsigned int i = 0; + + while (len && i < 4) { + int mask_len = min_t(int, len, 32); + + prefix->s6_addr32[i] = inet_make_mask(mask_len); + len -= mask_len; + i++; } } @@ -412,7 +371,6 @@ static int qeth_l3_send_setdelip(struct qeth_card *card, { struct qeth_cmd_buffer *iob; struct qeth_ipa_cmd *cmd; - __u8 netmask[16]; u32 flags; QETH_CARD_TEXT(card, 4, "setdelip"); @@ -427,15 +385,13 @@ static int qeth_l3_send_setdelip(struct qeth_card *card, QETH_CARD_TEXT_(card, 4, "flags%02X", flags); if (addr->proto == QETH_PROT_IPV6) { - memcpy(cmd->data.setdelip6.ip_addr, &addr->u.a6.addr, - sizeof(struct in6_addr)); - qeth_l3_fill_netmask(netmask, addr->u.a6.pfxlen); - memcpy(cmd->data.setdelip6.mask, netmask, - sizeof(struct in6_addr)); + cmd->data.setdelip6.addr = addr->u.a6.addr; + qeth_l3_set_ipv6_prefix(&cmd->data.setdelip6.prefix, + addr->u.a6.pfxlen); cmd->data.setdelip6.flags = flags; } else { - memcpy(cmd->data.setdelip4.ip_addr, &addr->u.a4.addr, 4); - memcpy(cmd->data.setdelip4.mask, &addr->u.a4.mask, 4); + cmd->data.setdelip4.addr = addr->u.a4.addr; + cmd->data.setdelip4.mask = addr->u.a4.mask; cmd->data.setdelip4.flags = flags; } @@ -581,6 +537,7 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "addipato"); + mutex_lock(&card->conf_mutex); mutex_lock(&card->ip_lock); list_for_each_entry(ipatoe, &card->ipato.entries, entry) { @@ -600,6 +557,7 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card, } mutex_unlock(&card->ip_lock); + mutex_unlock(&card->conf_mutex); return rc; } @@ -613,6 +571,7 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "delipato"); + mutex_lock(&card->conf_mutex); mutex_lock(&card->ip_lock); list_for_each_entry_safe(ipatoe, tmp, &card->ipato.entries, entry) { @@ -629,6 +588,8 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card, } mutex_unlock(&card->ip_lock); + mutex_unlock(&card->conf_mutex); + return rc; } @@ -637,6 +598,7 @@ int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip, enum qeth_prot_versions proto) { struct qeth_ipaddr addr; + int rc; qeth_l3_init_ipaddr(&addr, type, proto); if (proto == QETH_PROT_IPV4) @@ -644,7 +606,11 @@ int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip, else memcpy(&addr.u.a6.addr, ip, 16); - return qeth_l3_modify_ip(card, &addr, add); + mutex_lock(&card->conf_mutex); + rc = qeth_l3_modify_ip(card, &addr, add); + mutex_unlock(&card->conf_mutex); + + return rc; } int qeth_l3_modify_hsuid(struct qeth_card *card, bool add) @@ -2292,12 +2258,6 @@ static int __qeth_l3_set_offline(struct ccwgroup_device *cgdev, rtnl_unlock(); qeth_l3_stop_card(card); - if (card->options.cq == QETH_CQ_ENABLED) { - rtnl_lock(); - call_netdevice_notifiers(NETDEV_REBOOT, card->dev); - rtnl_unlock(); - } - rc = qeth_stop_channel(&card->data); rc2 = qeth_stop_channel(&card->write); rc3 = qeth_stop_channel(&card->read); @@ -2436,7 +2396,7 @@ static int qeth_l3_ip_event(struct notifier_block *this, qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV4); addr.u.a4.addr = ifa->ifa_address; - addr.u.a4.mask = be32_to_cpu(ifa->ifa_mask); + addr.u.a4.mask = ifa->ifa_mask; return qeth_l3_handle_ip_event(card, &addr, event); } diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index f9067ed6c7d3..96c73965eb68 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -386,30 +386,35 @@ static ssize_t qeth_l3_dev_ipato_add_show(char *buf, struct qeth_card *card, enum qeth_prot_versions proto) { struct qeth_ipato_entry *ipatoe; - char addr_str[40]; - int entry_len; /* length of 1 entry string, differs between v4 and v6 */ - int i = 0; + int str_len = 0; - entry_len = (proto == QETH_PROT_IPV4)? 12 : 40; - /* add strlen for "/<mask>\n" */ - entry_len += (proto == QETH_PROT_IPV4)? 5 : 6; mutex_lock(&card->ip_lock); list_for_each_entry(ipatoe, &card->ipato.entries, entry) { + char addr_str[40]; + int entry_len; + if (ipatoe->proto != proto) continue; - /* String must not be longer than PAGE_SIZE. So we check if - * string length gets near PAGE_SIZE. Then we can savely display - * the next IPv6 address (worst case, compared to IPv4) */ - if ((PAGE_SIZE - i) <= entry_len) + + entry_len = qeth_l3_ipaddr_to_string(proto, ipatoe->addr, + addr_str); + if (entry_len < 0) + continue; + + /* Append /%mask to the entry: */ + entry_len += 1 + ((proto == QETH_PROT_IPV4) ? 2 : 3); + /* Enough room to format %entry\n into null terminated page? */ + if (entry_len + 1 > PAGE_SIZE - str_len - 1) break; - qeth_l3_ipaddr_to_string(proto, ipatoe->addr, addr_str); - i += snprintf(buf + i, PAGE_SIZE - i, - "%s/%i\n", addr_str, ipatoe->mask_bits); + + entry_len = scnprintf(buf, PAGE_SIZE - str_len, + "%s/%i\n", addr_str, ipatoe->mask_bits); + str_len += entry_len; + buf += entry_len; } mutex_unlock(&card->ip_lock); - i += snprintf(buf + i, PAGE_SIZE - i, "\n"); - return i; + return str_len ? str_len : scnprintf(buf, PAGE_SIZE, "\n"); } static ssize_t qeth_l3_dev_ipato_add4_show(struct device *dev, @@ -455,16 +460,14 @@ static ssize_t qeth_l3_dev_ipato_add_store(const char *buf, size_t count, int mask_bits; int rc = 0; - mutex_lock(&card->conf_mutex); rc = qeth_l3_parse_ipatoe(buf, proto, addr, &mask_bits); if (rc) - goto out; + return rc; ipatoe = kzalloc(sizeof(struct qeth_ipato_entry), GFP_KERNEL); - if (!ipatoe) { - rc = -ENOMEM; - goto out; - } + if (!ipatoe) + return -ENOMEM; + ipatoe->proto = proto; memcpy(ipatoe->addr, addr, (proto == QETH_PROT_IPV4)? 4:16); ipatoe->mask_bits = mask_bits; @@ -472,8 +475,7 @@ static ssize_t qeth_l3_dev_ipato_add_store(const char *buf, size_t count, rc = qeth_l3_add_ipato_entry(card, ipatoe); if (rc) kfree(ipatoe); -out: - mutex_unlock(&card->conf_mutex); + return rc ? rc : count; } @@ -496,11 +498,9 @@ static ssize_t qeth_l3_dev_ipato_del_store(const char *buf, size_t count, int mask_bits; int rc = 0; - mutex_lock(&card->conf_mutex); rc = qeth_l3_parse_ipatoe(buf, proto, addr, &mask_bits); if (!rc) rc = qeth_l3_del_ipato_entry(card, proto, addr, mask_bits); - mutex_unlock(&card->conf_mutex); return rc ? rc : count; } @@ -607,31 +607,34 @@ static ssize_t qeth_l3_dev_ip_add_show(struct device *dev, char *buf, { struct qeth_card *card = dev_get_drvdata(dev); struct qeth_ipaddr *ipaddr; - char addr_str[40]; int str_len = 0; - int entry_len; /* length of 1 entry string, differs between v4 and v6 */ int i; - entry_len = (proto == QETH_PROT_IPV4)? 12 : 40; - entry_len += 2; /* \n + terminator */ mutex_lock(&card->ip_lock); hash_for_each(card->ip_htable, i, ipaddr, hnode) { + char addr_str[40]; + int entry_len; + if (ipaddr->proto != proto || ipaddr->type != type) continue; - /* String must not be longer than PAGE_SIZE. So we check if - * string length gets near PAGE_SIZE. Then we can savely display - * the next IPv6 address (worst case, compared to IPv4) */ - if ((PAGE_SIZE - str_len) <= entry_len) + + entry_len = qeth_l3_ipaddr_to_string(proto, (u8 *)&ipaddr->u, + addr_str); + if (entry_len < 0) + continue; + + /* Enough room to format %addr\n into null terminated page? */ + if (entry_len + 1 > PAGE_SIZE - str_len - 1) break; - qeth_l3_ipaddr_to_string(proto, (const u8 *)&ipaddr->u, - addr_str); - str_len += snprintf(buf + str_len, PAGE_SIZE - str_len, "%s\n", - addr_str); + + entry_len = scnprintf(buf, PAGE_SIZE - str_len, "%s\n", + addr_str); + str_len += entry_len; + buf += entry_len; } mutex_unlock(&card->ip_lock); - str_len += snprintf(buf + str_len, PAGE_SIZE - str_len, "\n"); - return str_len; + return str_len ? str_len : scnprintf(buf, PAGE_SIZE, "\n"); } static ssize_t qeth_l3_dev_vipa_add4_show(struct device *dev, @@ -642,63 +645,34 @@ static ssize_t qeth_l3_dev_vipa_add4_show(struct device *dev, QETH_IP_TYPE_VIPA); } -static int qeth_l3_parse_vipae(const char *buf, enum qeth_prot_versions proto, - u8 *addr) -{ - if (qeth_l3_string_to_ipaddr(buf, proto, addr)) { - return -EINVAL; - } - return 0; -} - -static ssize_t qeth_l3_dev_vipa_add_store(const char *buf, size_t count, - struct qeth_card *card, enum qeth_prot_versions proto) +static ssize_t qeth_l3_vipa_store(struct device *dev, const char *buf, bool add, + size_t count, enum qeth_prot_versions proto) { + struct qeth_card *card = dev_get_drvdata(dev); u8 addr[16] = {0, }; int rc; - mutex_lock(&card->conf_mutex); - rc = qeth_l3_parse_vipae(buf, proto, addr); + rc = qeth_l3_string_to_ipaddr(buf, proto, addr); if (!rc) - rc = qeth_l3_modify_rxip_vipa(card, true, addr, + rc = qeth_l3_modify_rxip_vipa(card, add, addr, QETH_IP_TYPE_VIPA, proto); - mutex_unlock(&card->conf_mutex); return rc ? rc : count; } static ssize_t qeth_l3_dev_vipa_add4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_vipa_add_store(buf, count, card, QETH_PROT_IPV4); + return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(vipa_add4, add4, 0644, qeth_l3_dev_vipa_add4_show, qeth_l3_dev_vipa_add4_store); -static ssize_t qeth_l3_dev_vipa_del_store(const char *buf, size_t count, - struct qeth_card *card, enum qeth_prot_versions proto) -{ - u8 addr[16]; - int rc; - - mutex_lock(&card->conf_mutex); - rc = qeth_l3_parse_vipae(buf, proto, addr); - if (!rc) - rc = qeth_l3_modify_rxip_vipa(card, false, addr, - QETH_IP_TYPE_VIPA, proto); - mutex_unlock(&card->conf_mutex); - return rc ? rc : count; -} - static ssize_t qeth_l3_dev_vipa_del4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_vipa_del_store(buf, count, card, QETH_PROT_IPV4); + return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(vipa_del4, del4, 0200, NULL, @@ -715,9 +689,7 @@ static ssize_t qeth_l3_dev_vipa_add6_show(struct device *dev, static ssize_t qeth_l3_dev_vipa_add6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_vipa_add_store(buf, count, card, QETH_PROT_IPV6); + return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV6); } static QETH_DEVICE_ATTR(vipa_add6, add6, 0644, @@ -727,9 +699,7 @@ static QETH_DEVICE_ATTR(vipa_add6, add6, 0644, static ssize_t qeth_l3_dev_vipa_del6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_vipa_del_store(buf, count, card, QETH_PROT_IPV6); + return qeth_l3_vipa_store(dev, buf, false, count, QETH_PROT_IPV6); } static QETH_DEVICE_ATTR(vipa_del6, del6, 0200, NULL, @@ -782,54 +752,34 @@ static int qeth_l3_parse_rxipe(const char *buf, enum qeth_prot_versions proto, return 0; } -static ssize_t qeth_l3_dev_rxip_add_store(const char *buf, size_t count, - struct qeth_card *card, enum qeth_prot_versions proto) +static ssize_t qeth_l3_rxip_store(struct device *dev, const char *buf, bool add, + size_t count, enum qeth_prot_versions proto) { + struct qeth_card *card = dev_get_drvdata(dev); u8 addr[16] = {0, }; int rc; - mutex_lock(&card->conf_mutex); rc = qeth_l3_parse_rxipe(buf, proto, addr); if (!rc) - rc = qeth_l3_modify_rxip_vipa(card, true, addr, + rc = qeth_l3_modify_rxip_vipa(card, add, addr, QETH_IP_TYPE_RXIP, proto); - mutex_unlock(&card->conf_mutex); return rc ? rc : count; } static ssize_t qeth_l3_dev_rxip_add4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_rxip_add_store(buf, count, card, QETH_PROT_IPV4); + return qeth_l3_rxip_store(dev, buf, true, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(rxip_add4, add4, 0644, qeth_l3_dev_rxip_add4_show, qeth_l3_dev_rxip_add4_store); -static ssize_t qeth_l3_dev_rxip_del_store(const char *buf, size_t count, - struct qeth_card *card, enum qeth_prot_versions proto) -{ - u8 addr[16]; - int rc; - - mutex_lock(&card->conf_mutex); - rc = qeth_l3_parse_rxipe(buf, proto, addr); - if (!rc) - rc = qeth_l3_modify_rxip_vipa(card, false, addr, - QETH_IP_TYPE_RXIP, proto); - mutex_unlock(&card->conf_mutex); - return rc ? rc : count; -} - static ssize_t qeth_l3_dev_rxip_del4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_rxip_del_store(buf, count, card, QETH_PROT_IPV4); + return qeth_l3_rxip_store(dev, buf, false, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(rxip_del4, del4, 0200, NULL, @@ -846,9 +796,7 @@ static ssize_t qeth_l3_dev_rxip_add6_show(struct device *dev, static ssize_t qeth_l3_dev_rxip_add6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_rxip_add_store(buf, count, card, QETH_PROT_IPV6); + return qeth_l3_rxip_store(dev, buf, true, count, QETH_PROT_IPV6); } static QETH_DEVICE_ATTR(rxip_add6, add6, 0644, @@ -858,9 +806,7 @@ static QETH_DEVICE_ATTR(rxip_add6, add6, 0644, static ssize_t qeth_l3_dev_rxip_del6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_rxip_del_store(buf, count, card, QETH_PROT_IPV6); + return qeth_l3_rxip_store(dev, buf, false, count, QETH_PROT_IPV6); } static QETH_DEVICE_ATTR(rxip_del6, del6, 0200, NULL, diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c index 3cffc8be6656..211dd4a11cac 100644 --- a/drivers/staging/ks7010/ks_wlan_net.c +++ b/drivers/staging/ks7010/ks_wlan_net.c @@ -45,7 +45,7 @@ struct wep_key { * function prototypes */ static int ks_wlan_open(struct net_device *dev); -static void ks_wlan_tx_timeout(struct net_device *dev); +static void ks_wlan_tx_timeout(struct net_device *dev, unsigned int txqueue); static int ks_wlan_start_xmit(struct sk_buff *skb, struct net_device *dev); static int ks_wlan_close(struct net_device *dev); static void ks_wlan_set_rx_mode(struct net_device *dev); @@ -2498,7 +2498,7 @@ int ks_wlan_set_mac_address(struct net_device *dev, void *addr) } static -void ks_wlan_tx_timeout(struct net_device *dev) +void ks_wlan_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ks_wlan_private *priv = netdev_priv(dev); diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 6ad4515311f7..24d20f000435 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4274,7 +4274,7 @@ static int qlge_set_mac_address(struct net_device *ndev, void *p) return status; } -static void qlge_tx_timeout(struct net_device *ndev) +static void qlge_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ql_adapter *qdev = netdev_priv(ndev); ql_queue_asic_error(qdev); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index dace81a7d1ba..a51d627284d1 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -267,7 +267,7 @@ static short _rtl92e_check_nic_enough_desc(struct net_device *dev, int prio) return 0; } -static void _rtl92e_tx_timeout(struct net_device *dev) +static void _rtl92e_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct r8192_priv *priv = rtllib_priv(dev); diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index 7e2cabd16e88..482382a887f8 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -640,7 +640,7 @@ short check_nic_enough_desc(struct net_device *dev, int queue_index) return (used < MAX_TX_URB); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct r8192_priv *priv = ieee80211_priv(dev); diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c index 1d1440d43002..0433536930a9 100644 --- a/drivers/staging/unisys/visornic/visornic_main.c +++ b/drivers/staging/unisys/visornic/visornic_main.c @@ -1078,7 +1078,7 @@ out_save_flags: * Queue the work and return. Make sure we have not already been informed that * the IO Partition is gone; if so, we will have already timed-out the xmits. */ -static void visornic_xmit_timeout(struct net_device *netdev) +static void visornic_xmit_timeout(struct net_device *netdev, unsigned int txqueue) { struct visornic_devdata *devdata = netdev_priv(netdev); unsigned long flags; diff --git a/drivers/staging/wlan-ng/p80211netdev.c b/drivers/staging/wlan-ng/p80211netdev.c index a70fb84f38f1..b809c0015c0c 100644 --- a/drivers/staging/wlan-ng/p80211netdev.c +++ b/drivers/staging/wlan-ng/p80211netdev.c @@ -101,7 +101,7 @@ static void p80211knetdev_set_multicast_list(struct net_device *dev); static int p80211knetdev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); static int p80211knetdev_set_mac_address(struct net_device *dev, void *addr); -static void p80211knetdev_tx_timeout(struct net_device *netdev); +static void p80211knetdev_tx_timeout(struct net_device *netdev, unsigned int txqueue); static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc); int wlan_watchdog = 5000; @@ -1074,7 +1074,7 @@ static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc) return drop; } -static void p80211knetdev_tx_timeout(struct net_device *netdev) +static void p80211knetdev_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct wlandevice *wlandev = netdev->ml_priv; diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 36a3eb4ad4c5..f1c90fa2978e 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2704,7 +2704,7 @@ static netdev_tx_t gsm_mux_net_start_xmit(struct sk_buff *skb, } /* called when a packet did not ack after watchdogtimeout */ -static void gsm_mux_net_tx_timeout(struct net_device *net) +static void gsm_mux_net_tx_timeout(struct net_device *net, unsigned int txqueue) { /* Tell syslog we are hosed. */ dev_dbg(&net->dev, "Tx timed out.\n"); diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c index 84f26e43b229..61dc6b4a43d0 100644 --- a/drivers/tty/synclink.c +++ b/drivers/tty/synclink.c @@ -7837,7 +7837,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * * dev pointer to network device structure */ -static void hdlcdev_tx_timeout(struct net_device *dev) +static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mgsl_struct *info = dev_to_port(dev); unsigned long flags; diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index e8a9047de451..5d59e2369c8a 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1682,7 +1682,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * * dev pointer to network device structure */ -static void hdlcdev_tx_timeout(struct net_device *dev) +static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct slgt_info *info = dev_to_port(dev); unsigned long flags; diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index fcb91bf7a15b..33181fa6eb18 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1807,7 +1807,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * * dev pointer to network device structure */ -static void hdlcdev_tx_timeout(struct net_device *dev) +static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { SLMP_INFO *info = dev_to_port(dev); unsigned long flags; |