From 848ef58695d8c013f24633352586279cfb40e9d9 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 1 Apr 2014 17:02:53 +0300 Subject: net: rfkill: gpio: remove unused and obsolete platform parameters After upgrading to descriptor based gpios, the gpio numbers are not used anymore. The power_clk_name and the platform specific setup and close hooks are not used by anybody, and we should not encourage use of such things, so removing them. Acked-by: Alexandre Courbot Reviewed-by: Linus Walleij Signed-off-by: Heikki Krogerus Signed-off-by: Johannes Berg --- include/linux/rfkill-gpio.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill-gpio.h b/include/linux/rfkill-gpio.h index 4d09f6eab359..20bcb55498cd 100644 --- a/include/linux/rfkill-gpio.h +++ b/include/linux/rfkill-gpio.h @@ -27,21 +27,11 @@ * struct rfkill_gpio_platform_data - platform data for rfkill gpio device. * for unused gpio's, the expected value is -1. * @name: name for the gpio rf kill instance - * @reset_gpio: GPIO which is used for reseting rfkill switch - * @shutdown_gpio: GPIO which is used for shutdown of rfkill switch - * @power_clk_name: [optional] name of clk to turn off while blocked - * @gpio_runtime_close: clean up platform specific gpio configuration - * @gpio_runtime_setup: set up platform specific gpio configuration */ struct rfkill_gpio_platform_data { char *name; - int reset_gpio; - int shutdown_gpio; - const char *power_clk_name; enum rfkill_type type; - void (*gpio_runtime_close)(struct platform_device *); - int (*gpio_runtime_setup)(struct platform_device *); }; #endif /* __RFKILL_GPIO_H */ -- cgit v1.2.3-59-g8ed1b From af6b6967d6e17fe070c0fd1be364c34cbd31a523 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 16 Apr 2014 17:19:12 +0200 Subject: net: phy: export genphy_config_init() This enables other drivers to call this generic implementation, and then only do specific details on top of it. Signed-off-by: Daniel Mack Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 3 ++- include/linux/phy.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0ce606624296..466ae3e06322 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1067,7 +1067,7 @@ int genphy_soft_reset(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_soft_reset); -static int genphy_config_init(struct phy_device *phydev) +int genphy_config_init(struct phy_device *phydev) { int val; u32 features; @@ -1118,6 +1118,7 @@ static int gen10g_soft_reset(struct phy_device *phydev) /* Do nothing for now */ return 0; } +EXPORT_SYMBOL(genphy_config_init); static int gen10g_config_init(struct phy_device *phydev) { diff --git a/include/linux/phy.h b/include/linux/phy.h index 4d0221fd0688..51d15f684e7e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -666,6 +666,7 @@ static inline int phy_read_status(struct phy_device *phydev) return phydev->drv->read_status(phydev); } +int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); -- cgit v1.2.3-59-g8ed1b From a0265d28b3a5877b5b8edd14eb12a2ccb60ab1f3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Apr 2014 13:45:03 +0800 Subject: net: Add __dev_forward_skb This patch adds the helper __dev_forward_skb which is identical to dev_forward_skb except that it doesn't actually inject the skb into the stack. This is useful where we wish to have finer control over how the packet is injected, e.g., via netif_rx_ni or netif_receive_skb. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 42 ++++++++++++++++++++++++------------------ 2 files changed, 25 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ed3a3aa6604..a803d792df1e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2633,6 +2633,7 @@ int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index d2c8a06b3a98..11d70e3afefa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1661,6 +1661,29 @@ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(is_skb_forwardable); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + + if (unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->protocol = eth_type_trans(skb, dev); + + return 0; +} +EXPORT_SYMBOL_GPL(__dev_forward_skb); + /** * dev_forward_skb - loopback an skb to another netif * @@ -1681,24 +1704,7 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { - if (skb_copy_ubufs(skb, GFP_ATOMIC)) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - } - - if (unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - - skb_scrub_packet(skb, true); - skb->protocol = eth_type_trans(skb, dev); - - return netif_rx_internal(skb); + return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); -- cgit v1.2.3-59-g8ed1b From 68957303f44a501af5cf37913208a2acaa6bcdf1 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 25 Mar 2014 06:51:47 +0100 Subject: NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip Add driver for STMicroelectronics ST21NFCA NFC controller. ST21NFCA is using HCI protocol, shdlc as LLC layer & I2C as communication protocol. Adding support for Reader/Writer mode with Tag type 1/2/3/4 A & B. It is using proprietary gate 15 for ISO14443-3 such as type 1 & type 2 tags. It is using proprietary gate 14 for type F tags. ST21NFCA_DEVICE_MGNT_GATE gives access to proprietary CLF configuration. Standard gate for ISO14443-4 A (13) & B (11) are also used. ST21NFCA specific mecanism: One particular point to notice for the data handling is that frame does not contain any length value. Therefore the i2c part of this driver is managing the reception with a read length sequence until the end of frame (0x7e) is reached. In order to avoid conflict between sof & eof a mecanism called byte stuffing concist of an escape byte (0x7d) insertion before special byte (0x7e, 0x7d). The special byte is then xored with 0x20. In this driver, When data are available in the CLF, the interrupt gpio is driven to active state and triggered an interrupt. Once the i2c_master_recv start, the interrupt gpio is driven to idle state until its complete. If the frame is incomplete or data are still available, interrupts will be triggered again. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- drivers/nfc/Kconfig | 1 + drivers/nfc/Makefile | 1 + drivers/nfc/st21nfca/Kconfig | 23 ++ drivers/nfc/st21nfca/Makefile | 8 + drivers/nfc/st21nfca/i2c.c | 595 +++++++++++++++++++++++++++++++++ drivers/nfc/st21nfca/st21nfca.c | 506 ++++++++++++++++++++++++++++ drivers/nfc/st21nfca/st21nfca.h | 87 +++++ include/linux/platform_data/st21nfca.h | 32 ++ 8 files changed, 1253 insertions(+) create mode 100644 drivers/nfc/st21nfca/Kconfig create mode 100644 drivers/nfc/st21nfca/Makefile create mode 100644 drivers/nfc/st21nfca/i2c.c create mode 100644 drivers/nfc/st21nfca/st21nfca.c create mode 100644 drivers/nfc/st21nfca/st21nfca.h create mode 100644 include/linux/platform_data/st21nfca.h (limited to 'include/linux') diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig index 65d4ca19d132..26c66a126551 100644 --- a/drivers/nfc/Kconfig +++ b/drivers/nfc/Kconfig @@ -71,5 +71,6 @@ config NFC_PORT100 source "drivers/nfc/pn544/Kconfig" source "drivers/nfc/microread/Kconfig" source "drivers/nfc/nfcmrvl/Kconfig" +source "drivers/nfc/st21nfca/Kconfig" endmenu diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile index ae42a3fa60c9..23225b0287fd 100644 --- a/drivers/nfc/Makefile +++ b/drivers/nfc/Makefile @@ -11,5 +11,6 @@ obj-$(CONFIG_NFC_SIM) += nfcsim.o obj-$(CONFIG_NFC_PORT100) += port100.o obj-$(CONFIG_NFC_MRVL) += nfcmrvl/ obj-$(CONFIG_NFC_TRF7970A) += trf7970a.o +obj-$(CONFIG_NFC_ST21NFCA) += st21nfca/ ccflags-$(CONFIG_NFC_DEBUG) := -DDEBUG diff --git a/drivers/nfc/st21nfca/Kconfig b/drivers/nfc/st21nfca/Kconfig new file mode 100644 index 000000000000..ee459f066ade --- /dev/null +++ b/drivers/nfc/st21nfca/Kconfig @@ -0,0 +1,23 @@ +config NFC_ST21NFCA + tristate "STMicroelectronics ST21NFCA NFC driver" + depends on NFC_HCI + select CRC_CCITT + default n + ---help--- + STMicroelectronics ST21NFCA core driver. It implements the chipset + HCI logic and hooks into the NFC kernel APIs. Physical layers will + register against it. + + To compile this driver as a module, choose m here. The module will + be called st21nfca. + Say N if unsure. + +config NFC_ST21NFCA_I2C + tristate "NFC ST21NFCA i2c support" + depends on NFC_ST21NFCA && I2C && NFC_SHDLC + ---help--- + This module adds support for the STMicroelectronics st21nfca i2c interface. + Select this if your platform is using the i2c bus. + + If you choose to build a module, it'll be called st21nfca_i2c. + Say N if unsure. diff --git a/drivers/nfc/st21nfca/Makefile b/drivers/nfc/st21nfca/Makefile new file mode 100644 index 000000000000..038ed093a119 --- /dev/null +++ b/drivers/nfc/st21nfca/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for ST21NFCA HCI based NFC driver +# + +st21nfca_i2c-objs = i2c.o + +obj-$(CONFIG_NFC_ST21NFCA) += st21nfca.o +obj-$(CONFIG_NFC_ST21NFCA_I2C) += st21nfca_i2c.o diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c new file mode 100644 index 000000000000..3b0fd0f76d1c --- /dev/null +++ b/drivers/nfc/st21nfca/i2c.c @@ -0,0 +1,595 @@ +/* + * I2C Link Layer for ST21NFCA HCI based Driver + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "st21nfca.h" + +/* + * Every frame starts with ST21NFCA_SOF_EOF and ends with ST21NFCA_SOF_EOF. + * Because ST21NFCA_SOF_EOF is a possible data value, there is a mecanism + * called byte stuffing has been introduced. + * + * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING + * - insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte) + * - xor byte with ST21NFCA_BYTE_STUFFING_MASK + */ +#define ST21NFCA_SOF_EOF 0x7e +#define ST21NFCA_BYTE_STUFFING_MASK 0x20 +#define ST21NFCA_ESCAPE_BYTE_STUFFING 0x7d + +/* SOF + 00 fill size */ +#define ST21NFCA_FRAME_HEADROOM 2 + +/* 4 bytes crc (worst case byte stuffing) + EOF */ +#define ST21NFCA_FRAME_TAILROOM 5 + +#define ST21NFCA_HCI_I2C_DRIVER_NAME "st21nfca_hci_i2c" + +static struct i2c_device_id st21nfca_hci_i2c_id_table[] = { + {ST21NFCA_HCI_DRIVER_NAME, 0}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, st21nfca_hci_i2c_id_table); + +struct st21nfca_i2c_phy { + struct i2c_client *i2c_dev; + struct nfc_hci_dev *hdev; + + unsigned int gpio_ena; + unsigned int gpio_irq; + unsigned int irq_polarity; + + struct sk_buff *pending_skb; + int current_read_len; + /* + * crc might have fail because i2c macro + * is disable due to other interface activity + */ + int crc_trials; + + int powered; + int run_mode; + + /* + * < 0 if hardware error occured (e.g. i2c err) + * and prevents normal operation. + */ + int hard_fault; +}; +static u8 len_seq[] = { 13, 24, 15, 29 }; +static u16 wait_tab[] = { 2, 3, 5, 15, 20, 40}; + +#define I2C_DUMP_SKB(info, skb) \ +do { \ + pr_debug("%s:\n", info); \ + print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET, \ + 16, 1, (skb)->data, (skb)->len, 0); \ +} while (0) + +static void st21nfca_hci_platform_init(struct st21nfca_i2c_phy *phy) +{ + u16 wait_tab[] = { 50, 300, 1000 }; + char reboot_cmd[] = { 0x7E, 0x66, 0x48, 0xF6, 0x7E }; + u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE]; + int i, r = -1; + + for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++) + r = i2c_master_recv(phy->i2c_dev, tmp, + ST21NFCA_HCI_LLC_MAX_SIZE); + + r = -1; + for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++) + r = i2c_master_send(phy->i2c_dev, reboot_cmd, + sizeof(reboot_cmd)); + usleep_range(1000, 1500); + +} + +static int st21nfca_hci_i2c_enable(void *phy_id) +{ + struct st21nfca_i2c_phy *phy = phy_id; + + gpio_set_value(phy->gpio_ena, 1); + phy->powered = 1; + phy->run_mode = ST21NFCA_HCI_MODE; + + usleep_range(10000, 15000); + + return 0; +} + +static void st21nfca_hci_i2c_disable(void *phy_id) +{ + struct st21nfca_i2c_phy *phy = phy_id; + + pr_info("\n"); + gpio_set_value(phy->gpio_ena, 0); + + phy->powered = 0; +} + +static int st21nfca_hci_add_len_crc(struct sk_buff *skb) +{ + int ret = 2; + u16 crc; + u8 tmp; + + *skb_push(skb, 1) = 0; + + crc = crc_ccitt(0xffff, skb->data, skb->len); + crc = ~crc; + + tmp = crc & 0x00ff; + *skb_put(skb, 1) = tmp; + + tmp = (crc >> 8) & 0x00ff; + *skb_put(skb, 1) = tmp; + + return ret; +} + +static void st21nfca_hci_remove_len_crc(struct sk_buff *skb, int crc_len) +{ + skb_pull(skb, ST21NFCA_FRAME_HEADROOM); + skb_trim(skb, crc_len); +} + +/* + * Writing a frame must not return the number of written bytes. + * It must return either zero for success, or <0 for error. + * In addition, it must not alter the skb + */ +static int st21nfca_hci_i2c_write(void *phy_id, struct sk_buff *skb) +{ + int r = -1, i, j, len; + struct st21nfca_i2c_phy *phy = phy_id; + struct i2c_client *client = phy->i2c_dev; + u16 wait_tab[] = { 2, 3, 5, 15, 20, 40}; + u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE * 2]; + + I2C_DUMP_SKB("st21nfca_hci_i2c_write", skb); + + + if (phy->hard_fault != 0) + return phy->hard_fault; + + /* + * Compute CRC before byte stuffing computation on frame + * Note st21nfca_hci_add_len_crc is doing a byte stuffing + * on its own value + */ + len = st21nfca_hci_add_len_crc(skb); + + /* add ST21NFCA_SOF_EOF on tail */ + *skb_put(skb, 1) = ST21NFCA_SOF_EOF; + /* add ST21NFCA_SOF_EOF on head */ + *skb_push(skb, 1) = ST21NFCA_SOF_EOF; + + /* + * Compute byte stuffing + * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING + * insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte) + * xor byte with ST21NFCA_BYTE_STUFFING_MASK + */ + tmp[0] = skb->data[0]; + for (i = 1, j = 1; i < skb->len - 1; i++, j++) { + if (skb->data[i] == ST21NFCA_SOF_EOF + || skb->data[i] == ST21NFCA_ESCAPE_BYTE_STUFFING) { + tmp[j] = ST21NFCA_ESCAPE_BYTE_STUFFING; + j++; + tmp[j] = skb->data[i] ^ ST21NFCA_BYTE_STUFFING_MASK; + } else { + tmp[j] = skb->data[i]; + } + } + tmp[j] = skb->data[i]; + j++; + + /* + * Manage sleep mode + * Try 3 times to send data with delay between each + */ + for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++) { + r = i2c_master_send(client, tmp, j); + if (r < 0) + msleep(wait_tab[i]); + } + + if (r >= 0) { + if (r != j) + r = -EREMOTEIO; + else + r = 0; + } + + st21nfca_hci_remove_len_crc(skb, len); + + return r; +} + +static int get_frame_size(u8 *buf, int buflen) +{ + int len = 0; + if (buf[len + 1] == ST21NFCA_SOF_EOF) + return 0; + + for (len = 1; len < buflen && buf[len] != ST21NFCA_SOF_EOF; len++) + ; + + return len; +} + +static int check_crc(u8 *buf, int buflen) +{ + u16 crc; + + crc = crc_ccitt(0xffff, buf, buflen - 2); + crc = ~crc; + + if (buf[buflen - 2] != (crc & 0xff) || buf[buflen - 1] != (crc >> 8)) { + pr_err(ST21NFCA_HCI_DRIVER_NAME + ": CRC error 0x%x != 0x%x 0x%x\n", crc, buf[buflen - 1], + buf[buflen - 2]); + + pr_info(DRIVER_DESC ": %s : BAD CRC\n", __func__); + print_hex_dump(KERN_DEBUG, "crc: ", DUMP_PREFIX_NONE, + 16, 2, buf, buflen, false); + return -EPERM; + } + return 0; +} + +/* + * Prepare received data for upper layer. + * Received data include byte stuffing, crc and sof/eof + * which is not usable by hci part. + * returns: + * frame size without sof/eof, header and byte stuffing + * -EBADMSG : frame was incorrect and discarded + */ +static int st21nfca_hci_i2c_repack(struct sk_buff *skb) +{ + int i, j, r, size; + if (skb->len < 1 || (skb->len > 1 && skb->data[1] != 0)) + return -EBADMSG; + + size = get_frame_size(skb->data, skb->len); + if (size > 0) { + skb_trim(skb, size); + /* remove ST21NFCA byte stuffing for upper layer */ + for (i = 1, j = 0; i < skb->len; i++) { + if (skb->data[i] == + (u8) ST21NFCA_ESCAPE_BYTE_STUFFING) { + skb->data[i] = + skb->data[i + + 1] | ST21NFCA_BYTE_STUFFING_MASK; + i++; + j++; + } + skb->data[i] = skb->data[i + j]; + } + /* remove byte stuffing useless byte */ + skb_trim(skb, i - j); + /* remove ST21NFCA_SOF_EOF from head */ + skb_pull(skb, 1); + + r = check_crc(skb->data, skb->len); + if (r != 0) { + i = 0; + return -EBADMSG; + } + + /* remove headbyte */ + skb_pull(skb, 1); + /* remove crc. Byte Stuffing is already removed here */ + skb_trim(skb, skb->len - 2); + return skb->len; + } + return 0; +} + +/* + * Reads an shdlc frame and returns it in a newly allocated sk_buff. Guarantees + * that i2c bus will be flushed and that next read will start on a new frame. + * returned skb contains only LLC header and payload. + * returns: + * frame size : if received frame is complete (find ST21NFCA_SOF_EOF at + * end of read) + * -EAGAIN : if received frame is incomplete (not find ST21NFCA_SOF_EOF + * at end of read) + * -EREMOTEIO : i2c read error (fatal) + * -EBADMSG : frame was incorrect and discarded + * (value returned from st21nfca_hci_i2c_repack) + * -EIO : if no ST21NFCA_SOF_EOF is found after reaching + * the read length end sequence + */ +static int st21nfca_hci_i2c_read(struct st21nfca_i2c_phy *phy, + struct sk_buff *skb) +{ + int r, i; + u8 len; + struct i2c_client *client = phy->i2c_dev; + + if (phy->current_read_len < ARRAY_SIZE(len_seq)) { + len = len_seq[phy->current_read_len]; + + /* + * Add retry mecanism + * Operation on I2C interface may fail in case of operation on + * RF or SWP interface + */ + r = 0; + for (i = 0; i < ARRAY_SIZE(wait_tab) && r <= 0; i++) { + r = i2c_master_recv(client, skb_put(skb, len), len); + if (r < 0) + msleep(wait_tab[i]); + } + + if (r != len) { + phy->current_read_len = 0; + return -EREMOTEIO; + } + + if (memchr(skb->data + 2, ST21NFCA_SOF_EOF, + skb->len - 2) != NULL) { + phy->current_read_len = 0; + return st21nfca_hci_i2c_repack(skb); + } + phy->current_read_len++; + return -EAGAIN; + } + return -EIO; +} + +/* + * Reads an shdlc frame from the chip. This is not as straightforward as it + * seems. The frame format is data-crc, and corruption can occur anywhere + * while transiting on i2c bus, such that we could read an invalid data. + * The tricky case is when we read a corrupted data or crc. We must detect + * this here in order to determine that data can be transmitted to the hci + * core. This is the reason why we check the crc here. + * The CLF will repeat a frame until we send a RR on that frame. + * + * On ST21NFCA, IRQ goes in idle when read starts. As no size information are + * available in the incoming data, other IRQ might come. Every IRQ will trigger + * a read sequence with different length and will fill the current frame. + * The reception is complete once we reach a ST21NFCA_SOF_EOF. + */ +static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id) +{ + struct st21nfca_i2c_phy *phy = phy_id; + struct i2c_client *client; + + int r; + + if (!phy || irq != phy->i2c_dev->irq) { + WARN_ON_ONCE(1); + return IRQ_NONE; + } + + client = phy->i2c_dev; + dev_dbg(&client->dev, "IRQ\n"); + + if (phy->hard_fault != 0) + return IRQ_HANDLED; + + r = st21nfca_hci_i2c_read(phy, phy->pending_skb); + if (r == -EREMOTEIO) { + phy->hard_fault = r; + + nfc_hci_recv_frame(phy->hdev, NULL); + + return IRQ_HANDLED; + } else if (r == -EAGAIN || r == -EIO) { + return IRQ_HANDLED; + } else if (r == -EBADMSG && phy->crc_trials < ARRAY_SIZE(wait_tab)) { + /* + * With ST21NFCA, only one interface (I2C, RF or SWP) + * may be active at a time. + * Having incorrect crc is usually due to i2c macrocell + * deactivation in the middle of a transmission. + * It may generate corrupted data on i2c. + * We give sometime to get i2c back. + * The complete frame will be repeated. + */ + msleep(wait_tab[phy->crc_trials]); + phy->crc_trials++; + phy->current_read_len = 0; + } else if (r > 0) { + /* + * We succeeded to read data from the CLF and + * data is valid. + * Reset counter. + */ + nfc_hci_recv_frame(phy->hdev, phy->pending_skb); + phy->crc_trials = 0; + } + + phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL); + if (phy->pending_skb == NULL) { + phy->hard_fault = -ENOMEM; + nfc_hci_recv_frame(phy->hdev, NULL); + } + + return IRQ_HANDLED; +} + +static struct nfc_phy_ops i2c_phy_ops = { + .write = st21nfca_hci_i2c_write, + .enable = st21nfca_hci_i2c_enable, + .disable = st21nfca_hci_i2c_disable, +}; + +static int st21nfca_request_resources(struct st21nfca_i2c_phy *phy, + struct i2c_client *client) +{ + struct st21nfca_nfc_platform_data *pdata; + int r; + + pdata = client->dev.platform_data; + if (pdata == NULL) { + nfc_err(&client->dev, "No platform data\n"); + return -EINVAL; + } + + /* store for later use */ + phy->gpio_irq = pdata->gpio_irq; + phy->gpio_ena = pdata->gpio_ena; + phy->irq_polarity = pdata->irq_polarity; + phy->i2c_dev = client; + + r = devm_gpio_request(&client->dev, phy->gpio_irq, "wake_up"); + if (r) { + pr_err("%s : gpio_request failed\n", __FILE__); + return -ENODEV; + } + + r = gpio_direction_input(phy->gpio_irq); + if (r) { + pr_err("%s : gpio_direction_input failed\n", __FILE__); + return -ENODEV; + } + + if (phy->gpio_ena != 0) { + r = devm_gpio_request(&client->dev, + phy->gpio_ena, "clf_enable"); + if (r) { + pr_err("%s : ena gpio_request failed\n", __FILE__); + return -ENODEV; + } + r = gpio_direction_output(phy->gpio_ena, 1); + + if (r) { + pr_err("%s : ena gpio_direction_output failed\n", + __FILE__); + return -ENODEV; + } + } + + phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL); + if (phy->pending_skb == NULL) + return -ENOMEM; + + phy->current_read_len = 0; + phy->crc_trials = 0; + return r; +} + +static int st21nfca_hci_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct st21nfca_i2c_phy *phy; + struct st21nfca_nfc_platform_data *pdata; + int r = 0; + + dev_dbg(&client->dev, "%s\n", __func__); + dev_dbg(&client->dev, "IRQ: %d\n", client->irq); + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + nfc_err(&client->dev, "Need I2C_FUNC_I2C\n"); + return -ENODEV; + } + + phy = devm_kzalloc(&client->dev, sizeof(struct st21nfca_i2c_phy), + GFP_KERNEL); + if (!phy) { + nfc_err(&client->dev, + "Cannot allocate memory for st21nfca i2c phy.\n"); + return -ENOMEM; + } + + phy->i2c_dev = client; + + i2c_set_clientdata(client, phy); + + pdata = client->dev.platform_data; + if (pdata == NULL) { + nfc_err(&client->dev, "No platform data\n"); + return -EINVAL; + } + + r = st21nfca_request_resources(phy, client); + if (r) { + nfc_err(&client->dev, "Cannot get platform resources\n"); + return r; + } + + st21nfca_hci_platform_init(phy); + r = devm_request_threaded_irq(&client->dev, client->irq, NULL, + st21nfca_hci_irq_thread_fn, + phy->irq_polarity | IRQF_ONESHOT, + ST21NFCA_HCI_DRIVER_NAME, phy); + if (r < 0) { + nfc_err(&client->dev, "Unable to register IRQ handler\n"); + return r; + } + + r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, + ST21NFCA_FRAME_HEADROOM, ST21NFCA_FRAME_TAILROOM, + ST21NFCA_HCI_LLC_MAX_PAYLOAD, &phy->hdev); + + if (r < 0) + return r; + + return 0; +} + +static int st21nfca_hci_i2c_remove(struct i2c_client *client) +{ + struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client); + + dev_dbg(&client->dev, "%s\n", __func__); + + st21nfca_hci_remove(phy->hdev); + + if (phy->powered) + st21nfca_hci_i2c_disable(phy); + + return 0; +} + +static struct i2c_driver st21nfca_hci_i2c_driver = { + .driver = { + .name = ST21NFCA_HCI_I2C_DRIVER_NAME, + }, + .probe = st21nfca_hci_i2c_probe, + .id_table = st21nfca_hci_i2c_id_table, + .remove = st21nfca_hci_i2c_remove, +}; + +module_i2c_driver(st21nfca_hci_i2c_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st21nfca/st21nfca.c b/drivers/nfc/st21nfca/st21nfca.c new file mode 100644 index 000000000000..69213f37b7ba --- /dev/null +++ b/drivers/nfc/st21nfca/st21nfca.c @@ -0,0 +1,506 @@ +/* + * HCI based Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include "st21nfca.h" +#include + +#define DRIVER_DESC "HCI NFC driver for ST21NFCA" + +#define FULL_VERSION_LEN 3 + +/* Proprietary gates, events, commands and registers */ + +/* Commands that apply to all RF readers */ +#define ST21NFCA_RF_READER_CMD_PRESENCE_CHECK 0x30 + +#define ST21NFCA_RF_READER_ISO15693_GATE 0x12 + +/* + * Reader gate for communication with contact-less cards using Type A + * protocol ISO14443-3 but not compliant with ISO14443-4 + */ +#define ST21NFCA_RF_READER_14443_3_A_GATE 0x15 +#define ST21NFCA_RF_READER_14443_3_A_UID 0x02 +#define ST21NFCA_RF_READER_14443_3_A_ATQA 0x03 +#define ST21NFCA_RF_READER_14443_3_A_SAK 0x04 + +#define ST21NFCA_DEVICE_MGNT_GATE 0x01 +#define ST21NFCA_DEVICE_MGNT_PIPE 0x02 +#define ST21NFCA_NFC_MODE 0x03 /* NFC_MODE parameter*/ + + +static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES); + +static struct nfc_hci_gate st21nfca_gates[] = { + {NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_LINK_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_RF_READER_B_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_RF_READER_A_GATE, NFC_HCI_INVALID_PIPE}, + {ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE}, + {ST21NFCA_RF_READER_F_GATE, NFC_HCI_INVALID_PIPE}, + {ST21NFCA_RF_READER_14443_3_A_GATE, NFC_HCI_INVALID_PIPE}, +}; +/* Largest headroom needed for outgoing custom commands */ +#define ST21NFCA_CMDS_HEADROOM 7 + +static int st21nfca_hci_open(struct nfc_hci_dev *hdev) +{ + struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); + int r; + + mutex_lock(&info->info_lock); + + if (info->state != ST21NFCA_ST_COLD) { + r = -EBUSY; + goto out; + } + + r = info->phy_ops->enable(info->phy_id); + + if (r == 0) + info->state = ST21NFCA_ST_READY; + +out: + mutex_unlock(&info->info_lock); + return r; +} + +static void st21nfca_hci_close(struct nfc_hci_dev *hdev) +{ + struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); + + mutex_lock(&info->info_lock); + + if (info->state == ST21NFCA_ST_COLD) + goto out; + + info->phy_ops->disable(info->phy_id); + info->state = ST21NFCA_ST_COLD; + +out: + mutex_unlock(&info->info_lock); +} + +static int st21nfca_hci_ready(struct nfc_hci_dev *hdev) +{ + struct sk_buff *skb; + + u8 param; + int r; + + param = NFC_HCI_UICC_HOST_ID; + r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, + NFC_HCI_ADMIN_WHITELIST, ¶m, 1); + if (r < 0) + return r; + + /* Set NFC_MODE in device management gate to enable */ + r = nfc_hci_get_param(hdev, ST21NFCA_DEVICE_MGNT_GATE, + ST21NFCA_NFC_MODE, &skb); + if (r < 0) + return r; + + if (skb->data[0] == 0) { + kfree_skb(skb); + param = 1; + + r = nfc_hci_set_param(hdev, ST21NFCA_DEVICE_MGNT_GATE, + ST21NFCA_NFC_MODE, ¶m, 1); + if (r < 0) + return r; + } + + r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, + NFC_HCI_EVT_END_OPERATION, NULL, 0); + if (r < 0) + return r; + + r = nfc_hci_get_param(hdev, NFC_HCI_ID_MGMT_GATE, + NFC_HCI_ID_MGMT_VERSION_SW, &skb); + if (r < 0) + return r; + + if (skb->len != FULL_VERSION_LEN) { + kfree_skb(skb); + return -EINVAL; + } + + print_hex_dump(KERN_DEBUG, "FULL VERSION SOFTWARE INFO: ", + DUMP_PREFIX_NONE, 16, 1, + skb->data, FULL_VERSION_LEN, false); + + kfree_skb(skb); + + return 0; +} + +static int st21nfca_hci_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb) +{ + struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); + + return info->phy_ops->write(info->phy_id, skb); +} + +static int st21nfca_hci_start_poll(struct nfc_hci_dev *hdev, + u32 im_protocols, u32 tm_protocols) +{ + int r; + + pr_info(DRIVER_DESC ": %s protocols 0x%x 0x%x\n", + __func__, im_protocols, tm_protocols); + + r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, + NFC_HCI_EVT_END_OPERATION, NULL, 0); + if (r < 0) + return r; + if (im_protocols) { + /* + * enable polling according to im_protocols & tm_protocols + * - CLOSE pipe according to im_protocols & tm_protocols + */ + if ((NFC_HCI_RF_READER_B_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + NFC_HCI_RF_READER_B_GATE); + if (r < 0) + return r; + } + + if ((NFC_HCI_RF_READER_A_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + NFC_HCI_RF_READER_A_GATE); + if (r < 0) + return r; + } + + if ((ST21NFCA_RF_READER_F_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + ST21NFCA_RF_READER_F_GATE); + if (r < 0) + return r; + } + + if ((ST21NFCA_RF_READER_14443_3_A_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + ST21NFCA_RF_READER_14443_3_A_GATE); + if (r < 0) + return r; + } + + if ((ST21NFCA_RF_READER_ISO15693_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + ST21NFCA_RF_READER_ISO15693_GATE); + if (r < 0) + return r; + } + + r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, + NFC_HCI_EVT_READER_REQUESTED, NULL, 0); + if (r < 0) + nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, + NFC_HCI_EVT_END_OPERATION, NULL, 0); + } + return r; +} + +static int st21nfca_get_iso14443_3_atqa(struct nfc_hci_dev *hdev, u16 *atqa) +{ + int r; + struct sk_buff *atqa_skb = NULL; + + r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE, + ST21NFCA_RF_READER_14443_3_A_ATQA, &atqa_skb); + if (r < 0) + goto exit; + + if (atqa_skb->len != 2) { + r = -EPROTO; + goto exit; + } + + *atqa = be16_to_cpu(*(u16 *) atqa_skb->data); + +exit: + kfree_skb(atqa_skb); + return r; +} + +static int st21nfca_get_iso14443_3_sak(struct nfc_hci_dev *hdev, u8 *sak) +{ + int r; + struct sk_buff *sak_skb = NULL; + + r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE, + ST21NFCA_RF_READER_14443_3_A_SAK, &sak_skb); + if (r < 0) + goto exit; + + if (sak_skb->len != 1) { + r = -EPROTO; + goto exit; + } + + *sak = sak_skb->data[0]; + +exit: + kfree_skb(sak_skb); + return r; +} + +static int st21nfca_get_iso14443_3_uid(struct nfc_hci_dev *hdev, u8 *gate, + int *len) +{ + int r; + struct sk_buff *uid_skb = NULL; + + r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE, + ST21NFCA_RF_READER_14443_3_A_UID, &uid_skb); + if (r < 0) + goto exit; + + if (uid_skb->len == 0 || uid_skb->len > NFC_NFCID1_MAXSIZE) { + r = -EPROTO; + goto exit; + } + + gate = uid_skb->data; + *len = uid_skb->len; +exit: + kfree_skb(uid_skb); + return r; +} + +static int st21nfca_hci_target_from_gate(struct nfc_hci_dev *hdev, u8 gate, + struct nfc_target *target) +{ + int r, len; + u16 atqa; + u8 sak; + u8 uid[NFC_NFCID1_MAXSIZE]; + + switch (gate) { + case ST21NFCA_RF_READER_F_GATE: + target->supported_protocols = NFC_PROTO_FELICA_MASK; + break; + case ST21NFCA_RF_READER_14443_3_A_GATE: + /* ISO14443-3 type 1 or 2 tags */ + r = st21nfca_get_iso14443_3_atqa(hdev, &atqa); + if (r < 0) + return r; + if (atqa == 0x000c) { + target->supported_protocols = NFC_PROTO_JEWEL_MASK; + target->sens_res = 0x0c00; + } else { + r = st21nfca_get_iso14443_3_sak(hdev, &sak); + if (r < 0) + return r; + + r = st21nfca_get_iso14443_3_uid(hdev, uid, &len); + if (r < 0) + return r; + + target->supported_protocols = + nfc_hci_sak_to_protocol(sak); + if (target->supported_protocols == 0xffffffff) + return -EPROTO; + + target->sens_res = atqa; + target->sel_res = sak; + memcpy(target->nfcid1, uid, len); + target->nfcid1_len = len; + } + + break; + default: + return -EPROTO; + } + + return 0; +} + +/* + * Returns: + * <= 0: driver handled the data exchange + * 1: driver doesn't especially handle, please do standard processing + */ +static int st21nfca_hci_im_transceive(struct nfc_hci_dev *hdev, + struct nfc_target *target, + struct sk_buff *skb, + data_exchange_cb_t cb, void *cb_context) +{ + pr_info(DRIVER_DESC ": %s for gate=%d len=%d\n", __func__, + target->hci_reader_gate, skb->len); + + switch (target->hci_reader_gate) { + case ST21NFCA_RF_READER_F_GATE: + *skb_push(skb, 1) = 0x1a; + return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate, + ST21NFCA_WR_XCHG_DATA, skb->data, + skb->len, cb, cb_context); + case ST21NFCA_RF_READER_14443_3_A_GATE: + *skb_push(skb, 1) = 0x1a; /* CTR, see spec:10.2.2.1 */ + + return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate, + ST21NFCA_WR_XCHG_DATA, skb->data, + skb->len, cb, cb_context); + default: + return 1; + } +} + +static int st21nfca_hci_check_presence(struct nfc_hci_dev *hdev, + struct nfc_target *target) +{ + u8 fwi = 0x11; + switch (target->hci_reader_gate) { + case NFC_HCI_RF_READER_A_GATE: + case NFC_HCI_RF_READER_B_GATE: + /* + * PRESENCE_CHECK on those gates is available + * However, the answer to this command is taking 3 * fwi + * if the card is no present. + * Instead, we send an empty I-Frame with a very short + * configurable fwi ~604µs. + */ + return nfc_hci_send_cmd(hdev, target->hci_reader_gate, + ST21NFCA_WR_XCHG_DATA, &fwi, 1, NULL); + case ST21NFCA_RF_READER_14443_3_A_GATE: + return nfc_hci_send_cmd(hdev, target->hci_reader_gate, + ST21NFCA_RF_READER_CMD_PRESENCE_CHECK, + NULL, 0, NULL); + default: + return -EOPNOTSUPP; + } +} + +static struct nfc_hci_ops st21nfca_hci_ops = { + .open = st21nfca_hci_open, + .close = st21nfca_hci_close, + .hci_ready = st21nfca_hci_ready, + .xmit = st21nfca_hci_xmit, + .start_poll = st21nfca_hci_start_poll, + .target_from_gate = st21nfca_hci_target_from_gate, + .im_transceive = st21nfca_hci_im_transceive, + .check_presence = st21nfca_hci_check_presence, +}; + +int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, struct nfc_hci_dev **hdev) +{ + struct st21nfca_hci_info *info; + int r = 0; + int dev_num; + u32 protocols; + struct nfc_hci_init_data init_data; + unsigned long quirks = 0; + + info = kzalloc(sizeof(struct st21nfca_hci_info), GFP_KERNEL); + if (!info) { + r = -ENOMEM; + goto err_alloc_hdev; + } + + info->phy_ops = phy_ops; + info->phy_id = phy_id; + info->state = ST21NFCA_ST_COLD; + mutex_init(&info->info_lock); + + init_data.gate_count = ARRAY_SIZE(st21nfca_gates); + + memcpy(init_data.gates, st21nfca_gates, sizeof(st21nfca_gates)); + + /* + * Session id must include the driver name + i2c bus addr + * persistent info to discriminate 2 identical chips + */ + dev_num = find_first_zero_bit(dev_mask, ST21NFCA_NUM_DEVICES); + if (dev_num >= ST21NFCA_NUM_DEVICES) + goto err_alloc_hdev; + + scnprintf(init_data.session_id, sizeof(init_data.session_id), "%s%2x", + "ST21AH", dev_num); + + protocols = NFC_PROTO_JEWEL_MASK | + NFC_PROTO_MIFARE_MASK | + NFC_PROTO_FELICA_MASK | + NFC_PROTO_ISO14443_MASK | + NFC_PROTO_ISO14443_B_MASK; + + set_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &quirks); + + info->hdev = + nfc_hci_allocate_device(&st21nfca_hci_ops, &init_data, quirks, + protocols, llc_name, + phy_headroom + ST21NFCA_CMDS_HEADROOM, + phy_tailroom, phy_payload); + + if (!info->hdev) { + pr_err("Cannot allocate nfc hdev.\n"); + r = -ENOMEM; + goto err_alloc_hdev; + } + + nfc_hci_set_clientdata(info->hdev, info); + + r = nfc_hci_register_device(info->hdev); + if (r) + goto err_regdev; + + *hdev = info->hdev; + + return 0; + +err_regdev: + nfc_hci_free_device(info->hdev); + +err_alloc_hdev: + kfree(info); + + return r; +} +EXPORT_SYMBOL(st21nfca_hci_probe); + +void st21nfca_hci_remove(struct nfc_hci_dev *hdev) +{ + struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); + + nfc_hci_unregister_device(hdev); + nfc_hci_free_device(hdev); + kfree(info); +} +EXPORT_SYMBOL(st21nfca_hci_remove); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h new file mode 100644 index 000000000000..334cd90bcc8c --- /dev/null +++ b/drivers/nfc/st21nfca/st21nfca.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef __LOCAL_ST21NFCA_H_ +#define __LOCAL_ST21NFCA_H_ + +#include + +#define HCI_MODE 0 + +/* framing in HCI mode */ +#define ST21NFCA_SOF_EOF_LEN 2 + +/* Almost every time value is 0 */ +#define ST21NFCA_HCI_LLC_LEN 1 + +/* Size in worst case : + * In normal case CRC len = 2 but byte stuffing + * may appear in case one CRC byte = ST21NFCA_SOF_EOF + */ +#define ST21NFCA_HCI_LLC_CRC 4 + +#define ST21NFCA_HCI_LLC_LEN_CRC (ST21NFCA_SOF_EOF_LEN + \ + ST21NFCA_HCI_LLC_LEN + \ + ST21NFCA_HCI_LLC_CRC) +#define ST21NFCA_HCI_LLC_MIN_SIZE (1 + ST21NFCA_HCI_LLC_LEN_CRC) + +/* Worst case when adding byte stuffing between each byte */ +#define ST21NFCA_HCI_LLC_MAX_PAYLOAD 29 +#define ST21NFCA_HCI_LLC_MAX_SIZE (ST21NFCA_HCI_LLC_LEN_CRC + 1 + \ + ST21NFCA_HCI_LLC_MAX_PAYLOAD) + +#define DRIVER_DESC "HCI NFC driver for ST21NFCA" + +#define ST21NFCA_HCI_MODE 0 + +#define ST21NFCA_NUM_DEVICES 256 + +int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, struct nfc_hci_dev **hdev); +void st21nfca_hci_remove(struct nfc_hci_dev *hdev); + +enum st21nfca_state { + ST21NFCA_ST_COLD, + ST21NFCA_ST_READY, +}; + +struct st21nfca_hci_info { + struct nfc_phy_ops *phy_ops; + void *phy_id; + + struct nfc_hci_dev *hdev; + + enum st21nfca_state state; + + struct mutex info_lock; + + int async_cb_type; + data_exchange_cb_t async_cb; + void *async_cb_context; + +} __packed; + +/* Reader RF commands */ +#define ST21NFCA_WR_XCHG_DATA 0x10 + +#define ST21NFCA_RF_READER_F_GATE 0x14 +#define ST21NFCA_RF_READER_F_DATARATE 0x01 +#define ST21NFCA_RF_READER_F_DATARATE_106 0x01 +#define ST21NFCA_RF_READER_F_DATARATE_212 0x02 +#define ST21NFCA_RF_READER_F_DATARATE_424 0x04 + +#endif /* __LOCAL_ST21NFCA_H_ */ diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h new file mode 100644 index 000000000000..1730312398ff --- /dev/null +++ b/include/linux/platform_data/st21nfca.h @@ -0,0 +1,32 @@ +/* + * Driver include for the ST21NFCA NFC chip. + * + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _ST21NFCA_HCI_H_ +#define _ST21NFCA_HCI_H_ + +#include + +#define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci" + +struct st21nfca_nfc_platform_data { + unsigned int gpio_irq; + unsigned int gpio_ena; + unsigned int irq_polarity; +}; + +#endif /* _ST21NFCA_HCI_H_ */ -- cgit v1.2.3-59-g8ed1b From 3de0b592394d17b2c41a261a6a493a521213f299 Mon Sep 17 00:00:00 2001 From: Venkata Duvvuru Date: Mon, 21 Apr 2014 15:37:59 +0530 Subject: ethtool: Support for configurable RSS hash key This ethtool patch primarily copies the ioctl command data structures from/to the User space and invokes the driver hook. Signed-off-by: Venkat Duvvuru Signed-off-by: David S. Miller --- include/linux/ethtool.h | 13 +++ include/uapi/linux/ethtool.h | 32 +++++++ net/core/ethtool.c | 221 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 252 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 0a114d05f68d..212f537fc686 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -154,13 +154,23 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * @reset: Reset (part of) the device, as specified by a bitmask of * flags from &enum ethtool_reset_flags. Returns a negative * error code or zero. + * @get_rxfh_key_size: Get the size of the RX flow hash key. + * Returns zero if not supported for this specific device. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. * Returns zero if not supported for this specific device. * @get_rxfh_indir: Get the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. + * @get_rxfh: Get the contents of the RX flow hash indirection table and hash + * key. + * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size + * returns zero. * Returns a negative error code or zero. * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. + * @set_rxfh: Set the contents of the RX flow hash indirection table and + * hash key. + * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size + * returns zero. * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or @@ -232,7 +242,10 @@ struct ethtool_ops { int (*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *); int (*flash_device)(struct net_device *, struct ethtool_flash *); int (*reset)(struct net_device *, u32 *); + u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); + int (*get_rxfh)(struct net_device *, u32 *, u8 *); + int (*set_rxfh)(struct net_device *, u32 *, u8 *); int (*get_rxfh_indir)(struct net_device *, u32 *); int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index fd161e91b6d7..d47d31d6fa0e 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -846,6 +846,35 @@ struct ethtool_rxfh_indir { __u32 ring_index[0]; }; +/** + * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key. + * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH + * @rss_context: RSS context identifier. + * @indir_size: On entry, the array size of the user buffer, which may be zero. + * On return from %ETHTOOL_GRSSH, the array size of the hardware + * indirection table. + * @key_size: On entry, the array size of the user buffer in bytes, + * which may be zero. + * On return from %ETHTOOL_GRSSH, the size of the RSS hash key. + * @rsvd: Reserved for future extensions. + * @rss_config: RX ring/queue index for each hash value i.e., indirection table + * of size @indir_size followed by hash key of size @key_size. + * + * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the + * size should be returned. For %ETHTOOL_SRSSH, a @indir_size of 0xDEADBEEF + * means that indir table setting is not requested and a @indir_size of zero + * means the indir table should be reset to default values. This last feature + * is not supported by the original implementations. + */ +struct ethtool_rxfh { + __u32 cmd; + __u32 rss_context; + __u32 indir_size; + __u32 key_size; + __u32 rsvd[2]; + __u32 rss_config[0]; +}; + /** * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW @@ -1118,6 +1147,9 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_GEEE 0x00000044 /* Get EEE settings */ #define ETHTOOL_SEEE 0x00000045 /* Set EEE settings */ +#define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */ +#define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 640ba0e5831c..1d72786ef866 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -557,6 +557,23 @@ err_out: return ret; } +static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, + struct ethtool_rxnfc *rx_rings, + u32 size) +{ + int ret = 0, i; + + if (copy_from_user(indir, useraddr, size * sizeof(indir[0]))) + ret = -EFAULT; + + /* Validate ring indices */ + for (i = 0; i < size; i++) { + if (indir[i] >= rx_rings->data) + ret = -EINVAL; + } + return ret; +} + static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { @@ -613,6 +630,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, u32 *indir; const struct ethtool_ops *ops = dev->ethtool_ops; int ret; + u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || !ops->get_rxnfc) @@ -643,28 +661,196 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, for (i = 0; i < dev_size; i++) indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); } else { - if (copy_from_user(indir, - useraddr + - offsetof(struct ethtool_rxfh_indir, - ring_index[0]), - dev_size * sizeof(indir[0]))) { + ret = ethtool_copy_validate_indir(indir, + useraddr + ringidx_offset, + &rx_rings, + dev_size); + if (ret) + goto out; + } + + ret = ops->set_rxfh_indir(dev, indir); + +out: + kfree(indir); + return ret; +} + +static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + u32 user_indir_size = 0, user_key_size = 0; + u32 dev_indir_size = 0, dev_key_size = 0; + u32 total_size; + u32 indir_offset, indir_bytes; + u32 key_offset; + u32 *indir = NULL; + u8 *hkey = NULL; + u8 *rss_config; + + if (!(dev->ethtool_ops->get_rxfh_indir_size || + dev->ethtool_ops->get_rxfh_key_size) || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + + indir_offset = offsetof(struct ethtool_rxfh, indir_size); + + if (copy_from_user(&user_indir_size, + useraddr + indir_offset, + sizeof(user_indir_size))) + return -EFAULT; + + if (copy_to_user(useraddr + indir_offset, + &dev_indir_size, sizeof(dev_indir_size))) + return -EFAULT; + + if (ops->get_rxfh_key_size) + dev_key_size = ops->get_rxfh_key_size(dev); + + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + key_offset = offsetof(struct ethtool_rxfh, key_size); + + if (copy_from_user(&user_key_size, + useraddr + key_offset, + sizeof(user_key_size))) + return -EFAULT; + + if (copy_to_user(useraddr + key_offset, + &dev_key_size, sizeof(dev_key_size))) + return -EFAULT; + + /* If the user buffer size is 0, this is just a query for the + * device table size and key size. Otherwise, if the User size is + * not equal to device table size or key size it's an error. + */ + if (!user_indir_size && !user_key_size) + return 0; + + if ((user_indir_size && (user_indir_size != dev_indir_size)) || + (user_key_size && (user_key_size != dev_key_size))) + return -EINVAL; + + indir_bytes = user_indir_size * sizeof(indir[0]); + total_size = indir_bytes + user_key_size; + rss_config = kzalloc(total_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + if (user_indir_size) + indir = (u32 *)rss_config; + + if (user_key_size) + hkey = rss_config + indir_bytes; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey); + if (!ret) { + if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh, rss_config[0]), + rss_config, total_size)) ret = -EFAULT; + } + + kfree(rss_config); + + return ret; +} + +static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxnfc rx_rings; + u32 user_indir_size = 0, dev_indir_size = 0, i; + u32 user_key_size = 0, dev_key_size = 0; + u32 *indir = NULL, indir_bytes = 0; + u8 *hkey = NULL; + u8 *rss_config; + u32 indir_offset, key_offset; + u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); + + if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) || + !ops->get_rxnfc || !ops->set_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + + indir_offset = offsetof(struct ethtool_rxfh, indir_size); + if (copy_from_user(&user_indir_size, + useraddr + indir_offset, + sizeof(user_indir_size))) + return -EFAULT; + + if (ops->get_rxfh_key_size) + dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev); + + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + key_offset = offsetof(struct ethtool_rxfh, key_size); + if (copy_from_user(&user_key_size, + useraddr + key_offset, + sizeof(user_key_size))) + return -EFAULT; + + /* If either indir or hash key is valid, proceed further. + */ + if ((user_indir_size && ((user_indir_size != 0xDEADBEEF) && + user_indir_size != dev_indir_size)) || + (user_key_size && (user_key_size != dev_key_size))) + return -EINVAL; + + if (user_indir_size != 0xDEADBEEF) + indir_bytes = dev_indir_size * sizeof(indir[0]); + + rss_config = kzalloc(indir_bytes + user_key_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + rx_rings.cmd = ETHTOOL_GRXRINGS; + ret = ops->get_rxnfc(dev, &rx_rings, NULL); + if (ret) + goto out; + + /* user_indir_size == 0 means reset the indir table to default. + * user_indir_size == 0xDEADBEEF means indir setting is not requested. + */ + if (user_indir_size && user_indir_size != 0xDEADBEEF) { + indir = (u32 *)rss_config; + ret = ethtool_copy_validate_indir(indir, + useraddr + rss_cfg_offset, + &rx_rings, + user_indir_size); + if (ret) goto out; - } + } else if (user_indir_size == 0) { + indir = (u32 *)rss_config; + for (i = 0; i < dev_indir_size; i++) + indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + } - /* Validate ring indices */ - for (i = 0; i < dev_size; i++) { - if (indir[i] >= rx_rings.data) { - ret = -EINVAL; - goto out; - } + if (user_key_size) { + hkey = rss_config + indir_bytes; + if (copy_from_user(hkey, + useraddr + rss_cfg_offset + indir_bytes, + user_key_size)) { + ret = -EFAULT; + goto out; } } - ret = ops->set_rxfh_indir(dev, indir); + ret = ops->set_rxfh(dev, indir, hkey); out: - kfree(indir); + kfree(rss_config); return ret; } @@ -1491,6 +1677,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: case ETHTOOL_GRXFHINDIR: + case ETHTOOL_GRSSH: case ETHTOOL_GFEATURES: case ETHTOOL_GCHANNELS: case ETHTOOL_GET_TS_INFO: @@ -1628,6 +1815,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GRSSH: + rc = ethtool_get_rxfh(dev, useraddr); + break; + case ETHTOOL_SRSSH: + rc = ethtool_set_rxfh(dev, useraddr); + break; case ETHTOOL_GFEATURES: rc = ethtool_get_features(dev, useraddr); break; -- cgit v1.2.3-59-g8ed1b From 4cd3675ebf74d7f559038ded6aa8088e4099a83d Mon Sep 17 00:00:00 2001 From: Chema Gonzalez Date: Mon, 21 Apr 2014 09:21:24 -0700 Subject: filter: added BPF random opcode Added a new ancillary load (bpf call in eBPF parlance) that produces a 32-bit random number. We are implementing it as an ancillary load (instead of an ISA opcode) because (a) it is simpler, (b) allows easy JITing, and (c) seems more in line with generic ISAs that do not have "get a random number" as a instruction, but as an OS call. The main use for this ancillary load is to perform random packet sampling. Signed-off-by: Chema Gonzalez Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 13 +++++++++++++ include/linux/filter.h | 1 + include/uapi/linux/filter.h | 3 ++- net/core/filter.c | 12 ++++++++++++ tools/net/bpf_exp.l | 1 + tools/net/bpf_exp.y | 11 ++++++++++- 6 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 81f940f4e884..82e1cb0b3da8 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -281,6 +281,7 @@ Possible BPF extensions are shown in the following table: cpu raw_smp_processor_id() vlan_tci vlan_tx_tag_get(skb) vlan_pr vlan_tx_tag_present(skb) + rand prandom_u32() These extensions can also be prefixed with '#'. Examples for low-level BPF: @@ -308,6 +309,18 @@ Examples for low-level BPF: ret #-1 drop: ret #0 +** icmp random packet sampling, 1 in 4 + ldh [12] + jne #0x800, drop + ldb [23] + jneq #1, drop + # get a random uint32 number + ld rand + mod #4 + jneq #1, drop + ret #-1 + drop: ret #0 + ** SECCOMP filter example: ld [4] /* offsetof(struct seccomp_data, arch) */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 024fd03e5d18..759abf78dd61 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -223,6 +223,7 @@ enum { BPF_S_ANC_VLAN_TAG, BPF_S_ANC_VLAN_TAG_PRESENT, BPF_S_ANC_PAY_OFFSET, + BPF_S_ANC_RANDOM, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 8eb9ccaa5b48..253b4d42cf2b 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -130,7 +130,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_VLAN_TAG 44 #define SKF_AD_VLAN_TAG_PRESENT 48 #define SKF_AD_PAY_OFFSET 52 -#define SKF_AD_MAX 56 +#define SKF_AD_RANDOM 56 +#define SKF_AD_MAX 60 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index cd58614660cf..78a636e60a0b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -643,6 +643,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) return raw_smp_processor_id(); } +/* note that this only generates 32-bit random numbers */ +static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + return (u64)prandom_u32(); +} + /* Register mappings for user programs. */ #define A_REG 0 #define X_REG 7 @@ -779,6 +785,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_NLATTR: case SKF_AD_OFF + SKF_AD_NLATTR_NEST: case SKF_AD_OFF + SKF_AD_CPU: + case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; insn->a_reg = ARG1_REG; @@ -812,6 +819,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_CPU: insn->imm = __get_raw_cpu_id - __bpf_call_base; break; + case SKF_AD_OFF + SKF_AD_RANDOM: + insn->imm = __get_random_u32 - __bpf_call_base; + break; } break; @@ -1362,6 +1372,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); ANCILLARY(PAY_OFFSET); + ANCILLARY(RANDOM); } /* ancillary operation unknown or unsupported */ @@ -1746,6 +1757,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index bf7be77ddd62..833a96611da6 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -92,6 +92,7 @@ extern void yyerror(const char *str); "#"?("cpu") { return K_CPU; } "#"?("vlan_tci") { return K_VLANT; } "#"?("vlan_pr") { return K_VLANP; } +"#"?("rand") { return K_RAND; } ":" { return ':'; } "," { return ','; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index d15efc989ef5..e6306c51c26f 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_LDXI %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF +%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' @@ -164,6 +164,9 @@ ldb | OP_LDB K_POFF { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LDB K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } ; ldh @@ -212,6 +215,9 @@ ldh | OP_LDH K_POFF { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LDH K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } ; ldi @@ -265,6 +271,9 @@ ld | OP_LD K_POFF { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LD K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { -- cgit v1.2.3-59-g8ed1b From 4f520900522fd596e336c07e9aafd5b7a9564235 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 22 Apr 2014 21:31:54 -0400 Subject: netlink: have netlink per-protocol bind function return an error code. Have the netlink per-protocol optional bind function return an int error code rather than void to signal a failure. This will enable netlink protocols to perform extra checks including capabilities and permissions verifications when updating memberships in multicast groups. In netlink_bind() and netlink_setsockopt() the call to the per-protocol bind function was moved above the multicast group update to prevent any access to the multicast socket groups before checking with the per-protocol bind function. This will enable the per-protocol bind function to be used to check permissions which could be denied before making them available, and to avoid the messy job of undoing the addition should the per-protocol bind function fail. The netfilter subsystem seems to be the only one currently using the per-protocol bind function. Signed-off-by: Richard Guy Briggs Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 ++- net/netfilter/nfnetlink.c | 3 ++- net/netlink/af_netlink.c | 68 +++++++++++++++++++++++++++++++++-------------- net/netlink/af_netlink.h | 6 +++-- 4 files changed, 56 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index aad8eeaf416d..5146ce066498 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -45,7 +45,8 @@ struct netlink_kernel_cfg { unsigned int flags; void (*input)(struct sk_buff *skb); struct mutex *cb_mutex; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); bool (*compare)(struct net *net, struct sock *sk); }; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 0df800a454ec..6e42dcfad40a 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -400,7 +400,7 @@ static void nfnetlink_rcv(struct sk_buff *skb) } #ifdef CONFIG_MODULES -static void nfnetlink_bind(int group) +static int nfnetlink_bind(int group) { const struct nfnetlink_subsystem *ss; int type = nfnl_group2type[group]; @@ -410,6 +410,7 @@ static void nfnetlink_bind(int group) rcu_read_unlock(); if (!ss) request_module("nfnetlink-subsys-%d", type); + return 0; } #endif diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 894cda0206bb..7e8d229bc010 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1206,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1232,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; + unbind = nl_table[protocol].unbind; netlink_unlock_table(); if (err < 0) @@ -1248,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, nlk = nlk_sk(sock->sk); nlk->module = module; nlk->netlink_bind = bind; + nlk->netlink_unbind = unbind; out: return err; @@ -1301,6 +1304,7 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].unbind = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -1411,6 +1415,19 @@ static int netlink_realloc_groups(struct sock *sk) return err; } +static void netlink_unbind(int group, long unsigned int groups, + struct netlink_sock *nlk) +{ + int undo; + + if (!nlk->netlink_unbind) + return; + + for (undo = 0; undo < group; undo++) + if (test_bit(group, &groups)) + nlk->netlink_unbind(undo); +} + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -1419,6 +1436,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; + long unsigned int groups = nladdr->nl_groups; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1427,7 +1445,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; /* Only superuser is allowed to listen multicasts */ - if (nladdr->nl_groups) { + if (groups) { if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); @@ -1435,37 +1453,45 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->portid) { + if (nlk->portid) if (nladdr->nl_pid != nlk->portid) return -EINVAL; - } else { + + if (nlk->netlink_bind && groups) { + int group; + + for (group = 0; group < nlk->ngroups; group++) { + if (!test_bit(group, &groups)) + continue; + err = nlk->netlink_bind(group); + if (!err) + continue; + netlink_unbind(group, groups, nlk); + return err; + } + } + + if (!nlk->portid) { err = nladdr->nl_pid ? netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); - if (err) + if (err) { + netlink_unbind(nlk->ngroups - 1, groups, nlk); return err; + } } - if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) + if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) return 0; netlink_table_grab(); netlink_update_subscriptions(sk, nlk->subscriptions + - hweight32(nladdr->nl_groups) - + hweight32(groups) - hweight32(nlk->groups[0])); - nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; + nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; netlink_update_listeners(sk); netlink_table_ungrab(); - if (nlk->netlink_bind && nlk->groups[0]) { - int i; - - for (i = 0; i < nlk->ngroups; i++) { - if (test_bit(i, nlk->groups)) - nlk->netlink_bind(i); - } - } - return 0; } @@ -2103,14 +2129,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return err; if (!val || val - 1 >= nlk->ngroups) return -EINVAL; + if (nlk->netlink_bind) { + err = nlk->netlink_bind(val); + if (err) + return err; + } netlink_table_grab(); netlink_update_socket_mc(nlk, val, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); - if (nlk->netlink_bind) - nlk->netlink_bind(val); - err = 0; break; } diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index ed13a790b00e..0b59d441f5b6 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -38,7 +38,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - void (*netlink_bind)(int group); + int (*netlink_bind)(int group); + void (*netlink_unbind)(int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -74,7 +75,8 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; -- cgit v1.2.3-59-g8ed1b From 4af619ae2ccf47a2b3a108e1926736484721370f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 24 Apr 2014 19:09:05 +0200 Subject: at86rf230: use irq_get_trigger_type This patch removes the platform data for the irq_type. We use instead the irq_get_trigger_type function to get these flags which should already configured by the interrupt controller. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- drivers/net/ieee802154/at86rf230.c | 28 ++++++++-------------------- include/linux/spi/at86rf230.h | 14 -------------- 2 files changed, 8 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index e36f194673a4..17b9c9aea9be 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -970,8 +971,7 @@ static int at86rf230_irq_polarity(struct at86rf230_local *lp, int pol) static int at86rf230_hw_init(struct at86rf230_local *lp) { - struct at86rf230_platform_data *pdata = lp->spi->dev.platform_data; - int rc, irq_pol; + int rc, irq_pol, irq_type; u8 status; u8 csma_seed[2]; @@ -983,8 +983,9 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) if (rc) return rc; + irq_type = irq_get_trigger_type(lp->spi->irq); /* configure irq polarity, defaults to high active */ - if (pdata->irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW)) + if (irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW)) irq_pol = IRQ_ACTIVE_LOW; else irq_pol = IRQ_ACTIVE_HIGH; @@ -1032,7 +1033,6 @@ static struct at86rf230_platform_data * at86rf230_get_pdata(struct spi_device *spi) { struct at86rf230_platform_data *pdata; - const char *irq_type; if (!IS_ENABLED(CONFIG_OF) || !spi->dev.of_node) return spi->dev.platform_data; @@ -1044,19 +1044,6 @@ at86rf230_get_pdata(struct spi_device *spi) pdata->rstn = of_get_named_gpio(spi->dev.of_node, "reset-gpio", 0); pdata->slp_tr = of_get_named_gpio(spi->dev.of_node, "sleep-gpio", 0); - pdata->irq_type = IRQF_TRIGGER_RISING; - of_property_read_string(spi->dev.of_node, "irq-type", &irq_type); - if (!strcmp(irq_type, "level-high")) - pdata->irq_type = IRQF_TRIGGER_HIGH; - else if (!strcmp(irq_type, "level-low")) - pdata->irq_type = IRQF_TRIGGER_LOW; - else if (!strcmp(irq_type, "edge-rising")) - pdata->irq_type = IRQF_TRIGGER_RISING; - else if (!strcmp(irq_type, "edge-falling")) - pdata->irq_type = IRQF_TRIGGER_FALLING; - else - dev_warn(&spi->dev, "wrong irq-type specified using edge-rising\n"); - spi->dev.platform_data = pdata; done: return pdata; @@ -1071,7 +1058,7 @@ static int at86rf230_probe(struct spi_device *spi) u8 part = 0, version = 0, status; irq_handler_t irq_handler; work_func_t irq_worker; - int rc; + int rc, irq_type; const char *chip; struct ieee802154_ops *ops = NULL; @@ -1176,7 +1163,8 @@ static int at86rf230_probe(struct spi_device *spi) dev->extra_tx_headroom = 0; dev->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK; - if (pdata->irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) { + irq_type = irq_get_trigger_type(spi->irq); + if (irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) { irq_worker = at86rf230_irqwork; irq_handler = at86rf230_isr; } else { @@ -1203,7 +1191,7 @@ static int at86rf230_probe(struct spi_device *spi) goto err_hw_init; rc = request_irq(spi->irq, irq_handler, - IRQF_SHARED | pdata->irq_type, + IRQF_SHARED | irq_type, dev_name(&spi->dev), lp); if (rc) goto err_hw_init; diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h index aa327a8105ad..b2b1afbb3202 100644 --- a/include/linux/spi/at86rf230.h +++ b/include/linux/spi/at86rf230.h @@ -26,20 +26,6 @@ struct at86rf230_platform_data { int rstn; int slp_tr; int dig2; - - /* Setting the irq_type will configure the driver to request - * the platform irq trigger type according to the given value - * and configure the interrupt polarity of the device to the - * corresponding polarity. - * - * Allowed values are: IRQF_TRIGGER_RISING, IRQF_TRIGGER_FALLING, - * IRQF_TRIGGER_HIGH and IRQF_TRIGGER_LOW - * - * Setting it to 0, the driver does not touch the trigger type - * configuration of the interrupt and sets the interrupt polarity - * of the device to high active (the default value). - */ - int irq_type; }; #endif -- cgit v1.2.3-59-g8ed1b From 683399eddb9fff742b1a14c5a5d03e12bfc0afff Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Sun, 20 Apr 2014 18:57:36 -0600 Subject: netfilter: nfnetlink_acct: Adding quota support to accounting framework nfacct objects already support accounting at the byte and packet level. As such it is a natural extension to add the possiblity to define a ceiling limit for both metrics. All the support for quotas itself is added to nfnetlink acctounting framework to stay coherent with current accounting object management. Quota limit checks are implemented in xt_nfacct filter where statistic collection is already done. Pablo Neira Ayuso has also contributed to this feature. Signed-off-by: Mathieu Poirier Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_acct.h | 8 ++- include/uapi/linux/netfilter/nfnetlink.h | 2 + include/uapi/linux/netfilter/nfnetlink_acct.h | 9 +++ net/netfilter/nfnetlink_acct.c | 85 +++++++++++++++++++++++++++ net/netfilter/xt_nfacct.c | 5 +- 5 files changed, 107 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h index b2e85e59f760..6ec975748742 100644 --- a/include/linux/netfilter/nfnetlink_acct.h +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -3,11 +3,17 @@ #include +enum { + NFACCT_NO_QUOTA = -1, + NFACCT_UNDERQUOTA, + NFACCT_OVERQUOTA, +}; struct nf_acct; struct nf_acct *nfnl_acct_find_get(const char *filter_name); void nfnl_acct_put(struct nf_acct *acct); void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); - +extern int nfnl_acct_overquota(const struct sk_buff *skb, + struct nf_acct *nfacct); #endif /* _NFNL_ACCT_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 596ddd45253c..354a7e5e50f2 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -20,6 +20,8 @@ enum nfnetlink_groups { #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_NFTABLES, #define NFNLGRP_NFTABLES NFNLGRP_NFTABLES + NFNLGRP_ACCT_QUOTA, +#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h index c7b6269e760b..51404ec19022 100644 --- a/include/uapi/linux/netfilter/nfnetlink_acct.h +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h @@ -10,15 +10,24 @@ enum nfnl_acct_msg_types { NFNL_MSG_ACCT_GET, NFNL_MSG_ACCT_GET_CTRZERO, NFNL_MSG_ACCT_DEL, + NFNL_MSG_ACCT_OVERQUOTA, NFNL_MSG_ACCT_MAX }; +enum nfnl_acct_flags { + NFACCT_F_QUOTA_PKTS = (1 << 0), + NFACCT_F_QUOTA_BYTES = (1 << 1), + NFACCT_F_OVERQUOTA = (1 << 2), /* can't be set from userspace */ +}; + enum nfnl_acct_type { NFACCT_UNSPEC, NFACCT_NAME, NFACCT_PKTS, NFACCT_BYTES, NFACCT_USE, + NFACCT_FLAGS, + NFACCT_QUOTA, __NFACCT_MAX }; #define NFACCT_MAX (__NFACCT_MAX - 1) diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c7b6d466a662..70e86bbb3637 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -32,18 +32,24 @@ static LIST_HEAD(nfnl_acct_list); struct nf_acct { atomic64_t pkts; atomic64_t bytes; + unsigned long flags; struct list_head head; atomic_t refcnt; char name[NFACCT_NAME_MAX]; struct rcu_head rcu_head; + char data[0]; }; +#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) + static int nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; char *acct_name; + unsigned int size = 0; + u32 flags = 0; if (!tb[NFACCT_NAME]) return -EINVAL; @@ -68,15 +74,39 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, /* reset counters if you request a replacement. */ atomic64_set(&matching->pkts, 0); atomic64_set(&matching->bytes, 0); + smp_mb__before_clear_bit(); + /* reset overquota flag if quota is enabled. */ + if ((matching->flags & NFACCT_F_QUOTA)) + clear_bit(NFACCT_F_OVERQUOTA, &matching->flags); return 0; } return -EBUSY; } nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); + if (tb[NFACCT_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS])); + if (flags & ~NFACCT_F_QUOTA) + return -EOPNOTSUPP; + if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA) + return -EINVAL; + if (flags & NFACCT_F_OVERQUOTA) + return -EINVAL; + + size += sizeof(u64); + } + + nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL); if (nfacct == NULL) return -ENOMEM; + if (flags & NFACCT_F_QUOTA) { + u64 *quota = (u64 *)nfacct->data; + + *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA])); + nfacct->flags = flags; + } + strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); if (tb[NFACCT_BYTES]) { @@ -117,6 +147,9 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (type == NFNL_MSG_ACCT_GET_CTRZERO) { pkts = atomic64_xchg(&acct->pkts, 0); bytes = atomic64_xchg(&acct->bytes, 0); + smp_mb__before_clear_bit(); + if (acct->flags & NFACCT_F_QUOTA) + clear_bit(NFACCT_F_OVERQUOTA, &acct->flags); } else { pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); @@ -125,7 +158,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) goto nla_put_failure; + if (acct->flags & NFACCT_F_QUOTA) { + u64 *quota = (u64 *)acct->data; + if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) || + nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota))) + goto nla_put_failure; + } nlmsg_end(skb, nlh); return skb->len; @@ -270,6 +309,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, [NFACCT_BYTES] = { .type = NLA_U64 }, [NFACCT_PKTS] = { .type = NLA_U64 }, + [NFACCT_FLAGS] = { .type = NLA_U32 }, + [NFACCT_QUOTA] = { .type = NLA_U64 }, }; static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { @@ -336,6 +377,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_update); +static void nfnl_overquota_report(struct nf_acct *nfacct) +{ + int ret; + struct sk_buff *skb; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (skb == NULL) + return; + + ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0, + nfacct); + if (ret <= 0) { + kfree_skb(skb); + return; + } + netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, + GFP_ATOMIC); +} + +int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) +{ + u64 now; + u64 *quota; + int ret = NFACCT_UNDERQUOTA; + + /* no place here if we don't have a quota */ + if (!(nfacct->flags & NFACCT_F_QUOTA)) + return NFACCT_NO_QUOTA; + + quota = (u64 *)nfacct->data; + now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ? + atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes); + + ret = now > *quota; + + if (now >= *quota && + !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) { + nfnl_overquota_report(nfacct); + } + + return ret; +} +EXPORT_SYMBOL_GPL(nfnl_acct_overquota); + static int __init nfnl_acct_init(void) { int ret; diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index b3be0ef21f19..8c646ed9c921 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -21,11 +21,14 @@ MODULE_ALIAS("ip6t_nfacct"); static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) { + int overquota; const struct xt_nfacct_match_info *info = par->targinfo; nfnl_acct_update(skb, info->nfacct); - return true; + overquota = nfnl_acct_overquota(skb, info->nfacct); + + return overquota == NFACCT_UNDERQUOTA ? false : true; } static int -- cgit v1.2.3-59-g8ed1b From 6d48f44b7b2af67b33c1ae5994b8f642685c8bc8 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 30 Apr 2014 15:23:33 +0300 Subject: mdio_bus: implement devm_mdiobus_alloc/devm_mdiobus_free Add a resource managed devm_mdiobus_alloc[_size]()/devm_mdiobus_free() to automatically clean up MDIO bus alocations made by MDIO drivers, thus leading to simplified MDIO drivers code. Cc: Florian Fainelli Cc: Sergei Shtylyov Acked-and-tested-by: Lad, Prabhakar Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- Documentation/driver-model/devres.txt | 5 +++ drivers/net/phy/mdio_bus.c | 67 +++++++++++++++++++++++++++++++++++ include/linux/phy.h | 7 ++++ 3 files changed, 79 insertions(+) (limited to 'include/linux') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 4f7897e99cba..c74e04494ade 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -308,3 +308,8 @@ SLAVE DMA ENGINE SPI devm_spi_register_master() + +MDIO + devm_mdiobus_alloc() + devm_mdiobus_alloc_size() + devm_mdiobus_free() diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 76f54b32a120..68a9a3867c0f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -69,6 +69,73 @@ struct mii_bus *mdiobus_alloc_size(size_t size) } EXPORT_SYMBOL(mdiobus_alloc_size); +static void _devm_mdiobus_free(struct device *dev, void *res) +{ + mdiobus_free(*(struct mii_bus **)res); +} + +static int devm_mdiobus_match(struct device *dev, void *res, void *data) +{ + struct mii_bus **r = res; + + if (WARN_ON(!r || !*r)) + return 0; + + return *r == data; +} + +/** + * devm_mdiobus_alloc_size - Resource-managed mdiobus_alloc_size() + * @dev: Device to allocate mii_bus for + * @sizeof_priv: Space to allocate for private structure. + * + * Managed mdiobus_alloc_size. mii_bus allocated with this function is + * automatically freed on driver detach. + * + * If an mii_bus allocated with this function needs to be freed separately, + * devm_mdiobus_free() must be used. + * + * RETURNS: + * Pointer to allocated mii_bus on success, NULL on failure. + */ +struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv) +{ + struct mii_bus **ptr, *bus; + + ptr = devres_alloc(_devm_mdiobus_free, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + /* use raw alloc_dr for kmalloc caller tracing */ + bus = mdiobus_alloc_size(sizeof_priv); + if (bus) { + *ptr = bus; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return bus; +} +EXPORT_SYMBOL_GPL(devm_mdiobus_alloc); + +/** + * devm_mdiobus_free - Resource-managed mdiobus_free() + * @dev: Device this mii_bus belongs to + * @bus: the mii_bus associated with the device + * + * Free mii_bus allocated with devm_mdiobus_alloc_size(). + */ +void devm_mdiobus_free(struct device *dev, struct mii_bus *bus) +{ + int rc; + + rc = devres_release(dev, _devm_mdiobus_free, + devm_mdiobus_match, bus); + WARN_ON(rc); +} +EXPORT_SYMBOL_GPL(devm_mdiobus_free); + /** * mdiobus_release - mii_bus device release callback * @d: the target struct device that contains the mii_bus diff --git a/include/linux/phy.h b/include/linux/phy.h index 51d15f684e7e..864ddafad8cc 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -198,6 +198,13 @@ static inline struct mii_bus *mdiobus_alloc(void) int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); +struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv); +static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) +{ + return devm_mdiobus_alloc_size(dev, 0); +} + +void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); -- cgit v1.2.3-59-g8ed1b From e114a710aa5058c0ba4aa1dfb105132aefeb5e04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Apr 2014 11:58:13 -0700 Subject: tcp: fix cwnd limited checking to improve congestion control Yuchung discovered tcp_is_cwnd_limited() was returning false in slow start phase even if the application filled the socket write queue. All congestion modules take into account tcp_is_cwnd_limited() before increasing cwnd, so this behavior limits slow start from probing the bandwidth at full speed. The problem is that even if write queue is full (aka we are _not_ application limited), cwnd can be under utilized if TSO should auto defer or TCP Small queues decided to hold packets. So the in_flight can be kept to smaller value, and we can get to the point tcp_is_cwnd_limited() returns false. With TCP Small Queues and FQ/pacing, this issue is more visible. We fix this by having tcp_cwnd_validate(), which is supposed to track such things, take into account unsent_segs, the number of segs that we are not sending at the moment due to TSO or TSQ, but intend to send real soon. Then when we are cwnd-limited, remember this fact while we are processing the window of ACKs that comes back. For example, suppose we have a brand new connection with cwnd=10; we are in slow start, and we send a flight of 9 packets. By the time we have received ACKs for all 9 packets we want our cwnd to be 18. We implement this by setting tp->lsnd_pending to 9, and considering ourselves to be cwnd-limited while cwnd is less than twice tp->lsnd_pending (2*9 -> 18). This makes tcp_is_cwnd_limited() more understandable, by removing the GSO/TSO kludge, that tried to work around the issue. Note the in_flight parameter can be removed in a followup cleanup patch. Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/tcp.h | 22 +++++++++++++++++++++- net/ipv4/tcp_cong.c | 20 -------------------- net/ipv4/tcp_output.c | 21 ++++++++++++++------- 4 files changed, 36 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 239946868142..4e37c71ecd74 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -230,6 +230,7 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; + u32 lsnd_pending; /* packets inflight or unsent since last xmit */ u32 prior_cwnd; /* Congestion window at start of Recovery. */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 163d2b467d78..a9fe7bc4f4bb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -974,7 +974,27 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) { return tp->snd_una + tp->snd_wnd; } -bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight); + +/* We follow the spirit of RFC2861 to validate cwnd but implement a more + * flexible approach. The RFC suggests cwnd should not be raised unless + * it was fully used previously. But we allow cwnd to grow as long as the + * application has used half the cwnd. + * Example : + * cwnd is 10 (IW10), but application sends 9 frames. + * We allow cwnd to reach 18 when all frames are ACKed. + * This check is safe because it's as aggressive as slow start which already + * risks 100% overshoot. The advantage is that we discourage application to + * either send more filler packets or data to artificially blow up the cwnd + * usage, and allow application-limited process to probe bw more aggressively. + * + * TODO: remove in_flight once we can fix all callers, and their callers... + */ +static inline bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + return tp->snd_cwnd < 2 * tp->lsnd_pending; +} static inline void tcp_check_probe_timer(struct sock *sk) { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 2b9464c93b88..a93b41ba05ff 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) return err; } -/* RFC2861 Check whether we are limited by application or congestion window - * This is the inverse of cwnd check in tcp_tso_should_defer - */ -bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) -{ - const struct tcp_sock *tp = tcp_sk(sk); - u32 left; - - if (in_flight >= tp->snd_cwnd) - return true; - - left = tp->snd_cwnd - in_flight; - if (sk_can_gso(sk) && - left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left < tp->xmit_size_goal_segs) - return true; - return left <= tcp_max_tso_deferred_mss(tp); -} -EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); - /* Slow start is used when congestion window is no greater than the slow start * threshold. We base on RFC2581 and also handle stretch ACKs properly. * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 20847de991ea..f9181a133462 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1402,12 +1402,13 @@ static void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -/* Congestion window validation. (RFC2861) */ -static void tcp_cwnd_validate(struct sock *sk) +static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs) { struct tcp_sock *tp = tcp_sk(sk); - if (tp->packets_out >= tp->snd_cwnd) { + tp->lsnd_pending = tp->packets_out + unsent_segs; + + if (tcp_is_cwnd_limited(sk, 0)) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; tp->snd_cwnd_stamp = tcp_time_stamp; @@ -1880,7 +1881,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - unsigned int tso_segs, sent_pkts; + unsigned int tso_segs, sent_pkts, unsent_segs = 0; int cwnd_quota; int result; @@ -1924,7 +1925,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } else { if (!push_one && tcp_tso_should_defer(sk, skb)) - break; + goto compute_unsent_segs; } /* TCP Small Queues : @@ -1949,8 +1950,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, * there is no smp_mb__after_set_bit() yet */ smp_mb__after_clear_bit(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) + if (atomic_read(&sk->sk_wmem_alloc) > limit) { + u32 unsent_bytes; + +compute_unsent_segs: + unsent_bytes = tp->write_seq - tp->snd_nxt; + unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now); break; + } } limit = mss_now; @@ -1990,7 +1997,7 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk); - tcp_cwnd_validate(sk); + tcp_cwnd_validate(sk, unsent_segs); return false; } return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); -- cgit v1.2.3-59-g8ed1b From 5bcfedf06f7fdf9efcf65dc11198e9012f7530f4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 1 May 2014 18:34:18 +0200 Subject: net: filter: simplify label names from jump-table This patch simplifies label naming for the BPF jump-table. When we define labels via DL(), we just concatenate/textify the combination of instruction opcode which consists of the class, subclass, word size, target register and so on. Each time we leave BPF_ prefix intact, so that e.g. the preprocessor generates a label BPF_ALU_BPF_ADD_BPF_X for DL(BPF_ALU, BPF_ADD, BPF_X) whereas a label name of ALU_ADD_X is much more easy to grasp. Pure cleanup only. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 3 + net/core/filter.c | 308 ++++++++++++++++++++++++------------------------- 2 files changed, 157 insertions(+), 154 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 759abf78dd61..b042d1db127f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -37,6 +37,9 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* Placeholder/dummy for 0 */ +#define BPF_0 0 + /* BPF has 10 general purpose 64-bit registers and stack frame. */ #define MAX_BPF_REG 11 diff --git a/net/core/filter.c b/net/core/filter.c index 7c4db3dd3d1e..a1784e95b049 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -156,94 +156,94 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) static const void *jumptable[256] = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ -#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C - DL(BPF_ALU, BPF_ADD, BPF_X), - DL(BPF_ALU, BPF_ADD, BPF_K), - DL(BPF_ALU, BPF_SUB, BPF_X), - DL(BPF_ALU, BPF_SUB, BPF_K), - DL(BPF_ALU, BPF_AND, BPF_X), - DL(BPF_ALU, BPF_AND, BPF_K), - DL(BPF_ALU, BPF_OR, BPF_X), - DL(BPF_ALU, BPF_OR, BPF_K), - DL(BPF_ALU, BPF_LSH, BPF_X), - DL(BPF_ALU, BPF_LSH, BPF_K), - DL(BPF_ALU, BPF_RSH, BPF_X), - DL(BPF_ALU, BPF_RSH, BPF_K), - DL(BPF_ALU, BPF_XOR, BPF_X), - DL(BPF_ALU, BPF_XOR, BPF_K), - DL(BPF_ALU, BPF_MUL, BPF_X), - DL(BPF_ALU, BPF_MUL, BPF_K), - DL(BPF_ALU, BPF_MOV, BPF_X), - DL(BPF_ALU, BPF_MOV, BPF_K), - DL(BPF_ALU, BPF_DIV, BPF_X), - DL(BPF_ALU, BPF_DIV, BPF_K), - DL(BPF_ALU, BPF_MOD, BPF_X), - DL(BPF_ALU, BPF_MOD, BPF_K), - DL(BPF_ALU, BPF_NEG, 0), - DL(BPF_ALU, BPF_END, BPF_TO_BE), - DL(BPF_ALU, BPF_END, BPF_TO_LE), - DL(BPF_ALU64, BPF_ADD, BPF_X), - DL(BPF_ALU64, BPF_ADD, BPF_K), - DL(BPF_ALU64, BPF_SUB, BPF_X), - DL(BPF_ALU64, BPF_SUB, BPF_K), - DL(BPF_ALU64, BPF_AND, BPF_X), - DL(BPF_ALU64, BPF_AND, BPF_K), - DL(BPF_ALU64, BPF_OR, BPF_X), - DL(BPF_ALU64, BPF_OR, BPF_K), - DL(BPF_ALU64, BPF_LSH, BPF_X), - DL(BPF_ALU64, BPF_LSH, BPF_K), - DL(BPF_ALU64, BPF_RSH, BPF_X), - DL(BPF_ALU64, BPF_RSH, BPF_K), - DL(BPF_ALU64, BPF_XOR, BPF_X), - DL(BPF_ALU64, BPF_XOR, BPF_K), - DL(BPF_ALU64, BPF_MUL, BPF_X), - DL(BPF_ALU64, BPF_MUL, BPF_K), - DL(BPF_ALU64, BPF_MOV, BPF_X), - DL(BPF_ALU64, BPF_MOV, BPF_K), - DL(BPF_ALU64, BPF_ARSH, BPF_X), - DL(BPF_ALU64, BPF_ARSH, BPF_K), - DL(BPF_ALU64, BPF_DIV, BPF_X), - DL(BPF_ALU64, BPF_DIV, BPF_K), - DL(BPF_ALU64, BPF_MOD, BPF_X), - DL(BPF_ALU64, BPF_MOD, BPF_K), - DL(BPF_ALU64, BPF_NEG, 0), - DL(BPF_JMP, BPF_CALL, 0), - DL(BPF_JMP, BPF_JA, 0), - DL(BPF_JMP, BPF_JEQ, BPF_X), - DL(BPF_JMP, BPF_JEQ, BPF_K), - DL(BPF_JMP, BPF_JNE, BPF_X), - DL(BPF_JMP, BPF_JNE, BPF_K), - DL(BPF_JMP, BPF_JGT, BPF_X), - DL(BPF_JMP, BPF_JGT, BPF_K), - DL(BPF_JMP, BPF_JGE, BPF_X), - DL(BPF_JMP, BPF_JGE, BPF_K), - DL(BPF_JMP, BPF_JSGT, BPF_X), - DL(BPF_JMP, BPF_JSGT, BPF_K), - DL(BPF_JMP, BPF_JSGE, BPF_X), - DL(BPF_JMP, BPF_JSGE, BPF_K), - DL(BPF_JMP, BPF_JSET, BPF_X), - DL(BPF_JMP, BPF_JSET, BPF_K), - DL(BPF_JMP, BPF_EXIT, 0), - DL(BPF_STX, BPF_MEM, BPF_B), - DL(BPF_STX, BPF_MEM, BPF_H), - DL(BPF_STX, BPF_MEM, BPF_W), - DL(BPF_STX, BPF_MEM, BPF_DW), - DL(BPF_STX, BPF_XADD, BPF_W), - DL(BPF_STX, BPF_XADD, BPF_DW), - DL(BPF_ST, BPF_MEM, BPF_B), - DL(BPF_ST, BPF_MEM, BPF_H), - DL(BPF_ST, BPF_MEM, BPF_W), - DL(BPF_ST, BPF_MEM, BPF_DW), - DL(BPF_LDX, BPF_MEM, BPF_B), - DL(BPF_LDX, BPF_MEM, BPF_H), - DL(BPF_LDX, BPF_MEM, BPF_W), - DL(BPF_LDX, BPF_MEM, BPF_DW), - DL(BPF_LD, BPF_ABS, BPF_W), - DL(BPF_LD, BPF_ABS, BPF_H), - DL(BPF_LD, BPF_ABS, BPF_B), - DL(BPF_LD, BPF_IND, BPF_W), - DL(BPF_LD, BPF_IND, BPF_H), - DL(BPF_LD, BPF_IND, BPF_B), +#define DL(A, B, C) [BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C + DL(ALU, ADD, X), + DL(ALU, ADD, K), + DL(ALU, SUB, X), + DL(ALU, SUB, K), + DL(ALU, AND, X), + DL(ALU, AND, K), + DL(ALU, OR, X), + DL(ALU, OR, K), + DL(ALU, LSH, X), + DL(ALU, LSH, K), + DL(ALU, RSH, X), + DL(ALU, RSH, K), + DL(ALU, XOR, X), + DL(ALU, XOR, K), + DL(ALU, MUL, X), + DL(ALU, MUL, K), + DL(ALU, MOV, X), + DL(ALU, MOV, K), + DL(ALU, DIV, X), + DL(ALU, DIV, K), + DL(ALU, MOD, X), + DL(ALU, MOD, K), + DL(ALU, NEG, 0), + DL(ALU, END, TO_BE), + DL(ALU, END, TO_LE), + DL(ALU64, ADD, X), + DL(ALU64, ADD, K), + DL(ALU64, SUB, X), + DL(ALU64, SUB, K), + DL(ALU64, AND, X), + DL(ALU64, AND, K), + DL(ALU64, OR, X), + DL(ALU64, OR, K), + DL(ALU64, LSH, X), + DL(ALU64, LSH, K), + DL(ALU64, RSH, X), + DL(ALU64, RSH, K), + DL(ALU64, XOR, X), + DL(ALU64, XOR, K), + DL(ALU64, MUL, X), + DL(ALU64, MUL, K), + DL(ALU64, MOV, X), + DL(ALU64, MOV, K), + DL(ALU64, ARSH, X), + DL(ALU64, ARSH, K), + DL(ALU64, DIV, X), + DL(ALU64, DIV, K), + DL(ALU64, MOD, X), + DL(ALU64, MOD, K), + DL(ALU64, NEG, 0), + DL(JMP, CALL, 0), + DL(JMP, JA, 0), + DL(JMP, JEQ, X), + DL(JMP, JEQ, K), + DL(JMP, JNE, X), + DL(JMP, JNE, K), + DL(JMP, JGT, X), + DL(JMP, JGT, K), + DL(JMP, JGE, X), + DL(JMP, JGE, K), + DL(JMP, JSGT, X), + DL(JMP, JSGT, K), + DL(JMP, JSGE, X), + DL(JMP, JSGE, K), + DL(JMP, JSET, X), + DL(JMP, JSET, K), + DL(JMP, EXIT, 0), + DL(STX, MEM, B), + DL(STX, MEM, H), + DL(STX, MEM, W), + DL(STX, MEM, DW), + DL(STX, XADD, W), + DL(STX, XADD, DW), + DL(ST, MEM, B), + DL(ST, MEM, H), + DL(ST, MEM, W), + DL(ST, MEM, DW), + DL(LDX, MEM, B), + DL(LDX, MEM, H), + DL(LDX, MEM, W), + DL(LDX, MEM, DW), + DL(LD, ABS, W), + DL(LD, ABS, H), + DL(LD, ABS, B), + DL(LD, IND, W), + DL(LD, IND, H), + DL(LD, IND, B), #undef DL }; @@ -257,93 +257,93 @@ select_insn: /* ALU */ #define ALU(OPCODE, OP) \ - BPF_ALU64_##OPCODE##_BPF_X: \ + ALU64_##OPCODE##_X: \ A = A OP X; \ CONT; \ - BPF_ALU_##OPCODE##_BPF_X: \ + ALU_##OPCODE##_X: \ A = (u32) A OP (u32) X; \ CONT; \ - BPF_ALU64_##OPCODE##_BPF_K: \ + ALU64_##OPCODE##_K: \ A = A OP K; \ CONT; \ - BPF_ALU_##OPCODE##_BPF_K: \ + ALU_##OPCODE##_K: \ A = (u32) A OP (u32) K; \ CONT; - ALU(BPF_ADD, +) - ALU(BPF_SUB, -) - ALU(BPF_AND, &) - ALU(BPF_OR, |) - ALU(BPF_LSH, <<) - ALU(BPF_RSH, >>) - ALU(BPF_XOR, ^) - ALU(BPF_MUL, *) + ALU(ADD, +) + ALU(SUB, -) + ALU(AND, &) + ALU(OR, |) + ALU(LSH, <<) + ALU(RSH, >>) + ALU(XOR, ^) + ALU(MUL, *) #undef ALU - BPF_ALU_BPF_NEG_0: + ALU_NEG_0: A = (u32) -A; CONT; - BPF_ALU64_BPF_NEG_0: + ALU64_NEG_0: A = -A; CONT; - BPF_ALU_BPF_MOV_BPF_X: + ALU_MOV_X: A = (u32) X; CONT; - BPF_ALU_BPF_MOV_BPF_K: + ALU_MOV_K: A = (u32) K; CONT; - BPF_ALU64_BPF_MOV_BPF_X: + ALU64_MOV_X: A = X; CONT; - BPF_ALU64_BPF_MOV_BPF_K: + ALU64_MOV_K: A = K; CONT; - BPF_ALU64_BPF_ARSH_BPF_X: + ALU64_ARSH_X: (*(s64 *) &A) >>= X; CONT; - BPF_ALU64_BPF_ARSH_BPF_K: + ALU64_ARSH_K: (*(s64 *) &A) >>= K; CONT; - BPF_ALU64_BPF_MOD_BPF_X: + ALU64_MOD_X: if (unlikely(X == 0)) return 0; tmp = A; A = do_div(tmp, X); CONT; - BPF_ALU_BPF_MOD_BPF_X: + ALU_MOD_X: if (unlikely(X == 0)) return 0; tmp = (u32) A; A = do_div(tmp, (u32) X); CONT; - BPF_ALU64_BPF_MOD_BPF_K: + ALU64_MOD_K: tmp = A; A = do_div(tmp, K); CONT; - BPF_ALU_BPF_MOD_BPF_K: + ALU_MOD_K: tmp = (u32) A; A = do_div(tmp, (u32) K); CONT; - BPF_ALU64_BPF_DIV_BPF_X: + ALU64_DIV_X: if (unlikely(X == 0)) return 0; do_div(A, X); CONT; - BPF_ALU_BPF_DIV_BPF_X: + ALU_DIV_X: if (unlikely(X == 0)) return 0; tmp = (u32) A; do_div(tmp, (u32) X); A = (u32) tmp; CONT; - BPF_ALU64_BPF_DIV_BPF_K: + ALU64_DIV_K: do_div(A, K); CONT; - BPF_ALU_BPF_DIV_BPF_K: + ALU_DIV_K: tmp = (u32) A; do_div(tmp, (u32) K); A = (u32) tmp; CONT; - BPF_ALU_BPF_END_BPF_TO_BE: + ALU_END_TO_BE: switch (K) { case 16: A = (__force u16) cpu_to_be16(A); @@ -356,7 +356,7 @@ select_insn: break; } CONT; - BPF_ALU_BPF_END_BPF_TO_LE: + ALU_END_TO_LE: switch (K) { case 16: A = (__force u16) cpu_to_le16(A); @@ -371,7 +371,7 @@ select_insn: CONT; /* CALL */ - BPF_JMP_BPF_CALL_0: + JMP_CALL_0: /* Function call scratches R1-R5 registers, preserves R6-R9, * and stores return value into R0. */ @@ -380,122 +380,122 @@ select_insn: CONT; /* JMP */ - BPF_JMP_BPF_JA_0: + JMP_JA_0: insn += insn->off; CONT; - BPF_JMP_BPF_JEQ_BPF_X: + JMP_JEQ_X: if (A == X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JEQ_BPF_K: + JMP_JEQ_K: if (A == K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JNE_BPF_X: + JMP_JNE_X: if (A != X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JNE_BPF_K: + JMP_JNE_K: if (A != K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGT_BPF_X: + JMP_JGT_X: if (A > X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGT_BPF_K: + JMP_JGT_K: if (A > K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGE_BPF_X: + JMP_JGE_X: if (A >= X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGE_BPF_K: + JMP_JGE_K: if (A >= K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGT_BPF_X: - if (((s64)A) > ((s64)X)) { + JMP_JSGT_X: + if (((s64) A) > ((s64) X)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGT_BPF_K: - if (((s64)A) > ((s64)K)) { + JMP_JSGT_K: + if (((s64) A) > ((s64) K)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGE_BPF_X: - if (((s64)A) >= ((s64)X)) { + JMP_JSGE_X: + if (((s64) A) >= ((s64) X)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGE_BPF_K: - if (((s64)A) >= ((s64)K)) { + JMP_JSGE_K: + if (((s64) A) >= ((s64) K)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSET_BPF_X: + JMP_JSET_X: if (A & X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSET_BPF_K: + JMP_JSET_K: if (A & K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_EXIT_0: + JMP_EXIT_0: return R0; /* STX and ST and LDX*/ #define LDST(SIZEOP, SIZE) \ - BPF_STX_BPF_MEM_##SIZEOP: \ + STX_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (A + insn->off) = X; \ CONT; \ - BPF_ST_BPF_MEM_##SIZEOP: \ + ST_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (A + insn->off) = K; \ CONT; \ - BPF_LDX_BPF_MEM_##SIZEOP: \ + LDX_MEM_##SIZEOP: \ A = *(SIZE *)(unsigned long) (X + insn->off); \ CONT; - LDST(BPF_B, u8) - LDST(BPF_H, u16) - LDST(BPF_W, u32) - LDST(BPF_DW, u64) + LDST(B, u8) + LDST(H, u16) + LDST(W, u32) + LDST(DW, u64) #undef LDST - BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */ + STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */ atomic_add((u32) X, (atomic_t *)(unsigned long) (A + insn->off)); CONT; - BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ + STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ atomic64_add((u64) X, (atomic64_t *)(unsigned long) (A + insn->off)); CONT; - BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */ + LD_ABS_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */ off = K; load_word: /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only @@ -524,7 +524,7 @@ load_word: CONT; } return 0; - BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */ + LD_ABS_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */ off = K; load_half: ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); @@ -533,7 +533,7 @@ load_half: CONT; } return 0; - BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */ + LD_ABS_B: /* R0 = *(u8 *) (ctx + K) */ off = K; load_byte: ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); @@ -542,13 +542,13 @@ load_byte: CONT; } return 0; - BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */ + LD_IND_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */ off = K + X; goto load_word; - BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */ + LD_IND_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */ off = K + X; goto load_half; - BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */ + LD_IND_B: /* R0 = *(u8 *) (skb->data + X + K) */ off = K + X; goto load_byte; -- cgit v1.2.3-59-g8ed1b From 30743837dd204d2b04fd4e9d3db78cc7b118c81a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 1 May 2014 18:34:19 +0200 Subject: net: filter: make register naming more comprehensible The current code is a bit hard to parse on which registers can be used, how they are mapped and all play together. It makes much more sense to define this a bit more clearly so that the code is a bit more intuitive. This patch cleans this up, and makes naming a bit more consistent among the code. This also allows for moving some of the defines into the header file. Clearing of A and X registers in __sk_run_filter() do not get a particular register name assigned as they have not an 'official' function, but rather just result from the concrete initial mapping of old BPF programs. Since for BPF helper functions for BPF_CALL we already use small letters, so be consistent here as well. No functional changes. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 44 ++++++++-- net/core/filter.c | 215 +++++++++++++++++++++++++------------------------ 2 files changed, 145 insertions(+), 114 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index b042d1db127f..ed1efab10b8f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -40,16 +40,47 @@ /* Placeholder/dummy for 0 */ #define BPF_0 0 +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + /* BPF has 10 general purpose 64-bit registers and stack frame. */ -#define MAX_BPF_REG 11 +#define MAX_BPF_REG __MAX_BPF_REG + +/* ArgX, context and stack frame pointer register positions. Note, + * Arg1, Arg2, Arg3, etc are used as argument mappings of function + * calls in BPF_CALL instruction. + */ +#define BPF_REG_ARG1 BPF_REG_1 +#define BPF_REG_ARG2 BPF_REG_2 +#define BPF_REG_ARG3 BPF_REG_3 +#define BPF_REG_ARG4 BPF_REG_4 +#define BPF_REG_ARG5 BPF_REG_5 +#define BPF_REG_CTX BPF_REG_6 +#define BPF_REG_FP BPF_REG_10 + +/* Additional register mappings for converted user programs. */ +#define BPF_REG_A BPF_REG_0 +#define BPF_REG_X BPF_REG_7 +#define BPF_REG_TMP BPF_REG_8 /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 -/* Arg1, context and stack frame pointer register positions. */ -#define ARG1_REG 1 -#define CTX_REG 6 -#define FP_REG 10 +/* Macro to invoke filter function. */ +#define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) struct sock_filter_int { __u8 code; /* opcode */ @@ -100,9 +131,6 @@ static inline unsigned int sk_filter_size(unsigned int proglen) #define sk_filter_proglen(fprog) \ (fprog->len * sizeof(fprog->filter[0])) -#define SK_RUN_FILTER(filter, ctx) \ - (*filter->bpf_func)(ctx, filter->insnsi) - int sk_filter(struct sock *sk, struct sk_buff *skb); u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, diff --git a/net/core/filter.c b/net/core/filter.c index a1784e95b049..c93ca8d66f37 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -45,6 +45,27 @@ #include #include +/* Registers */ +#define R0 regs[BPF_REG_0] +#define R1 regs[BPF_REG_1] +#define R2 regs[BPF_REG_2] +#define R3 regs[BPF_REG_3] +#define R4 regs[BPF_REG_4] +#define R5 regs[BPF_REG_5] +#define R6 regs[BPF_REG_6] +#define R7 regs[BPF_REG_7] +#define R8 regs[BPF_REG_8] +#define R9 regs[BPF_REG_9] +#define R10 regs[BPF_REG_10] + +/* Named registers */ +#define A regs[insn->a_reg] +#define X regs[insn->x_reg] +#define FP regs[BPF_REG_FP] +#define ARG1 regs[BPF_REG_ARG1] +#define CTX regs[BPF_REG_CTX] +#define K insn->imm + /* No hurry in this branch * * Exported for the bpf jit load helper. @@ -122,13 +143,6 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return 0; } -/* Register mappings for user programs. */ -#define A_REG 0 -#define X_REG 7 -#define TMP_REG 8 -#define ARG2_REG 2 -#define ARG3_REG 3 - /** * __sk_run_filter - run a filter on a given context * @ctx: buffer to run the filter on @@ -142,17 +156,6 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) { u64 stack[MAX_BPF_STACK / sizeof(u64)]; u64 regs[MAX_BPF_REG], tmp; - void *ptr; - int off; - -#define K insn->imm -#define A regs[insn->a_reg] -#define X regs[insn->x_reg] -#define R0 regs[0] - -#define CONT ({insn++; goto select_insn; }) -#define CONT_JMP ({insn++; goto select_insn; }) - static const void *jumptable[256] = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ @@ -246,11 +249,18 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) DL(LD, IND, B), #undef DL }; + void *ptr; + int off; - regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; - regs[ARG1_REG] = (u64) (unsigned long) ctx; - regs[A_REG] = 0; - regs[X_REG] = 0; +#define CONT ({ insn++; goto select_insn; }) +#define CONT_JMP ({ insn++; goto select_insn; }) + + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; + ARG1 = (u64) (unsigned long) ctx; + + /* Register for user BPF programs need to be reset first. */ + regs[BPF_REG_A] = 0; + regs[BPF_REG_X] = 0; select_insn: goto *jumptable[insn->code]; @@ -375,8 +385,7 @@ select_insn: /* Function call scratches R1-R5 registers, preserves R6-R9, * and stores return value into R0. */ - R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3], - regs[4], regs[5]); + R0 = (__bpf_call_base + insn->imm)(R1, R2, R3, R4, R5); CONT; /* JMP */ @@ -500,7 +509,7 @@ select_insn: load_word: /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only * appearing in the programs where ctx == skb. All programs - * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter() + * keep 'ctx' in regs[BPF_REG_CTX] == R6, sk_convert_filter() * saves it in R6, internal BPF verifier will check that * R6 == ctx. * @@ -556,13 +565,6 @@ load_byte: /* If we ever reach this, we have a bug somewhere. */ WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); return 0; -#undef CONT_JMP -#undef CONT - -#undef R0 -#undef X -#undef A -#undef K } u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, @@ -594,14 +596,14 @@ static unsigned int pkt_type_offset(void) return -1; } -static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long) ctx; return __skb_get_poff(skb); } -static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long) ctx; struct nlattr *nla; @@ -612,17 +614,17 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb->len < sizeof(struct nlattr)) return 0; - if (A > skb->len - sizeof(struct nlattr)) + if (a > skb->len - sizeof(struct nlattr)) return 0; - nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X); + nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x); if (nla) return (void *) nla - (void *) skb->data; return 0; } -static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long) ctx; struct nlattr *nla; @@ -633,27 +635,27 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb->len < sizeof(struct nlattr)) return 0; - if (A > skb->len - sizeof(struct nlattr)) + if (a > skb->len - sizeof(struct nlattr)) return 0; - nla = (struct nlattr *) &skb->data[A]; - if (nla->nla_len > skb->len - A) + nla = (struct nlattr *) &skb->data[a]; + if (nla->nla_len > skb->len - a) return 0; - nla = nla_find_nested(nla, X); + nla = nla_find_nested(nla, x); if (nla) return (void *) nla - (void *) skb->data; return 0; } -static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { return raw_smp_processor_id(); } /* note that this only generates 32-bit random numbers */ -static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { return (u64)prandom_u32(); } @@ -668,28 +670,28 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, protocol); insn++; /* A = ntohs(A) [emitting a nop or swap16] */ insn->code = BPF_ALU | BPF_END | BPF_FROM_BE; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 16; break; case SKF_AD_OFF + SKF_AD_PKTTYPE: insn->code = BPF_LDX | BPF_MEM | BPF_B; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = pkt_type_offset(); if (insn->off < 0) return false; insn++; insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = PKT_TYPE_MAX; break; @@ -699,13 +701,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp, insn->code = BPF_LDX | BPF_MEM | BPF_DW; else insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = TMP_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_TMP; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, dev); insn++; insn->code = BPF_JMP | BPF_JNE | BPF_K; - insn->a_reg = TMP_REG; + insn->a_reg = BPF_REG_TMP; insn->imm = 0; insn->off = 1; insn++; @@ -716,8 +718,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_TMP; if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) { insn->code = BPF_LDX | BPF_MEM | BPF_W; @@ -732,8 +734,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, mark); break; @@ -741,8 +743,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, hash); break; @@ -750,8 +752,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, queue_mapping); break; @@ -760,8 +762,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, vlan_tci); insn++; @@ -769,16 +771,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp, if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = ~VLAN_TAG_PRESENT; } else { insn->code = BPF_ALU | BPF_RSH | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 12; insn++; insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 1; } break; @@ -790,20 +792,20 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG1_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_ARG1; + insn->x_reg = BPF_REG_CTX; insn++; /* arg2 = A */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG2_REG; - insn->x_reg = A_REG; + insn->a_reg = BPF_REG_ARG2; + insn->x_reg = BPF_REG_A; insn++; /* arg3 = X */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG3_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_ARG3; + insn->x_reg = BPF_REG_X; insn++; /* Emit call(ctx, arg2=A, arg3=X) */ @@ -829,8 +831,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_ALU_XOR_X: insn->code = BPF_ALU | BPF_XOR | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_X; break; default: @@ -880,7 +882,7 @@ int sk_convert_filter(struct sock_filter *prog, int len, u8 bpf_src; BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); - BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG); + BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); if (len <= 0 || len >= BPF_MAXINSNS) return -EINVAL; @@ -897,8 +899,8 @@ do_pass: if (new_insn) { new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - new_insn->a_reg = CTX_REG; - new_insn->x_reg = ARG1_REG; + new_insn->a_reg = BPF_REG_CTX; + new_insn->x_reg = BPF_REG_ARG1; } new_insn++; @@ -948,8 +950,8 @@ do_pass: break; insn->code = fp->code; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_X; insn->imm = fp->k; break; @@ -983,16 +985,16 @@ do_pass: * in compare insn. */ insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = TMP_REG; + insn->a_reg = BPF_REG_TMP; insn->imm = fp->k; insn++; - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_TMP; bpf_src = BPF_X; } else { - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_X; insn->imm = fp->k; bpf_src = BPF_SRC(fp->code); } @@ -1027,33 +1029,33 @@ do_pass: /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ case BPF_LDX | BPF_MSH | BPF_B: insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = TMP_REG; - insn->x_reg = A_REG; + insn->a_reg = BPF_REG_TMP; + insn->x_reg = BPF_REG_A; insn++; insn->code = BPF_LD | BPF_ABS | BPF_B; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = fp->k; insn++; insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 0xf; insn++; insn->code = BPF_ALU | BPF_LSH | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 2; insn++; insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = X_REG; - insn->x_reg = A_REG; + insn->a_reg = BPF_REG_X; + insn->x_reg = BPF_REG_A; insn++; insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_TMP; break; /* RET_K, RET_A are remaped into 2 insns. */ @@ -1063,7 +1065,7 @@ do_pass: (BPF_RVAL(fp->code) == BPF_K ? BPF_K : BPF_X); insn->a_reg = 0; - insn->x_reg = A_REG; + insn->x_reg = BPF_REG_A; insn->imm = fp->k; insn++; @@ -1074,8 +1076,9 @@ do_pass: case BPF_ST: case BPF_STX: insn->code = BPF_STX | BPF_MEM | BPF_W; - insn->a_reg = FP_REG; - insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG; + insn->a_reg = BPF_REG_FP; + insn->x_reg = fp->code == BPF_ST ? + BPF_REG_A : BPF_REG_X; insn->off = -(BPF_MEMWORDS - fp->k) * 4; break; @@ -1084,8 +1087,8 @@ do_pass: case BPF_LDX | BPF_MEM: insn->code = BPF_LDX | BPF_MEM | BPF_W; insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->x_reg = FP_REG; + BPF_REG_A : BPF_REG_X; + insn->x_reg = BPF_REG_FP; insn->off = -(BPF_MEMWORDS - fp->k) * 4; break; @@ -1094,22 +1097,22 @@ do_pass: case BPF_LDX | BPF_IMM: insn->code = BPF_ALU | BPF_MOV | BPF_K; insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; + BPF_REG_A : BPF_REG_X; insn->imm = fp->k; break; /* X = A */ case BPF_MISC | BPF_TAX: insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = X_REG; - insn->x_reg = A_REG; + insn->a_reg = BPF_REG_X; + insn->x_reg = BPF_REG_A; break; /* A = X */ case BPF_MISC | BPF_TXA: insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_X; break; /* A = skb->len or X = skb->len */ @@ -1117,16 +1120,16 @@ do_pass: case BPF_LDX | BPF_W | BPF_LEN: insn->code = BPF_LDX | BPF_MEM | BPF_W; insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->x_reg = CTX_REG; + BPF_REG_A : BPF_REG_X; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, len); break; /* access seccomp_data fields */ case BPF_LDX | BPF_ABS | BPF_W: insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = fp->k; break; -- cgit v1.2.3-59-g8ed1b From 76ba0aae673075c77a8b775e9133c8e8b1a44563 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 2 May 2014 16:29:18 -0700 Subject: net: Generalize checksum_init functions Create a general __skb_checksum_validate function (actually a macro) to subsume the various checksum_init functions. This function can either init the checksum, or do the full validation (logically checksum_init+skb_check_complete)-- a flag specifies if full vaidation is performed. Also, there is a flag to the function to indicate that zero checksums are allowed (to support optional UDP checksums). Added several stub functions for calling __skb_checksum_validate. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 08074a810164..3ca0dda5a42e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2741,6 +2741,99 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb) 0 : __skb_checksum_complete(skb); } +/* Check if we need to perform checksum complete validation. + * + * Returns true if checksum complete is needed, false otherwise + * (either checksum is unnecessary or zero checksum is allowed). + */ +static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + if (skb_csum_unnecessary(skb)) { + return false; + } else if (zero_okay && !check) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + return false; + } + + return true; +} + +/* For small packets <= CHECKSUM_BREAK peform checksum complete directly + * in checksum_init. + */ +#define CHECKSUM_BREAK 76 + +/* Validate (init) checksum based on checksum complete. + * + * Return values: + * 0: checksum is validated or try to in skb_checksum_complete. In the latter + * case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo + * checksum is stored in skb->csum for use in __skb_checksum_complete + * non-zero: value of invalid checksum + * + */ +static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, + bool complete, + __wsum psum) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + if (!csum_fold(csum_add(psum, skb->csum))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + return 0; + } + } + + skb->csum = psum; + + if (complete || skb->len <= CHECKSUM_BREAK) + return __skb_checksum_complete(skb); + + return 0; +} + +static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) +{ + return 0; +} + +/* Perform checksum validate (init). Note that this is a macro since we only + * want to calculate the pseudo header which is an input function if necessary. + * First we try to validate without any computation (checksum unnecessary) and + * then calculate based on checksum complete calling the function to compute + * pseudo header. + * + * Return values: + * 0: checksum is validated or try to in skb_checksum_complete + * non-zero: value of invalid checksum + */ +#define __skb_checksum_validate(skb, proto, complete, \ + zero_okay, check, compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_checksum_validate_complete(skb, \ + complete, compute_pseudo(skb, proto)); \ + __ret; \ +}) + +#define skb_checksum_init(skb, proto, compute_pseudo) \ + __skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo) + +#define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo) \ + __skb_checksum_validate(skb, proto, false, true, check, compute_pseudo) + +#define skb_checksum_validate(skb, proto, compute_pseudo) \ + __skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo) + +#define skb_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_checksum_validate_(skb, proto, true, true, check, compute_pseudo) + +#define skb_checksum_simple_validate(skb) \ + __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) -- cgit v1.2.3-59-g8ed1b From 552a515707a5caf9fa9f3620d68fc28146c6b1b9 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Wed, 7 May 2014 09:28:31 +0200 Subject: ath9k: Allow platform override without EEPROM override Add a new platform data flag "use_eeprom" that indicates that the eeprom found on the card itself should be used instead of the one present in the platform data. This allows to override the MAC address of a PCI card while preserving the eeprom data from the card itself. The default behavior is preserved. Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/init.c | 2 +- include/linux/ath9k_platform.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index a6e273a2c44b..bcc7cfb1866d 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -508,7 +508,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, sc->tx99_power = MAX_RATE_POWER + 1; init_waitqueue_head(&sc->tx_wait); - if (!pdata) { + if (!pdata || pdata->use_eeprom) { ah->ah_flags |= AH_USE_EEPROM; sc->sc_ah->led_pin = -1; } else { diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h index 8598f8eacb20..a495a959e8a7 100644 --- a/include/linux/ath9k_platform.h +++ b/include/linux/ath9k_platform.h @@ -36,6 +36,8 @@ struct ath9k_platform_data { int (*get_mac_revision)(void); int (*external_reset)(void); + + bool use_eeprom; }; #endif /* _LINUX_ATH9K_PLATFORM_H */ -- cgit v1.2.3-59-g8ed1b From 9739eef13c926645fbf88bcb77e66442fa75d688 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 8 May 2014 14:10:51 -0700 Subject: net: filter: make BPF conversion more readable Introduce BPF helper macros to define instructions (similar to old BPF_STMT/BPF_JUMP macros) Use them while converting classic BPF to internal and in BPF testsuite later. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 51 ++++++++++++++++++ net/core/filter.c | 142 +++++++++++++++++-------------------------------- 2 files changed, 101 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ed1efab10b8f..4457b383961c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -79,6 +79,57 @@ enum { /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 +/* bpf_add|sub|...: a += x, bpf_mov: a = x */ +#define BPF_ALU64_REG(op, a, x) \ + ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_X, a, x, 0, 0}) +#define BPF_ALU32_REG(op, a, x) \ + ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_X, a, x, 0, 0}) + +/* bpf_add|sub|...: a += imm, bpf_mov: a = imm */ +#define BPF_ALU64_IMM(op, a, imm) \ + ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_K, a, 0, 0, imm}) +#define BPF_ALU32_IMM(op, a, imm) \ + ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_K, a, 0, 0, imm}) + +/* R0 = *(uint *) (skb->data + off) */ +#define BPF_LD_ABS(size, off) \ + ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_ABS, 0, 0, 0, off}) + +/* R0 = *(uint *) (skb->data + x + off) */ +#define BPF_LD_IND(size, x, off) \ + ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_IND, 0, x, 0, off}) + +/* a = *(uint *) (x + off) */ +#define BPF_LDX_MEM(sz, a, x, off) \ + ((struct sock_filter_int) {BPF_LDX|BPF_SIZE(sz)|BPF_MEM, a, x, off, 0}) + +/* if (a 'op' x) goto pc+off */ +#define BPF_JMP_REG(op, a, x, off) \ + ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_X, a, x, off, 0}) + +/* if (a 'op' imm) goto pc+off */ +#define BPF_JMP_IMM(op, a, imm, off) \ + ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_K, a, 0, off, imm}) + +#define BPF_EXIT_INSN() \ + ((struct sock_filter_int) {BPF_JMP|BPF_EXIT, 0, 0, 0, 0}) + +static inline int size_to_bpf(int size) +{ + switch (size) { + case 1: + return BPF_B; + case 2: + return BPF_H; + case 4: + return BPF_W; + case 8: + return BPF_DW; + default: + return -EINVAL; + } +} + /* Macro to invoke filter function. */ #define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) diff --git a/net/core/filter.c b/net/core/filter.c index eb020a7d6f55..9aaa05ad8fe3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -668,10 +668,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_PROTOCOL: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, protocol); + /* A = *(u16 *) (ctx + offsetof(protocol)) */ + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, protocol)); insn++; /* A = ntohs(A) [emitting a nop or swap16] */ @@ -681,37 +680,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - insn->code = BPF_LDX | BPF_MEM | BPF_B; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = pkt_type_offset(); + *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, + pkt_type_offset()); if (insn->off < 0) return false; insn++; - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = PKT_TYPE_MAX; + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); break; case SKF_AD_OFF + SKF_AD_IFINDEX: case SKF_AD_OFF + SKF_AD_HATYPE: - if (FIELD_SIZEOF(struct sk_buff, dev) == 8) - insn->code = BPF_LDX | BPF_MEM | BPF_DW; - else - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_TMP; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, dev); + *insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)), + BPF_REG_TMP, BPF_REG_CTX, + offsetof(struct sk_buff, dev)); insn++; - insn->code = BPF_JMP | BPF_JNE | BPF_K; - insn->a_reg = BPF_REG_TMP; - insn->imm = 0; - insn->off = 1; + /* if (tmp != 0) goto pc+1 */ + *insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); insn++; - insn->code = BPF_JMP | BPF_EXIT; + *insn = BPF_EXIT_INSN(); insn++; BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); @@ -732,55 +721,45 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_MARK: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, mark); + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, mark)); break; case SKF_AD_OFF + SKF_AD_RXHASH: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, hash); + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, hash)); break; case SKF_AD_OFF + SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, queue_mapping); + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, queue_mapping)); break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, vlan_tci); + /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */ + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, vlan_tci)); insn++; BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = ~VLAN_TAG_PRESENT; + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, + ~VLAN_TAG_PRESENT); } else { - insn->code = BPF_ALU | BPF_RSH | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = 12; + /* A >>= 12 */ + *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); insn++; - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = 1; + /* A &= 1 */ + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); } break; @@ -790,21 +769,15 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_CPU: case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_ARG1; - insn->x_reg = BPF_REG_CTX; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX); insn++; /* arg2 = A */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_ARG2; - insn->x_reg = BPF_REG_A; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A); insn++; /* arg3 = X */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_ARG3; - insn->x_reg = BPF_REG_X; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X); insn++; /* Emit call(ctx, arg2=A, arg3=X) */ @@ -829,9 +802,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_ALU_XOR_X: - insn->code = BPF_ALU | BPF_XOR | BPF_X; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_X; + /* A ^= X */ + *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X); break; default: @@ -897,9 +869,7 @@ do_pass: fp = prog; if (new_insn) { - new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - new_insn->a_reg = BPF_REG_CTX; - new_insn->x_reg = BPF_REG_ARG1; + *new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1); } new_insn++; @@ -1027,34 +997,28 @@ do_pass: /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ case BPF_LDX | BPF_MSH | BPF_B: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_TMP; - insn->x_reg = BPF_REG_A; + /* tmp = A */ + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A); insn++; - insn->code = BPF_LD | BPF_ABS | BPF_B; - insn->a_reg = BPF_REG_A; - insn->imm = fp->k; + /* A = R0 = *(u8 *) (skb->data + K) */ + *insn = BPF_LD_ABS(BPF_B, fp->k); insn++; - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = 0xf; + /* A &= 0xf */ + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); insn++; - insn->code = BPF_ALU | BPF_LSH | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = 2; + /* A <<= 2 */ + *insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); insn++; - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_X; - insn->x_reg = BPF_REG_A; + /* X = A */ + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A); insn++; - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_TMP; + /* A = tmp */ + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP); break; /* RET_K, RET_A are remaped into 2 insns. */ @@ -1068,7 +1032,7 @@ do_pass: insn->imm = fp->k; insn++; - insn->code = BPF_JMP | BPF_EXIT; + *insn = BPF_EXIT_INSN(); break; /* Store to stack. */ @@ -1102,16 +1066,12 @@ do_pass: /* X = A */ case BPF_MISC | BPF_TAX: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_X; - insn->x_reg = BPF_REG_A; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A); break; /* A = X */ case BPF_MISC | BPF_TXA: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_X; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X); break; /* A = skb->len or X = skb->len */ @@ -1126,10 +1086,8 @@ do_pass: /* access seccomp_data fields */ case BPF_LDX | BPF_ABS | BPF_W: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = fp->k; + /* A = *(u32 *) (ctx + K) */ + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); break; default: -- cgit v1.2.3-59-g8ed1b From 60ff746739bf805a912484643c720b6124826140 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 4 May 2014 16:39:18 -0700 Subject: net: rename local_df to ignore_df MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As suggested by several people, rename local_df to ignore_df, since it means "ignore df bit if it is set". Cc: Maciej Żenczykowski Cc: Florian Westphal Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- include/net/ip.h | 6 +++--- include/net/ip6_route.h | 2 +- net/core/skbuff.c | 4 ++-- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_output.c | 12 ++++++------ net/ipv4/netfilter/nf_defrag_ipv4.c | 2 +- net/ipv4/xfrm4_output.c | 2 +- net/ipv6/ip6_output.c | 12 ++++++------ net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/xfrm6_output.c | 6 +++--- net/l2tp/l2tp_core.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 20 ++++++++++---------- net/openvswitch/vport-gre.c | 2 +- net/openvswitch/vport-vxlan.c | 2 +- net/sctp/ipv6.c | 2 +- net/sctp/output.c | 2 +- 17 files changed, 42 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3ca0dda5a42e..7a9beeb1c458 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -426,7 +426,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored * @priority: Packet queueing priority - * @local_df: allow local fragmentation + * @ignore_df: allow local fragmentation * @cloned: Head may be cloned (check refcnt to be sure) * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header @@ -514,7 +514,7 @@ struct sk_buff { }; __u32 priority; kmemcheck_bitfield_begin(flags1); - __u8 local_df:1, + __u8 ignore_df:1, cloned:1, ip_summed:2, nohdr:1, diff --git a/include/net/ip.h b/include/net/ip.h index 16146b667ddb..55752985c144 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -269,7 +269,7 @@ static inline bool ip_sk_use_pmtu(const struct sock *sk) return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; } -static inline bool ip_sk_local_df(const struct sock *sk) +static inline bool ip_sk_ignore_df(const struct sock *sk) { return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO || inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT; @@ -304,7 +304,7 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s { struct iphdr *iph = ip_hdr(skb); - if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) { + if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { /* This is only to work around buggy Windows95/2000 * VJ compression implementations. If the ID field * does not change, they drop every other packet in @@ -320,7 +320,7 @@ static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *d { struct iphdr *iph = ip_hdr(skb); - if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) { + if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { if (sk && inet_sk(sk)->inet_daddr) { iph->id = htons(inet_sk(sk)->inet_id); inet_sk(sk)->inet_id += 1 + more; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 6c4f5eac98e7..38e41e4d0998 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -185,7 +185,7 @@ static inline bool ip6_sk_accept_pmtu(const struct sock *sk) inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; } -static inline bool ip6_sk_local_df(const struct sock *sk) +static inline bool ip6_sk_ignore_df(const struct sock *sk) { return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1b62343f5837..3d74530ae82b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -694,7 +694,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif memcpy(new->cb, old->cb, sizeof(old->cb)); new->csum = old->csum; - new->local_df = old->local_df; + new->ignore_df = old->ignore_df; new->pkt_type = old->pkt_type; new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); @@ -3913,7 +3913,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; - skb->local_df = 0; + skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; secpath_reset(skb); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 6f111e48e11c..3a83ce5efa80 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -42,7 +42,7 @@ static bool ip_may_fragment(const struct sk_buff *skb) { return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || - skb->local_df; + skb->ignore_df; } static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a52f50187b54..6aa4380fde1a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -415,7 +415,7 @@ packet_routed: skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); - if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) + if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; @@ -501,7 +501,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); mtu = ip_skb_dst_mtu(skb); - if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); @@ -866,7 +866,7 @@ static int __ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; + maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1189,7 +1189,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; + maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; if (cork->length + size > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1350,10 +1350,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - skb->local_df = ip_sk_local_df(sk); + skb->ignore_df = ip_sk_ignore_df(sk); /* DF bit is set when we want to see DF on outgoing frames. - * If local_df is set too, we still allow to fragment this frame + * If ignore_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc == IP_PMTUDISC_DO || inet->pmtudisc == IP_PMTUDISC_PROBE || diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f40f321b41fc..b8f6381c7d0b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -34,7 +34,7 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) if (!err) { ip_send_check(ip_hdr(skb)); - skb->local_df = 1; + skb->ignore_df = 1; } return err; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 40e701f2e1e0..8e8c018d9d2d 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -25,7 +25,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) goto out; - if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) goto out; mtu = dst_mtu(skb_dst(skb)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31a38bde69ef..ab0cc57f779c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -219,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->mark = sk->sk_mark; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { + if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, @@ -347,11 +347,11 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) if (skb->len <= mtu) return false; - /* ipv6 conntrack defrag sets max_frag_size + local_df */ + /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; - if (skb->local_df) + if (skb->ignore_df) return false; if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) @@ -559,7 +559,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->local_df && skb->len > mtu) || + if (unlikely(!skb->ignore_df && skb->len > mtu) || (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { if (skb->sk && dst_allfrag(skb_dst(skb))) @@ -1234,7 +1234,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - if (ip6_sk_local_df(sk)) + if (ip6_sk_ignore_df(sk)) maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; else maxnonfragsize = mtu; @@ -1544,7 +1544,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - skb->local_df = ip6_sk_local_df(sk); + skb->ignore_df = ip6_sk_ignore_df(sk); *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 767ab8da8218..0d5279fd852a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -451,7 +451,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } sub_frag_mem_limit(&fq->q, head->truesize); - head->local_df = 1; + head->ignore_df = 1; head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 19ef329bdbf8..f47c8b153dd3 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (!skb->local_df && skb->len > mtu) { + if (!skb->ignore_df && skb->len > mtu) { skb->dev = dst->dev; if (xfrm6_local_dontfrag(skb)) @@ -120,7 +120,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) #endif skb->protocol = htons(ETH_P_IPV6); - skb->local_df = 1; + skb->ignore_df = 1; return x->outer_mode->output2(x, skb); } @@ -150,7 +150,7 @@ static int __xfrm6_output(struct sk_buff *skb) if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; - } else if (!skb->local_df && skb->len > mtu && skb->sk) { + } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { xfrm_local_error(skb, mtu); return -EMSGSIZE; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index aa1a9d44c107..ed0716a075ba 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1073,7 +1073,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, } /* Queue the packet to IP for output */ - skb->local_df = 1; + skb->ignore_df = 1; #if IS_ENABLED(CONFIG_IPV6) if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) error = inet6_csk_xmit(tunnel->sock, skb, NULL); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index c47444e4cf8c..487b55e04337 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -562,7 +562,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(iph); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); rcu_read_unlock(); @@ -590,7 +590,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); rcu_read_unlock(); @@ -684,7 +684,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); rcu_read_unlock(); @@ -774,7 +774,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); @@ -886,7 +886,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_select_ident(skb, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) @@ -974,7 +974,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, iph->hop_limit = old_iph->hop_limit; /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) @@ -1023,7 +1023,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(ip_hdr(skb)); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); rcu_read_unlock(); @@ -1060,7 +1060,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); rcu_read_unlock(); @@ -1157,7 +1157,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_nat_icmp(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); rcu_read_unlock(); @@ -1249,7 +1249,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_nat_icmp_v6(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index ebb6e2442554..0856f014d7a9 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -172,7 +172,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; return iptunnel_xmit(skb->sk, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 21cceb3bdf78..a93efa3f64c3 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -170,7 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; inet_get_local_port_range(net, &port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2b1738ef9394..4dc5d9e08311 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -216,7 +216,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) IP6_ECN_flow_xmit(sk, fl6->flowlabel); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) - skb->local_df = 1; + skb->ignore_df = 1; SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); diff --git a/net/sctp/output.c b/net/sctp/output.c index 0f4d15fc2627..01ab8e0723f0 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -591,7 +591,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); - nskb->local_df = packet->ipfragok; + nskb->ignore_df = packet->ipfragok; tp->af_specific->sctp_xmit(nskb, tp); out: -- cgit v1.2.3-59-g8ed1b From f06c7f9f92295faf701a9628b383156c4efb6119 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 9 May 2014 14:58:05 +0800 Subject: vlan: rename __vlan_find_dev_deep() to __vlan_find_dev_deep_rcu() The __vlan_find_dev_deep should always called in RCU, according David's suggestion, rename to __vlan_find_dev_deep_rcu looks more reasonable. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +- drivers/net/usb/cdc_mbim.c | 4 ++-- drivers/s390/net/qeth_l3_main.c | 10 +++++----- include/linux/if_vlan.h | 4 ++-- net/8021q/vlan_core.c | 6 +++--- net/bridge/br_netfilter.c | 2 +- 8 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index c0a9dd55f4e5..b0cbb2b7fd48 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -185,7 +185,7 @@ static struct net_device *get_iff_from_mac(struct adapter *adapter, if (ether_addr_equal(dev->dev_addr, mac)) { rcu_read_lock(); if (vlan && vlan != VLAN_VID_MASK) { - dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), vlan); + dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), vlan); } else if (netif_is_bond_slave(dev)) { struct net_device *upper_dev; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 8efeed3325b5..0f1e886d89e3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4068,7 +4068,7 @@ static int update_root_dev_clip(struct net_device *dev) /* Parse all bond and vlan devices layered on top of the physical dev */ for (i = 0; i < VLAN_N_VID; i++) { - root_dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), i); + root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i); if (!root_dev) continue; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index d2e18b52caba..8a2aeb85e320 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -4146,7 +4146,7 @@ void qlcnic_restore_indev_addr(struct net_device *netdev, unsigned long event) rcu_read_lock(); for_each_set_bit(vid, adapter->vlans, VLAN_N_VID) { - dev = __vlan_find_dev_deep(netdev, htons(ETH_P_8021Q), vid); + dev = __vlan_find_dev_deep_rcu(netdev, htons(ETH_P_8021Q), vid); if (!dev) continue; qlcnic_config_indev_addr(adapter, dev, event); diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 2e025ddcef21..0ab79fca822c 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -223,8 +223,8 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) /* need to send the NA on the VLAN dev, if any */ rcu_read_lock(); if (tci) { - netdev = __vlan_find_dev_deep(dev->net, htons(ETH_P_8021Q), - tci); + netdev = __vlan_find_dev_deep_rcu(dev->net, htons(ETH_P_8021Q), + tci); if (!netdev) { rcu_read_unlock(); return; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c8d91d797ec8..bc2499a24884 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1659,7 +1659,7 @@ static void qeth_l3_add_vlan_mc(struct qeth_card *card) for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) { struct net_device *netdev; - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); if (netdev == NULL || !(netdev->flags & IFF_UP)) @@ -1721,7 +1721,7 @@ static void qeth_l3_add_vlan_mc6(struct qeth_card *card) for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) { struct net_device *netdev; - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); if (netdev == NULL || !(netdev->flags & IFF_UP)) @@ -1766,7 +1766,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "frvaddr4"); - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid); + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); if (!netdev) return; in_dev = in_dev_get(netdev); @@ -1796,7 +1796,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "frvaddr6"); - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid); + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); if (!netdev) return; in6_dev = in6_dev_get(netdev); @@ -2089,7 +2089,7 @@ static int qeth_l3_verify_vlan_dev(struct net_device *dev, struct net_device *netdev; rcu_read_lock(); - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); rcu_read_unlock(); if (netdev == dev) { diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 13bbbde00e68..8c0fb7f3a9a5 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -106,7 +106,7 @@ struct vlan_pcpu_stats { #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) -extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, +extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id); extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); @@ -199,7 +199,7 @@ extern void vlan_vids_del_by_dev(struct net_device *dev, extern bool vlan_uses_dev(const struct net_device *dev); #else static inline struct net_device * -__vlan_find_dev_deep(struct net_device *real_dev, +__vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id) { return NULL; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 3c32bd257b73..9012b1c922b6 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -63,7 +63,7 @@ bool vlan_do_receive(struct sk_buff **skbp) } /* Must be invoked with rcu_read_lock. */ -struct net_device *__vlan_find_dev_deep(struct net_device *dev, +struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev, __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); @@ -81,13 +81,13 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev, upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) - return __vlan_find_dev_deep(upper_dev, + return __vlan_find_dev_deep_rcu(upper_dev, vlan_proto, vlan_id); } return NULL; } -EXPORT_SYMBOL(__vlan_find_dev_deep); +EXPORT_SYMBOL(__vlan_find_dev_deep_rcu); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2acf7fa1fec6..a615264cf01a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -535,7 +535,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) return br; - vlan = __vlan_find_dev_deep(br, skb->vlan_proto, + vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, vlan_tx_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; -- cgit v1.2.3-59-g8ed1b From 7ad24ea4bf620a32631d7b3069c3e30c078b0c3e Mon Sep 17 00:00:00 2001 From: Wilfried Klaebe Date: Sun, 11 May 2014 00:12:32 +0000 Subject: net: get rid of SET_ETHTOOL_OPS net: get rid of SET_ETHTOOL_OPS Dave Miller mentioned he'd like to see SET_ETHTOOL_OPS gone. This does that. Mostly done via coccinelle script: @@ struct ethtool_ops *ops; struct net_device *dev; @@ - SET_ETHTOOL_OPS(dev, ops); + dev->ethtool_ops = ops; Compile tested only, but I'd seriously wonder if this broke anything. Suggested-by: Dave Miller Signed-off-by: Wilfried Klaebe Acked-by: Felipe Balbi Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 2 +- drivers/net/ethernet/3com/3c509.c | 2 +- drivers/net/ethernet/3com/3c589_cs.c | 2 +- drivers/net/ethernet/3com/typhoon.c | 2 +- drivers/net/ethernet/adaptec/starfire.c | 2 +- drivers/net/ethernet/alteon/acenic.c | 2 +- drivers/net/ethernet/altera/altera_tse_ethtool.c | 2 +- drivers/net/ethernet/amd/amd8111e.c | 2 +- drivers/net/ethernet/amd/au1000_eth.c | 2 +- drivers/net/ethernet/amd/nmclan_cs.c | 2 +- drivers/net/ethernet/atheros/alx/main.c | 2 +- drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c | 2 +- drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c | 2 +- drivers/net/ethernet/atheros/atlx/atl2.c | 2 +- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 4 ++-- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- drivers/net/ethernet/broadcom/bgmac.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 6 ++---- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +- drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 2 +- drivers/net/ethernet/calxeda/xgmac.c | 2 +- drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 2 +- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 2 +- drivers/net/ethernet/dec/tulip/tulip_core.c | 2 +- drivers/net/ethernet/dlink/dl2k.c | 2 +- drivers/net/ethernet/dlink/sundance.c | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/faraday/ftgmac100.c | 2 +- drivers/net/ethernet/faraday/ftmac100.c | 2 +- drivers/net/ethernet/freescale/ucc_geth_ethtool.c | 2 +- drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 2 +- drivers/net/ethernet/ibm/ehea/ehea_ethtool.c | 2 +- drivers/net/ethernet/ibm/emac/core.c | 2 +- drivers/net/ethernet/icplus/ipg.c | 2 +- drivers/net/ethernet/intel/e100.c | 2 +- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 2 +- drivers/net/ethernet/intel/e1000e/ethtool.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 2 +- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- drivers/net/ethernet/intel/igbvf/ethtool.c | 2 +- drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +- drivers/net/ethernet/intel/ixgbevf/ethtool.c | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/mvneta.c | 2 +- drivers/net/ethernet/marvell/pxa168_eth.c | 2 +- drivers/net/ethernet/marvell/sky2.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/micrel/ks8695net.c | 6 +++--- drivers/net/ethernet/micrel/ks8851.c | 2 +- drivers/net/ethernet/micrel/ksz884x.c | 2 +- drivers/net/ethernet/microchip/enc28j60.c | 2 +- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 2 +- drivers/net/ethernet/natsemi/natsemi.c | 2 +- drivers/net/ethernet/natsemi/ns83820.c | 2 +- drivers/net/ethernet/neterion/s2io.c | 2 +- drivers/net/ethernet/neterion/vxge/vxge-ethtool.c | 2 +- drivers/net/ethernet/nvidia/forcedeth.c | 2 +- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c | 2 +- drivers/net/ethernet/packetengines/hamachi.c | 6 ++---- drivers/net/ethernet/packetengines/yellowfin.c | 2 +- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 +- drivers/net/ethernet/qlogic/qla3xxx.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 +++----- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 2 +- drivers/net/ethernet/realtek/r8169.c | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 2 +- drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c | 2 +- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/ethernet/sis/sis190.c | 2 +- drivers/net/ethernet/smsc/smc91c92_cs.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 2 +- drivers/net/ethernet/tehuti/tehuti.c | 2 +- drivers/net/ethernet/ti/cpsw.c | 4 ++-- drivers/net/ethernet/ti/davinci_emac.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 2 +- drivers/net/ntb_netdev.c | 2 +- drivers/net/rionet.c | 2 +- drivers/net/usb/catc.c | 2 +- drivers/net/usb/hso.c | 2 +- drivers/net/usb/ipheth.c | 2 +- drivers/net/usb/kaweth.c | 2 +- drivers/net/usb/pegasus.c | 2 +- drivers/net/usb/r8152.c | 2 +- drivers/net/usb/rtl8150.c | 2 +- drivers/net/virtio_net.c | 2 +- drivers/net/vmxnet3/vmxnet3_ethtool.c | 2 +- drivers/net/vxlan.c | 2 +- drivers/net/wireless/hostap/hostap_main.c | 2 +- drivers/net/xen-netback/interface.c | 2 +- drivers/net/xen-netfront.c | 2 +- drivers/s390/net/qeth_l2_main.c | 7 +++---- drivers/s390/net/qeth_l3_main.c | 2 +- drivers/staging/et131x/et131x.c | 2 +- drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c | 2 +- drivers/staging/netlogic/xlr_net.c | 2 +- drivers/staging/octeon/ethernet.c | 2 +- drivers/usb/gadget/u_ether.c | 4 ++-- include/linux/netdevice.h | 3 --- net/batman-adv/soft-interface.c | 2 +- net/bridge/br_device.c | 2 +- net/dsa/slave.c | 2 +- net/openvswitch/vport-internal_dev.c | 2 +- 108 files changed, 118 insertions(+), 128 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index c4b3940845e6..078cadd6c797 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -105,5 +105,5 @@ static const struct ethtool_ops ipoib_ethtool_ops = { void ipoib_set_ethtool_ops(struct net_device *dev) { - SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops); + dev->ethtool_ops = &ipoib_ethtool_ops; } diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 35df0b9e6848..a968654b631d 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -534,7 +534,7 @@ static int el3_common_init(struct net_device *dev) /* The EL3-specific entries in the device structure. */ dev->netdev_ops = &netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; err = register_netdev(dev); if (err) { diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index 063557e037f2..f18647c23559 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -218,7 +218,7 @@ static int tc589_probe(struct pcmcia_device *link) dev->netdev_ops = &el3_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; return tc589_config(link); } diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 465cc7108d8a..e13b04624ded 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -2435,7 +2435,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_napi_add(dev, &tp->napi, typhoon_poll, 16); dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, &typhoon_ethtool_ops); + dev->ethtool_ops = &typhoon_ethtool_ops; /* We can handle scatter gather, up to 16 entries, and * we can do IP checksumming (only version 4, doh...) diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 171d73c1d3c2..40dbbf740331 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -784,7 +784,7 @@ static int starfire_init_one(struct pci_dev *pdev, dev->netdev_ops = &netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; netif_napi_add(dev, &np->napi, netdev_poll, max_interrupt_work); diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index 1517e9df5ba1..9a6991be9749 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -476,7 +476,7 @@ static int acenic_probe_one(struct pci_dev *pdev, dev->watchdog_timeo = 5*HZ; dev->netdev_ops = &ace_netdev_ops; - SET_ETHTOOL_OPS(dev, &ace_ethtool_ops); + dev->ethtool_ops = &ace_ethtool_ops; /* we only display this string ONCE */ if (!boards_found) diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c index 76133caffa78..d817e285b266 100644 --- a/drivers/net/ethernet/altera/altera_tse_ethtool.c +++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c @@ -237,5 +237,5 @@ static const struct ethtool_ops tse_ethtool_ops = { void altera_tse_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &tse_ethtool_ops); + netdev->ethtool_ops = &tse_ethtool_ops; } diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 26efaaa5e73f..068dc7cad5fa 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1900,7 +1900,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, /* Initialize driver entry points */ dev->netdev_ops = &amd8111e_netdev_ops; - SET_ETHTOOL_OPS(dev, &ops); + dev->ethtool_ops = &ops; dev->irq =pdev->irq; dev->watchdog_timeo = AMD8111E_TX_TIMEOUT; netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32); diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index a2bd91e3d302..a78e4c136959 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1229,7 +1229,7 @@ static int au1000_probe(struct platform_device *pdev) dev->base_addr = base->start; dev->irq = irq; dev->netdev_ops = &au1000_netdev_ops; - SET_ETHTOOL_OPS(dev, &au1000_ethtool_ops); + dev->ethtool_ops = &au1000_ethtool_ops; dev->watchdog_timeo = ETH_TX_TIMEOUT; /* diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 08569fe2b182..abf3b1581c82 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -457,7 +457,7 @@ static int nmclan_probe(struct pcmcia_device *link) lp->tx_free_frames=AM2150_MAX_TX_FRAMES; dev->netdev_ops = &mace_netdev_ops; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; return nmclan_config(link); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 17bb9ce96260..49faa97a30c3 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1302,7 +1302,7 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } netdev->netdev_ops = &alx_netdev_ops; - SET_ETHTOOL_OPS(netdev, &alx_ethtool_ops); + netdev->ethtool_ops = &alx_ethtool_ops; netdev->irq = pdev->irq; netdev->watchdog_timeo = ALX_WATCHDOG_TIME; diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c index 859ea844ba0f..ecacaaeb2b92 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c @@ -305,5 +305,5 @@ static const struct ethtool_ops atl1c_ethtool_ops = { void atl1c_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &atl1c_ethtool_ops); + netdev->ethtool_ops = &atl1c_ethtool_ops; } diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c index 82b23861bf55..206e9b7be431 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c @@ -388,5 +388,5 @@ static const struct ethtool_ops atl1e_ethtool_ops = { void atl1e_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &atl1e_ethtool_ops); + netdev->ethtool_ops = &atl1e_ethtool_ops; } diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 78befb522a52..2587fed7b02c 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1396,7 +1396,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) atl2_setup_pcicmd(pdev); netdev->netdev_ops = &atl2_netdev_ops; - SET_ETHTOOL_OPS(netdev, &atl2_ethtool_ops); + netdev->ethtool_ops = &atl2_ethtool_ops; netdev->watchdog_timeo = 5 * HZ; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 05ba62589017..ca5a20a48b14 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2380,7 +2380,7 @@ static int b44_init_one(struct ssb_device *sdev, netif_napi_add(dev, &bp->napi, b44_poll, 64); dev->watchdog_timeo = B44_TX_TIMEOUT; dev->irq = sdev->irq; - SET_ETHTOOL_OPS(dev, &b44_ethtool_ops); + dev->ethtool_ops = &b44_ethtool_ops; err = ssb_bus_powerup(sdev->bus, 0); if (err) { diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 8db34d389675..3e8d1a88ed3d 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1897,7 +1897,7 @@ static int bcm_enet_probe(struct platform_device *pdev) dev->netdev_ops = &bcm_enet_ops; netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16); - SET_ETHTOOL_OPS(dev, &bcm_enet_ethtool_ops); + dev->ethtool_ops = &bcm_enet_ethtool_ops; SET_NETDEV_DEV(dev, &pdev->dev); ret = register_netdev(dev); @@ -2783,7 +2783,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev) /* register netdevice */ dev->netdev_ops = &bcm_enetsw_ops; netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16); - SET_ETHTOOL_OPS(dev, &bcm_enetsw_ethtool_ops); + dev->ethtool_ops = &bcm_enetsw_ethtool_ops; SET_NETDEV_DEV(dev, &pdev->dev); spin_lock_init(&priv->enetsw_mdio_lock); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 4dc8d1e9829b..56b74a495181 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1540,7 +1540,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); dev_set_drvdata(&pdev->dev, dev); - SET_ETHTOOL_OPS(dev, &bcm_sysport_ethtool_ops); + dev->ethtool_ops = &bcm_sysport_ethtool_ops; dev->netdev_ops = &bcm_sysport_netdev_ops; netif_napi_add(dev, &priv->napi, bcm_sysport_poll, 64); diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 0297a79a38e1..05c6af6c418f 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1436,7 +1436,7 @@ static int bgmac_probe(struct bcma_device *core) return -ENOMEM; net_dev->netdev_ops = &bgmac_netdev_ops; net_dev->irq = core->irq; - SET_ETHTOOL_OPS(net_dev, &bgmac_ethtool_ops); + net_dev->ethtool_ops = &bgmac_ethtool_ops; bgmac = netdev_priv(net_dev); bgmac->net_dev = net_dev; bgmac->core = core; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index b6de05e3149b..03224090ecf9 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3506,8 +3506,6 @@ static const struct ethtool_ops bnx2x_vf_ethtool_ops = { void bnx2x_set_ethtool_ops(struct bnx2x *bp, struct net_device *netdev) { - if (IS_PF(bp)) - SET_ETHTOOL_OPS(netdev, &bnx2x_ethtool_ops); - else /* vf */ - SET_ETHTOOL_OPS(netdev, &bnx2x_vf_ethtool_ops); + netdev->ethtool_ops = (IS_PF(bp)) ? + &bnx2x_ethtool_ops : &bnx2x_vf_ethtool_ops; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0966bd04375f..5ba1cfbd60da 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2481,7 +2481,7 @@ static int bcmgenet_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, dev); ether_addr_copy(dev->dev_addr, macaddr); dev->watchdog_timeo = 2 * HZ; - SET_ETHTOOL_OPS(dev, &bcmgenet_ethtool_ops); + dev->ethtool_ops = &bcmgenet_ethtool_ops; dev->netdev_ops = &bcmgenet_netdev_ops; netif_napi_add(dev, &priv->napi, bcmgenet_poll, 64); diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index f9e150825bb5..adca62b72837 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -1137,5 +1137,5 @@ static const struct ethtool_ops bnad_ethtool_ops = { void bnad_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &bnad_ethtool_ops); + netdev->ethtool_ops = &bnad_ethtool_ops; } diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 521dfea44b83..25d6b2a10e4e 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1737,7 +1737,7 @@ static int xgmac_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); ether_setup(ndev); ndev->netdev_ops = &xgmac_netdev_ops; - SET_ETHTOOL_OPS(ndev, &xgmac_ethtool_ops); + ndev->ethtool_ops = &xgmac_ethtool_ops; spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->tx_timeout_work, xgmac_tx_timeout_work); diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 0fe7ff750d77..c1b2c1dbf015 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -1100,7 +1100,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_napi_add(netdev, &adapter->napi, t1_poll, 64); - SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops); + netdev->ethtool_ops = &t1_ethtool_ops; } if (t1_init_sw_modules(adapter, bi) < 0) { diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 07bbb711b7e5..3ed507947248 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -3291,7 +3291,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->features |= NETIF_F_HIGHDMA; netdev->netdev_ops = &cxgb_netdev_ops; - SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); + netdev->ethtool_ops = &cxgb_ethtool_ops; } pci_set_drvdata(pdev, adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 0f1e886d89e3..266a5bc6aedf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6083,7 +6083,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; netdev->netdev_ops = &cxgb4_netdev_ops; - SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); + netdev->ethtool_ops = &cxgb_ethtool_ops; } pci_set_drvdata(pdev, adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 52859288de7b..ff1cdd1788b5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2664,7 +2664,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, netdev->priv_flags |= IFF_UNICAST_FLT; netdev->netdev_ops = &cxgb4vf_netdev_ops; - SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops); + netdev->ethtool_ops = &cxgb4vf_ethtool_ops; /* * Initialize the hardware/software state for the port. diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 47e3562f4866..58a8c67638e3 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -253,5 +253,5 @@ static const struct ethtool_ops enic_ethtool_ops = { void enic_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &enic_ethtool_ops); + netdev->ethtool_ops = &enic_ethtool_ops; } diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 1642de78aac8..861660841ce2 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1703,7 +1703,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_TULIP_NAPI netif_napi_add(dev, &tp->napi, tulip_poll, 16); #endif - SET_ETHTOOL_OPS(dev, &ops); + dev->ethtool_ops = &ops; if (register_netdev(dev)) goto err_out_free_ring; diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 4fb756d219f7..2324f2ddfd48 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -227,7 +227,7 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) } dev->netdev_ops = &netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; #if 0 dev->features = NETIF_F_IP_CSUM; #endif diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index d9e5ca0d48c1..433c1e185442 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -577,7 +577,7 @@ static int sundance_probe1(struct pci_dev *pdev, /* The chip-specific entries in the device structure. */ dev->netdev_ops = &netdev_ops; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; dev->watchdog_timeo = TX_TIMEOUT; pci_set_drvdata(pdev, dev); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 3f04356afa82..dcc5e5c69743 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4304,7 +4304,7 @@ static void be_netdev_init(struct net_device *netdev) netdev->netdev_ops = &be_netdev_ops; - SET_ETHTOOL_OPS(netdev, &be_ethtool_ops); + netdev->ethtool_ops = &be_ethtool_ops; } static void be_unmap_pci_bars(struct be_adapter *adapter) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 68069eabc4f8..c77fa4a69844 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1210,7 +1210,7 @@ static int ftgmac100_probe(struct platform_device *pdev) SET_NETDEV_DEV(netdev, &pdev->dev); - SET_ETHTOOL_OPS(netdev, &ftgmac100_ethtool_ops); + netdev->ethtool_ops = &ftgmac100_ethtool_ops; netdev->netdev_ops = &ftgmac100_netdev_ops; netdev->features = NETIF_F_IP_CSUM | NETIF_F_GRO; diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 8be5b40c0a12..4ff1adc6bfca 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -1085,7 +1085,7 @@ static int ftmac100_probe(struct platform_device *pdev) } SET_NETDEV_DEV(netdev, &pdev->dev); - SET_ETHTOOL_OPS(netdev, &ftmac100_ethtool_ops); + netdev->ethtool_ops = &ftmac100_ethtool_ops; netdev->netdev_ops = &ftmac100_netdev_ops; platform_set_drvdata(pdev, netdev); diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 413329eff2ff..cc83350d56ba 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -417,5 +417,5 @@ static const struct ethtool_ops uec_ethtool_ops = { void uec_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &uec_ethtool_ops); + netdev->ethtool_ops = &uec_ethtool_ops; } diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 7becab1aa3e4..cfe7a7431730 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -256,7 +256,7 @@ static int fmvj18x_probe(struct pcmcia_device *link) dev->netdev_ops = &fjn_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; return fmvj18x_config(link); } /* fmvj18x_attach */ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c index 95837b99a464..6055e3eaf49c 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c @@ -278,5 +278,5 @@ static const struct ethtool_ops ehea_ethtool_ops = { void ehea_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &ehea_ethtool_ops); + netdev->ethtool_ops = &ehea_ethtool_ops; } diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index ae342fdb42c8..87bd953cc2ee 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2879,7 +2879,7 @@ static int emac_probe(struct platform_device *ofdev) dev->commac.ops = &emac_commac_sg_ops; } else ndev->netdev_ops = &emac_netdev_ops; - SET_ETHTOOL_OPS(ndev, &emac_ethtool_ops); + ndev->ethtool_ops = &emac_ethtool_ops; netif_carrier_off(ndev); diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c index 25045ae07171..5727779a7df2 100644 --- a/drivers/net/ethernet/icplus/ipg.c +++ b/drivers/net/ethernet/icplus/ipg.c @@ -2245,7 +2245,7 @@ static int ipg_probe(struct pci_dev *pdev, const struct pci_device_id *id) */ dev->netdev_ops = &ipg_netdev_ops; SET_NETDEV_DEV(dev, &pdev->dev); - SET_ETHTOOL_OPS(dev, &ipg_ethtool_ops); + dev->ethtool_ops = &ipg_ethtool_ops; rc = pci_request_regions(pdev, DRV_NAME); if (rc) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index b56461ce674c..9d979d7debef 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2854,7 +2854,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_RXALL; netdev->netdev_ops = &e100_netdev_ops; - SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops); + netdev->ethtool_ops = &e100_ethtool_ops; netdev->watchdog_timeo = E100_WATCHDOG_PERIOD; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 73a8aeefb92a..341889a4ef7f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1905,5 +1905,5 @@ static const struct ethtool_ops e1000_ethtool_ops = { void e1000_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops); + netdev->ethtool_ops = &e1000_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 4e5ad7ebe1f2..e9a48bb5caac 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -2318,5 +2318,5 @@ static const struct ethtool_ops e1000_ethtool_ops = { void e1000e_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops); + netdev->ethtool_ops = &e1000_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 0cf47c958081..f62929419a09 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1700,5 +1700,5 @@ static const struct ethtool_ops i40e_ethtool_ops = { void i40e_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &i40e_ethtool_ops); + netdev->ethtool_ops = &i40e_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index a46be016039e..77e786d2d0e0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -705,5 +705,5 @@ static struct ethtool_ops i40evf_ethtool_ops = { **/ void i40evf_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &i40evf_ethtool_ops); + netdev->ethtool_ops = &i40evf_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 333a2b0bbada..a84297c85fb1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -3035,5 +3035,5 @@ static const struct ethtool_ops igb_ethtool_ops = { void igb_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops); + netdev->ethtool_ops = &igb_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c index 90eef07943f4..f58170bae18b 100644 --- a/drivers/net/ethernet/intel/igbvf/ethtool.c +++ b/drivers/net/ethernet/intel/igbvf/ethtool.c @@ -476,5 +476,5 @@ static const struct ethtool_ops igbvf_ethtool_ops = { void igbvf_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &igbvf_ethtool_ops); + netdev->ethtool_ops = &igbvf_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c index dbb7dd2f8e36..1da2d987d370 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c @@ -656,5 +656,5 @@ static const struct ethtool_ops ixgb_ethtool_ops = { void ixgb_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &ixgb_ethtool_ops); + netdev->ethtool_ops = &ixgb_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 6c55c14d082a..31d7268401e7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3099,5 +3099,5 @@ static const struct ethtool_ops ixgbe_ethtool_ops = { void ixgbe_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops); + netdev->ethtool_ops = &ixgbe_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 1baecb60f065..a757f0734719 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -813,5 +813,5 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = { void ixgbevf_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &ixgbevf_ethtool_ops); + netdev->ethtool_ops = &ixgbevf_ethtool_ops; } diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index b7b8d74c22d9..df1d1b97187f 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2889,7 +2889,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) if (err) goto out; - SET_ETHTOOL_OPS(dev, &mv643xx_eth_ethtool_ops); + dev->ethtool_ops = &mv643xx_eth_ethtool_ops; init_pscr(mp, pd->speed, pd->duplex); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 14786c8bf99e..72bc47f2d2e9 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2813,7 +2813,7 @@ static int mvneta_probe(struct platform_device *pdev) dev->watchdog_timeo = 5 * HZ; dev->netdev_ops = &mvneta_netdev_ops; - SET_ETHTOOL_OPS(dev, &mvneta_eth_tool_ops); + dev->ethtool_ops = &mvneta_eth_tool_ops; pp = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index b358c2f6f4bd..8f5aa7c62b18 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1488,7 +1488,7 @@ static int pxa168_eth_probe(struct platform_device *pdev) dev->netdev_ops = &pxa168_eth_netdev_ops; dev->watchdog_timeo = 2 * HZ; dev->base_addr = 0; - SET_ETHTOOL_OPS(dev, &pxa168_ethtool_ops); + dev->ethtool_ops = &pxa168_ethtool_ops; INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task); diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index b81106451a0a..69693384b58c 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4760,7 +4760,7 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, SET_NETDEV_DEV(dev, &hw->pdev->dev); dev->irq = hw->pdev->irq; - SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops); + dev->ethtool_ops = &sky2_ethtool_ops; dev->watchdog_timeo = TX_WATCHDOG; dev->netdev_ops = &sky2_netdev_ops[port]; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fba3c8e77626..79c6f467d17e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2539,7 +2539,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_set_real_num_tx_queues(dev, priv->tx_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); - SET_ETHTOOL_OPS(dev, &mlx4_en_ethtool_ops); + dev->ethtool_ops = &mlx4_en_ethtool_ops; /* * Set driver features diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c index 16435b3cfa9f..6c7c78baedca 100644 --- a/drivers/net/ethernet/micrel/ks8695net.c +++ b/drivers/net/ethernet/micrel/ks8695net.c @@ -1504,15 +1504,15 @@ ks8695_probe(struct platform_device *pdev) if (ksp->phyiface_regs && ksp->link_irq == -1) { ks8695_init_switch(ksp); ksp->dtype = KS8695_DTYPE_LAN; - SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops); + ndev->ethtool_ops = &ks8695_ethtool_ops; } else if (ksp->phyiface_regs && ksp->link_irq != -1) { ks8695_init_wan_phy(ksp); ksp->dtype = KS8695_DTYPE_WAN; - SET_ETHTOOL_OPS(ndev, &ks8695_wan_ethtool_ops); + ndev->ethtool_ops = &ks8695_wan_ethtool_ops; } else { /* No initialisation since HPNA does not have a PHY */ ksp->dtype = KS8695_DTYPE_HPNA; - SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops); + ndev->ethtool_ops = &ks8695_ethtool_ops; } /* And bring up the net_device with the net core */ diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index e0c92e0e5e1d..13767eb36a48 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1471,7 +1471,7 @@ static int ks8851_probe(struct spi_device *spi) skb_queue_head_init(&ks->txq); - SET_ETHTOOL_OPS(ndev, &ks8851_ethtool_ops); + ndev->ethtool_ops = &ks8851_ethtool_ops; SET_NETDEV_DEV(ndev, &spi->dev); spi_set_drvdata(spi, ks); diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 14ac0e2bc09f..4b9592c1fb40 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -7106,7 +7106,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id) } dev->netdev_ops = &netdev_ops; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; if (register_netdev(dev)) goto pcidev_init_reg_err; port_set_power_saving(port, true); diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index c7b40aa21f22..b1b5f66b8b69 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1593,7 +1593,7 @@ static int enc28j60_probe(struct spi_device *spi) dev->irq = spi->irq; dev->netdev_ops = &enc28j60_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, &enc28j60_ethtool_ops); + dev->ethtool_ops = &enc28j60_ethtool_ops; enc28j60_lowpower(priv, true); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 130f6b204efa..f3d5d79f1cd1 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -4112,7 +4112,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer, (unsigned long)mgp); - SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops); + netdev->ethtool_ops = &myri10ge_ethtool_ops; INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog); status = register_netdev(netdev); if (status != 0) { diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 64ec2a437f46..291fba8b9f07 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -927,7 +927,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) dev->netdev_ops = &natsemi_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; if (mtu) dev->mtu = mtu; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index dbccf1de49ec..19bb8244b9e3 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -2030,7 +2030,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev, pci_dev->subsystem_vendor, pci_dev->subsystem_device); ndev->netdev_ops = &netdev_ops; - SET_ETHTOOL_OPS(ndev, &ops); + ndev->ethtool_ops = &ops; ndev->watchdog_timeo = 5 * HZ; pci_set_drvdata(pci_dev, ndev); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index e900c1abdef7..e3cf38e6ce3c 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7910,7 +7910,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) /* Driver entry points */ dev->netdev_ops = &s2io_netdev_ops; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXCSUM | NETIF_F_LRO; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c index f8f073880f84..ddcc81ad1ae1 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c @@ -1128,5 +1128,5 @@ static const struct ethtool_ops vxge_ethtool_ops = { void vxge_initialize_ethtool_ops(struct net_device *ndev) { - SET_ETHTOOL_OPS(ndev, &vxge_ethtool_ops); + ndev->ethtool_ops = &vxge_ethtool_ops; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index fddb464aeab3..e8235c5c5e69 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5766,7 +5766,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) dev->netdev_ops = &nv_netdev_ops_optimized; netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP); - SET_ETHTOOL_OPS(dev, &ops); + dev->ethtool_ops = &ops; dev->watchdog_timeo = NV_WATCHDOG_TIMEO; pci_set_drvdata(pci_dev, dev); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c index 826f0ccdc23c..114d2fe52cc2 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c @@ -508,5 +508,5 @@ static const struct ethtool_ops pch_gbe_ethtool_ops = { void pch_gbe_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &pch_gbe_ethtool_ops); + netdev->ethtool_ops = &pch_gbe_ethtool_ops; } diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index b6bdeb3c1971..9a997e4c3e08 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -724,10 +724,8 @@ static int hamachi_init_one(struct pci_dev *pdev, /* The Hamachi-specific entries in the device structure. */ dev->netdev_ops = &hamachi_netdev_ops; - if (chip_tbl[hmp->chip_id].flags & CanHaveMII) - SET_ETHTOOL_OPS(dev, ðtool_ops); - else - SET_ETHTOOL_OPS(dev, ðtool_ops_no_mii); + dev->ethtool_ops = (chip_tbl[hmp->chip_id].flags & CanHaveMII) ? + ðtool_ops : ðtool_ops_no_mii; dev->watchdog_timeo = TX_TIMEOUT; if (mtu) dev->mtu = mtu; diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 9a6cb482dcd0..69a8dc095072 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -472,7 +472,7 @@ static int yellowfin_init_one(struct pci_dev *pdev, /* The Yellowfin-specific entries in the device structure. */ dev->netdev_ops = &netdev_ops; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; dev->watchdog_timeo = TX_TIMEOUT; if (mtu) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index f09c35d669b3..5bf05818a12c 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1373,7 +1373,7 @@ netxen_setup_netdev(struct netxen_adapter *adapter, netxen_nic_change_mtu(netdev, netdev->mtu); - SET_ETHTOOL_OPS(netdev, &netxen_nic_ethtool_ops); + netdev->ethtool_ops = &netxen_nic_ethtool_ops; netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_RXCSUM; diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2eabd44f8914..b5d6bc1a8b00 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3838,7 +3838,7 @@ static int ql3xxx_probe(struct pci_dev *pdev, /* Set driver entry points */ ndev->netdev_ops = &ql3xxx_netdev_ops; - SET_ETHTOOL_OPS(ndev, &ql3xxx_ethtool_ops); + ndev->ethtool_ops = &ql3xxx_ethtool_ops; ndev->watchdog_timeo = 5 * HZ; netif_napi_add(ndev, &qdev->napi, ql_poll, 64); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 8a2aeb85e320..f0a285359e66 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2265,10 +2265,8 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, qlcnic_change_mtu(netdev, netdev->mtu); - if (qlcnic_sriov_vf_check(adapter)) - SET_ETHTOOL_OPS(netdev, &qlcnic_sriov_vf_ethtool_ops); - else - SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_ops); + netdev->ethtool_ops = (qlcnic_sriov_vf_check(adapter)) ? + &qlcnic_sriov_vf_ethtool_ops : &qlcnic_ethtool_ops; netdev->features |= (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_IPV6_CSUM | NETIF_F_GRO | @@ -2682,7 +2680,7 @@ err_out_disable_pdev: err_out_maintenance_mode: set_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state); netdev->netdev_ops = &qlcnic_netdev_failed_ops; - SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_failed_ops); + netdev->ethtool_ops = &qlcnic_ethtool_failed_ops; ahw->port_type = QLCNIC_XGBE; if (qlcnic_83xx_check(adapter)) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 6e36fe14b848..b40050e03a56 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4770,7 +4770,7 @@ static int qlge_probe(struct pci_dev *pdev, ndev->irq = pdev->irq; ndev->netdev_ops = &qlge_netdev_ops; - SET_ETHTOOL_OPS(ndev, &qlge_ethtool_ops); + ndev->ethtool_ops = &qlge_ethtool_ops; ndev->watchdog_timeo = 10 * HZ; err = register_netdev(ndev); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index aa1c079f231d..be425ad5e824 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7125,7 +7125,7 @@ rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) for (i = 0; i < ETH_ALEN; i++) dev->dev_addr[i] = RTL_R8(MAC0 + i); - SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops); + dev->ethtool_ops = &rtl8169_ethtool_ops; dev->watchdog_timeo = RTL8169_TX_TIMEOUT; netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 6a9509ccd33b..967314cade95 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2843,7 +2843,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) ndev->netdev_ops = &sh_eth_netdev_ops_tsu; else ndev->netdev_ops = &sh_eth_netdev_ops; - SET_ETHTOOL_OPS(ndev, &sh_eth_ethtool_ops); + ndev->ethtool_ops = &sh_eth_ethtool_ops; ndev->watchdog_timeo = TX_TIMEOUT; /* debug message level */ diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c index 0415fa50eeb7..c0981ae45874 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c @@ -520,5 +520,5 @@ static const struct ethtool_ops sxgbe_ethtool_ops = { void sxgbe_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &sxgbe_ethtool_ops); + netdev->ethtool_ops = &sxgbe_ethtool_ops; } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 63d595fd3cc5..1e274045970f 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2248,7 +2248,7 @@ static int efx_register_netdev(struct efx_nic *efx) } else { net_dev->netdev_ops = &efx_farch_netdev_ops; } - SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops); + net_dev->ethtool_ops = &efx_ethtool_ops; net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; rtnl_lock(); diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index acbbe48a519c..a86339903b9b 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1877,7 +1877,7 @@ static int sis190_init_one(struct pci_dev *pdev, dev->netdev_ops = &sis190_netdev_ops; - SET_ETHTOOL_OPS(dev, &sis190_ethtool_ops); + dev->ethtool_ops = &sis190_ethtool_ops; dev->watchdog_timeo = SIS190_TX_TIMEOUT; spin_lock_init(&tp->lock); diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index c7a4868571f9..6b33127ab352 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -318,7 +318,7 @@ static int smc91c92_probe(struct pcmcia_device *link) /* The SMC91c92-specific entries in the device structure. */ dev->netdev_ops = &smc_netdev_ops; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; dev->watchdog_timeo = TX_TIMEOUT; smc->mii_if.dev = dev; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index c5f9cb85c8ef..c963394ded6c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -784,5 +784,5 @@ static const struct ethtool_ops stmmac_ethtool_ops = { void stmmac_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &stmmac_ethtool_ops); + netdev->ethtool_ops = &stmmac_ethtool_ops; } diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 2ead87759ab4..38da73a2a886 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -2413,7 +2413,7 @@ static void bdx_set_ethtool_ops(struct net_device *netdev) .get_ethtool_stats = bdx_get_ethtool_stats, }; - SET_ETHTOOL_OPS(netdev, &bdx_ethtool_ops); + netdev->ethtool_ops = &bdx_ethtool_ops; } /** diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 91499be03c6f..e3d871055d63 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1998,7 +1998,7 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev, ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; - SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); + ndev->ethtool_ops = &cpsw_ethtool_ops; netif_napi_add(ndev, &priv_sl2->napi, cpsw_poll, CPSW_POLL_WEIGHT); /* register the network device */ @@ -2227,7 +2227,7 @@ static int cpsw_probe(struct platform_device *pdev) ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; - SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); + ndev->ethtool_ops = &cpsw_ethtool_ops; netif_napi_add(ndev, &priv->napi, cpsw_poll, CPSW_POLL_WEIGHT); /* register the network device */ diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 8f0e69ce07ca..e76eae541151 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1980,7 +1980,7 @@ static int davinci_emac_probe(struct platform_device *pdev) } ndev->netdev_ops = &emac_netdev_ops; - SET_ETHTOOL_OPS(ndev, ðtool_ops); + ndev->ethtool_ops = ðtool_ops; netif_napi_add(ndev, &priv->napi, emac_poll, EMAC_POLL_WEIGHT); /* register the network device */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 1de3ef5dd5d2..2e967a7bdb33 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -810,7 +810,7 @@ static int netvsc_probe(struct hv_device *dev, net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; - SET_ETHTOOL_OPS(net, ðtool_ops); + net->ethtool_ops = ðtool_ops; SET_NETDEV_DEV(net, &dev->device); /* Notify the netvsc driver of the new device */ diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 63aa9d9e34c5..27536aa89199 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -348,7 +348,7 @@ static int ntb_netdev_probe(struct pci_dev *pdev) memcpy(ndev->dev_addr, ndev->perm_addr, ndev->addr_len); ndev->netdev_ops = &ntb_netdev_ops; - SET_ETHTOOL_OPS(ndev, &ntb_ethtool_ops); + ndev->ethtool_ops = &ntb_ethtool_ops; dev->qp = ntb_transport_create_queue(ndev, pdev, &ntb_netdev_handlers); if (!dev->qp) { diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index a8497183ff8b..dac7a0d9bb46 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -494,7 +494,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) ndev->mtu = RIO_MAX_MSG_SIZE - 14; ndev->features = NETIF_F_LLTX; SET_NETDEV_DEV(ndev, &mport->dev); - SET_ETHTOOL_OPS(ndev, &rionet_ethtool_ops); + ndev->ethtool_ops = &rionet_ethtool_ops; spin_lock_init(&rnet->lock); spin_lock_init(&rnet->tx_lock); diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 630caf48f63a..8cfc3bb0c6a6 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -793,7 +793,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id netdev->netdev_ops = &catc_netdev_ops; netdev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; catc->usbdev = usbdev; catc->netdev = netdev; diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 660bd5ea9fc0..a3a05869309d 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2425,7 +2425,7 @@ static void hso_net_init(struct net_device *net) net->type = ARPHRD_NONE; net->mtu = DEFAULT_MTU - 14; net->tx_queue_len = 10; - SET_ETHTOOL_OPS(net, &ops); + net->ethtool_ops = &ops; /* and initialize the semaphore */ spin_lock_init(&hso_net->net_lock); diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 421934c83f1c..f72570708edb 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -524,7 +524,7 @@ static int ipheth_probe(struct usb_interface *intf, usb_set_intfdata(intf, dev); SET_NETDEV_DEV(netdev, &intf->dev); - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; retval = register_netdev(netdev); if (retval) { diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index a359d3bb7c5b..dcb6d33141e0 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1171,7 +1171,7 @@ err_fw: netdev->netdev_ops = &kaweth_netdev_ops; netdev->watchdog_timeo = KAWETH_TX_TIMEOUT; netdev->mtu = le16_to_cpu(kaweth->configuration.segment_size); - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; /* kaweth is zeroed as part of alloc_netdev */ INIT_DELAYED_WORK(&kaweth->lowmem_work, kaweth_resubmit_tl); diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 03e8a15d7deb..f84080215915 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1159,7 +1159,7 @@ static int pegasus_probe(struct usb_interface *intf, net->watchdog_timeo = PEGASUS_TX_TIMEOUT; net->netdev_ops = &pegasus_netdev_ops; - SET_ETHTOOL_OPS(net, &ops); + net->ethtool_ops = &ops; pegasus->mii.dev = net; pegasus->mii.mdio_read = mdio_read; pegasus->mii.mdio_write = mdio_write; diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 3fbfb0869030..9f91c7aba4b0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3452,7 +3452,7 @@ static int rtl8152_probe(struct usb_interface *intf, NETIF_F_TSO | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM | NETIF_F_TSO6; - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); tp->mii.dev = netdev; diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index da2c4583bd2d..6e87e5710048 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -878,7 +878,7 @@ static int rtl8150_probe(struct usb_interface *intf, dev->netdev = netdev; netdev->netdev_ops = &rtl8150_netdev_ops; netdev->watchdog_timeo = RTL8150_TX_TIMEOUT; - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; dev->intr_interval = 100; /* 100ms */ if (!alloc_all_urbs(dev)) { diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b852bb368acc..7d9f84a91f37 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1646,7 +1646,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->netdev_ops = &virtnet_netdev; dev->features = NETIF_F_HIGHDMA; - SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops); + dev->ethtool_ops = &virtnet_ethtool_ops; SET_NETDEV_DEV(dev, &vdev->dev); /* Do we support "hardware" checksums? */ diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 600ab56c0008..00e120296e92 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -635,5 +635,5 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = { void vmxnet3_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &vmxnet3_ethtool_ops); + netdev->ethtool_ops = &vmxnet3_ethtool_ops; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 1dfee9a7fbf7..e68c8eb4ea8e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2716,7 +2716,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, return -EEXIST; } - SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops); + dev->ethtool_ops = &vxlan_ethtool_ops; /* create an fdb entry for a valid default destination */ if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c index 67db34e56d7e..52919ad42726 100644 --- a/drivers/net/wireless/hostap/hostap_main.c +++ b/drivers/net/wireless/hostap/hostap_main.c @@ -882,7 +882,7 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local, dev->mtu = local->mtu; - SET_ETHTOOL_OPS(dev, &prism2_ethtool_ops); + dev->ethtool_ops = &prism2_ethtool_ops; } diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index ef05c5c49d41..a7557331699f 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -386,7 +386,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; dev->features = dev->hw_features | NETIF_F_RXCSUM; - SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops); + dev->ethtool_ops = &xenvif_ethtool_ops; dev->tx_queue_len = XENVIF_QUEUE_LENGTH; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 158b5e639fc7..895355de8ac4 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1332,7 +1332,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) */ netdev->features |= netdev->hw_features; - SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); + netdev->ethtool_ops = &xennet_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index e232ceca38fe..5ef5b4f45758 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -969,10 +969,9 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->watchdog_timeo = QETH_TX_TIMEOUT; card->dev->mtu = card->info.initial_mtu; card->dev->netdev_ops = &qeth_l2_netdev_ops; - if (card->info.type != QETH_CARD_TYPE_OSN) - SET_ETHTOOL_OPS(card->dev, &qeth_l2_ethtool_ops); - else - SET_ETHTOOL_OPS(card->dev, &qeth_l2_osn_ops); + card->dev->ethtool_ops = + (card->info.type != QETH_CARD_TYPE_OSN) ? + &qeth_l2_ethtool_ops : &qeth_l2_osn_ops; card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index bc2499a24884..c58f82af3658 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3301,7 +3301,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->ml_priv = card; card->dev->watchdog_timeo = QETH_TX_TIMEOUT; card->dev->mtu = card->info.initial_mtu; - SET_ETHTOOL_OPS(card->dev, &qeth_l3_ethtool_ops); + card->dev->ethtool_ops = &qeth_l3_ethtool_ops; card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; diff --git a/drivers/staging/et131x/et131x.c b/drivers/staging/et131x/et131x.c index d329cf314360..15e0f4da3ce0 100644 --- a/drivers/staging/et131x/et131x.c +++ b/drivers/staging/et131x/et131x.c @@ -4604,7 +4604,7 @@ static int et131x_pci_setup(struct pci_dev *pdev, netdev->netdev_ops = &et131x_netdev_ops; SET_NETDEV_DEV(netdev, &pdev->dev); - SET_ETHTOOL_OPS(netdev, &et131x_ethtool_ops); + netdev->ethtool_ops = &et131x_ethtool_ops; adapter = et131x_adapter_init(netdev, pdev); diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c index d6421b9b5981..a6158bef58e5 100644 --- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c +++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c @@ -2249,7 +2249,7 @@ struct net_device *init_ft1000_card(struct pcmcia_device *link, ft1000InitProc(dev); ft1000_card_present = 1; - SET_ETHTOOL_OPS(dev, &ops); + dev->ethtool_ops = &ops; printk(KERN_INFO "ft1000: %s: addr 0x%04lx irq %d, MAC addr %pM\n", dev->name, dev->base_addr, dev->irq, dev->dev_addr); return dev; diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c index c83e3375104b..9d957615e32a 100644 --- a/drivers/staging/netlogic/xlr_net.c +++ b/drivers/staging/netlogic/xlr_net.c @@ -1066,7 +1066,7 @@ static int xlr_net_probe(struct platform_device *pdev) xlr_set_rx_mode(ndev); priv->num_rx_desc += MAX_NUM_DESC_SPILL; - SET_ETHTOOL_OPS(ndev, &xlr_ethtool_ops); + ndev->ethtool_ops = &xlr_ethtool_ops; SET_NETDEV_DEV(ndev, &pdev->dev); /* Common registers, do one time initialization */ diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index ff7214aac9dd..da9dd6bc5660 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -469,7 +469,7 @@ int cvm_oct_common_init(struct net_device *dev) /* We do our own locking, Linux doesn't need to */ dev->features |= NETIF_F_LLTX; - SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops); + dev->ethtool_ops = &cvm_oct_ethtool_ops; cvm_oct_phy_setup_device(dev); cvm_oct_set_mac_filter(dev); diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index b7d4f82872b7..ce8e28146162 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -793,7 +793,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, net->netdev_ops = ð_netdev_ops; - SET_ETHTOOL_OPS(net, &ops); + net->ethtool_ops = &ops; dev->gadget = g; SET_NETDEV_DEV(net, &g->dev); @@ -850,7 +850,7 @@ struct net_device *gether_setup_name_default(const char *netname) net->netdev_ops = ð_netdev_ops; - SET_ETHTOOL_OPS(net, &ops); + net->ethtool_ops = &ops; SET_NETDEV_DEVTYPE(net, &gadget_type); return net; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index adc4658e9873..2dea98cbbdba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -56,9 +56,6 @@ struct device; struct phy_device; /* 802.11 specific */ struct wireless_dev; - /* source back-compat hooks */ -#define SET_ETHTOOL_OPS(netdev,ops) \ - ( (netdev)->ethtool_ops = (ops) ) void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 744a59b85e15..e7ee65dc20bf 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -884,7 +884,7 @@ static void batadv_softif_init_early(struct net_device *dev) /* generate random address */ eth_hw_addr_random(dev); - SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); + dev->ethtool_ops = &batadv_ethtool_ops; memset(priv, 0, sizeof(*priv)); } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 3e2da2cb72db..9212015abc8f 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -348,7 +348,7 @@ void br_dev_setup(struct net_device *dev) dev->netdev_ops = &br_netdev_ops; dev->destructor = br_dev_free; - SET_ETHTOOL_OPS(dev, &br_ethtool_ops); + dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 02c0e1716f64..64c5af0a10dd 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -346,7 +346,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, return slave_dev; slave_dev->features = master->vlan_features; - SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); + slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 729c68763fe7..789af9280e77 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -130,7 +130,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev->destructor = internal_dev_destructor; - SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); + netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | -- cgit v1.2.3-59-g8ed1b From 50a0ffaf75e9d2d97200b523f2c600f40e9756b1 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Sun, 11 May 2014 10:47:15 +0200 Subject: net: cdc_ncm/cdc_mbim: rework probing of NCM/MBIM functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NCM class match in the cdc_mbim driver is confusing and cause unexpected behaviour. The USB core guarantees that a USB interface is in altsetting 0 when probing starts. This means that devices implementing a NCM 1.0 backwards compatible MBIM function (a "NCM/MBIM function") always hit the NCM entry in the cdc_mbim driver match table. Such functions will never match any of the MBIM entries. This causes unexpeced behaviour for cases where the NCM and MBIM entries are differet, which is currently the case for all except Ericsson devices. Improve the probing of NCM/MBIM functions by looking up the device again in the cdc_mbim match table after switching to the MBIM identity. The shared altsetting selection is updated to better accommodate the new probing logic, returning the preferred altsetting for the control interface instead of the data interface. The control interface altsetting update is moved to the cdc_mbim driver. It is never necessary to change the control interface altsetting for NCM. Cc: Greg Suarez Reported by: Yu-an Shih Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 43 +++++++++++++++++++++++++++++++++++++++++-- drivers/net/usb/cdc_ncm.c | 27 +++++++++++++-------------- include/linux/usb/cdc_ncm.h | 2 +- 3 files changed, 55 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 80d27719ba38..bc23273d0455 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -107,15 +107,54 @@ static const struct net_device_ops cdc_mbim_netdev_ops = { .ndo_vlan_rx_kill_vid = cdc_mbim_rx_kill_vid, }; +/* Change the control interface altsetting and update the .driver_info + * pointer if the matching entry after changing class codes points to + * a different struct + */ +static int cdc_mbim_set_ctrlalt(struct usbnet *dev, struct usb_interface *intf, u8 alt) +{ + struct usb_driver *driver = to_usb_driver(intf->dev.driver); + const struct usb_device_id *id; + struct driver_info *info; + int ret; + + ret = usb_set_interface(dev->udev, + intf->cur_altsetting->desc.bInterfaceNumber, + alt); + if (ret) + return ret; + + id = usb_match_id(intf, driver->id_table); + if (!id) + return -ENODEV; + + info = (struct driver_info *)id->driver_info; + if (info != dev->driver_info) { + dev_dbg(&intf->dev, "driver_info updated to '%s'\n", + info->description); + dev->driver_info = info; + } + return 0; +} + static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) { struct cdc_ncm_ctx *ctx; struct usb_driver *subdriver = ERR_PTR(-ENODEV); int ret = -ENODEV; - u8 data_altsetting = cdc_ncm_select_altsetting(dev, intf); + u8 data_altsetting = 1; struct cdc_mbim_state *info = (void *)&dev->data; - /* Probably NCM, defer for cdc_ncm_bind */ + /* should we change control altsetting on a NCM/MBIM function? */ + if (cdc_ncm_select_altsetting(intf) == CDC_NCM_COMM_ALTSETTING_MBIM) { + data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM; + ret = cdc_mbim_set_ctrlalt(dev, intf, CDC_NCM_COMM_ALTSETTING_MBIM); + if (ret) + goto err; + ret = -ENODEV; + } + + /* we will hit this for NCM/MBIM functions if prefer_mbim is false */ if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) goto err; diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 9a2bd11943eb..d23bca57a23f 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -541,10 +541,10 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) } EXPORT_SYMBOL_GPL(cdc_ncm_unbind); -/* Select the MBIM altsetting iff it is preferred and available, - * returning the number of the corresponding data interface altsetting +/* Return the number of the MBIM control interface altsetting iff it + * is preferred and available, */ -u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf) +u8 cdc_ncm_select_altsetting(struct usb_interface *intf) { struct usb_host_interface *alt; @@ -563,15 +563,15 @@ u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf) * the rules given in section 6 (USB Device Model) of this * specification." */ - if (prefer_mbim && intf->num_altsetting == 2) { + if (intf->num_altsetting < 2) + return intf->cur_altsetting->desc.bAlternateSetting; + + if (prefer_mbim) { alt = usb_altnum_to_altsetting(intf, CDC_NCM_COMM_ALTSETTING_MBIM); - if (alt && cdc_ncm_comm_intf_is_mbim(alt) && - !usb_set_interface(dev->udev, - intf->cur_altsetting->desc.bInterfaceNumber, - CDC_NCM_COMM_ALTSETTING_MBIM)) - return CDC_NCM_DATA_ALTSETTING_MBIM; + if (alt && cdc_ncm_comm_intf_is_mbim(alt)) + return CDC_NCM_COMM_ALTSETTING_MBIM; } - return CDC_NCM_DATA_ALTSETTING_NCM; + return CDC_NCM_COMM_ALTSETTING_NCM; } EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting); @@ -580,12 +580,11 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) int ret; /* MBIM backwards compatible function? */ - cdc_ncm_select_altsetting(dev, intf); - if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) + if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM) return -ENODEV; - /* NCM data altsetting is always 1 */ - ret = cdc_ncm_bind_common(dev, intf, 1); + /* The NCM data altsetting is fixed */ + ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM); /* * We should get an event when network connection is "connected" or diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 44b38b92236a..55b6feead93b 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -121,7 +121,7 @@ struct cdc_ncm_ctx { u16 connected; }; -u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf); +u8 cdc_ncm_select_altsetting(struct usb_interface *intf); int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); -- cgit v1.2.3-59-g8ed1b From 89278c9dc922272df921042aafa18311f3398c6c Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 11 May 2014 20:22:10 -0700 Subject: tcp: simplify fast open cookie processing Consolidate various cookie checking and generation code to simplify the fast open processing. The main goal is to reduce code duplication in tcp_v4_conn_request() for IPv6 support. Removes two experimental sysctl flags TFO_SERVER_ALWAYS and TFO_SERVER_COOKIE_NOT_CHKD used primarily for developmental debugging purposes. Signed-off-by: Yuchung Cheng Signed-off-by: Daniel Lee Signed-off-by: Jerry Chu Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ---- include/net/tcp.h | 9 +------ net/ipv4/tcp_fastopen.c | 71 +++++++++++++++++++------------------------------ net/ipv4/tcp_ipv4.c | 10 +++---- net/ipv4/tcp_output.c | 2 +- 5 files changed, 33 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4e37c71ecd74..bc35e4709e8e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -366,11 +366,6 @@ static inline bool tcp_passive_fastopen(const struct sock *sk) tcp_sk(sk)->fastopen_rsk != NULL); } -static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc) -{ - return foc->len != -1; -} - extern void tcp_sock_destruct(struct sock *sk); static inline int fastopen_init_queue(struct sock *sk, int backlog) diff --git a/include/net/tcp.h b/include/net/tcp.h index 012236838583..17d7c6a3d037 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -220,8 +220,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TFO_SERVER_ENABLE 2 #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ -/* Process SYN data but skip cookie validation */ -#define TFO_SERVER_COOKIE_NOT_CHKED 0x100 /* Accept SYN data w/o any cookie option */ #define TFO_SERVER_COOKIE_NOT_REQD 0x200 @@ -230,10 +228,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TFO_SERVER_WO_SOCKOPT1 0x400 #define TFO_SERVER_WO_SOCKOPT2 0x800 -/* Always create TFO child sockets on a TFO listener even when - * cookie/data not present. (For testing purpose!) - */ -#define TFO_SERVER_ALWAYS 0x1000 extern struct inet_timewait_death_row tcp_death_row; @@ -1335,8 +1329,7 @@ int tcp_fastopen_create_child(struct sock *sk, struct request_sock *req); bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct tcp_fastopen_cookie *valid_foc); + struct tcp_fastopen_cookie *foc); void tcp_fastopen_init_key_once(bool publish); #define TCP_FASTOPEN_KEY_LENGTH 16 diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 0606c91d9d0b..5a98277b9a82 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -228,59 +228,44 @@ static bool tcp_fastopen_queue_check(struct sock *sk) return true; } +/* Returns true if we should perform Fast Open on the SYN. The cookie (foc) + * may be updated and return the client in the SYN-ACK later. E.g., Fast Open + * cookie request (foc->len == 0). + */ bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct tcp_fastopen_cookie *valid_foc) + struct tcp_fastopen_cookie *foc) { - bool skip_cookie = false; - - if (likely(!fastopen_cookie_present(foc))) { - /* See include/net/tcp.h for the meaning of these knobs */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || - ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) - skip_cookie = true; /* no cookie to validate */ - else - return false; - } - /* A FO option is present; bump the counter. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || - !tcp_fastopen_queue_check(sk)) + if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + (syn_data || foc->len >= 0) && + tcp_fastopen_queue_check(sk))) { + foc->len = -1; return false; - - if (skip_cookie) { - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; } - if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { - if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || - memcmp(&foc->val[0], &valid_foc->val[0], - TCP_FASTOPEN_COOKIE_SIZE) != 0) - return false; - valid_foc->len = -1; - } - /* Acknowledge the data received from the peer. */ + if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) + goto fastopen; + + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &valid_foc); + + if (foc->len == TCP_FASTOPEN_COOKIE_SIZE && + foc->len == valid_foc.len && + !memcmp(foc->val, valid_foc.val, foc->len)) { +fastopen: tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + foc->len = -1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); return true; - } else if (foc->len == 0) { /* Client requesting a cookie */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENCOOKIEREQD); - } else { - /* Client sent a cookie with wrong size. Treat it - * the same as invalid and return a valid one. - */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); } + + NET_INC_STATS_BH(sock_net(sk), foc->len ? + LINUX_MIB_TCPFASTOPENPASSIVEFAIL : + LINUX_MIB_TCPFASTOPENCOOKIEREQD); + *foc = valid_foc; return false; } EXPORT_SYMBOL(tcp_fastopen_check); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 032fcaee164a..5ea0949dadfd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1273,7 +1273,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) bool want_cookie = false; struct flowi4 fl4; struct tcp_fastopen_cookie foc = { .len = -1 }; - struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sk_buff *skb_synack; int do_fastopen; @@ -1381,7 +1380,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dst == NULL) goto drop_and_free; } - do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); + do_fastopen = !want_cookie && + tcp_fastopen_check(sk, skb, req, &foc); /* We don't call tcp_v4_send_synack() directly because we need * to make sure a child socket can be created successfully before @@ -1394,8 +1394,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * latter to remove its dependency on the current implementation * of tcp_v4_send_synack()->tcp_select_initial_window(). */ - skb_synack = tcp_make_synack(sk, dst, req, - fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); + skb_synack = tcp_make_synack(sk, dst, req, &foc); if (skb_synack) { __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -1415,9 +1414,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->listener = NULL; /* Add the request_sock to the SYN table */ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - if (fastopen_cookie_present(&foc) && foc.len != 0) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); } else if (tcp_fastopen_create_child(sk, skb, skb_synack, req)) goto drop_and_release; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 694711a140d4..b20fc02920f9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - if (foc != NULL) { + if (foc != NULL && foc->len >= 0) { u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { -- cgit v1.2.3-59-g8ed1b From 2eacc23c422b4553030168f315cb49522fa1b1f6 Mon Sep 17 00:00:00 2001 From: Yuval Atias Date: Wed, 14 May 2014 12:15:10 +0300 Subject: net/mlx4_core: Enforce irq affinity changes immediatly During heavy traffic, napi is constatntly polling the complition queue and no interrupt is fired. Because of that, changes to irq affinity are ignored until traffic is stopped and resumed. By registering to the irq notifier mechanism, and forcing interrupt when affinity is changed, irq affinity changes will be immediatly enforced. Signed-off-by: Yuval Atias Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cq.c | 3 ++ drivers/net/ethernet/mellanox/mlx4/en_rx.c | 11 +++++- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 +++ drivers/net/ethernet/mellanox/mlx4/eq.c | 62 ++++++++++++++++++++++++++++++ include/linux/mlx4/device.h | 3 ++ 5 files changed, 83 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 0487121e4a0f..8542030b89cf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -293,6 +293,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, atomic_set(&cq->refcount, 1); init_completion(&cq->free); + cq->irq = priv->eq_table.eq[cq->vector].irq; + cq->irq_affinity_change = false; + return 0; err_radix: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index a1512450816d..e8c0d2b832b7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -895,10 +895,17 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) mlx4_en_cq_unlock_napi(cq); /* If we used up all the quota - we're probably not done yet... */ - if (done == budget) + if (done == budget) { INC_PERF_COUNTER(priv->pstats.napi_quota); - else { + if (unlikely(cq->mcq.irq_affinity_change)) { + cq->mcq.irq_affinity_change = false; + napi_complete(napi); + mlx4_en_arm_cq(priv, cq); + return 0; + } + } else { /* Done for now */ + cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 89585c6311c3..cb964056d710 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -474,9 +474,15 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done < budget) { /* Done for now */ + cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); return done; + } else if (unlikely(cq->mcq.irq_affinity_change)) { + cq->mcq.irq_affinity_change = false; + napi_complete(napi); + mlx4_en_arm_cq(priv, cq); + return 0; } return budget; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 6c088bc1845b..d954ec1eac17 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -53,6 +53,11 @@ enum { MLX4_EQ_ENTRY_SIZE = 0x20 }; +struct mlx4_irq_notify { + void *arg; + struct irq_affinity_notify notify; +}; + #define MLX4_EQ_STATUS_OK ( 0 << 28) #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28) #define MLX4_EQ_OWNER_SW ( 0 << 24) @@ -1083,6 +1088,57 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev) iounmap(priv->clr_base); } +static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct mlx4_irq_notify *n = container_of(notify, + struct mlx4_irq_notify, + notify); + struct mlx4_priv *priv = (struct mlx4_priv *)n->arg; + struct radix_tree_iter iter; + void **slot; + + radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) { + struct mlx4_cq *cq = (struct mlx4_cq *)(*slot); + + if (cq->irq == notify->irq) + cq->irq_affinity_change = true; + } +} + +static void mlx4_release_irq_notifier(struct kref *ref) +{ + struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify, + notify.kref); + kfree(n); +} + +static void mlx4_assign_irq_notifier(struct mlx4_priv *priv, + struct mlx4_dev *dev, int irq) +{ + struct mlx4_irq_notify *irq_notifier = NULL; + int err = 0; + + irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL); + if (!irq_notifier) { + mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n", + irq); + return; + } + + irq_notifier->notify.irq = irq; + irq_notifier->notify.notify = mlx4_irq_notifier_notify; + irq_notifier->notify.release = mlx4_release_irq_notifier; + irq_notifier->arg = priv; + err = irq_set_affinity_notifier(irq, &irq_notifier->notify); + if (err) { + kfree(irq_notifier); + irq_notifier = NULL; + mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq); + } +} + + int mlx4_alloc_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1353,6 +1409,9 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, continue; /*we dont want to break here*/ } + mlx4_assign_irq_notifier(priv, dev, + priv->eq_table.eq[vec].irq); + eq_set_ci(&priv->eq_table.eq[vec], 1); } } @@ -1379,6 +1438,9 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) Belonging to a legacy EQ*/ mutex_lock(&priv->msix_ctl.pool_lock); if (priv->msix_ctl.pool_bm & 1ULL << i) { + irq_set_affinity_notifier( + priv->eq_table.eq[vec].irq, + NULL); free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ba87bd21295a..c0468e6f0442 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -577,6 +577,9 @@ struct mlx4_cq { u32 cons_index; + u16 irq; + bool irq_affinity_change; + __be32 *set_ci_db; __be32 *arm_db; int arm_sn; -- cgit v1.2.3-59-g8ed1b From 622582786c9e041d0bd52bde201787adeab249f8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 13 May 2014 19:50:46 -0700 Subject: net: filter: x86: internal BPF JIT Maps all internal BPF instructions into x86_64 instructions. This patch replaces original BPF x64 JIT with internal BPF x64 JIT. sysctl net.core.bpf_jit_enable is reused as on/off switch. Performance: 1. old BPF JIT and internal BPF JIT generate equivalent x86_64 code. No performance difference is observed for filters that were JIT-able before Example assembler code for BPF filter "tcpdump port 22" original BPF -> old JIT: original BPF -> internal BPF -> new JIT: 0: push %rbp 0: push %rbp 1: mov %rsp,%rbp 1: mov %rsp,%rbp 4: sub $0x60,%rsp 4: sub $0x228,%rsp 8: mov %rbx,-0x8(%rbp) b: mov %rbx,-0x228(%rbp) // prologue 12: mov %r13,-0x220(%rbp) 19: mov %r14,-0x218(%rbp) 20: mov %r15,-0x210(%rbp) 27: xor %eax,%eax // clear A c: xor %ebx,%ebx 29: xor %r13,%r13 // clear X e: mov 0x68(%rdi),%r9d 2c: mov 0x68(%rdi),%r9d 12: sub 0x6c(%rdi),%r9d 30: sub 0x6c(%rdi),%r9d 16: mov 0xd8(%rdi),%r8 34: mov 0xd8(%rdi),%r10 3b: mov %rdi,%rbx 1d: mov $0xc,%esi 3e: mov $0xc,%esi 22: callq 0xffffffffe1021e15 43: callq 0xffffffffe102bd75 27: cmp $0x86dd,%eax 48: cmp $0x86dd,%rax 2c: jne 0x0000000000000069 4f: jne 0x000000000000009a 2e: mov $0x14,%esi 51: mov $0x14,%esi 33: callq 0xffffffffe1021e31 56: callq 0xffffffffe102bd91 38: cmp $0x84,%eax 5b: cmp $0x84,%rax 3d: je 0x0000000000000049 62: je 0x0000000000000074 3f: cmp $0x6,%eax 64: cmp $0x6,%rax 42: je 0x0000000000000049 68: je 0x0000000000000074 44: cmp $0x11,%eax 6a: cmp $0x11,%rax 47: jne 0x00000000000000c6 6e: jne 0x0000000000000117 49: mov $0x36,%esi 74: mov $0x36,%esi 4e: callq 0xffffffffe1021e15 79: callq 0xffffffffe102bd75 53: cmp $0x16,%eax 7e: cmp $0x16,%rax 56: je 0x00000000000000bf 82: je 0x0000000000000110 58: mov $0x38,%esi 88: mov $0x38,%esi 5d: callq 0xffffffffe1021e15 8d: callq 0xffffffffe102bd75 62: cmp $0x16,%eax 92: cmp $0x16,%rax 65: je 0x00000000000000bf 96: je 0x0000000000000110 67: jmp 0x00000000000000c6 98: jmp 0x0000000000000117 69: cmp $0x800,%eax 9a: cmp $0x800,%rax 6e: jne 0x00000000000000c6 a1: jne 0x0000000000000117 70: mov $0x17,%esi a3: mov $0x17,%esi 75: callq 0xffffffffe1021e31 a8: callq 0xffffffffe102bd91 7a: cmp $0x84,%eax ad: cmp $0x84,%rax 7f: je 0x000000000000008b b4: je 0x00000000000000c2 81: cmp $0x6,%eax b6: cmp $0x6,%rax 84: je 0x000000000000008b ba: je 0x00000000000000c2 86: cmp $0x11,%eax bc: cmp $0x11,%rax 89: jne 0x00000000000000c6 c0: jne 0x0000000000000117 8b: mov $0x14,%esi c2: mov $0x14,%esi 90: callq 0xffffffffe1021e15 c7: callq 0xffffffffe102bd75 95: test $0x1fff,%ax cc: test $0x1fff,%rax 99: jne 0x00000000000000c6 d3: jne 0x0000000000000117 d5: mov %rax,%r14 9b: mov $0xe,%esi d8: mov $0xe,%esi a0: callq 0xffffffffe1021e44 dd: callq 0xffffffffe102bd91 // MSH e2: and $0xf,%eax e5: shl $0x2,%eax e8: mov %rax,%r13 eb: mov %r14,%rax ee: mov %r13,%rsi a5: lea 0xe(%rbx),%esi f1: add $0xe,%esi a8: callq 0xffffffffe1021e0d f4: callq 0xffffffffe102bd6d ad: cmp $0x16,%eax f9: cmp $0x16,%rax b0: je 0x00000000000000bf fd: je 0x0000000000000110 ff: mov %r13,%rsi b2: lea 0x10(%rbx),%esi 102: add $0x10,%esi b5: callq 0xffffffffe1021e0d 105: callq 0xffffffffe102bd6d ba: cmp $0x16,%eax 10a: cmp $0x16,%rax bd: jne 0x00000000000000c6 10e: jne 0x0000000000000117 bf: mov $0xffff,%eax 110: mov $0xffff,%eax c4: jmp 0x00000000000000c8 115: jmp 0x000000000000011c c6: xor %eax,%eax 117: mov $0x0,%eax c8: mov -0x8(%rbp),%rbx 11c: mov -0x228(%rbp),%rbx // epilogue cc: leaveq 123: mov -0x220(%rbp),%r13 cd: retq 12a: mov -0x218(%rbp),%r14 131: mov -0x210(%rbp),%r15 138: leaveq 139: retq On fully cached SKBs both JITed functions take 12 nsec to execute. BPF interpreter executes the program in 30 nsec. The difference in generated assembler is due to the following: Old BPF imlements LDX_MSH instruction via sk_load_byte_msh() helper function inside bpf_jit.S. New JIT removes the helper and does it explicitly, so ldx_msh cost is the same for both JITs, but generated code looks longer. New JIT has 4 registers to save, so prologue/epilogue are larger, but the cost is within noise on x64. Old JIT checks whether first insn clears A and if not emits 'xor %eax,%eax'. New JIT clears %rax unconditionally. 2. old BPF JIT doesn't support ANC_NLATTR, ANC_PAY_OFFSET, ANC_RANDOM extensions. New JIT supports all BPF extensions. Performance of such filters improves 2-4 times depending on a filter. The longer the filter the higher performance gain. Synthetic benchmarks with many ancillary loads see 20x speedup which seems to be the maximum gain from JIT Notes: . net.core.bpf_jit_enable=2 + tools/net/bpf_jit_disasm is still functional and can be used to see generated assembler . there are two jit_compile() functions and code flow for classic filters is: sk_attach_filter() - load classic BPF bpf_jit_compile() - try to JIT from classic BPF sk_convert_filter() - convert classic to internal bpf_int_jit_compile() - JIT from internal BPF seccomp and tracing filters will just call bpf_int_jit_compile() Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit.S | 77 +-- arch/x86/net/bpf_jit_comp.c | 1314 +++++++++++++++++++++++-------------------- include/linux/filter.h | 3 + net/core/filter.c | 9 +- 4 files changed, 748 insertions(+), 655 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 01495755701b..6440221ced0d 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -12,13 +12,16 @@ /* * Calling convention : - * rdi : skb pointer + * rbx : skb pointer (callee saved) * esi : offset of byte(s) to fetch in skb (can be scratched) - * r8 : copy of skb->data + * r10 : copy of skb->data * r9d : hlen = skb->len - skb->data_len */ -#define SKBDATA %r8 +#define SKBDATA %r10 #define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ +#define MAX_BPF_STACK (512 /* from filter.h */ + \ + 32 /* space for rbx,r13,r14,r15 */ + \ + 8 /* space for skb_copy_bits */) sk_load_word: .globl sk_load_word @@ -68,53 +71,31 @@ sk_load_byte_positive_offset: movzbl (SKBDATA,%rsi),%eax ret -/** - * sk_load_byte_msh - BPF_S_LDX_B_MSH helper - * - * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) - * Must preserve A accumulator (%eax) - * Inputs : %esi is the offset value - */ -sk_load_byte_msh: - .globl sk_load_byte_msh - test %esi,%esi - js bpf_slow_path_byte_msh_neg - -sk_load_byte_msh_positive_offset: - .globl sk_load_byte_msh_positive_offset - cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ - jle bpf_slow_path_byte_msh - movzbl (SKBDATA,%rsi),%ebx - and $15,%bl - shl $2,%bl - ret - /* rsi contains offset and can be scratched */ #define bpf_slow_path_common(LEN) \ - push %rdi; /* save skb */ \ + mov %rbx, %rdi; /* arg1 == skb */ \ push %r9; \ push SKBDATA; \ /* rsi already has offset */ \ mov $LEN,%ecx; /* len */ \ - lea -12(%rbp),%rdx; \ + lea - MAX_BPF_STACK + 32(%rbp),%rdx; \ call skb_copy_bits; \ test %eax,%eax; \ pop SKBDATA; \ - pop %r9; \ - pop %rdi + pop %r9; bpf_slow_path_word: bpf_slow_path_common(4) js bpf_error - mov -12(%rbp),%eax + mov - MAX_BPF_STACK + 32(%rbp),%eax bswap %eax ret bpf_slow_path_half: bpf_slow_path_common(2) js bpf_error - mov -12(%rbp),%ax + mov - MAX_BPF_STACK + 32(%rbp),%ax rol $8,%ax movzwl %ax,%eax ret @@ -122,21 +103,11 @@ bpf_slow_path_half: bpf_slow_path_byte: bpf_slow_path_common(1) js bpf_error - movzbl -12(%rbp),%eax - ret - -bpf_slow_path_byte_msh: - xchg %eax,%ebx /* dont lose A , X is about to be scratched */ - bpf_slow_path_common(1) - js bpf_error - movzbl -12(%rbp),%eax - and $15,%al - shl $2,%al - xchg %eax,%ebx + movzbl - MAX_BPF_STACK + 32(%rbp),%eax ret #define sk_negative_common(SIZE) \ - push %rdi; /* save skb */ \ + mov %rbx, %rdi; /* arg1 == skb */ \ push %r9; \ push SKBDATA; \ /* rsi already has offset */ \ @@ -145,10 +116,8 @@ bpf_slow_path_byte_msh: test %rax,%rax; \ pop SKBDATA; \ pop %r9; \ - pop %rdi; \ jz bpf_error - bpf_slow_path_word_neg: cmp SKF_MAX_NEG_OFF, %esi /* test range */ jl bpf_error /* offset lower -> error */ @@ -179,22 +148,12 @@ sk_load_byte_negative_offset: movzbl (%rax), %eax ret -bpf_slow_path_byte_msh_neg: - cmp SKF_MAX_NEG_OFF, %esi - jl bpf_error -sk_load_byte_msh_negative_offset: - .globl sk_load_byte_msh_negative_offset - xchg %eax,%ebx /* dont lose A , X is about to be scratched */ - sk_negative_common(1) - movzbl (%rax),%eax - and $15,%al - shl $2,%al - xchg %eax,%ebx - ret - bpf_error: # force a return 0 from jit handler - xor %eax,%eax - mov -8(%rbp),%rbx + xor %eax,%eax + mov - MAX_BPF_STACK(%rbp),%rbx + mov - MAX_BPF_STACK + 8(%rbp),%r13 + mov - MAX_BPF_STACK + 16(%rbp),%r14 + mov - MAX_BPF_STACK + 24(%rbp),%r15 leaveq ret diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index c5fa7c9cb665..92aef8fdac2f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1,6 +1,7 @@ /* bpf_jit_comp.c : BPF JIT compiler * * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) + * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,28 +15,16 @@ #include #include -/* - * Conventions : - * EAX : BPF A accumulator - * EBX : BPF X accumulator - * RDI : pointer to skb (first argument given to JIT function) - * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) - * ECX,EDX,ESI : scratch registers - * r9d : skb->len - skb->data_len (headlen) - * r8 : skb->data - * -8(RBP) : saved RBX value - * -16(RBP)..-80(RBP) : BPF_MEMWORDS values - */ int bpf_jit_enable __read_mostly; /* * assembly code in arch/x86/net/bpf_jit.S */ -extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; -extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[]; +extern u8 sk_load_byte_positive_offset[]; extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; -extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[]; +extern u8 sk_load_byte_negative_offset[]; static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -56,30 +45,44 @@ static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) -#define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0) - -#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */ -#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */ +#define EMIT1_off32(b1, off) \ + do {EMIT1(b1); EMIT(off, 4); } while (0) +#define EMIT2_off32(b1, b2, off) \ + do {EMIT2(b1, b2); EMIT(off, 4); } while (0) +#define EMIT3_off32(b1, b2, b3, off) \ + do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) +#define EMIT4_off32(b1, b2, b3, b4, off) \ + do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) static inline bool is_imm8(int value) { return value <= 127 && value >= -128; } -static inline bool is_near(int offset) +static inline bool is_simm32(s64 value) { - return offset <= 127 && offset >= -128; + return value == (s64) (s32) value; } -#define EMIT_JMP(offset) \ -do { \ - if (offset) { \ - if (is_near(offset)) \ - EMIT2(0xeb, offset); /* jmp .+off8 */ \ - else \ - EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \ - } \ -} while (0) +/* mov A, X */ +#define EMIT_mov(A, X) \ + do {if (A != X) \ + EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \ + } while (0) + +static int bpf_size_to_x86_bytes(int bpf_size) +{ + if (bpf_size == BPF_W) + return 4; + else if (bpf_size == BPF_H) + return 2; + else if (bpf_size == BPF_B) + return 1; + else if (bpf_size == BPF_DW) + return 4; /* imm32 */ + else + return 0; +} /* list of x86 cond jumps opcodes (. + s8) * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) @@ -90,27 +93,8 @@ do { \ #define X86_JNE 0x75 #define X86_JBE 0x76 #define X86_JA 0x77 - -#define EMIT_COND_JMP(op, offset) \ -do { \ - if (is_near(offset)) \ - EMIT2(op, offset); /* jxx .+off8 */ \ - else { \ - EMIT2(0x0f, op + 0x10); \ - EMIT(offset, 4); /* jxx .+off32 */ \ - } \ -} while (0) - -#define COND_SEL(CODE, TOP, FOP) \ - case CODE: \ - t_op = TOP; \ - f_op = FOP; \ - goto cond_branch - - -#define SEEN_DATAREF 1 /* might call external helpers */ -#define SEEN_XREG 2 /* ebx is used */ -#define SEEN_MEM 4 /* use mem[] for temporary storage */ +#define X86_JGE 0x7D +#define X86_JG 0x7F static inline void bpf_flush_icache(void *start, void *end) { @@ -125,26 +109,6 @@ static inline void bpf_flush_icache(void *start, void *end) #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) -/* Helper to find the offset of pkt_type in sk_buff - * We want to make sure its still a 3bit field starting at a byte boundary. - */ -#define PKT_TYPE_MAX 7 -static int pkt_type_offset(void) -{ - struct sk_buff skb_probe = { - .pkt_type = ~0, - }; - char *ct = (char *)&skb_probe; - unsigned int off; - - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (ct[off] == PKT_TYPE_MAX) - return off; - } - pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n"); - return -1; -} - struct bpf_binary_header { unsigned int pages; /* Note : for security reasons, bpf code will follow a randomly @@ -178,546 +142,715 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, return header; } +/* pick a register outside of BPF range for JIT internal work */ +#define AUX_REG (MAX_BPF_REG + 1) + +/* the following table maps BPF registers to x64 registers. + * x64 register r12 is unused, since if used as base address register + * in load/store instructions, it always needs an extra byte of encoding + */ +static const int reg2hex[] = { + [BPF_REG_0] = 0, /* rax */ + [BPF_REG_1] = 7, /* rdi */ + [BPF_REG_2] = 6, /* rsi */ + [BPF_REG_3] = 2, /* rdx */ + [BPF_REG_4] = 1, /* rcx */ + [BPF_REG_5] = 0, /* r8 */ + [BPF_REG_6] = 3, /* rbx callee saved */ + [BPF_REG_7] = 5, /* r13 callee saved */ + [BPF_REG_8] = 6, /* r14 callee saved */ + [BPF_REG_9] = 7, /* r15 callee saved */ + [BPF_REG_FP] = 5, /* rbp readonly */ + [AUX_REG] = 3, /* r11 temp register */ +}; + +/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 + * which need extra byte of encoding. + * rax,rcx,...,rbp have simpler encoding + */ +static inline bool is_ereg(u32 reg) +{ + if (reg == BPF_REG_5 || reg == AUX_REG || + (reg >= BPF_REG_7 && reg <= BPF_REG_9)) + return true; + else + return false; +} + +/* add modifiers if 'reg' maps to x64 registers r8..r15 */ +static inline u8 add_1mod(u8 byte, u32 reg) +{ + if (is_ereg(reg)) + byte |= 1; + return byte; +} + +static inline u8 add_2mod(u8 byte, u32 r1, u32 r2) +{ + if (is_ereg(r1)) + byte |= 1; + if (is_ereg(r2)) + byte |= 4; + return byte; +} + +/* encode dest register 'a_reg' into x64 opcode 'byte' */ +static inline u8 add_1reg(u8 byte, u32 a_reg) +{ + return byte + reg2hex[a_reg]; +} + +/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */ +static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg) +{ + return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3); +} + struct jit_context { unsigned int cleanup_addr; /* epilogue code offset */ - int pc_ret0; /* bpf index of first RET #0 instruction (if any) */ - u8 seen; + bool seen_ld_abs; }; static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { - const struct sock_filter *filter = bpf_prog->insns; - int flen = bpf_prog->len; + struct sock_filter_int *insn = bpf_prog->insnsi; + int insn_cnt = bpf_prog->len; u8 temp[64]; - u8 *prog; - int ilen, i, proglen; - int t_offset, f_offset; - u8 t_op, f_op, seen = 0; - u8 *func; - unsigned int cleanup_addr = ctx->cleanup_addr; - u8 seen_or_pass0 = ctx->seen; - - /* no prologue/epilogue for trivial filters (RET something) */ - proglen = 0; - prog = temp; + int i; + int proglen = 0; + u8 *prog = temp; + int stacksize = MAX_BPF_STACK + + 32 /* space for rbx, r13, r14, r15 */ + + 8 /* space for skb_copy_bits() buffer */; - if (seen_or_pass0) { - EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ - EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ - /* note : must save %rbx in case bpf_error is hit */ - if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF)) - EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ - if (seen_or_pass0 & SEEN_XREG) - CLEAR_X(); /* make sure we dont leek kernel memory */ - - /* - * If this filter needs to access skb data, - * loads r9 and r8 with : - * r9 = skb->len - skb->data_len - * r8 = skb->data - */ - if (seen_or_pass0 & SEEN_DATAREF) { - if (offsetof(struct sk_buff, len) <= 127) - /* mov off8(%rdi),%r9d */ - EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); - else { - /* mov off32(%rdi),%r9d */ - EMIT3(0x44, 0x8b, 0x8f); - EMIT(offsetof(struct sk_buff, len), 4); - } - if (is_imm8(offsetof(struct sk_buff, data_len))) - /* sub off8(%rdi),%r9d */ - EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len)); - else { - EMIT3(0x44, 0x2b, 0x8f); - EMIT(offsetof(struct sk_buff, data_len), 4); - } + EMIT1(0x55); /* push rbp */ + EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ - if (is_imm8(offsetof(struct sk_buff, data))) - /* mov off8(%rdi),%r8 */ - EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data)); - else { - /* mov off32(%rdi),%r8 */ - EMIT3(0x4c, 0x8b, 0x87); - EMIT(offsetof(struct sk_buff, data), 4); - } + /* sub rsp, stacksize */ + EMIT3_off32(0x48, 0x81, 0xEC, stacksize); + + /* all classic BPF filters use R6(rbx) save it */ + + /* mov qword ptr [rbp-X],rbx */ + EMIT3_off32(0x48, 0x89, 0x9D, -stacksize); + + /* sk_convert_filter() maps classic BPF register X to R7 and uses R8 + * as temporary, so all tcpdump filters need to spill/fill R7(r13) and + * R8(r14). R9(r15) spill could be made conditional, but there is only + * one 'bpf_error' return path out of helper functions inside bpf_jit.S + * The overhead of extra spill is negligible for any filter other + * than synthetic ones. Therefore not worth adding complexity. + */ + + /* mov qword ptr [rbp-X],r13 */ + EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8); + /* mov qword ptr [rbp-X],r14 */ + EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16); + /* mov qword ptr [rbp-X],r15 */ + EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24); + + /* clear A and X registers */ + EMIT2(0x31, 0xc0); /* xor eax, eax */ + EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ + + if (ctx->seen_ld_abs) { + /* r9d : skb->len - skb->data_len (headlen) + * r10 : skb->data + */ + if (is_imm8(offsetof(struct sk_buff, len))) + /* mov %r9d, off8(%rdi) */ + EMIT4(0x44, 0x8b, 0x4f, + offsetof(struct sk_buff, len)); + else + /* mov %r9d, off32(%rdi) */ + EMIT3_off32(0x44, 0x8b, 0x8f, + offsetof(struct sk_buff, len)); + + if (is_imm8(offsetof(struct sk_buff, data_len))) + /* sub %r9d, off8(%rdi) */ + EMIT4(0x44, 0x2b, 0x4f, + offsetof(struct sk_buff, data_len)); + else + EMIT3_off32(0x44, 0x2b, 0x8f, + offsetof(struct sk_buff, data_len)); + + if (is_imm8(offsetof(struct sk_buff, data))) + /* mov %r10, off8(%rdi) */ + EMIT4(0x4c, 0x8b, 0x57, + offsetof(struct sk_buff, data)); + else + /* mov %r10, off32(%rdi) */ + EMIT3_off32(0x4c, 0x8b, 0x97, + offsetof(struct sk_buff, data)); + } + + for (i = 0; i < insn_cnt; i++, insn++) { + const s32 K = insn->imm; + u32 a_reg = insn->a_reg; + u32 x_reg = insn->x_reg; + u8 b1 = 0, b2 = 0, b3 = 0; + s64 jmp_offset; + u8 jmp_cond; + int ilen; + u8 *func; + + switch (insn->code) { + /* ALU */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + switch (BPF_OP(insn->code)) { + case BPF_ADD: b2 = 0x01; break; + case BPF_SUB: b2 = 0x29; break; + case BPF_AND: b2 = 0x21; break; + case BPF_OR: b2 = 0x09; break; + case BPF_XOR: b2 = 0x31; break; } - } + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_2mod(0x48, a_reg, x_reg)); + else if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT1(add_2mod(0x40, a_reg, x_reg)); + EMIT2(b2, add_2reg(0xC0, a_reg, x_reg)); + break; - switch (filter[0].code) { - case BPF_S_RET_K: - case BPF_S_LD_W_LEN: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_CPU: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_QUEUE: - case BPF_S_ANC_PKTTYPE: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - /* first instruction sets A register (or is RET 'constant') */ + /* mov A, X */ + case BPF_ALU64 | BPF_MOV | BPF_X: + EMIT_mov(a_reg, x_reg); break; - default: - /* make sure we dont leak kernel information to user */ - CLEAR_A(); /* A = 0 */ - } - for (i = 0; i < flen; i++) { - unsigned int K = filter[i].k; + /* mov32 A, X */ + case BPF_ALU | BPF_MOV | BPF_X: + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT1(add_2mod(0x40, a_reg, x_reg)); + EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg)); + break; - switch (filter[i].code) { - case BPF_S_ALU_ADD_X: /* A += X; */ - seen |= SEEN_XREG; - EMIT2(0x01, 0xd8); /* add %ebx,%eax */ - break; - case BPF_S_ALU_ADD_K: /* A += K; */ - if (!K) - break; - if (is_imm8(K)) - EMIT3(0x83, 0xc0, K); /* add imm8,%eax */ - else - EMIT1_off32(0x05, K); /* add imm32,%eax */ - break; - case BPF_S_ALU_SUB_X: /* A -= X; */ - seen |= SEEN_XREG; - EMIT2(0x29, 0xd8); /* sub %ebx,%eax */ - break; - case BPF_S_ALU_SUB_K: /* A -= K */ - if (!K) - break; - if (is_imm8(K)) - EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */ - else - EMIT1_off32(0x2d, K); /* sub imm32,%eax */ - break; - case BPF_S_ALU_MUL_X: /* A *= X; */ - seen |= SEEN_XREG; - EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */ - break; - case BPF_S_ALU_MUL_K: /* A *= K */ - if (is_imm8(K)) - EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */ - else { - EMIT2(0x69, 0xc0); /* imul imm32,%eax */ - EMIT(K, 4); - } - break; - case BPF_S_ALU_DIV_X: /* A /= X; */ - seen |= SEEN_XREG; - EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ - if (ctx->pc_ret0 > 0) { - /* addrs[pc_ret0 - 1] is start address of target - * (addrs[i] - 4) is the address following this jmp - * ("xor %edx,%edx; div %ebx" being 4 bytes long) - */ - EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] - - (addrs[i] - 4)); - } else { - EMIT_COND_JMP(X86_JNE, 2 + 5); - CLEAR_A(); - EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ - } - EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ - break; - case BPF_S_ALU_MOD_X: /* A %= X; */ - seen |= SEEN_XREG; - EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ - if (ctx->pc_ret0 > 0) { - /* addrs[pc_ret0 - 1] is start address of target - * (addrs[i] - 6) is the address following this jmp - * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long) - */ - EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] - - (addrs[i] - 6)); - } else { - EMIT_COND_JMP(X86_JNE, 2 + 5); - CLEAR_A(); - EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */ - } - EMIT2(0x31, 0xd2); /* xor %edx,%edx */ - EMIT2(0xf7, 0xf3); /* div %ebx */ - EMIT2(0x89, 0xd0); /* mov %edx,%eax */ - break; - case BPF_S_ALU_MOD_K: /* A %= K; */ - if (K == 1) { - CLEAR_A(); - break; - } - EMIT2(0x31, 0xd2); /* xor %edx,%edx */ - EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ - EMIT2(0xf7, 0xf1); /* div %ecx */ - EMIT2(0x89, 0xd0); /* mov %edx,%eax */ - break; - case BPF_S_ALU_DIV_K: /* A /= K */ - if (K == 1) - break; - EMIT2(0x31, 0xd2); /* xor %edx,%edx */ - EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ - EMIT2(0xf7, 0xf1); /* div %ecx */ - break; - case BPF_S_ALU_AND_X: - seen |= SEEN_XREG; - EMIT2(0x21, 0xd8); /* and %ebx,%eax */ - break; - case BPF_S_ALU_AND_K: - if (K >= 0xFFFFFF00) { - EMIT2(0x24, K & 0xFF); /* and imm8,%al */ - } else if (K >= 0xFFFF0000) { - EMIT2(0x66, 0x25); /* and imm16,%ax */ - EMIT(K, 2); - } else { - EMIT1_off32(0x25, K); /* and imm32,%eax */ - } - break; - case BPF_S_ALU_OR_X: - seen |= SEEN_XREG; - EMIT2(0x09, 0xd8); /* or %ebx,%eax */ - break; - case BPF_S_ALU_OR_K: - if (is_imm8(K)) - EMIT3(0x83, 0xc8, K); /* or imm8,%eax */ - else - EMIT1_off32(0x0d, K); /* or imm32,%eax */ - break; - case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ - case BPF_S_ALU_XOR_X: - seen |= SEEN_XREG; - EMIT2(0x31, 0xd8); /* xor %ebx,%eax */ - break; - case BPF_S_ALU_XOR_K: /* A ^= K; */ - if (K == 0) - break; - if (is_imm8(K)) - EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */ - else - EMIT1_off32(0x35, K); /* xor imm32,%eax */ - break; - case BPF_S_ALU_LSH_X: /* A <<= X; */ - seen |= SEEN_XREG; - EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ - break; - case BPF_S_ALU_LSH_K: - if (K == 0) - break; - else if (K == 1) - EMIT2(0xd1, 0xe0); /* shl %eax */ - else - EMIT3(0xc1, 0xe0, K); - break; - case BPF_S_ALU_RSH_X: /* A >>= X; */ - seen |= SEEN_XREG; - EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */ - break; - case BPF_S_ALU_RSH_K: /* A >>= K; */ - if (K == 0) - break; - else if (K == 1) - EMIT2(0xd1, 0xe8); /* shr %eax */ - else - EMIT3(0xc1, 0xe8, K); - break; - case BPF_S_ALU_NEG: - EMIT2(0xf7, 0xd8); /* neg %eax */ - break; - case BPF_S_RET_K: - if (!K) { - if (ctx->pc_ret0 == -1) - ctx->pc_ret0 = i; - CLEAR_A(); - } else { - EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ - } - /* fallinto */ - case BPF_S_RET_A: - if (seen_or_pass0) { - if (i != flen - 1) { - EMIT_JMP(cleanup_addr - addrs[i]); - break; - } - if (seen_or_pass0 & SEEN_XREG) - EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ - EMIT1(0xc9); /* leaveq */ - } - EMIT1(0xc3); /* ret */ - break; - case BPF_S_MISC_TAX: /* X = A */ - seen |= SEEN_XREG; - EMIT2(0x89, 0xc3); /* mov %eax,%ebx */ - break; - case BPF_S_MISC_TXA: /* A = X */ - seen |= SEEN_XREG; - EMIT2(0x89, 0xd8); /* mov %ebx,%eax */ - break; - case BPF_S_LD_IMM: /* A = K */ - if (!K) - CLEAR_A(); - else - EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ + /* neg A */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, a_reg)); + else if (is_ereg(a_reg)) + EMIT1(add_1mod(0x40, a_reg)); + EMIT2(0xF7, add_1reg(0xD8, a_reg)); + break; + + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, a_reg)); + else if (is_ereg(a_reg)) + EMIT1(add_1mod(0x40, a_reg)); + + switch (BPF_OP(insn->code)) { + case BPF_ADD: b3 = 0xC0; break; + case BPF_SUB: b3 = 0xE8; break; + case BPF_AND: b3 = 0xE0; break; + case BPF_OR: b3 = 0xC8; break; + case BPF_XOR: b3 = 0xF0; break; + } + + if (is_imm8(K)) + EMIT3(0x83, add_1reg(b3, a_reg), K); + else + EMIT2_off32(0x81, add_1reg(b3, a_reg), K); + break; + + case BPF_ALU64 | BPF_MOV | BPF_K: + /* optimization: if imm32 is positive, + * use 'mov eax, imm32' (which zero-extends imm32) + * to save 2 bytes + */ + if (K < 0) { + /* 'mov rax, imm32' sign extends imm32 */ + b1 = add_1mod(0x48, a_reg); + b2 = 0xC7; + b3 = 0xC0; + EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K); break; - case BPF_S_LDX_IMM: /* X = K */ - seen |= SEEN_XREG; - if (!K) - CLEAR_X(); + } + + case BPF_ALU | BPF_MOV | BPF_K: + /* mov %eax, imm32 */ + if (is_ereg(a_reg)) + EMIT1(add_1mod(0x40, a_reg)); + EMIT1_off32(add_1reg(0xB8, a_reg), K); + break; + + /* A %= X, A /= X, A %= K, A /= K */ + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + EMIT1(0x50); /* push rax */ + EMIT1(0x52); /* push rdx */ + + if (BPF_SRC(insn->code) == BPF_X) + /* mov r11, X */ + EMIT_mov(AUX_REG, x_reg); + else + /* mov r11, K */ + EMIT3_off32(0x49, 0xC7, 0xC3, K); + + /* mov rax, A */ + EMIT_mov(BPF_REG_0, a_reg); + + /* xor edx, edx + * equivalent to 'xor rdx, rdx', but one byte less + */ + EMIT2(0x31, 0xd2); + + if (BPF_SRC(insn->code) == BPF_X) { + /* if (X == 0) return 0 */ + + /* cmp r11, 0 */ + EMIT4(0x49, 0x83, 0xFB, 0x00); + + /* jne .+9 (skip over pop, pop, xor and jmp) */ + EMIT2(X86_JNE, 1 + 1 + 2 + 5); + EMIT1(0x5A); /* pop rdx */ + EMIT1(0x58); /* pop rax */ + EMIT2(0x31, 0xc0); /* xor eax, eax */ + + /* jmp cleanup_addr + * addrs[i] - 11, because there are 11 bytes + * after this insn: div, mov, pop, pop, mov + */ + jmp_offset = ctx->cleanup_addr - (addrs[i] - 11); + EMIT1_off32(0xE9, jmp_offset); + } + + if (BPF_CLASS(insn->code) == BPF_ALU64) + /* div r11 */ + EMIT3(0x49, 0xF7, 0xF3); + else + /* div r11d */ + EMIT3(0x41, 0xF7, 0xF3); + + if (BPF_OP(insn->code) == BPF_MOD) + /* mov r11, rdx */ + EMIT3(0x49, 0x89, 0xD3); + else + /* mov r11, rax */ + EMIT3(0x49, 0x89, 0xC3); + + EMIT1(0x5A); /* pop rdx */ + EMIT1(0x58); /* pop rax */ + + /* mov A, r11 */ + EMIT_mov(a_reg, AUX_REG); + break; + + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_X: + EMIT1(0x50); /* push rax */ + EMIT1(0x52); /* push rdx */ + + /* mov r11, A */ + EMIT_mov(AUX_REG, a_reg); + + if (BPF_SRC(insn->code) == BPF_X) + /* mov rax, X */ + EMIT_mov(BPF_REG_0, x_reg); + else + /* mov rax, K */ + EMIT3_off32(0x48, 0xC7, 0xC0, K); + + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, AUX_REG)); + else if (is_ereg(AUX_REG)) + EMIT1(add_1mod(0x40, AUX_REG)); + /* mul(q) r11 */ + EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); + + /* mov r11, rax */ + EMIT_mov(AUX_REG, BPF_REG_0); + + EMIT1(0x5A); /* pop rdx */ + EMIT1(0x58); /* pop rax */ + + /* mov A, r11 */ + EMIT_mov(a_reg, AUX_REG); + break; + + /* shifts */ + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, a_reg)); + else if (is_ereg(a_reg)) + EMIT1(add_1mod(0x40, a_reg)); + + switch (BPF_OP(insn->code)) { + case BPF_LSH: b3 = 0xE0; break; + case BPF_RSH: b3 = 0xE8; break; + case BPF_ARSH: b3 = 0xF8; break; + } + EMIT3(0xC1, add_1reg(b3, a_reg), K); + break; + + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (K) { + case 16: + /* emit 'ror %ax, 8' to swap lower 2 bytes */ + EMIT1(0x66); + if (is_ereg(a_reg)) + EMIT1(0x41); + EMIT3(0xC1, add_1reg(0xC8, a_reg), 8); + break; + case 32: + /* emit 'bswap eax' to swap lower 4 bytes */ + if (is_ereg(a_reg)) + EMIT2(0x41, 0x0F); else - EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */ - break; - case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */ - seen |= SEEN_MEM; - EMIT3(0x8b, 0x45, 0xf0 - K*4); + EMIT1(0x0F); + EMIT1(add_1reg(0xC8, a_reg)); break; - case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */ - seen |= SEEN_XREG | SEEN_MEM; - EMIT3(0x8b, 0x5d, 0xf0 - K*4); - break; - case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */ - seen |= SEEN_MEM; - EMIT3(0x89, 0x45, 0xf0 - K*4); - break; - case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ - seen |= SEEN_XREG | SEEN_MEM; - EMIT3(0x89, 0x5d, 0xf0 - K*4); - break; - case BPF_S_LD_W_LEN: /* A = skb->len; */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); - if (is_imm8(offsetof(struct sk_buff, len))) - /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len)); - else { - EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, len), 4); - } - break; - case BPF_S_LDX_W_LEN: /* X = skb->len; */ - seen |= SEEN_XREG; - if (is_imm8(offsetof(struct sk_buff, len))) - /* mov off8(%rdi),%ebx */ - EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len)); - else { - EMIT2(0x8b, 0x9f); - EMIT(offsetof(struct sk_buff, len), 4); - } - break; - case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - if (is_imm8(offsetof(struct sk_buff, protocol))) { - /* movzwl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol)); - } else { - EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ - EMIT(offsetof(struct sk_buff, protocol), 4); - } - EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */ - break; - case BPF_S_ANC_IFINDEX: - if (is_imm8(offsetof(struct sk_buff, dev))) { - /* movq off8(%rdi),%rax */ - EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev)); - } else { - EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */ - EMIT(offsetof(struct sk_buff, dev), 4); - } - EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */ - EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6)); - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */ - EMIT(offsetof(struct net_device, ifindex), 4); - break; - case BPF_S_ANC_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - if (is_imm8(offsetof(struct sk_buff, mark))) { - /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark)); - } else { - EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, mark), 4); - } - break; - case BPF_S_ANC_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - if (is_imm8(offsetof(struct sk_buff, hash))) { - /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash)); - } else { - EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, hash), 4); - } - break; - case BPF_S_ANC_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - if (is_imm8(offsetof(struct sk_buff, queue_mapping))) { - /* movzwl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping)); - } else { - EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ - EMIT(offsetof(struct sk_buff, queue_mapping), 4); - } - break; - case BPF_S_ANC_CPU: -#ifdef CONFIG_SMP - EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */ - EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */ -#else - CLEAR_A(); -#endif - break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - if (is_imm8(offsetof(struct sk_buff, vlan_tci))) { - /* movzwl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci)); - } else { - EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ - EMIT(offsetof(struct sk_buff, vlan_tci), 4); - } - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); - if (filter[i].code == BPF_S_ANC_VLAN_TAG) { - EMIT3(0x80, 0xe4, 0xef); /* and $0xef,%ah */ - } else { - EMIT3(0xc1, 0xe8, 0x0c); /* shr $0xc,%eax */ - EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */ - } - break; - case BPF_S_ANC_PKTTYPE: - { - int off = pkt_type_offset(); - - if (off < 0) - return -EINVAL; - if (is_imm8(off)) { - /* movzbl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb6, 0x47, off); - } else { - /* movbl off32(%rdi),%eax */ - EMIT3(0x0f, 0xb6, 0x87); - EMIT(off, 4); - } - EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */ + case 64: + /* emit 'bswap rax' to swap 8 bytes */ + EMIT3(add_1mod(0x48, a_reg), 0x0F, + add_1reg(0xC8, a_reg)); break; } - case BPF_S_LD_W_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_word); -common_load: seen |= SEEN_DATAREF; - t_offset = func - (image + addrs[i]); - EMIT1_off32(0xbe, K); /* mov imm32,%esi */ - EMIT1_off32(0xe8, t_offset); /* call */ - break; - case BPF_S_LD_H_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_half); - goto common_load; - case BPF_S_LD_B_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte); - goto common_load; - case BPF_S_LDX_B_MSH: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); - seen |= SEEN_DATAREF | SEEN_XREG; - t_offset = func - (image + addrs[i]); - EMIT1_off32(0xbe, K); /* mov imm32,%esi */ - EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ - break; - case BPF_S_LD_W_IND: - func = sk_load_word; -common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; - t_offset = func - (image + addrs[i]); - if (K) { - if (is_imm8(K)) { - EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */ - } else { - EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */ - EMIT(K, 4); - } - } else { - EMIT2(0x89,0xde); /* mov %ebx,%esi */ - } - EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ - break; - case BPF_S_LD_H_IND: - func = sk_load_half; - goto common_load_ind; - case BPF_S_LD_B_IND: - func = sk_load_byte; - goto common_load_ind; - case BPF_S_JMP_JA: - t_offset = addrs[i + K] - addrs[i]; - EMIT_JMP(t_offset); - break; - COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE); - COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB); - COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE); - COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE); - COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE); - COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB); - COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE); - COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE); - -cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; - t_offset = addrs[i + filter[i].jt] - addrs[i]; - - /* same targets, can avoid doing the test :) */ - if (filter[i].jt == filter[i].jf) { - EMIT_JMP(t_offset); - break; - } + break; + + case BPF_ALU | BPF_END | BPF_FROM_LE: + break; + + /* ST: *(u8*)(a_reg + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + if (is_ereg(a_reg)) + EMIT2(0x41, 0xC6); + else + EMIT1(0xC6); + goto st; + case BPF_ST | BPF_MEM | BPF_H: + if (is_ereg(a_reg)) + EMIT3(0x66, 0x41, 0xC7); + else + EMIT2(0x66, 0xC7); + goto st; + case BPF_ST | BPF_MEM | BPF_W: + if (is_ereg(a_reg)) + EMIT2(0x41, 0xC7); + else + EMIT1(0xC7); + goto st; + case BPF_ST | BPF_MEM | BPF_DW: + EMIT2(add_1mod(0x48, a_reg), 0xC7); + +st: if (is_imm8(insn->off)) + EMIT2(add_1reg(0x40, a_reg), insn->off); + else + EMIT1_off32(add_1reg(0x80, a_reg), insn->off); + + EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); + break; + + /* STX: *(u8*)(a_reg + off) = x_reg */ + case BPF_STX | BPF_MEM | BPF_B: + /* emit 'mov byte ptr [rax + off], al' */ + if (is_ereg(a_reg) || is_ereg(x_reg) || + /* have to add extra byte for x86 SIL, DIL regs */ + x_reg == BPF_REG_1 || x_reg == BPF_REG_2) + EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88); + else + EMIT1(0x88); + goto stx; + case BPF_STX | BPF_MEM | BPF_H: + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89); + else + EMIT2(0x66, 0x89); + goto stx; + case BPF_STX | BPF_MEM | BPF_W: + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89); + else + EMIT1(0x89); + goto stx; + case BPF_STX | BPF_MEM | BPF_DW: + EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89); +stx: if (is_imm8(insn->off)) + EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off); + else + EMIT1_off32(add_2reg(0x80, a_reg, x_reg), + insn->off); + break; + + /* LDX: a_reg = *(u8*)(x_reg + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + /* emit 'movzx rax, byte ptr [rax + off]' */ + EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6); + goto ldx; + case BPF_LDX | BPF_MEM | BPF_H: + /* emit 'movzx rax, word ptr [rax + off]' */ + EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7); + goto ldx; + case BPF_LDX | BPF_MEM | BPF_W: + /* emit 'mov eax, dword ptr [rax+0x14]' */ + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B); + else + EMIT1(0x8B); + goto ldx; + case BPF_LDX | BPF_MEM | BPF_DW: + /* emit 'mov rax, qword ptr [rax+0x14]' */ + EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B); +ldx: /* if insn->off == 0 we can save one extra byte, but + * special case of x86 r13 which always needs an offset + * is not worth the hassle + */ + if (is_imm8(insn->off)) + EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off); + else + EMIT1_off32(add_2reg(0x80, x_reg, a_reg), + insn->off); + break; + + /* STX XADD: lock *(u32*)(a_reg + off) += x_reg */ + case BPF_STX | BPF_XADD | BPF_W: + /* emit 'lock add dword ptr [rax + off], eax' */ + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01); + else + EMIT2(0xF0, 0x01); + goto xadd; + case BPF_STX | BPF_XADD | BPF_DW: + EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01); +xadd: if (is_imm8(insn->off)) + EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off); + else + EMIT1_off32(add_2reg(0x80, a_reg, x_reg), + insn->off); + break; + + /* call */ + case BPF_JMP | BPF_CALL: + func = (u8 *) __bpf_call_base + K; + jmp_offset = func - (image + addrs[i]); + if (ctx->seen_ld_abs) { + EMIT2(0x41, 0x52); /* push %r10 */ + EMIT2(0x41, 0x51); /* push %r9 */ + /* need to adjust jmp offset, since + * pop %r9, pop %r10 take 4 bytes after call insn + */ + jmp_offset += 4; + } + if (!K || !is_simm32(jmp_offset)) { + pr_err("unsupported bpf func %d addr %p image %p\n", + K, func, image); + return -EINVAL; + } + EMIT1_off32(0xE8, jmp_offset); + if (ctx->seen_ld_abs) { + EMIT2(0x41, 0x59); /* pop %r9 */ + EMIT2(0x41, 0x5A); /* pop %r10 */ + } + break; + + /* cond jump */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + /* cmp a_reg, x_reg */ + EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39, + add_2reg(0xC0, a_reg, x_reg)); + goto emit_cond_jmp; + + case BPF_JMP | BPF_JSET | BPF_X: + /* test a_reg, x_reg */ + EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85, + add_2reg(0xC0, a_reg, x_reg)); + goto emit_cond_jmp; + + case BPF_JMP | BPF_JSET | BPF_K: + /* test a_reg, imm32 */ + EMIT1(add_1mod(0x48, a_reg)); + EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K); + goto emit_cond_jmp; + + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + /* cmp a_reg, imm8/32 */ + EMIT1(add_1mod(0x48, a_reg)); + + if (is_imm8(K)) + EMIT3(0x83, add_1reg(0xF8, a_reg), K); + else + EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K); + +emit_cond_jmp: /* convert BPF opcode to x86 */ + switch (BPF_OP(insn->code)) { + case BPF_JEQ: + jmp_cond = X86_JE; + break; + case BPF_JSET: + case BPF_JNE: + jmp_cond = X86_JNE; + break; + case BPF_JGT: + /* GT is unsigned '>', JA in x86 */ + jmp_cond = X86_JA; + break; + case BPF_JGE: + /* GE is unsigned '>=', JAE in x86 */ + jmp_cond = X86_JAE; + break; + case BPF_JSGT: + /* signed '>', GT in x86 */ + jmp_cond = X86_JG; + break; + case BPF_JSGE: + /* signed '>=', GE in x86 */ + jmp_cond = X86_JGE; + break; + default: /* to silence gcc warning */ + return -EFAULT; + } + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (is_imm8(jmp_offset)) { + EMIT2(jmp_cond, jmp_offset); + } else if (is_simm32(jmp_offset)) { + EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); + } else { + pr_err("cond_jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } + + break; - switch (filter[i].code) { - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JEQ_X: - seen |= SEEN_XREG; - EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */ - break; - case BPF_S_JMP_JSET_X: - seen |= SEEN_XREG; - EMIT2(0x85, 0xd8); /* test %ebx,%eax */ - break; - case BPF_S_JMP_JEQ_K: - if (K == 0) { - EMIT2(0x85, 0xc0); /* test %eax,%eax */ - break; - } - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGE_K: - if (K <= 127) - EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */ + case BPF_JMP | BPF_JA: + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (!jmp_offset) + /* optimize out nop jumps */ + break; +emit_jmp: + if (is_imm8(jmp_offset)) { + EMIT2(0xEB, jmp_offset); + } else if (is_simm32(jmp_offset)) { + EMIT1_off32(0xE9, jmp_offset); + } else { + pr_err("jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } + break; + + case BPF_LD | BPF_IND | BPF_W: + func = sk_load_word; + goto common_load; + case BPF_LD | BPF_ABS | BPF_W: + func = CHOOSE_LOAD_FUNC(K, sk_load_word); +common_load: ctx->seen_ld_abs = true; + jmp_offset = func - (image + addrs[i]); + if (!func || !is_simm32(jmp_offset)) { + pr_err("unsupported bpf func %d addr %p image %p\n", + K, func, image); + return -EINVAL; + } + if (BPF_MODE(insn->code) == BPF_ABS) { + /* mov %esi, imm32 */ + EMIT1_off32(0xBE, K); + } else { + /* mov %rsi, x_reg */ + EMIT_mov(BPF_REG_2, x_reg); + if (K) { + if (is_imm8(K)) + /* add %esi, imm8 */ + EMIT3(0x83, 0xC6, K); else - EMIT1_off32(0x3d, K); /* cmp imm32,%eax */ - break; - case BPF_S_JMP_JSET_K: - if (K <= 0xFF) - EMIT2(0xa8, K); /* test imm8,%al */ - else if (!(K & 0xFFFF00FF)) - EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */ - else if (K <= 0xFFFF) { - EMIT2(0x66, 0xa9); /* test imm16,%ax */ - EMIT(K, 2); - } else { - EMIT1_off32(0xa9, K); /* test imm32,%eax */ - } - break; + /* add %esi, imm32 */ + EMIT2_off32(0x81, 0xC6, K); } - if (filter[i].jt != 0) { - if (filter[i].jf && f_offset) - t_offset += is_near(f_offset) ? 2 : 5; - EMIT_COND_JMP(t_op, t_offset); - if (filter[i].jf) - EMIT_JMP(f_offset); - break; - } - EMIT_COND_JMP(f_op, f_offset); - break; + } + /* skb pointer is in R6 (%rbx), it will be copied into + * %rdi if skb_copy_bits() call is necessary. + * sk_load_* helpers also use %r10 and %r9d. + * See bpf_jit.S + */ + EMIT1_off32(0xE8, jmp_offset); /* call */ + break; + + case BPF_LD | BPF_IND | BPF_H: + func = sk_load_half; + goto common_load; + case BPF_LD | BPF_ABS | BPF_H: + func = CHOOSE_LOAD_FUNC(K, sk_load_half); + goto common_load; + case BPF_LD | BPF_IND | BPF_B: + func = sk_load_byte; + goto common_load; + case BPF_LD | BPF_ABS | BPF_B: + func = CHOOSE_LOAD_FUNC(K, sk_load_byte); + goto common_load; + + case BPF_JMP | BPF_EXIT: + if (i != insn_cnt - 1) { + jmp_offset = ctx->cleanup_addr - addrs[i]; + goto emit_jmp; + } + /* update cleanup_addr */ + ctx->cleanup_addr = proglen; + /* mov rbx, qword ptr [rbp-X] */ + EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize); + /* mov r13, qword ptr [rbp-X] */ + EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8); + /* mov r14, qword ptr [rbp-X] */ + EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16); + /* mov r15, qword ptr [rbp-X] */ + EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24); + + EMIT1(0xC9); /* leave */ + EMIT1(0xC3); /* ret */ + break; + default: - /* hmm, too complex filter, give up with jit compiler */ + /* By design x64 JIT should support all BPF instructions + * This error will be seen if new instruction was added + * to interpreter, but not to JIT + * or if there is junk in sk_filter + */ + pr_err("bpf_jit: unknown opcode %02x\n", insn->code); return -EINVAL; } + ilen = prog - temp; if (image) { if (unlikely(proglen + ilen > oldproglen)) { - pr_err("bpb_jit_compile fatal error\n"); + pr_err("bpf_jit_compile fatal error\n"); return -EFAULT; } memcpy(image + proglen, temp, ilen); @@ -726,21 +859,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; addrs[i] = proglen; prog = temp; } - /* last bpf instruction is always a RET : - * use it to give the cleanup instruction(s) addr - */ - ctx->cleanup_addr = proglen - 1; /* ret */ - if (seen_or_pass0) - ctx->cleanup_addr -= 1; /* leaveq */ - if (seen_or_pass0 & SEEN_XREG) - ctx->cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ - - ctx->seen = seen; - return proglen; } void bpf_jit_compile(struct sk_filter *prog) +{ +} + +void bpf_int_jit_compile(struct sk_filter *prog) { struct bpf_binary_header *header = NULL; int proglen, oldproglen = 0; @@ -768,8 +894,6 @@ void bpf_jit_compile(struct sk_filter *prog) addrs[i] = proglen; } ctx.cleanup_addr = proglen; - ctx.seen = SEEN_XREG | SEEN_DATAREF | SEEN_MEM; - ctx.pc_ret0 = -1; for (pass = 0; pass < 10; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); diff --git a/include/linux/filter.h b/include/linux/filter.h index 4457b383961c..9d5ae0a2c954 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -207,6 +207,9 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); void sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); +u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +void bpf_int_jit_compile(struct sk_filter *fp); + #ifdef CONFIG_BPF_JIT #include #include diff --git a/net/core/filter.c b/net/core/filter.c index c442a0d7d0f7..32c5b44c537e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1524,6 +1524,10 @@ out_err: return ERR_PTR(err); } +void __weak bpf_int_jit_compile(struct sk_filter *prog) +{ +} + static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, struct sock *sk) { @@ -1544,9 +1548,12 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, /* JIT compiler couldn't process this filter, so do the * internal BPF translation for the optimized interpreter. */ - if (!fp->jited) + if (!fp->jited) { fp = __sk_migrate_filter(fp, sk); + /* Probe if internal BPF can be jit-ed */ + bpf_int_jit_compile(fp); + } return fp; } -- cgit v1.2.3-59-g8ed1b From 1836eea209546b870dd83f3f4ef234d6598a560d Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Sat, 10 May 2014 10:32:57 -0400 Subject: lib/crc7: Shift crc7() output left 1 bit This eliminates a 1-bit left shift in every single caller, and makes the inner loop of the CRC computation more efficient. Renamed crc7 to crc7_be (big-endian) since the interface changed. Also purged #include from files that don't use it at all. Signed-off-by: George Spelvin Reviewed-by: Pavel Machek Acked-by: Ulf Hansson Signed-off-by: John W. Linville --- drivers/mmc/host/mmc_spi.c | 2 +- drivers/net/wireless/ti/wl1251/acx.c | 1 - drivers/net/wireless/ti/wl1251/cmd.c | 1 - drivers/net/wireless/ti/wl1251/spi.c | 3 +- drivers/net/wireless/ti/wlcore/spi.c | 3 +- include/linux/crc7.h | 8 ++-- lib/crc7.c | 84 ++++++++++++++++++++---------------- 7 files changed, 53 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 0a87e5691341..338e2202eaaa 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -472,7 +472,7 @@ mmc_spi_command_send(struct mmc_spi_host *host, *cp++ = (u8)(arg >> 16); *cp++ = (u8)(arg >> 8); *cp++ = (u8)arg; - *cp++ = (crc7(0, &data->status[1], 5) << 1) | 0x01; + *cp++ = crc7_be(0, &data->status[1], 5) | 0x01; /* Then, read up to 13 bytes (while writing all-ones): * - N(CR) (== 1..8) bytes of all-ones diff --git a/drivers/net/wireless/ti/wl1251/acx.c b/drivers/net/wireless/ti/wl1251/acx.c index 5a4ec56c83d0..5695628757ee 100644 --- a/drivers/net/wireless/ti/wl1251/acx.c +++ b/drivers/net/wireless/ti/wl1251/acx.c @@ -2,7 +2,6 @@ #include #include -#include #include "wl1251.h" #include "reg.h" diff --git a/drivers/net/wireless/ti/wl1251/cmd.c b/drivers/net/wireless/ti/wl1251/cmd.c index bf1fa18b9786..ede31f048ef9 100644 --- a/drivers/net/wireless/ti/wl1251/cmd.c +++ b/drivers/net/wireless/ti/wl1251/cmd.c @@ -2,7 +2,6 @@ #include #include -#include #include #include "wl1251.h" diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c index b06d36d99362..e94b57cd5a22 100644 --- a/drivers/net/wireless/ti/wl1251/spi.c +++ b/drivers/net/wireless/ti/wl1251/spi.c @@ -122,8 +122,7 @@ static void wl1251_spi_wake(struct wl1251 *wl) crc[3] = cmd[6]; crc[4] = cmd[5]; - cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1; - cmd[4] |= WSPI_INIT_CMD_END; + cmd[4] = crc7_be(0, crc, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END; t.tx_buf = cmd; t.len = WSPI_INIT_CMD_LEN; diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 5f3a389dd74c..1d4ddabe6063 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -151,8 +151,7 @@ static void wl12xx_spi_init(struct device *child) crc[3] = cmd[6]; crc[4] = cmd[5]; - cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1; - cmd[4] |= WSPI_INIT_CMD_END; + cmd[4] = crc7_be(0, crc, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END; t.tx_buf = cmd; t.len = WSPI_INIT_CMD_LEN; diff --git a/include/linux/crc7.h b/include/linux/crc7.h index 1786e772d5c6..d590765106f3 100644 --- a/include/linux/crc7.h +++ b/include/linux/crc7.h @@ -2,13 +2,13 @@ #define _LINUX_CRC7_H #include -extern const u8 crc7_syndrome_table[256]; +extern const u8 crc7_be_syndrome_table[256]; -static inline u8 crc7_byte(u8 crc, u8 data) +static inline u8 crc7_be_byte(u8 crc, u8 data) { - return crc7_syndrome_table[(crc << 1) ^ data]; + return crc7_be_syndrome_table[crc ^ data]; } -extern u8 crc7(u8 crc, const u8 *buffer, size_t len); +extern u8 crc7_be(u8 crc, const u8 *buffer, size_t len); #endif diff --git a/lib/crc7.c b/lib/crc7.c index f1c3a144cec1..bf6255e23919 100644 --- a/lib/crc7.c +++ b/lib/crc7.c @@ -10,42 +10,47 @@ #include -/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */ -const u8 crc7_syndrome_table[256] = { - 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, - 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, - 0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26, - 0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e, - 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d, - 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, - 0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14, - 0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c, - 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, - 0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13, - 0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42, - 0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a, - 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, - 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21, - 0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70, - 0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38, - 0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e, - 0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36, - 0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67, - 0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f, - 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, - 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, - 0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55, - 0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d, - 0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a, - 0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52, - 0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03, - 0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b, - 0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28, - 0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60, - 0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31, - 0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79 +/* + * Table for CRC-7 (polynomial x^7 + x^3 + 1). + * This is a big-endian CRC (msbit is highest power of x), + * aligned so the msbit of the byte is the x^6 coefficient + * and the lsbit is not used. + */ +const u8 crc7_be_syndrome_table[256] = { + 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, + 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, + 0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c, + 0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc, + 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a, + 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, + 0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28, + 0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8, + 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, + 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26, + 0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84, + 0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14, + 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2, + 0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42, + 0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0, + 0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70, + 0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc, + 0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c, + 0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce, + 0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e, + 0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98, + 0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08, + 0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa, + 0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a, + 0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34, + 0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4, + 0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06, + 0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96, + 0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50, + 0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0, + 0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62, + 0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2 }; -EXPORT_SYMBOL(crc7_syndrome_table); +EXPORT_SYMBOL(crc7_be_syndrome_table); /** * crc7 - update the CRC7 for the data buffer @@ -55,14 +60,17 @@ EXPORT_SYMBOL(crc7_syndrome_table); * Context: any * * Returns the updated CRC7 value. + * The CRC7 is left-aligned in the byte (the lsbit is always 0), as that + * makes the computation easier, and all callers want it in that form. + * */ -u8 crc7(u8 crc, const u8 *buffer, size_t len) +u8 crc7_be(u8 crc, const u8 *buffer, size_t len) { while (len--) - crc = crc7_byte(crc, *buffer++); + crc = crc7_be_byte(crc, *buffer++); return crc; } -EXPORT_SYMBOL(crc7); +EXPORT_SYMBOL(crc7_be); MODULE_DESCRIPTION("CRC7 calculations"); MODULE_LICENSE("GPL"); -- cgit v1.2.3-59-g8ed1b From a75951217472c522c324adb0a4de3ba69d656ef5 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 16 May 2014 16:14:04 +0200 Subject: net: phy: extend fixed driver with fixed_phy_register() The existing fixed_phy_add() function has several drawbacks that prevents it from being used as is for OF-based declaration of fixed PHYs: * The address of the PHY on the fake bus needs to be passed, while a dynamic allocation is desired. * Since the phy_device instantiation is post-poned until the next mdiobus scan, there is no way to associate the fixed PHY with its OF node, which later prevents of_phy_connect() from finding this fixed PHY from a given OF node. To solve this, this commit introduces fixed_phy_register(), which will allocate an available PHY address, add the PHY using fixed_phy_add() and instantiate the phy_device structure associated with the provided OF node. Signed-off-by: Thomas Petazzoni Acked-by: Florian Fainelli Acked-by: Grant Likely Tested-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/fixed.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/phy_fixed.h | 11 +++++++++ 2 files changed, 72 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c index e41546da105e..d60d875cb445 100644 --- a/drivers/net/phy/fixed.c +++ b/drivers/net/phy/fixed.c @@ -21,6 +21,7 @@ #include #include #include +#include #define MII_REGS_NUM 29 @@ -203,6 +204,66 @@ err_regs: } EXPORT_SYMBOL_GPL(fixed_phy_add); +void fixed_phy_del(int phy_addr) +{ + struct fixed_mdio_bus *fmb = &platform_fmb; + struct fixed_phy *fp, *tmp; + + list_for_each_entry_safe(fp, tmp, &fmb->phys, node) { + if (fp->addr == phy_addr) { + list_del(&fp->node); + kfree(fp); + return; + } + } +} +EXPORT_SYMBOL_GPL(fixed_phy_del); + +static int phy_fixed_addr; +static DEFINE_SPINLOCK(phy_fixed_addr_lock); + +int fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np) +{ + struct fixed_mdio_bus *fmb = &platform_fmb; + struct phy_device *phy; + int phy_addr; + int ret; + + /* Get the next available PHY address, up to PHY_MAX_ADDR */ + spin_lock(&phy_fixed_addr_lock); + if (phy_fixed_addr == PHY_MAX_ADDR) { + spin_unlock(&phy_fixed_addr_lock); + return -ENOSPC; + } + phy_addr = phy_fixed_addr++; + spin_unlock(&phy_fixed_addr_lock); + + ret = fixed_phy_add(PHY_POLL, phy_addr, status); + if (ret < 0) + return ret; + + phy = get_phy_device(fmb->mii_bus, phy_addr, false); + if (!phy || IS_ERR(phy)) { + fixed_phy_del(phy_addr); + return -EINVAL; + } + + of_node_get(np); + phy->dev.of_node = np; + + ret = phy_device_register(phy); + if (ret) { + phy_device_free(phy); + of_node_put(np); + fixed_phy_del(phy_addr); + return ret; + } + + return 0; +} + static int __init fixed_mdio_bus_init(void) { struct fixed_mdio_bus *fmb = &platform_fmb; diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 509d8f5f984e..4f2478b47136 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -9,15 +9,26 @@ struct fixed_phy_status { int asym_pause; }; +struct device_node; + #ifdef CONFIG_FIXED_PHY extern int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status); +extern int fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np); #else static inline int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status) { return -ENODEV; } +static inline int fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np) +{ + return -ENODEV; +} #endif /* CONFIG_FIXED_PHY */ /* -- cgit v1.2.3-59-g8ed1b From 3be2a49e5c08d268f8af0dd4fe89a24ea8cdc339 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 16 May 2014 16:14:05 +0200 Subject: of: provide a binding for fixed link PHYs Some Ethernet MACs have a "fixed link", and are not connected to a normal MDIO-managed PHY device. For those situations, a Device Tree binding allows to describe a "fixed link" using a special PHY node. This patch adds: * A documentation for the fixed PHY Device Tree binding. * An of_phy_is_fixed_link() function that an Ethernet driver can call on its PHY phandle to find out whether it's a fixed link PHY or not. It should typically be used to know if of_phy_register_fixed_link() should be called. * An of_phy_register_fixed_link() function that instantiates the fixed PHY into the PHY subsystem, so that when the driver calls of_phy_connect(), the PHY device associated to the OF node will be found. These two additional functions also support the old fixed-link Device Tree binding used on PowerPC platforms, so that ultimately, the network device drivers for those platforms could be converted to use of_phy_is_fixed_link() and of_phy_register_fixed_link() instead of of_phy_connect_fixed_link(), while keeping compatibility with their respective Device Tree bindings. Signed-off-by: Thomas Petazzoni Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/fixed-link.txt | 30 ++++++++++ drivers/of/of_mdio.c | 67 ++++++++++++++++++++++ include/linux/of_mdio.h | 15 +++++ 3 files changed, 112 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/fixed-link.txt (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt new file mode 100644 index 000000000000..e956de1be935 --- /dev/null +++ b/Documentation/devicetree/bindings/net/fixed-link.txt @@ -0,0 +1,30 @@ +Fixed link Device Tree binding +------------------------------ + +Some Ethernet MACs have a "fixed link", and are not connected to a +normal MDIO-managed PHY device. For those situations, a Device Tree +binding allows to describe a "fixed link". + +Such a fixed link situation is described by creating a 'fixed-link' +sub-node of the Ethernet MAC device node, with the following +properties: + +* 'speed' (integer, mandatory), to indicate the link speed. Accepted + values are 10, 100 and 1000 +* 'full-duplex' (boolean, optional), to indicate that full duplex is + used. When absent, half duplex is assumed. +* 'pause' (boolean, optional), to indicate that pause should be + enabled. +* 'asym-pause' (boolean, optional), to indicate that asym_pause should + be enabled. + +Example: + +ethernet@0 { + ... + fixed-link { + speed = <1000>; + full-duplex; + }; + ... +}; diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 9a95831bd065..1def0bb5cb37 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -301,3 +302,69 @@ struct phy_device *of_phy_attach(struct net_device *dev, return phy_attach_direct(dev, phy, flags, iface) ? NULL : phy; } EXPORT_SYMBOL(of_phy_attach); + +#if defined(CONFIG_FIXED_PHY) +/* + * of_phy_is_fixed_link() and of_phy_register_fixed_link() must + * support two DT bindings: + * - the old DT binding, where 'fixed-link' was a property with 5 + * cells encoding various informations about the fixed PHY + * - the new DT binding, where 'fixed-link' is a sub-node of the + * Ethernet device. + */ +bool of_phy_is_fixed_link(struct device_node *np) +{ + struct device_node *dn; + int len; + + /* New binding */ + dn = of_get_child_by_name(np, "fixed-link"); + if (dn) { + of_node_put(dn); + return true; + } + + /* Old binding */ + if (of_get_property(np, "fixed-link", &len) && + len == (5 * sizeof(__be32))) + return true; + + return false; +} +EXPORT_SYMBOL(of_phy_is_fixed_link); + +int of_phy_register_fixed_link(struct device_node *np) +{ + struct fixed_phy_status status = {}; + struct device_node *fixed_link_node; + const __be32 *fixed_link_prop; + int len; + + /* New binding */ + fixed_link_node = of_get_child_by_name(np, "fixed-link"); + if (fixed_link_node) { + status.link = 1; + status.duplex = of_property_read_bool(np, "full-duplex"); + if (of_property_read_u32(fixed_link_node, "speed", &status.speed)) + return -EINVAL; + status.pause = of_property_read_bool(np, "pause"); + status.asym_pause = of_property_read_bool(np, "asym-pause"); + of_node_put(fixed_link_node); + return fixed_phy_register(PHY_POLL, &status, np); + } + + /* Old binding */ + fixed_link_prop = of_get_property(np, "fixed-link", &len); + if (fixed_link_prop && len == (5 * sizeof(__be32))) { + status.link = 1; + status.duplex = be32_to_cpu(fixed_link_prop[1]); + status.speed = be32_to_cpu(fixed_link_prop[2]); + status.pause = be32_to_cpu(fixed_link_prop[3]); + status.asym_pause = be32_to_cpu(fixed_link_prop[4]); + return fixed_phy_register(PHY_POLL, &status, np); + } + + return -ENODEV; +} +EXPORT_SYMBOL(of_phy_register_fixed_link); +#endif diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 881a7c3571f4..0aa367e316cb 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -72,4 +72,19 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) } #endif /* CONFIG_OF */ +#if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) +extern int of_phy_register_fixed_link(struct device_node *np); +extern bool of_phy_is_fixed_link(struct device_node *np); +#else +static inline int of_phy_register_fixed_link(struct device_node *np) +{ + return -ENOSYS; +} +static inline bool of_phy_is_fixed_link(struct device_node *np) +{ + return false; +} +#endif + + #endif /* __LINUX_OF_MDIO_H */ -- cgit v1.2.3-59-g8ed1b From 3e9c156e2c210ab67b12b1b692983a6b97c19d3f Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Fri, 16 May 2014 17:46:44 +0200 Subject: ieee802154: add netlink interfaces for llsec This patch adds user-visible interfaces for the llsec infrastructure. For the added methods, the only major difference between all add/remove implementation lies in how the specific object is parsed, and for dump requests, how objects are written into netlink messages. To save on boilerplate code, table dumps are routed through a helper function that handles netlink dump state, leaving the actual dumping code to care only about iterating over the table to be dumped and filling netlink messages. For add/remove methods, the boilerplate required to work is not quite as large, but still enough to also move into a local helper. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/linux/nl802154.h | 31 ++ net/ieee802154/ieee802154.h | 19 ++ net/ieee802154/netlink.c | 20 ++ net/ieee802154/nl-mac.c | 807 ++++++++++++++++++++++++++++++++++++++++++++ net/ieee802154/nl_policy.c | 16 + 5 files changed, 893 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index c8d7f3965fff..20163b9a0eae 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -80,6 +80,22 @@ enum { IEEE802154_ATTR_FRAME_RETRIES, + IEEE802154_ATTR_LLSEC_ENABLED, + IEEE802154_ATTR_LLSEC_SECLEVEL, + IEEE802154_ATTR_LLSEC_KEY_MODE, + IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT, + IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, + IEEE802154_ATTR_LLSEC_KEY_ID, + IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + IEEE802154_ATTR_LLSEC_KEY_BYTES, + IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES, + IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS, + IEEE802154_ATTR_LLSEC_FRAME_TYPE, + IEEE802154_ATTR_LLSEC_CMD_FRAME_ID, + IEEE802154_ATTR_LLSEC_SECLEVELS, + IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, + __IEEE802154_ATTR_MAX, }; @@ -134,6 +150,21 @@ enum { IEEE802154_SET_MACPARAMS, + IEEE802154_LLSEC_GETPARAMS, + IEEE802154_LLSEC_SETPARAMS, + IEEE802154_LLSEC_LIST_KEY, + IEEE802154_LLSEC_ADD_KEY, + IEEE802154_LLSEC_DEL_KEY, + IEEE802154_LLSEC_LIST_DEV, + IEEE802154_LLSEC_ADD_DEV, + IEEE802154_LLSEC_DEL_DEV, + IEEE802154_LLSEC_LIST_DEVKEY, + IEEE802154_LLSEC_ADD_DEVKEY, + IEEE802154_LLSEC_DEL_DEVKEY, + IEEE802154_LLSEC_LIST_SECLEVEL, + IEEE802154_LLSEC_ADD_SECLEVEL, + IEEE802154_LLSEC_DEL_SECLEVEL, + __IEEE802154_CMD_MAX, }; diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index 6693a5cf01ce..8b83a231299e 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -68,4 +68,23 @@ int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info); int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb); int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_keys(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_devs(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_seclevels(struct sk_buff *skb, + struct netlink_callback *cb); + #endif diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 04b20589d97a..26efcf4fd2ff 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -124,6 +124,26 @@ static const struct genl_ops ieee8021154_ops[] = { IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface, ieee802154_dump_iface), IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams), + IEEE802154_OP(IEEE802154_LLSEC_GETPARAMS, ieee802154_llsec_getparams), + IEEE802154_OP(IEEE802154_LLSEC_SETPARAMS, ieee802154_llsec_setparams), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_KEY, NULL, + ieee802154_llsec_dump_keys), + IEEE802154_OP(IEEE802154_LLSEC_ADD_KEY, ieee802154_llsec_add_key), + IEEE802154_OP(IEEE802154_LLSEC_DEL_KEY, ieee802154_llsec_del_key), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEV, NULL, + ieee802154_llsec_dump_devs), + IEEE802154_OP(IEEE802154_LLSEC_ADD_DEV, ieee802154_llsec_add_dev), + IEEE802154_OP(IEEE802154_LLSEC_DEL_DEV, ieee802154_llsec_del_dev), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEVKEY, NULL, + ieee802154_llsec_dump_devkeys), + IEEE802154_OP(IEEE802154_LLSEC_ADD_DEVKEY, ieee802154_llsec_add_devkey), + IEEE802154_OP(IEEE802154_LLSEC_DEL_DEVKEY, ieee802154_llsec_del_devkey), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_SECLEVEL, NULL, + ieee802154_llsec_dump_seclevels), + IEEE802154_OP(IEEE802154_LLSEC_ADD_SECLEVEL, + ieee802154_llsec_add_seclevel), + IEEE802154_OP(IEEE802154_LLSEC_DEL_SECLEVEL, + ieee802154_llsec_del_seclevel), }; static const struct genl_multicast_group ieee802154_mcgrps[] = { diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 5d285498c0f6..5617b4c6d6d5 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -715,3 +715,810 @@ out: dev_put(dev); return rc; } + + + +static int +ieee802154_llsec_parse_key_id(struct genl_info *info, + struct ieee802154_llsec_key_id *desc) +{ + memset(desc, 0, sizeof(*desc)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) + return -EINVAL; + + desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]); + + if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (!info->attrs[IEEE802154_ATTR_PAN_ID] && + !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] || + info->attrs[IEEE802154_ATTR_HW_ADDR])) + return -EINVAL; + + desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); + + if (info->attrs[IEEE802154_ATTR_SHORT_ADDR]) { + desc->device_addr.mode = IEEE802154_ADDR_SHORT; + desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); + } else { + desc->device_addr.mode = IEEE802154_ADDR_LONG; + desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + } + } + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]) + return -EINVAL; + + if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]) + return -EINVAL; + + if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]) + return -EINVAL; + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT) + desc->id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]); + + switch (desc->mode) { + case IEEE802154_SCF_KEY_SHORT_INDEX: + { + u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]); + desc->short_source = cpu_to_le32(source); + break; + } + case IEEE802154_SCF_KEY_HW_INDEX: + desc->extended_source = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]); + break; + } + + return 0; +} + +static int +ieee802154_llsec_fill_key_id(struct sk_buff *msg, + const struct ieee802154_llsec_key_id *desc) +{ + if (nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_MODE, desc->mode)) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, + desc->device_addr.pan_id)) + return -EMSGSIZE; + + if (desc->device_addr.mode == IEEE802154_ADDR_SHORT && + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, + desc->device_addr.short_addr)) + return -EMSGSIZE; + + if (desc->device_addr.mode == IEEE802154_ADDR_LONG && + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, + desc->device_addr.extended_addr)) + return -EMSGSIZE; + } + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_ID, desc->id)) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT, + le32_to_cpu(desc->short_source))) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && + nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, + desc->extended_source)) + return -EMSGSIZE; + + return 0; +} + +int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct net_device *dev = NULL; + int rc = -ENOBUFS; + struct ieee802154_mlme_ops *ops; + void *hdr; + struct ieee802154_llsec_params params; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + ops = ieee802154_mlme_ops(dev); + if (!ops->llsec) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + goto out_dev; + + hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0, + IEEE802154_LLSEC_GETPARAMS); + if (!hdr) + goto out_free; + + rc = ops->llsec->get_params(dev, ¶ms); + if (rc < 0) + goto out_free; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_ENABLED, params.enabled) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + be32_to_cpu(params.frame_counter)) || + ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) + goto out_free; + + dev_put(dev); + + return ieee802154_nl_reply(msg, info); +out_free: + nlmsg_free(msg); +out_dev: + dev_put(dev); + return rc; +} + +int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev = NULL; + int rc = -EINVAL; + struct ieee802154_mlme_ops *ops; + struct ieee802154_llsec_params params; + int changed = 0; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (!info->attrs[IEEE802154_ATTR_LLSEC_ENABLED] && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE] && + !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) + goto out; + + ops = ieee802154_mlme_ops(dev); + if (!ops->llsec) { + rc = -EOPNOTSUPP; + goto out; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL] && + nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) > 7) + goto out; + + if (info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]) { + params.enabled = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]); + changed |= IEEE802154_LLSEC_PARAM_ENABLED; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) { + if (ieee802154_llsec_parse_key_id(info, ¶ms.out_key)) + goto out; + + changed |= IEEE802154_LLSEC_PARAM_OUT_KEY; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) { + params.out_level = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]); + changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]) { + u32 fc = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + + params.frame_counter = cpu_to_be32(fc); + changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER; + } + + rc = ops->llsec->set_params(dev, ¶ms, changed); + + dev_put(dev); + + return rc; +out: + dev_put(dev); + return rc; +} + + + +struct llsec_dump_data { + struct sk_buff *skb; + int s_idx, s_idx2; + int portid; + int nlmsg_seq; + struct net_device *dev; + struct ieee802154_mlme_ops *ops; + struct ieee802154_llsec_table *table; +}; + +static int +ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb, + int (*step)(struct llsec_dump_data*)) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + struct llsec_dump_data data; + int idx = 0; + int first_dev = cb->args[0]; + int rc; + + for_each_netdev(net, dev) { + if (idx < first_dev || dev->type != ARPHRD_IEEE802154) + goto skip; + + data.ops = ieee802154_mlme_ops(dev); + if (!data.ops->llsec) + goto skip; + + data.skb = skb; + data.s_idx = cb->args[1]; + data.s_idx2 = cb->args[2]; + data.dev = dev; + data.portid = NETLINK_CB(cb->skb).portid; + data.nlmsg_seq = cb->nlh->nlmsg_seq; + + data.ops->llsec->lock_table(dev); + data.ops->llsec->get_table(data.dev, &data.table); + rc = step(&data); + data.ops->llsec->unlock_table(dev); + + if (rc < 0) + break; + +skip: + idx++; + } + cb->args[0] = idx; + + return skb->len; +} + +static int +ieee802154_nl_llsec_change(struct sk_buff *skb, struct genl_info *info, + int (*fn)(struct net_device*, struct genl_info*)) +{ + struct net_device *dev = NULL; + int rc = -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (!ieee802154_mlme_ops(dev)->llsec) + rc = -EOPNOTSUPP; + else + rc = fn(dev, info); + + dev_put(dev); + return rc; +} + + + +static int +ieee802154_llsec_parse_key(struct genl_info *info, + struct ieee802154_llsec_key *key) +{ + u8 frames; + u32 commands[256 / 32]; + + memset(key, 0, sizeof(*key)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] || + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES]) + return -EINVAL; + + frames = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES]); + if ((frames & BIT(IEEE802154_FC_TYPE_MAC_CMD)) && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) + return -EINVAL; + + if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) { + nla_memcpy(commands, + info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS], + 256 / 8); + + if (commands[0] || commands[1] || commands[2] || commands[3] || + commands[4] || commands[5] || commands[6] || + commands[7] >= BIT(IEEE802154_CMD_GTS_REQ + 1)) + return -EINVAL; + + key->cmd_frame_ids = commands[7]; + } + + key->frame_types = frames; + + nla_memcpy(key->key, info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES], + IEEE802154_LLSEC_KEY_SIZE); + + return 0; +} + +static int llsec_add_key(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_key key; + struct ieee802154_llsec_key_id id; + + if (ieee802154_llsec_parse_key(info, &key) || + ieee802154_llsec_parse_key_id(info, &id)) + return -EINVAL; + + return ops->llsec->add_key(dev, &id, &key); +} + +int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_key); +} + +static int llsec_remove_key(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_key_id id; + + if (ieee802154_llsec_parse_key_id(info, &id)) + return -EINVAL; + + return ops->llsec->del_key(dev, &id); +} + +int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_remove_key); +} + +static int +ieee802154_nl_fill_key(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_key_entry *key, + const struct net_device *dev) +{ + void *hdr; + u32 commands[256 / 32]; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_KEY); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + ieee802154_llsec_fill_key_id(msg, &key->id) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES, + key->key->frame_types)) + goto nla_put_failure; + + if (key->key->frame_types & BIT(IEEE802154_FC_TYPE_MAC_CMD)) { + memset(commands, 0, sizeof(commands)); + commands[7] = key->key->cmd_frame_ids; + if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS, + sizeof(commands), commands)) + goto nla_put_failure; + } + + if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_BYTES, + IEEE802154_LLSEC_KEY_SIZE, key->key->key)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_keys(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_key_entry *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->keys, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_key(data->skb, data->portid, + data->nlmsg_seq, pos, data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_keys(struct sk_buff *skb, struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_keys); +} + + + +static int +llsec_parse_dev(struct genl_info *info, + struct ieee802154_llsec_device *dev) +{ + memset(dev, 0, sizeof(*dev)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || + !info->attrs[IEEE802154_ATTR_HW_ADDR] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] || + (!!info->attrs[IEEE802154_ATTR_PAN_ID] != + !!info->attrs[IEEE802154_ATTR_SHORT_ADDR])) + return -EINVAL; + + if (info->attrs[IEEE802154_ATTR_PAN_ID]) { + dev->pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); + dev->short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); + } else { + dev->short_addr = cpu_to_le16(IEEE802154_ADDR_UNDEF); + } + + dev->hwaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + dev->frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + dev->seclevel_exempt = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); + dev->key_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE]); + + if (dev->key_mode >= __IEEE802154_LLSEC_DEVKEY_MAX) + return -EINVAL; + + return 0; +} + +static int llsec_add_dev(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device desc; + + if (llsec_parse_dev(info, &desc)) + return -EINVAL; + + return ops->llsec->add_dev(dev, &desc); +} + +int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_dev); +} + +static int llsec_del_dev(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_HW_ADDR]) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + + return ops->llsec->del_dev(dev, devaddr); +} + +int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_dev); +} + +static int +ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_device *desc, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_DEV); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) || + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, + desc->short_addr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + desc->frame_counter) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + desc->seclevel_exempt) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, desc->key_mode)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_devs(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_device *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->devices, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_dev(data->skb, data->portid, + data->nlmsg_seq, pos, data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_devs(struct sk_buff *skb, struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devs); +} + + + +static int llsec_add_devkey(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device_key key; + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || + !info->attrs[IEEE802154_ATTR_HW_ADDR] || + ieee802154_llsec_parse_key_id(info, &key.key_id)) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + key.frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + + return ops->llsec->add_devkey(dev, devaddr, &key); +} + +int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_devkey); +} + +static int llsec_del_devkey(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device_key key; + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_HW_ADDR] || + ieee802154_llsec_parse_key_id(info, &key.key_id)) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + + return ops->llsec->del_devkey(dev, devaddr, &key); +} + +int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_devkey); +} + +static int +ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq, + __le64 devaddr, + const struct ieee802154_llsec_device_key *devkey, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_DEVKEY); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + devkey->frame_counter) || + ieee802154_llsec_fill_key_id(msg, &devkey->key_id)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_devkeys(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_device *dpos; + struct ieee802154_llsec_device_key *kpos; + int rc = 0, idx = 0, idx2; + + list_for_each_entry(dpos, &data->table->devices, list) { + if (idx++ < data->s_idx) + continue; + + idx2 = 0; + + list_for_each_entry(kpos, &dpos->keys, list) { + if (idx2++ < data->s_idx2) + continue; + + if (ieee802154_nl_fill_devkey(data->skb, data->portid, + data->nlmsg_seq, + dpos->hwaddr, kpos, + data->dev)) { + return rc = -EMSGSIZE; + } + + data->s_idx2++; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devkeys); +} + + + +static int +llsec_parse_seclevel(struct genl_info *info, + struct ieee802154_llsec_seclevel *sl) +{ + memset(sl, 0, sizeof(*sl)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE] || + !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]) + return -EINVAL; + + sl->frame_type = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE]); + if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD) { + if (!info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]) + return -EINVAL; + + sl->cmd_frame_id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]); + } + + sl->sec_levels = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS]); + sl->device_override = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); + + return 0; +} + +static int llsec_add_seclevel(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_seclevel sl; + + if (llsec_parse_seclevel(info, &sl)) + return -EINVAL; + + return ops->llsec->add_seclevel(dev, &sl); +} + +int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_seclevel); +} + +static int llsec_del_seclevel(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_seclevel sl; + + if (llsec_parse_seclevel(info, &sl)) + return -EINVAL; + + return ops->llsec->del_seclevel(dev, &sl); +} + +int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_seclevel); +} + +static int +ieee802154_nl_fill_seclevel(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_seclevel *sl, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_SECLEVEL); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_FRAME_TYPE, sl->frame_type) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVELS, sl->sec_levels) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + sl->device_override)) + goto nla_put_failure; + + if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD && + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_CMD_FRAME_ID, + sl->cmd_frame_id)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_seclevels(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_seclevel *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->security_levels, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_seclevel(data->skb, data->portid, + data->nlmsg_seq, pos, + data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_seclevels(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_seclevels); +} diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index fd7be5e45cef..3a703ab88348 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -62,5 +62,21 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, }, [IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, }, + + [IEEE802154_ATTR_LLSEC_ENABLED] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_SECLEVEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_MODE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT] = { .type = NLA_U32, }, + [IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED] = { .type = NLA_HW_ADDR, }, + [IEEE802154_ATTR_LLSEC_KEY_ID] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_FRAME_COUNTER] = { .type = NLA_U32 }, + [IEEE802154_ATTR_LLSEC_KEY_BYTES] = { .len = 16, }, + [IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS] = { .len = 258 / 8 }, + [IEEE802154_ATTR_LLSEC_FRAME_TYPE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_CMD_FRAME_ID] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_SECLEVELS] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] = { .type = NLA_U8, }, }; -- cgit v1.2.3-59-g8ed1b From 6c4e548ff36672eeb78f8288a2920d66fa4a6a66 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:22 +0200 Subject: net: cdc_ncm: use ethtool to tune coalescing settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Datagram coalescing is an integral part of the NCM and MBIM protocols, intended to reduce the interrupt load primarily on the device end of the USB link. As with all coalescing solutions, there is a trade-off between buffering and interrupts. The current defaults are based on the assumption that device side buffers should be the limiting factor. However, many modern high speed LTE modems suffers from buffer-bloat, making this assumption fail. This results in sub-optimal performance due to excessive coalescing. And in cases where such modems are connected to cheap embedded hosts there is often severe buffer allocation issues, giving very noticeable performance degradation . A start on improving this is going from build time hard coded limits to per device user configurable limits. The ethtool coalescing API was selected as user interface because, although the tuned values are buffer sizes, these settings directly control datagram coalescing. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 71 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/usb/cdc_ncm.h | 6 +++- 2 files changed, 74 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 2ec3790a4db8..141dbec912be 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -65,6 +65,67 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); static struct usb_driver cdc_ncm_driver; +static int cdc_ncm_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct usbnet *dev = netdev_priv(netdev); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + + /* assuming maximum sized dgrams and ignoring NDPs */ + ec->rx_max_coalesced_frames = ctx->rx_max / ctx->max_datagram_size; + ec->tx_max_coalesced_frames = ctx->tx_max / ctx->max_datagram_size; + + /* the timer will fire CDC_NCM_TIMER_PENDING_CNT times in a row */ + ec->tx_coalesce_usecs = (ctx->timer_interval * CDC_NCM_TIMER_PENDING_CNT) / NSEC_PER_USEC; + return 0; +} + +static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx); + +static int cdc_ncm_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct usbnet *dev = netdev_priv(netdev); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + u32 new_rx_max = ctx->rx_max; + u32 new_tx_max = ctx->tx_max; + + /* assuming maximum sized dgrams and a single NDP */ + if (ec->rx_max_coalesced_frames) + new_rx_max = ec->rx_max_coalesced_frames * ctx->max_datagram_size; + if (ec->tx_max_coalesced_frames) + new_tx_max = ec->tx_max_coalesced_frames * ctx->max_datagram_size; + + if (ec->tx_coalesce_usecs && + (ec->tx_coalesce_usecs < CDC_NCM_TIMER_INTERVAL_MIN * CDC_NCM_TIMER_PENDING_CNT || + ec->tx_coalesce_usecs > CDC_NCM_TIMER_INTERVAL_MAX * CDC_NCM_TIMER_PENDING_CNT)) + return -EINVAL; + + spin_lock_bh(&ctx->mtx); + ctx->timer_interval = ec->tx_coalesce_usecs * NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT; + if (!ctx->timer_interval) + ctx->tx_timer_pending = 0; + spin_unlock_bh(&ctx->mtx); + + /* inform device of new values */ + if (new_rx_max != ctx->rx_max || new_tx_max != ctx->tx_max) + cdc_ncm_update_rxtx_max(dev, new_rx_max, new_tx_max); + return 0; +} + +static const struct ethtool_ops cdc_ncm_ethtool_ops = { + .get_settings = usbnet_get_settings, + .set_settings = usbnet_set_settings, + .get_link = usbnet_get_link, + .nway_reset = usbnet_nway_reset, + .get_drvinfo = usbnet_get_drvinfo, + .get_msglevel = usbnet_get_msglevel, + .set_msglevel = usbnet_set_msglevel, + .get_ts_info = ethtool_op_get_ts_info, + .get_coalesce = cdc_ncm_get_coalesce, + .set_coalesce = cdc_ncm_set_coalesce, +}; + /* handle rx_max and tx_max changes */ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx) { @@ -257,6 +318,9 @@ static int cdc_ncm_init(struct usbnet *dev) (ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX)) ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX; + /* initial coalescing timer interval */ + ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC; + return 0; } @@ -596,6 +660,9 @@ advance: /* finish setting up the device specific data */ cdc_ncm_setup(dev); + /* override ethtool_ops */ + dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; + return 0; error2: @@ -863,7 +930,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) ctx->tx_curr_skb = skb_out; goto exit_no_skb; - } else if ((n < ctx->tx_max_datagrams) && (ready2send == 0)) { + } else if ((n < ctx->tx_max_datagrams) && (ready2send == 0) && (ctx->timer_interval > 0)) { /* wait for more frames */ /* push variables */ ctx->tx_curr_skb = skb_out; @@ -915,7 +982,7 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx) /* start timer, if not already started */ if (!(hrtimer_active(&ctx->tx_timer) || atomic_read(&ctx->stop))) hrtimer_start(&ctx->tx_timer, - ktime_set(0, CDC_NCM_TIMER_INTERVAL), + ktime_set(0, ctx->timer_interval), HRTIMER_MODE_REL); } diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 55b6feead93b..5c1066b4dc41 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -72,7 +72,9 @@ /* Restart the timer, if amount of datagrams is less than given value */ #define CDC_NCM_RESTART_TIMER_DATAGRAM_CNT 3 #define CDC_NCM_TIMER_PENDING_CNT 2 -#define CDC_NCM_TIMER_INTERVAL (400UL * NSEC_PER_USEC) +#define CDC_NCM_TIMER_INTERVAL_USEC 400UL +#define CDC_NCM_TIMER_INTERVAL_MIN 5UL +#define CDC_NCM_TIMER_INTERVAL_MAX (15UL * USEC_PER_SEC) /* The following macro defines the minimum header space */ #define CDC_NCM_MIN_HDR_SIZE \ @@ -107,6 +109,8 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; + u64 timer_interval; + u32 tx_timer_pending; u32 tx_curr_frame_num; u32 rx_max; -- cgit v1.2.3-59-g8ed1b From 70559b8970e52aa9962dc823fd4498af06809544 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:23 +0200 Subject: net: cdc_ncm: use true max dgram count for header estimates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many newer NCM and MBIM devices will request a maximum tx datagram count which is much smaller than our hard-coded absolute max. We can reduce the overhead without sacrificing any of the simplicity for these devices, by simply using the true negotiated count in when calculated the maximum NTH and NDP header sizes. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 9 ++++++--- include/linux/usb/cdc_ncm.h | 10 +--------- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 141dbec912be..b9b562b9128a 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -174,7 +174,7 @@ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx) } /* clamp new_tx to sane values */ - min = CDC_NCM_MIN_HDR_SIZE + ctx->max_datagram_size; + min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth16); max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); /* some devices set dwNtbOutMaxSize too low for the above default */ @@ -318,6 +318,9 @@ static int cdc_ncm_init(struct usbnet *dev) (ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX)) ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX; + /* set up maximum NDP size */ + ctx->max_ndp_size = sizeof(struct usb_cdc_ncm_ndp16) + (ctx->tx_max_datagrams + 1) * sizeof(struct usb_cdc_ncm_dpe16); + /* initial coalescing timer interval */ ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC; @@ -800,7 +803,7 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); /* verify that there is room for the NDP and the datagram (reserve) */ - if ((ctx->tx_max - skb->len - reserve) < CDC_NCM_NDP_SIZE) + if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size) return NULL; /* link to it */ @@ -810,7 +813,7 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ nth16->wNdpIndex = cpu_to_le16(skb->len); /* push a new empty NDP */ - ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, CDC_NCM_NDP_SIZE), 0, CDC_NCM_NDP_SIZE); + ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); ndp16->dwSignature = sign; ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16)); return ndp16; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 5c1066b4dc41..60a44b8a464e 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -76,15 +76,6 @@ #define CDC_NCM_TIMER_INTERVAL_MIN 5UL #define CDC_NCM_TIMER_INTERVAL_MAX (15UL * USEC_PER_SEC) -/* The following macro defines the minimum header space */ -#define CDC_NCM_MIN_HDR_SIZE \ - (sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \ - (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16)) - -#define CDC_NCM_NDP_SIZE \ - (sizeof(struct usb_cdc_ncm_ndp16) + \ - (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16)) - #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) @@ -110,6 +101,7 @@ struct cdc_ncm_ctx { atomic_t stop; u64 timer_interval; + u32 max_ndp_size; u32 tx_timer_pending; u32 tx_curr_frame_num; -- cgit v1.2.3-59-g8ed1b From 43e4c6dfc0fd781e68f20caf563a06f5c6ece995 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:24 +0200 Subject: net: cdc_ncm: set reasonable padding limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We pad frames larger than X to maximum size for devices which don't need a ZLP after maximum sized frames. This allows the device to optimize its transfers for one fixed buffer size. X was arbitrarily set at 512 bytes regardless of real buffer maximum, causing extreme overheads due to excessive padding of larger tx buffers. Limit the padding to at most 3 full USB packets, still allowing the overhead to payload ratio of 3/1. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 8 ++++++-- include/linux/usb/cdc_ncm.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index b9b562b9128a..9592d4669435 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -213,6 +213,10 @@ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx) /* max qlen depend on hard_mtu and rx_urb_size */ usbnet_update_max_qlen(dev); + + /* never pad more than 3 full USB packets per transfer */ + ctx->min_tx_pkt = clamp_t(u16, ctx->tx_max - 3 * usb_maxpacket(dev->udev, dev->out, 1), + CDC_NCM_MIN_TX_PKT, ctx->tx_max); } /* helpers for NCM and MBIM differences */ @@ -947,7 +951,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* variables will be reset at next call */ } - /* If collected data size is less or equal CDC_NCM_MIN_TX_PKT + /* If collected data size is less or equal ctx->min_tx_pkt * bytes, we send buffers as it is. If we get more data, it * would be more efficient for USB HS mobile device with DMA * engine to receive a full size NTB, than canceling DMA @@ -957,7 +961,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) * a ZLP after full sized NTBs. */ if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && - skb_out->len > CDC_NCM_MIN_TX_PKT) + skb_out->len > ctx->min_tx_pkt) memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0, ctx->tx_max - skb_out->len); else if (skb_out->len < ctx->tx_max && (skb_out->len % dev->maxpacket) == 0) diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 60a44b8a464e..79de6724d398 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -115,6 +115,7 @@ struct cdc_ncm_ctx { u16 tx_seq; u16 rx_seq; u16 connected; + u16 min_tx_pkt; }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -- cgit v1.2.3-59-g8ed1b From beeecd42c3b41d17d0bf1d839db99274c287f514 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:25 +0200 Subject: net: cdc_ncm/cdc_mbim: adding NCM protocol statistics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To have an idea of the effects of the protocol coalescing it's useful to have some counters showing the different aspects. Due to the asymmetrical usbnet interface the netdev rx_bytes counter has been counting real received payload, while the tx_bytes counter has included the NCM/MBIM framing overhead. This overhead can be many times the payload because of the aggressive padding strategy of this driver, and will vary a lot depending on device and traffic. With very few exceptions, users are only interested in the payload size. Having an somewhat accurate payload byte counter is particularly important for mobile broadband devices, which many NCM devices and of course all MBIM devices are. Users and userspace applications will use this counter to monitor account quotas. Having protocol specific counters for the overhead, we are now able to correct the tx_bytes netdev counter so that it shows the real payload Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 6 +++ drivers/net/usb/cdc_ncm.c | 91 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/cdc_ncm.h | 11 ++++++ 3 files changed, 108 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index bc23273d0455..5ee7a1dbc023 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -420,6 +420,7 @@ static int cdc_mbim_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) struct usb_cdc_ncm_dpe16 *dpe16; int ndpoffset; int loopcount = 50; /* arbitrary max preventing infinite loop */ + u32 payload = 0; u8 *c; u16 tci; @@ -482,6 +483,7 @@ next_ndp: if (!skb) goto error; usbnet_skb_return(dev, skb); + payload += len; /* count payload bytes in this NTB */ } } err_ndp: @@ -490,6 +492,10 @@ err_ndp: if (ndpoffset && loopcount--) goto next_ndp; + /* update stats */ + ctx->rx_overhead += skb_in->len - payload; + ctx->rx_ntbs++; + return 1; error: return 0; diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 9592d4669435..f4b439847d04 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -65,6 +65,68 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); static struct usb_driver cdc_ncm_driver; +struct cdc_ncm_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define CDC_NCM_STAT(str, m) { \ + .stat_string = str, \ + .sizeof_stat = sizeof(((struct cdc_ncm_ctx *)0)->m), \ + .stat_offset = offsetof(struct cdc_ncm_ctx, m) } +#define CDC_NCM_SIMPLE_STAT(m) CDC_NCM_STAT(__stringify(m), m) + +static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = { + CDC_NCM_SIMPLE_STAT(tx_reason_ntb_full), + CDC_NCM_SIMPLE_STAT(tx_reason_ndp_full), + CDC_NCM_SIMPLE_STAT(tx_reason_timeout), + CDC_NCM_SIMPLE_STAT(tx_reason_max_datagram), + CDC_NCM_SIMPLE_STAT(tx_overhead), + CDC_NCM_SIMPLE_STAT(tx_ntbs), + CDC_NCM_SIMPLE_STAT(rx_overhead), + CDC_NCM_SIMPLE_STAT(rx_ntbs), +}; + +static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(cdc_ncm_gstrings_stats); + default: + return -EOPNOTSUPP; + } +} + +static void cdc_ncm_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, + u64 *data) +{ + struct usbnet *dev = netdev_priv(netdev); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + int i; + char *p = NULL; + + for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) { + p = (char *)ctx + cdc_ncm_gstrings_stats[i].stat_offset; + data[i] = (cdc_ncm_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } +} + +static void cdc_ncm_get_strings(struct net_device __always_unused *netdev, u32 stringset, u8 *data) +{ + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) { + memcpy(p, cdc_ncm_gstrings_stats[i].stat_string, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + } +} + static int cdc_ncm_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { @@ -122,6 +184,9 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = { .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_ts_info = ethtool_op_get_ts_info, + .get_sset_count = cdc_ncm_get_sset_count, + .get_strings = cdc_ncm_get_strings, + .get_ethtool_stats = cdc_ncm_get_ethtool_stats, .get_coalesce = cdc_ncm_get_coalesce, .set_coalesce = cdc_ncm_set_coalesce, }; @@ -862,6 +927,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* count total number of frames in this NTB */ ctx->tx_curr_frame_num = 0; + + /* recent payload counter for this skb_out */ + ctx->tx_curr_frame_payload = 0; } for (n = ctx->tx_curr_frame_num; n < ctx->tx_max_datagrams; n++) { @@ -899,6 +967,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) ctx->tx_rem_sign = sign; skb = NULL; ready2send = 1; + ctx->tx_reason_ntb_full++; /* count reason for transmitting */ } break; } @@ -912,12 +981,14 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) ndp16->dpe16[index].wDatagramIndex = cpu_to_le16(skb_out->len); ndp16->wLength = cpu_to_le16(ndplen + sizeof(struct usb_cdc_ncm_dpe16)); memcpy(skb_put(skb_out, skb->len), skb->data, skb->len); + ctx->tx_curr_frame_payload += skb->len; /* count real tx payload data */ dev_kfree_skb_any(skb); skb = NULL; /* send now if this NDP is full */ if (index >= CDC_NCM_DPT_DATAGRAMS_MAX) { ready2send = 1; + ctx->tx_reason_ndp_full++; /* count reason for transmitting */ break; } } @@ -947,6 +1018,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) goto exit_no_skb; } else { + if (n == ctx->tx_max_datagrams) + ctx->tx_reason_max_datagram++; /* count reason for transmitting */ /* frame goes out */ /* variables will be reset at next call */ } @@ -974,6 +1047,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* return skb */ ctx->tx_curr_skb = NULL; dev->net->stats.tx_packets += ctx->tx_curr_frame_num; + + /* keep private stats: framing overhead and number of NTBs */ + ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; + ctx->tx_ntbs++; + + /* usbnet has already counted all the framing overhead. + * Adjust the stats so that the tx_bytes counter show real + * payload data instead. + */ + dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload; + return skb_out; exit_no_skb: @@ -1014,6 +1098,7 @@ static void cdc_ncm_txpath_bh(unsigned long param) cdc_ncm_tx_timeout_start(ctx); spin_unlock_bh(&ctx->mtx); } else if (dev->net != NULL) { + ctx->tx_reason_timeout++; /* count reason for transmitting */ spin_unlock_bh(&ctx->mtx); netif_tx_lock_bh(dev->net); usbnet_start_xmit(NULL, dev->net); @@ -1149,6 +1234,7 @@ int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) struct usb_cdc_ncm_dpe16 *dpe16; int ndpoffset; int loopcount = 50; /* arbitrary max preventing infinite loop */ + u32 payload = 0; ndpoffset = cdc_ncm_rx_verify_nth16(ctx, skb_in); if (ndpoffset < 0) @@ -1201,6 +1287,7 @@ next_ndp: skb->data = ((u8 *)skb_in->data) + offset; skb_set_tail_pointer(skb, len); usbnet_skb_return(dev, skb); + payload += len; /* count payload bytes in this NTB */ } } err_ndp: @@ -1209,6 +1296,10 @@ err_ndp: if (ndpoffset && loopcount--) goto next_ndp; + /* update stats */ + ctx->rx_overhead += skb_in->len - payload; + ctx->rx_ntbs++; + return 1; error: return 0; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 79de6724d398..88d2d7f1820f 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -116,6 +116,17 @@ struct cdc_ncm_ctx { u16 rx_seq; u16 connected; u16 min_tx_pkt; + + /* statistics */ + u32 tx_curr_frame_payload; + u32 tx_reason_ntb_full; + u32 tx_reason_ndp_full; + u32 tx_reason_timeout; + u32 tx_reason_max_datagram; + u64 tx_overhead; + u64 tx_ntbs; + u64 rx_overhead; + u64 rx_ntbs; }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -- cgit v1.2.3-59-g8ed1b From 50f1cb1cc8f50fa88dbeaf990ae2bae91b9ff306 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:26 +0200 Subject: net: cdc_ncm: use sane defaults for rx/tx buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lots of devices request much larger buffers than reasonable. This cause real problems for users of hosts with limited resources. Reducing the default buffer size to 16kB for such devices is a reasonable trade-off between allowing them to aggregate traffic and avoiding memory exhaustion on resource restrained hosts. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 12 ++++++++++-- include/linux/usb/cdc_ncm.h | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index f4b439847d04..bb53abe1f3a1 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -492,10 +492,18 @@ static void cdc_ncm_fix_modulus(struct usbnet *dev) static int cdc_ncm_setup(struct usbnet *dev) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + u32 def_rx, def_tx; + + /* be conservative when selecting intial buffer size to + * increase the number of hosts this will work for + */ + def_rx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_RX, + le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)); + def_tx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_TX, + le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); /* clamp rx_max and tx_max and inform device */ - cdc_ncm_update_rxtx_max(dev, le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize), - le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); + cdc_ncm_update_rxtx_max(dev, def_rx, def_tx); /* sanitize the modulus and remainder values */ cdc_ncm_fix_modulus(dev); diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 88d2d7f1820f..cde506731c48 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -52,6 +52,10 @@ #define CDC_NCM_NTB_MAX_SIZE_TX 32768 /* bytes */ #define CDC_NCM_NTB_MAX_SIZE_RX 32768 /* bytes */ +/* Initial NTB length */ +#define CDC_NCM_NTB_DEF_SIZE_TX 16384 /* bytes */ +#define CDC_NCM_NTB_DEF_SIZE_RX 16384 /* bytes */ + /* Minimum value for MaxDatagramSize, ch. 6.2.9 */ #define CDC_NCM_MIN_DATAGRAM_SIZE 1514 /* bytes */ -- cgit v1.2.3-59-g8ed1b From fa83dbeee55865678025b6c1637ca08860209f87 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:28 +0200 Subject: net: cdc_ncm: remove redundant "disconnected" flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling netif_carrier_{on,off} is sufficient. There is no need to duplicate the carrier state in a driver specific flag. Acked-by: Enrico Mioso Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 19 ++----------------- drivers/net/usb/huawei_cdc_ncm.c | 13 ------------- include/linux/usb/cdc_ncm.h | 1 - 3 files changed, 2 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 1d1ff2fa8ae1..783c4ed96395 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1364,11 +1364,10 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ - ctx->connected = le16_to_cpu(event->wValue); netif_info(dev, link, dev->net, "network connection: %sconnected\n", - ctx->connected ? "" : "dis"); - usbnet_link_change(dev, ctx->connected, 0); + !!event->wValue ? "" : "dis"); + usbnet_link_change(dev, !!event->wValue, 0); break; case USB_CDC_NOTIFY_SPEED_CHANGE: @@ -1388,23 +1387,11 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) } } -static int cdc_ncm_check_connect(struct usbnet *dev) -{ - struct cdc_ncm_ctx *ctx; - - ctx = (struct cdc_ncm_ctx *)dev->data[0]; - if (ctx == NULL) - return 1; /* disconnected */ - - return !ctx->connected; -} - static const struct driver_info cdc_ncm_info = { .description = "CDC NCM", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, - .check_connect = cdc_ncm_check_connect, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, @@ -1418,7 +1405,6 @@ static const struct driver_info wwan_info = { | FLAG_WWAN, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, - .check_connect = cdc_ncm_check_connect, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, @@ -1432,7 +1418,6 @@ static const struct driver_info wwan_noarp_info = { | FLAG_WWAN | FLAG_NOARP, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, - .check_connect = cdc_ncm_check_connect, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c index 312178d7b698..f9822bc75425 100644 --- a/drivers/net/usb/huawei_cdc_ncm.c +++ b/drivers/net/usb/huawei_cdc_ncm.c @@ -172,24 +172,11 @@ err: return ret; } -static int huawei_cdc_ncm_check_connect(struct usbnet *usbnet_dev) -{ - struct cdc_ncm_ctx *ctx; - - ctx = (struct cdc_ncm_ctx *)usbnet_dev->data[0]; - - if (ctx == NULL) - return 1; /* disconnected */ - - return !ctx->connected; -} - static const struct driver_info huawei_cdc_ncm_info = { .description = "Huawei CDC NCM device", .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN, .bind = huawei_cdc_ncm_bind, .unbind = huawei_cdc_ncm_unbind, - .check_connect = huawei_cdc_ncm_check_connect, .manage_power = huawei_cdc_ncm_manage_power, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index cde506731c48..8c5e38819828 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -118,7 +118,6 @@ struct cdc_ncm_ctx { u16 tx_ndp_modulus; u16 tx_seq; u16 rx_seq; - u16 connected; u16 min_tx_pkt; /* statistics */ -- cgit v1.2.3-59-g8ed1b From 678e30df2e5664619e06fcfea5490a476826d8fe Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 19 May 2014 01:25:59 +0100 Subject: ethtool: Expand documentation of ethtool_ops::{get,set}_rxfh() Some corner-cases are not explained properly. Signed-off-by: Ben Hutchings --- include/linux/ethtool.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 212f537fc686..886e127d51a6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -162,15 +162,16 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * Will not be called if @get_rxfh_indir_size returns zero. * @get_rxfh: Get the contents of the RX flow hash indirection table and hash * key. - * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size - * returns zero. + * Will only be called if one or both of @get_rxfh_indir_size and + * @get_rxfh_key_size are implemented and return non-zero. * Returns a negative error code or zero. * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. - * @set_rxfh: Set the contents of the RX flow hash indirection table and - * hash key. - * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size - * returns zero. + * @set_rxfh: Set the contents of the RX flow hash indirection table and/or + * hash key. Either or both arguments may be %NULL if that attribute + * is not to be changed. + * Will only be called if one or both of @get_rxfh_indir_size and + * @get_rxfh_key_size are implemented and return non-zero. * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or @@ -244,8 +245,8 @@ struct ethtool_ops { int (*reset)(struct net_device *, u32 *); u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); - int (*get_rxfh)(struct net_device *, u32 *, u8 *); - int (*set_rxfh)(struct net_device *, u32 *, u8 *); + int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key); + int (*set_rxfh)(struct net_device *, u32 *indir, u8 *key); int (*get_rxfh_indir)(struct net_device *, u32 *); int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); -- cgit v1.2.3-59-g8ed1b From 61d88c6811f216de4ec26aafe24e650dc1aeb00e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 19 May 2014 01:29:42 +0100 Subject: ethtool: Disallow ETHTOOL_SRSSH with both indir table and hash key unchanged This would be a no-op, so there is no reason to request it. This also allows conversion of the current implementations of ethtool_ops::{get,set}_rxfh_indir to ethtool_ops::{get,set}_rxfh with no change other than their parameters. Signed-off-by: Ben Hutchings --- include/linux/ethtool.h | 4 ++-- net/core/ethtool.c | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 886e127d51a6..de687a97c6e7 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -168,8 +168,8 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. * @set_rxfh: Set the contents of the RX flow hash indirection table and/or - * hash key. Either or both arguments may be %NULL if that attribute - * is not to be changed. + * hash key. In case only the indirection table or hash key is to be + * changed, the other argument will be %NULL. * Will only be called if one or both of @get_rxfh_indir_size and * @get_rxfh_key_size are implemented and return non-zero. * Returns a negative error code or zero. diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 7156fe5ca876..b8857348bdf3 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -802,11 +802,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EFAULT; /* If either indir or hash key is valid, proceed further. + * It is not valid to request that both be unchanged. */ if ((user_indir_size && user_indir_size != ETH_RXFH_INDIR_NO_CHANGE && user_indir_size != dev_indir_size) || - (user_key_size && (user_key_size != dev_key_size))) + (user_key_size && (user_key_size != dev_key_size)) || + (user_indir_size == ETH_RXFH_INDIR_NO_CHANGE && + user_key_size == 0)) return -EINVAL; if (user_indir_size != ETH_RXFH_INDIR_NO_CHANGE) -- cgit v1.2.3-59-g8ed1b From 33cb0fa7888510b5bd2096352b200cfe29db10fe Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 May 2014 02:01:23 +0100 Subject: ethtool, be2net: constify array pointer parameters to ethtool_ops::set_rxfh Signed-off-by: Ben Hutchings --- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- drivers/net/ethernet/emulex/benet/be_cmds.h | 2 +- drivers/net/ethernet/emulex/benet/be_ethtool.c | 3 ++- include/linux/ethtool.h | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 476752d0a6a4..7b59da241ccb 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2033,7 +2033,7 @@ int be_cmd_reset_function(struct be_adapter *adapter) } int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, - u32 rss_hash_opts, u16 table_size, u8 *rss_hkey) + u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey) { struct be_mcc_wrb *wrb; struct be_cmd_req_rss_config *req; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 228d4b611084..451f3138b8fb 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2068,7 +2068,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num, u32 *function_mode, u32 *function_caps, u16 *asic_rev); int be_cmd_reset_function(struct be_adapter *adapter); int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, - u32 rss_hash_opts, u16 table_size, u8 *rss_hkey); + u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey); int be_process_mcc(struct be_adapter *adapter); int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon, u8 status, u8 state); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 970ae337daac..e2da4d20dd3d 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1117,7 +1117,8 @@ static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) return 0; } -static int be_set_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) +static int be_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *hkey) { int rc = 0, i, j; struct be_adapter *adapter = netdev_priv(netdev); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index de687a97c6e7..874fde01d398 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -246,7 +246,8 @@ struct ethtool_ops { u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key); - int (*set_rxfh)(struct net_device *, u32 *indir, u8 *key); + int (*set_rxfh)(struct net_device *, const u32 *indir, + const u8 *key); int (*get_rxfh_indir)(struct net_device *, u32 *); int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); -- cgit v1.2.3-59-g8ed1b From fd1159318e55e901cf269de90163b19fd62938cb Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 17 May 2014 00:03:54 +0400 Subject: can: add Renesas R-Car CAN driver Add support for the CAN controller found in Renesas R-Car SoCs. Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/Kconfig | 10 + drivers/net/can/Makefile | 1 + drivers/net/can/rcar_can.c | 876 ++++++++++++++++++++++++++++++++++ include/linux/can/platform/rcar_can.h | 17 + 4 files changed, 904 insertions(+) create mode 100644 drivers/net/can/rcar_can.c create mode 100644 include/linux/can/platform/rcar_can.h (limited to 'include/linux') diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index ac67afa7735c..714b18790caf 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -119,6 +119,16 @@ config CAN_GRCAN endian syntheses of the cores would need some modifications on the hardware level to work. +config CAN_RCAR + tristate "Renesas R-Car CAN controller" + depends on ARM + ---help--- + Say Y here if you want to use CAN controller found on Renesas R-Car + SoCs. + + To compile this driver as a module, choose M here: the module will + be called rcar_can. + source "drivers/net/can/mscan/Kconfig" source "drivers/net/can/sja1000/Kconfig" diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index c42058868b0f..90f538c73f8c 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -25,5 +25,6 @@ obj-$(CONFIG_CAN_JANZ_ICAN3) += janz-ican3.o obj-$(CONFIG_CAN_FLEXCAN) += flexcan.o obj-$(CONFIG_PCH_CAN) += pch_can.o obj-$(CONFIG_CAN_GRCAN) += grcan.o +obj-$(CONFIG_CAN_RCAR) += rcar_can.o ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c new file mode 100644 index 000000000000..5268d216ecfa --- /dev/null +++ b/drivers/net/can/rcar_can.c @@ -0,0 +1,876 @@ +/* Renesas R-Car CAN device driver + * + * Copyright (C) 2013 Cogent Embedded, Inc. + * Copyright (C) 2013 Renesas Solutions Corp. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RCAR_CAN_DRV_NAME "rcar_can" + +/* Mailbox configuration: + * mailbox 60 - 63 - Rx FIFO mailboxes + * mailbox 56 - 59 - Tx FIFO mailboxes + * non-FIFO mailboxes are not used + */ +#define RCAR_CAN_N_MBX 64 /* Number of mailboxes in non-FIFO mode */ +#define RCAR_CAN_RX_FIFO_MBX 60 /* Mailbox - window to Rx FIFO */ +#define RCAR_CAN_TX_FIFO_MBX 56 /* Mailbox - window to Tx FIFO */ +#define RCAR_CAN_FIFO_DEPTH 4 + +/* Mailbox registers structure */ +struct rcar_can_mbox_regs { + u32 id; /* IDE and RTR bits, SID and EID */ + u8 stub; /* Not used */ + u8 dlc; /* Data Length Code - bits [0..3] */ + u8 data[8]; /* Data Bytes */ + u8 tsh; /* Time Stamp Higher Byte */ + u8 tsl; /* Time Stamp Lower Byte */ +}; + +struct rcar_can_regs { + struct rcar_can_mbox_regs mb[RCAR_CAN_N_MBX]; /* Mailbox registers */ + u32 mkr_2_9[8]; /* Mask Registers 2-9 */ + u32 fidcr[2]; /* FIFO Received ID Compare Register */ + u32 mkivlr1; /* Mask Invalid Register 1 */ + u32 mier1; /* Mailbox Interrupt Enable Register 1 */ + u32 mkr_0_1[2]; /* Mask Registers 0-1 */ + u32 mkivlr0; /* Mask Invalid Register 0*/ + u32 mier0; /* Mailbox Interrupt Enable Register 0 */ + u8 pad_440[0x3c0]; + u8 mctl[64]; /* Message Control Registers */ + u16 ctlr; /* Control Register */ + u16 str; /* Status register */ + u8 bcr[3]; /* Bit Configuration Register */ + u8 clkr; /* Clock Select Register */ + u8 rfcr; /* Receive FIFO Control Register */ + u8 rfpcr; /* Receive FIFO Pointer Control Register */ + u8 tfcr; /* Transmit FIFO Control Register */ + u8 tfpcr; /* Transmit FIFO Pointer Control Register */ + u8 eier; /* Error Interrupt Enable Register */ + u8 eifr; /* Error Interrupt Factor Judge Register */ + u8 recr; /* Receive Error Count Register */ + u8 tecr; /* Transmit Error Count Register */ + u8 ecsr; /* Error Code Store Register */ + u8 cssr; /* Channel Search Support Register */ + u8 mssr; /* Mailbox Search Status Register */ + u8 msmr; /* Mailbox Search Mode Register */ + u16 tsr; /* Time Stamp Register */ + u8 afsr; /* Acceptance Filter Support Register */ + u8 pad_857; + u8 tcr; /* Test Control Register */ + u8 pad_859[7]; + u8 ier; /* Interrupt Enable Register */ + u8 isr; /* Interrupt Status Register */ + u8 pad_862; + u8 mbsmr; /* Mailbox Search Mask Register */ +}; + +struct rcar_can_priv { + struct can_priv can; /* Must be the first member! */ + struct net_device *ndev; + struct napi_struct napi; + struct rcar_can_regs __iomem *regs; + struct clk *clk; + u8 tx_dlc[RCAR_CAN_FIFO_DEPTH]; + u32 tx_head; + u32 tx_tail; + u8 clock_select; + u8 ier; +}; + +static const struct can_bittiming_const rcar_can_bittiming_const = { + .name = RCAR_CAN_DRV_NAME, + .tseg1_min = 4, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + +/* Control Register bits */ +#define RCAR_CAN_CTLR_BOM (3 << 11) /* Bus-Off Recovery Mode Bits */ +#define RCAR_CAN_CTLR_BOM_ENT (1 << 11) /* Entry to halt mode */ + /* at bus-off entry */ +#define RCAR_CAN_CTLR_SLPM (1 << 10) +#define RCAR_CAN_CTLR_CANM (3 << 8) /* Operating Mode Select Bit */ +#define RCAR_CAN_CTLR_CANM_HALT (1 << 9) +#define RCAR_CAN_CTLR_CANM_RESET (1 << 8) +#define RCAR_CAN_CTLR_CANM_FORCE_RESET (3 << 8) +#define RCAR_CAN_CTLR_MLM (1 << 3) /* Message Lost Mode Select */ +#define RCAR_CAN_CTLR_IDFM (3 << 1) /* ID Format Mode Select Bits */ +#define RCAR_CAN_CTLR_IDFM_MIXED (1 << 2) /* Mixed ID mode */ +#define RCAR_CAN_CTLR_MBM (1 << 0) /* Mailbox Mode select */ + +/* Status Register bits */ +#define RCAR_CAN_STR_RSTST (1 << 8) /* Reset Status Bit */ + +/* FIFO Received ID Compare Registers 0 and 1 bits */ +#define RCAR_CAN_FIDCR_IDE (1 << 31) /* ID Extension Bit */ +#define RCAR_CAN_FIDCR_RTR (1 << 30) /* Remote Transmission Request Bit */ + +/* Receive FIFO Control Register bits */ +#define RCAR_CAN_RFCR_RFEST (1 << 7) /* Receive FIFO Empty Status Flag */ +#define RCAR_CAN_RFCR_RFE (1 << 0) /* Receive FIFO Enable */ + +/* Transmit FIFO Control Register bits */ +#define RCAR_CAN_TFCR_TFUST (7 << 1) /* Transmit FIFO Unsent Message */ + /* Number Status Bits */ +#define RCAR_CAN_TFCR_TFUST_SHIFT 1 /* Offset of Transmit FIFO Unsent */ + /* Message Number Status Bits */ +#define RCAR_CAN_TFCR_TFE (1 << 0) /* Transmit FIFO Enable */ + +#define RCAR_CAN_N_RX_MKREGS1 2 /* Number of mask registers */ + /* for Rx mailboxes 0-31 */ +#define RCAR_CAN_N_RX_MKREGS2 8 + +/* Bit Configuration Register settings */ +#define RCAR_CAN_BCR_TSEG1(x) (((x) & 0x0f) << 20) +#define RCAR_CAN_BCR_BPR(x) (((x) & 0x3ff) << 8) +#define RCAR_CAN_BCR_SJW(x) (((x) & 0x3) << 4) +#define RCAR_CAN_BCR_TSEG2(x) ((x) & 0x07) + +/* Mailbox and Mask Registers bits */ +#define RCAR_CAN_IDE (1 << 31) +#define RCAR_CAN_RTR (1 << 30) +#define RCAR_CAN_SID_SHIFT 18 + +/* Mailbox Interrupt Enable Register 1 bits */ +#define RCAR_CAN_MIER1_RXFIE (1 << 28) /* Receive FIFO Interrupt Enable */ +#define RCAR_CAN_MIER1_TXFIE (1 << 24) /* Transmit FIFO Interrupt Enable */ + +/* Interrupt Enable Register bits */ +#define RCAR_CAN_IER_ERSIE (1 << 5) /* Error (ERS) Interrupt Enable Bit */ +#define RCAR_CAN_IER_RXFIE (1 << 4) /* Reception FIFO Interrupt */ + /* Enable Bit */ +#define RCAR_CAN_IER_TXFIE (1 << 3) /* Transmission FIFO Interrupt */ + /* Enable Bit */ +/* Interrupt Status Register bits */ +#define RCAR_CAN_ISR_ERSF (1 << 5) /* Error (ERS) Interrupt Status Bit */ +#define RCAR_CAN_ISR_RXFF (1 << 4) /* Reception FIFO Interrupt */ + /* Status Bit */ +#define RCAR_CAN_ISR_TXFF (1 << 3) /* Transmission FIFO Interrupt */ + /* Status Bit */ + +/* Error Interrupt Enable Register bits */ +#define RCAR_CAN_EIER_BLIE (1 << 7) /* Bus Lock Interrupt Enable */ +#define RCAR_CAN_EIER_OLIE (1 << 6) /* Overload Frame Transmit */ + /* Interrupt Enable */ +#define RCAR_CAN_EIER_ORIE (1 << 5) /* Receive Overrun Interrupt Enable */ +#define RCAR_CAN_EIER_BORIE (1 << 4) /* Bus-Off Recovery Interrupt Enable */ +#define RCAR_CAN_EIER_BOEIE (1 << 3) /* Bus-Off Entry Interrupt Enable */ +#define RCAR_CAN_EIER_EPIE (1 << 2) /* Error Passive Interrupt Enable */ +#define RCAR_CAN_EIER_EWIE (1 << 1) /* Error Warning Interrupt Enable */ +#define RCAR_CAN_EIER_BEIE (1 << 0) /* Bus Error Interrupt Enable */ + +/* Error Interrupt Factor Judge Register bits */ +#define RCAR_CAN_EIFR_BLIF (1 << 7) /* Bus Lock Detect Flag */ +#define RCAR_CAN_EIFR_OLIF (1 << 6) /* Overload Frame Transmission */ + /* Detect Flag */ +#define RCAR_CAN_EIFR_ORIF (1 << 5) /* Receive Overrun Detect Flag */ +#define RCAR_CAN_EIFR_BORIF (1 << 4) /* Bus-Off Recovery Detect Flag */ +#define RCAR_CAN_EIFR_BOEIF (1 << 3) /* Bus-Off Entry Detect Flag */ +#define RCAR_CAN_EIFR_EPIF (1 << 2) /* Error Passive Detect Flag */ +#define RCAR_CAN_EIFR_EWIF (1 << 1) /* Error Warning Detect Flag */ +#define RCAR_CAN_EIFR_BEIF (1 << 0) /* Bus Error Detect Flag */ + +/* Error Code Store Register bits */ +#define RCAR_CAN_ECSR_EDPM (1 << 7) /* Error Display Mode Select Bit */ +#define RCAR_CAN_ECSR_ADEF (1 << 6) /* ACK Delimiter Error Flag */ +#define RCAR_CAN_ECSR_BE0F (1 << 5) /* Bit Error (dominant) Flag */ +#define RCAR_CAN_ECSR_BE1F (1 << 4) /* Bit Error (recessive) Flag */ +#define RCAR_CAN_ECSR_CEF (1 << 3) /* CRC Error Flag */ +#define RCAR_CAN_ECSR_AEF (1 << 2) /* ACK Error Flag */ +#define RCAR_CAN_ECSR_FEF (1 << 1) /* Form Error Flag */ +#define RCAR_CAN_ECSR_SEF (1 << 0) /* Stuff Error Flag */ + +#define RCAR_CAN_NAPI_WEIGHT 4 +#define MAX_STR_READS 0x100 + +static void tx_failure_cleanup(struct net_device *ndev) +{ + int i; + + for (i = 0; i < RCAR_CAN_FIFO_DEPTH; i++) + can_free_echo_skb(ndev, i); +} + +static void rcar_can_error(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + struct can_frame *cf; + struct sk_buff *skb; + u8 eifr, txerr = 0, rxerr = 0; + + /* Propagate the error condition to the CAN stack */ + skb = alloc_can_err_skb(ndev, &cf); + + eifr = readb(&priv->regs->eifr); + if (eifr & (RCAR_CAN_EIFR_EWIF | RCAR_CAN_EIFR_EPIF)) { + txerr = readb(&priv->regs->tecr); + rxerr = readb(&priv->regs->recr); + if (skb) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[6] = txerr; + cf->data[7] = rxerr; + } + } + if (eifr & RCAR_CAN_EIFR_BEIF) { + int rx_errors = 0, tx_errors = 0; + u8 ecsr; + + netdev_dbg(priv->ndev, "Bus error interrupt:\n"); + if (skb) { + cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; + cf->data[2] = CAN_ERR_PROT_UNSPEC; + } + ecsr = readb(&priv->regs->ecsr); + if (ecsr & RCAR_CAN_ECSR_ADEF) { + netdev_dbg(priv->ndev, "ACK Delimiter Error\n"); + tx_errors++; + writeb(~RCAR_CAN_ECSR_ADEF, &priv->regs->ecsr); + if (skb) + cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL; + } + if (ecsr & RCAR_CAN_ECSR_BE0F) { + netdev_dbg(priv->ndev, "Bit Error (dominant)\n"); + tx_errors++; + writeb(~RCAR_CAN_ECSR_BE0F, &priv->regs->ecsr); + if (skb) + cf->data[2] |= CAN_ERR_PROT_BIT0; + } + if (ecsr & RCAR_CAN_ECSR_BE1F) { + netdev_dbg(priv->ndev, "Bit Error (recessive)\n"); + tx_errors++; + writeb(~RCAR_CAN_ECSR_BE1F, &priv->regs->ecsr); + if (skb) + cf->data[2] |= CAN_ERR_PROT_BIT1; + } + if (ecsr & RCAR_CAN_ECSR_CEF) { + netdev_dbg(priv->ndev, "CRC Error\n"); + rx_errors++; + writeb(~RCAR_CAN_ECSR_CEF, &priv->regs->ecsr); + if (skb) + cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ; + } + if (ecsr & RCAR_CAN_ECSR_AEF) { + netdev_dbg(priv->ndev, "ACK Error\n"); + tx_errors++; + writeb(~RCAR_CAN_ECSR_AEF, &priv->regs->ecsr); + if (skb) { + cf->can_id |= CAN_ERR_ACK; + cf->data[3] |= CAN_ERR_PROT_LOC_ACK; + } + } + if (ecsr & RCAR_CAN_ECSR_FEF) { + netdev_dbg(priv->ndev, "Form Error\n"); + rx_errors++; + writeb(~RCAR_CAN_ECSR_FEF, &priv->regs->ecsr); + if (skb) + cf->data[2] |= CAN_ERR_PROT_FORM; + } + if (ecsr & RCAR_CAN_ECSR_SEF) { + netdev_dbg(priv->ndev, "Stuff Error\n"); + rx_errors++; + writeb(~RCAR_CAN_ECSR_SEF, &priv->regs->ecsr); + if (skb) + cf->data[2] |= CAN_ERR_PROT_STUFF; + } + + priv->can.can_stats.bus_error++; + ndev->stats.rx_errors += rx_errors; + ndev->stats.tx_errors += tx_errors; + writeb(~RCAR_CAN_EIFR_BEIF, &priv->regs->eifr); + } + if (eifr & RCAR_CAN_EIFR_EWIF) { + netdev_dbg(priv->ndev, "Error warning interrupt\n"); + priv->can.state = CAN_STATE_ERROR_WARNING; + priv->can.can_stats.error_warning++; + /* Clear interrupt condition */ + writeb(~RCAR_CAN_EIFR_EWIF, &priv->regs->eifr); + if (skb) + cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_WARNING : + CAN_ERR_CRTL_RX_WARNING; + } + if (eifr & RCAR_CAN_EIFR_EPIF) { + netdev_dbg(priv->ndev, "Error passive interrupt\n"); + priv->can.state = CAN_STATE_ERROR_PASSIVE; + priv->can.can_stats.error_passive++; + /* Clear interrupt condition */ + writeb(~RCAR_CAN_EIFR_EPIF, &priv->regs->eifr); + if (skb) + cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_PASSIVE : + CAN_ERR_CRTL_RX_PASSIVE; + } + if (eifr & RCAR_CAN_EIFR_BOEIF) { + netdev_dbg(priv->ndev, "Bus-off entry interrupt\n"); + tx_failure_cleanup(ndev); + priv->ier = RCAR_CAN_IER_ERSIE; + writeb(priv->ier, &priv->regs->ier); + priv->can.state = CAN_STATE_BUS_OFF; + /* Clear interrupt condition */ + writeb(~RCAR_CAN_EIFR_BOEIF, &priv->regs->eifr); + can_bus_off(ndev); + if (skb) + cf->can_id |= CAN_ERR_BUSOFF; + } + if (eifr & RCAR_CAN_EIFR_ORIF) { + netdev_dbg(priv->ndev, "Receive overrun error interrupt\n"); + ndev->stats.rx_over_errors++; + ndev->stats.rx_errors++; + writeb(~RCAR_CAN_EIFR_ORIF, &priv->regs->eifr); + if (skb) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; + } + } + if (eifr & RCAR_CAN_EIFR_OLIF) { + netdev_dbg(priv->ndev, + "Overload Frame Transmission error interrupt\n"); + ndev->stats.rx_over_errors++; + ndev->stats.rx_errors++; + writeb(~RCAR_CAN_EIFR_OLIF, &priv->regs->eifr); + if (skb) { + cf->can_id |= CAN_ERR_PROT; + cf->data[2] |= CAN_ERR_PROT_OVERLOAD; + } + } + + if (skb) { + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + netif_rx(skb); + } +} + +static void rcar_can_tx_done(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + u8 isr; + + while (1) { + u8 unsent = readb(&priv->regs->tfcr); + + unsent = (unsent & RCAR_CAN_TFCR_TFUST) >> + RCAR_CAN_TFCR_TFUST_SHIFT; + if (priv->tx_head - priv->tx_tail <= unsent) + break; + stats->tx_packets++; + stats->tx_bytes += priv->tx_dlc[priv->tx_tail % + RCAR_CAN_FIFO_DEPTH]; + priv->tx_dlc[priv->tx_tail % RCAR_CAN_FIFO_DEPTH] = 0; + can_get_echo_skb(ndev, priv->tx_tail % RCAR_CAN_FIFO_DEPTH); + priv->tx_tail++; + netif_wake_queue(ndev); + } + /* Clear interrupt */ + isr = readb(&priv->regs->isr); + writeb(isr & ~RCAR_CAN_ISR_TXFF, &priv->regs->isr); + can_led_event(ndev, CAN_LED_EVENT_TX); +} + +static irqreturn_t rcar_can_interrupt(int irq, void *dev_id) +{ + struct net_device *ndev = dev_id; + struct rcar_can_priv *priv = netdev_priv(ndev); + u8 isr; + + isr = readb(&priv->regs->isr); + if (!(isr & priv->ier)) + return IRQ_NONE; + + if (isr & RCAR_CAN_ISR_ERSF) + rcar_can_error(ndev); + + if (isr & RCAR_CAN_ISR_TXFF) + rcar_can_tx_done(ndev); + + if (isr & RCAR_CAN_ISR_RXFF) { + if (napi_schedule_prep(&priv->napi)) { + /* Disable Rx FIFO interrupts */ + priv->ier &= ~RCAR_CAN_IER_RXFIE; + writeb(priv->ier, &priv->regs->ier); + __napi_schedule(&priv->napi); + } + } + + return IRQ_HANDLED; +} + +static void rcar_can_set_bittiming(struct net_device *dev) +{ + struct rcar_can_priv *priv = netdev_priv(dev); + struct can_bittiming *bt = &priv->can.bittiming; + u32 bcr; + + bcr = RCAR_CAN_BCR_TSEG1(bt->phase_seg1 + bt->prop_seg - 1) | + RCAR_CAN_BCR_BPR(bt->brp - 1) | RCAR_CAN_BCR_SJW(bt->sjw - 1) | + RCAR_CAN_BCR_TSEG2(bt->phase_seg2 - 1); + /* Don't overwrite CLKR with 32-bit BCR access; CLKR has 8-bit access. + * All the registers are big-endian but they get byte-swapped on 32-bit + * read/write (but not on 8-bit, contrary to the manuals)... + */ + writel((bcr << 8) | priv->clock_select, &priv->regs->bcr); +} + +static void rcar_can_start(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + u16 ctlr; + int i; + + /* Set controller to known mode: + * - FIFO mailbox mode + * - accept all messages + * - overrun mode + * CAN is in sleep mode after MCU hardware or software reset. + */ + ctlr = readw(&priv->regs->ctlr); + ctlr &= ~RCAR_CAN_CTLR_SLPM; + writew(ctlr, &priv->regs->ctlr); + /* Go to reset mode */ + ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET; + writew(ctlr, &priv->regs->ctlr); + for (i = 0; i < MAX_STR_READS; i++) { + if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST) + break; + } + rcar_can_set_bittiming(ndev); + ctlr |= RCAR_CAN_CTLR_IDFM_MIXED; /* Select mixed ID mode */ + ctlr |= RCAR_CAN_CTLR_BOM_ENT; /* Entry to halt mode automatically */ + /* at bus-off */ + ctlr |= RCAR_CAN_CTLR_MBM; /* Select FIFO mailbox mode */ + ctlr |= RCAR_CAN_CTLR_MLM; /* Overrun mode */ + writew(ctlr, &priv->regs->ctlr); + + /* Accept all SID and EID */ + writel(0, &priv->regs->mkr_2_9[6]); + writel(0, &priv->regs->mkr_2_9[7]); + /* In FIFO mailbox mode, write "0" to bits 24 to 31 */ + writel(0, &priv->regs->mkivlr1); + /* Accept all frames */ + writel(0, &priv->regs->fidcr[0]); + writel(RCAR_CAN_FIDCR_IDE | RCAR_CAN_FIDCR_RTR, &priv->regs->fidcr[1]); + /* Enable and configure FIFO mailbox interrupts */ + writel(RCAR_CAN_MIER1_RXFIE | RCAR_CAN_MIER1_TXFIE, &priv->regs->mier1); + + priv->ier = RCAR_CAN_IER_ERSIE | RCAR_CAN_IER_RXFIE | + RCAR_CAN_IER_TXFIE; + writeb(priv->ier, &priv->regs->ier); + + /* Accumulate error codes */ + writeb(RCAR_CAN_ECSR_EDPM, &priv->regs->ecsr); + /* Enable error interrupts */ + writeb(RCAR_CAN_EIER_EWIE | RCAR_CAN_EIER_EPIE | RCAR_CAN_EIER_BOEIE | + (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING ? + RCAR_CAN_EIER_BEIE : 0) | RCAR_CAN_EIER_ORIE | + RCAR_CAN_EIER_OLIE, &priv->regs->eier); + priv->can.state = CAN_STATE_ERROR_ACTIVE; + + /* Go to operation mode */ + writew(ctlr & ~RCAR_CAN_CTLR_CANM, &priv->regs->ctlr); + for (i = 0; i < MAX_STR_READS; i++) { + if (!(readw(&priv->regs->str) & RCAR_CAN_STR_RSTST)) + break; + } + /* Enable Rx and Tx FIFO */ + writeb(RCAR_CAN_RFCR_RFE, &priv->regs->rfcr); + writeb(RCAR_CAN_TFCR_TFE, &priv->regs->tfcr); +} + +static int rcar_can_open(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + int err; + + err = clk_prepare_enable(priv->clk); + if (err) { + netdev_err(ndev, "clk_prepare_enable() failed, error %d\n", + err); + goto out; + } + err = open_candev(ndev); + if (err) { + netdev_err(ndev, "open_candev() failed, error %d\n", err); + goto out_clock; + } + napi_enable(&priv->napi); + err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); + if (err) { + netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq); + goto out_close; + } + can_led_event(ndev, CAN_LED_EVENT_OPEN); + rcar_can_start(ndev); + netif_start_queue(ndev); + return 0; +out_close: + napi_disable(&priv->napi); + close_candev(ndev); +out_clock: + clk_disable_unprepare(priv->clk); +out: + return err; +} + +static void rcar_can_stop(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + u16 ctlr; + int i; + + /* Go to (force) reset mode */ + ctlr = readw(&priv->regs->ctlr); + ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET; + writew(ctlr, &priv->regs->ctlr); + for (i = 0; i < MAX_STR_READS; i++) { + if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST) + break; + } + writel(0, &priv->regs->mier0); + writel(0, &priv->regs->mier1); + writeb(0, &priv->regs->ier); + writeb(0, &priv->regs->eier); + /* Go to sleep mode */ + ctlr |= RCAR_CAN_CTLR_SLPM; + writew(ctlr, &priv->regs->ctlr); + priv->can.state = CAN_STATE_STOPPED; +} + +static int rcar_can_close(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + + netif_stop_queue(ndev); + rcar_can_stop(ndev); + free_irq(ndev->irq, ndev); + napi_disable(&priv->napi); + clk_disable_unprepare(priv->clk); + close_candev(ndev); + can_led_event(ndev, CAN_LED_EVENT_STOP); + return 0; +} + +static netdev_tx_t rcar_can_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + struct can_frame *cf = (struct can_frame *)skb->data; + u32 data, i; + + if (can_dropped_invalid_skb(ndev, skb)) + return NETDEV_TX_OK; + + if (cf->can_id & CAN_EFF_FLAG) /* Extended frame format */ + data = (cf->can_id & CAN_EFF_MASK) | RCAR_CAN_IDE; + else /* Standard frame format */ + data = (cf->can_id & CAN_SFF_MASK) << RCAR_CAN_SID_SHIFT; + + if (cf->can_id & CAN_RTR_FLAG) { /* Remote transmission request */ + data |= RCAR_CAN_RTR; + } else { + for (i = 0; i < cf->can_dlc; i++) + writeb(cf->data[i], + &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].data[i]); + } + + writel(data, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].id); + + writeb(cf->can_dlc, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].dlc); + + priv->tx_dlc[priv->tx_head % RCAR_CAN_FIFO_DEPTH] = cf->can_dlc; + can_put_echo_skb(skb, ndev, priv->tx_head % RCAR_CAN_FIFO_DEPTH); + priv->tx_head++; + /* Start Tx: write 0xff to the TFPCR register to increment + * the CPU-side pointer for the transmit FIFO to the next + * mailbox location + */ + writeb(0xff, &priv->regs->tfpcr); + /* Stop the queue if we've filled all FIFO entries */ + if (priv->tx_head - priv->tx_tail >= RCAR_CAN_FIFO_DEPTH) + netif_stop_queue(ndev); + + return NETDEV_TX_OK; +} + +static const struct net_device_ops rcar_can_netdev_ops = { + .ndo_open = rcar_can_open, + .ndo_stop = rcar_can_close, + .ndo_start_xmit = rcar_can_start_xmit, +}; + +static void rcar_can_rx_pkt(struct rcar_can_priv *priv) +{ + struct net_device_stats *stats = &priv->ndev->stats; + struct can_frame *cf; + struct sk_buff *skb; + u32 data; + u8 dlc; + + skb = alloc_can_skb(priv->ndev, &cf); + if (!skb) { + stats->rx_dropped++; + return; + } + + data = readl(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].id); + if (data & RCAR_CAN_IDE) + cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG; + else + cf->can_id = (data >> RCAR_CAN_SID_SHIFT) & CAN_SFF_MASK; + + dlc = readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].dlc); + cf->can_dlc = get_can_dlc(dlc); + if (data & RCAR_CAN_RTR) { + cf->can_id |= CAN_RTR_FLAG; + } else { + for (dlc = 0; dlc < cf->can_dlc; dlc++) + cf->data[dlc] = + readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].data[dlc]); + } + + can_led_event(priv->ndev, CAN_LED_EVENT_RX); + + stats->rx_bytes += cf->can_dlc; + stats->rx_packets++; + netif_receive_skb(skb); +} + +static int rcar_can_rx_poll(struct napi_struct *napi, int quota) +{ + struct rcar_can_priv *priv = container_of(napi, + struct rcar_can_priv, napi); + int num_pkts; + + for (num_pkts = 0; num_pkts < quota; num_pkts++) { + u8 rfcr, isr; + + isr = readb(&priv->regs->isr); + /* Clear interrupt bit */ + if (isr & RCAR_CAN_ISR_RXFF) + writeb(isr & ~RCAR_CAN_ISR_RXFF, &priv->regs->isr); + rfcr = readb(&priv->regs->rfcr); + if (rfcr & RCAR_CAN_RFCR_RFEST) + break; + rcar_can_rx_pkt(priv); + /* Write 0xff to the RFPCR register to increment + * the CPU-side pointer for the receive FIFO + * to the next mailbox location + */ + writeb(0xff, &priv->regs->rfpcr); + } + /* All packets processed */ + if (num_pkts < quota) { + napi_complete(napi); + priv->ier |= RCAR_CAN_IER_RXFIE; + writeb(priv->ier, &priv->regs->ier); + } + return num_pkts; +} + +static int rcar_can_do_set_mode(struct net_device *ndev, enum can_mode mode) +{ + switch (mode) { + case CAN_MODE_START: + rcar_can_start(ndev); + netif_wake_queue(ndev); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int rcar_can_get_berr_counter(const struct net_device *dev, + struct can_berr_counter *bec) +{ + struct rcar_can_priv *priv = netdev_priv(dev); + int err; + + err = clk_prepare_enable(priv->clk); + if (err) + return err; + bec->txerr = readb(&priv->regs->tecr); + bec->rxerr = readb(&priv->regs->recr); + clk_disable_unprepare(priv->clk); + return 0; +} + +static int rcar_can_probe(struct platform_device *pdev) +{ + struct rcar_can_platform_data *pdata; + struct rcar_can_priv *priv; + struct net_device *ndev; + struct resource *mem; + void __iomem *addr; + int err = -ENODEV; + int irq; + + pdata = dev_get_platdata(&pdev->dev); + if (!pdata) { + dev_err(&pdev->dev, "No platform data provided!\n"); + goto fail; + } + + irq = platform_get_irq(pdev, 0); + if (!irq) { + dev_err(&pdev->dev, "No IRQ resource\n"); + goto fail; + } + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + addr = devm_ioremap_resource(&pdev->dev, mem); + if (IS_ERR(addr)) { + err = PTR_ERR(addr); + goto fail; + } + + ndev = alloc_candev(sizeof(struct rcar_can_priv), RCAR_CAN_FIFO_DEPTH); + if (!ndev) { + dev_err(&pdev->dev, "alloc_candev() failed\n"); + err = -ENOMEM; + goto fail; + } + + priv = netdev_priv(ndev); + + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + err = PTR_ERR(priv->clk); + dev_err(&pdev->dev, "cannot get clock: %d\n", err); + goto fail_clk; + } + + ndev->netdev_ops = &rcar_can_netdev_ops; + ndev->irq = irq; + ndev->flags |= IFF_ECHO; + priv->ndev = ndev; + priv->regs = addr; + priv->clock_select = pdata->clock_select; + priv->can.clock.freq = clk_get_rate(priv->clk); + priv->can.bittiming_const = &rcar_can_bittiming_const; + priv->can.do_set_mode = rcar_can_do_set_mode; + priv->can.do_get_berr_counter = rcar_can_get_berr_counter; + priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING; + platform_set_drvdata(pdev, ndev); + SET_NETDEV_DEV(ndev, &pdev->dev); + + netif_napi_add(ndev, &priv->napi, rcar_can_rx_poll, + RCAR_CAN_NAPI_WEIGHT); + err = register_candev(ndev); + if (err) { + dev_err(&pdev->dev, "register_candev() failed, error %d\n", + err); + goto fail_candev; + } + + devm_can_led_init(ndev); + + dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n", + priv->regs, ndev->irq); + + return 0; +fail_candev: + netif_napi_del(&priv->napi); +fail_clk: + free_candev(ndev); +fail: + return err; +} + +static int rcar_can_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct rcar_can_priv *priv = netdev_priv(ndev); + + unregister_candev(ndev); + netif_napi_del(&priv->napi); + free_candev(ndev); + return 0; +} + +static int __maybe_unused rcar_can_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct rcar_can_priv *priv = netdev_priv(ndev); + u16 ctlr; + + if (netif_running(ndev)) { + netif_stop_queue(ndev); + netif_device_detach(ndev); + } + ctlr = readw(&priv->regs->ctlr); + ctlr |= RCAR_CAN_CTLR_CANM_HALT; + writew(ctlr, &priv->regs->ctlr); + ctlr |= RCAR_CAN_CTLR_SLPM; + writew(ctlr, &priv->regs->ctlr); + priv->can.state = CAN_STATE_SLEEPING; + + clk_disable(priv->clk); + return 0; +} + +static int __maybe_unused rcar_can_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct rcar_can_priv *priv = netdev_priv(ndev); + u16 ctlr; + int err; + + err = clk_enable(priv->clk); + if (err) { + netdev_err(ndev, "clk_enable() failed, error %d\n", err); + return err; + } + + ctlr = readw(&priv->regs->ctlr); + ctlr &= ~RCAR_CAN_CTLR_SLPM; + writew(ctlr, &priv->regs->ctlr); + ctlr &= ~RCAR_CAN_CTLR_CANM; + writew(ctlr, &priv->regs->ctlr); + priv->can.state = CAN_STATE_ERROR_ACTIVE; + + if (netif_running(ndev)) { + netif_device_attach(ndev); + netif_start_queue(ndev); + } + return 0; +} + +static SIMPLE_DEV_PM_OPS(rcar_can_pm_ops, rcar_can_suspend, rcar_can_resume); + +static struct platform_driver rcar_can_driver = { + .driver = { + .name = RCAR_CAN_DRV_NAME, + .owner = THIS_MODULE, + .pm = &rcar_can_pm_ops, + }, + .probe = rcar_can_probe, + .remove = rcar_can_remove, +}; + +module_platform_driver(rcar_can_driver); + +MODULE_AUTHOR("Cogent Embedded, Inc."); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CAN driver for Renesas R-Car SoC"); +MODULE_ALIAS("platform:" RCAR_CAN_DRV_NAME); diff --git a/include/linux/can/platform/rcar_can.h b/include/linux/can/platform/rcar_can.h new file mode 100644 index 000000000000..0f4a2f3df504 --- /dev/null +++ b/include/linux/can/platform/rcar_can.h @@ -0,0 +1,17 @@ +#ifndef _CAN_PLATFORM_RCAR_CAN_H_ +#define _CAN_PLATFORM_RCAR_CAN_H_ + +#include + +/* Clock Select Register settings */ +enum CLKR { + CLKR_CLKP1 = 0, /* Peripheral clock (clkp1) */ + CLKR_CLKP2 = 1, /* Peripheral clock (clkp2) */ + CLKR_CLKEXT = 3 /* Externally input clock */ +}; + +struct rcar_can_platform_data { + enum CLKR clock_select; /* Clock source select */ +}; + +#endif /* !_CAN_PLATFORM_RCAR_CAN_H_ */ -- cgit v1.2.3-59-g8ed1b From 42193e3efb632c84d686acacd7b2327f2b1f8c63 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 15 May 2014 20:31:56 +0200 Subject: can: unify identifiers to ensure unique include processing Armin pointed me to the fact that the identifier which is used to ensure the unique include processing in lunux/include/uapi/linux/can.h is CAN_H. This clashed with his own source as includes from libraries and APIs should use an underscore '_' at the identifier start. This patch fixes the protection identifiers in all CAN relavant includes. Reported-by: Armin Burchardt Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/core.h | 6 +++--- include/linux/can/dev.h | 6 +++--- include/linux/can/led.h | 6 +++--- include/linux/can/platform/cc770.h | 6 +++--- include/linux/can/platform/mcp251x.h | 6 +++--- include/linux/can/platform/sja1000.h | 6 +++--- include/linux/can/platform/ti_hecc.h | 6 +++--- include/linux/can/skb.h | 6 +++--- include/uapi/linux/can.h | 6 +++--- include/uapi/linux/can/bcm.h | 6 +++--- include/uapi/linux/can/error.h | 6 +++--- include/uapi/linux/can/gw.h | 6 +++--- include/uapi/linux/can/netlink.h | 6 +++--- include/uapi/linux/can/raw.h | 6 +++--- 14 files changed, 42 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 78c6c52073ad..a0875001b13c 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -10,8 +10,8 @@ * */ -#ifndef CAN_CORE_H -#define CAN_CORE_H +#ifndef _CAN_CORE_H +#define _CAN_CORE_H #include #include @@ -58,4 +58,4 @@ extern void can_rx_unregister(struct net_device *dev, canid_t can_id, extern int can_send(struct sk_buff *skb, int loop); extern int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); -#endif /* CAN_CORE_H */ +#endif /* !_CAN_CORE_H */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 3ce5e526525f..6992afc6ba7f 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -10,8 +10,8 @@ * */ -#ifndef CAN_DEV_H -#define CAN_DEV_H +#ifndef _CAN_DEV_H +#define _CAN_DEV_H #include #include @@ -132,4 +132,4 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf); -#endif /* CAN_DEV_H */ +#endif /* !_CAN_DEV_H */ diff --git a/include/linux/can/led.h b/include/linux/can/led.h index 9c1167baf273..e0475c5cbb92 100644 --- a/include/linux/can/led.h +++ b/include/linux/can/led.h @@ -6,8 +6,8 @@ * published by the Free Software Foundation. */ -#ifndef CAN_LED_H -#define CAN_LED_H +#ifndef _CAN_LED_H +#define _CAN_LED_H #include #include @@ -48,4 +48,4 @@ static inline void can_led_notifier_exit(void) #endif -#endif +#endif /* !_CAN_LED_H */ diff --git a/include/linux/can/platform/cc770.h b/include/linux/can/platform/cc770.h index 7702641f87ee..78b2d44f04cf 100644 --- a/include/linux/can/platform/cc770.h +++ b/include/linux/can/platform/cc770.h @@ -1,5 +1,5 @@ -#ifndef _CAN_PLATFORM_CC770_H_ -#define _CAN_PLATFORM_CC770_H_ +#ifndef _CAN_PLATFORM_CC770_H +#define _CAN_PLATFORM_CC770_H /* CPU Interface Register (0x02) */ #define CPUIF_CEN 0x01 /* Clock Out Enable */ @@ -30,4 +30,4 @@ struct cc770_platform_data { u8 bcr; /* Bus Configuration Register */ }; -#endif /* !_CAN_PLATFORM_CC770_H_ */ +#endif /* !_CAN_PLATFORM_CC770_H */ diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h index dc029dba7a03..d44fcae274ff 100644 --- a/include/linux/can/platform/mcp251x.h +++ b/include/linux/can/platform/mcp251x.h @@ -1,5 +1,5 @@ -#ifndef __CAN_PLATFORM_MCP251X_H__ -#define __CAN_PLATFORM_MCP251X_H__ +#ifndef _CAN_PLATFORM_MCP251X_H +#define _CAN_PLATFORM_MCP251X_H /* * @@ -18,4 +18,4 @@ struct mcp251x_platform_data { unsigned long oscillator_frequency; }; -#endif /* __CAN_PLATFORM_MCP251X_H__ */ +#endif /* !_CAN_PLATFORM_MCP251X_H */ diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 96f8fcc78d78..93570b61ec6c 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -1,5 +1,5 @@ -#ifndef _CAN_PLATFORM_SJA1000_H_ -#define _CAN_PLATFORM_SJA1000_H_ +#ifndef _CAN_PLATFORM_SJA1000_H +#define _CAN_PLATFORM_SJA1000_H /* clock divider register */ #define CDR_CLKOUT_MASK 0x07 @@ -32,4 +32,4 @@ struct sja1000_platform_data { u8 cdr; /* clock divider register */ }; -#endif /* !_CAN_PLATFORM_SJA1000_H_ */ +#endif /* !_CAN_PLATFORM_SJA1000_H */ diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h index af17cb3f7a84..a52f47ca6c8a 100644 --- a/include/linux/can/platform/ti_hecc.h +++ b/include/linux/can/platform/ti_hecc.h @@ -1,5 +1,5 @@ -#ifndef __CAN_PLATFORM_TI_HECC_H__ -#define __CAN_PLATFORM_TI_HECC_H__ +#ifndef _CAN_PLATFORM_TI_HECC_H +#define _CAN_PLATFORM_TI_HECC_H /* * TI HECC (High End CAN Controller) driver platform header @@ -41,4 +41,4 @@ struct ti_hecc_platform_data { u32 version; void (*transceiver_switch) (int); }; -#endif +#endif /* !_CAN_PLATFORM_TI_HECC_H */ diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index f9bbbb472663..cc00d15c6107 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -7,8 +7,8 @@ * */ -#ifndef CAN_SKB_H -#define CAN_SKB_H +#ifndef _CAN_SKB_H +#define _CAN_SKB_H #include #include @@ -80,4 +80,4 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) return skb; } -#endif /* CAN_SKB_H */ +#endif /* !_CAN_SKB_H */ diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 5d9d1d140718..41892f720057 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -42,8 +42,8 @@ * DAMAGE. */ -#ifndef CAN_H -#define CAN_H +#ifndef _UAPI_CAN_H +#define _UAPI_CAN_H #include #include @@ -191,4 +191,4 @@ struct can_filter { #define CAN_INV_FILTER 0x20000000U /* to be set in can_filter.can_id */ -#endif /* CAN_H */ +#endif /* !_UAPI_CAN_H */ diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h index 382251a1d214..89ddb9dc9bdf 100644 --- a/include/uapi/linux/can/bcm.h +++ b/include/uapi/linux/can/bcm.h @@ -41,8 +41,8 @@ * DAMAGE. */ -#ifndef CAN_BCM_H -#define CAN_BCM_H +#ifndef _UAPI_CAN_BCM_H +#define _UAPI_CAN_BCM_H #include #include @@ -95,4 +95,4 @@ enum { #define TX_RESET_MULTI_IDX 0x0200 #define RX_RTR_FRAME 0x0400 -#endif /* CAN_BCM_H */ +#endif /* !_UAPI_CAN_BCM_H */ diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index b63204545320..c247446ab25a 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -41,8 +41,8 @@ * DAMAGE. */ -#ifndef CAN_ERROR_H -#define CAN_ERROR_H +#ifndef _UAPI_CAN_ERROR_H +#define _UAPI_CAN_ERROR_H #define CAN_ERR_DLC 8 /* dlc for error message frames */ @@ -120,4 +120,4 @@ /* controller specific additional information / data[5..7] */ -#endif /* CAN_ERROR_H */ +#endif /* _UAPI_CAN_ERROR_H */ diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 844c8964bdfe..3e6184cf2f6d 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -41,8 +41,8 @@ * DAMAGE. */ -#ifndef CAN_GW_H -#define CAN_GW_H +#ifndef _UAPI_CAN_GW_H +#define _UAPI_CAN_GW_H #include #include @@ -200,4 +200,4 @@ enum { * Beware of sending unpacked or aligned structs! */ -#endif +#endif /* !_UAPI_CAN_GW_H */ diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 7e2e1863db16..813d11f54977 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -15,8 +15,8 @@ * GNU General Public License for more details. */ -#ifndef CAN_NETLINK_H -#define CAN_NETLINK_H +#ifndef _UAPI_CAN_NETLINK_H +#define _UAPI_CAN_NETLINK_H #include @@ -130,4 +130,4 @@ enum { #define IFLA_CAN_MAX (__IFLA_CAN_MAX - 1) -#endif /* CAN_NETLINK_H */ +#endif /* !_UAPI_CAN_NETLINK_H */ diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index c7d8c334e0ce..78ec76fd89a6 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -42,8 +42,8 @@ * DAMAGE. */ -#ifndef CAN_RAW_H -#define CAN_RAW_H +#ifndef _UAPI_CAN_RAW_H +#define _UAPI_CAN_RAW_H #include @@ -59,4 +59,4 @@ enum { CAN_RAW_FD_FRAMES, /* allow CAN FD frames (default:off) */ }; -#endif +#endif /* !_UAPI_CAN_RAW_H */ -- cgit v1.2.3-59-g8ed1b From 4f4aa2ec24dc45881849833a439558d3a378028c Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Sun, 18 May 2014 00:22:38 +0200 Subject: ssb: sprom: add dev_id field for value overriding standard ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some devices may have different features despite sharing the same ID (e.g. PCI ID). For example 14e4:4331 is usually a dual band, but this can be "limited". Device with "pci/x/y/devid=0x4332" supports 2.4 GHz only. Similarly 0x4333 will mean support for 5 GHz only. Add entry in SPROM so info described above can be extracted and stored. Signed-off-by: Rafał Miłecki Acked-by: Hauke Mehrtens Signed-off-by: John W. Linville --- arch/mips/bcm47xx/sprom.c | 1 + include/linux/ssb/ssb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c index a8b5408dd349..da4cdb16844e 100644 --- a/arch/mips/bcm47xx/sprom.c +++ b/arch/mips/bcm47xx/sprom.c @@ -168,6 +168,7 @@ static void nvram_read_alpha2(const char *prefix, const char *name, static void bcm47xx_fill_sprom_r1234589(struct ssb_sprom *sprom, const char *prefix, bool fallback) { + nvram_read_u16(prefix, NULL, "devid", &sprom->dev_id, 0, fallback); nvram_read_u8(prefix, NULL, "ledbh0", &sprom->gpio0, 0xff, fallback); nvram_read_u8(prefix, NULL, "ledbh1", &sprom->gpio1, 0xff, fallback); nvram_read_u8(prefix, NULL, "ledbh2", &sprom->gpio2, 0xff, fallback); diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 07ef9b82b66d..4568a5cc9ab8 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -33,6 +33,7 @@ struct ssb_sprom { u8 et1phyaddr; /* MII address for enet1 */ u8 et0mdcport; /* MDIO for enet0 */ u8 et1mdcport; /* MDIO for enet1 */ + u16 dev_id; /* Device ID overriding e.g. PCI ID */ u16 board_rev; /* Board revision number from SPROM. */ u16 board_num; /* Board number from SPROM. */ u16 board_type; /* Board type from SPROM. */ -- cgit v1.2.3-59-g8ed1b From 7d10d2610cc02d432168ca0c5d964cd9e85c1b06 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Mon, 19 May 2014 09:21:09 +0200 Subject: net: cdc_ncm: fix 64bit division build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The upper timer_interval limit is arbitrary and much higher than anything usable in the real world. Reducing it from 15s to ~4s to make the timer_interval fit in an u32 does not make much difference. The limit is still outside the practical bounds. This eliminates the need for a 64bit timer_interval, fixing a build error related to 64bit division: drivers/built-in.o: In function `cdc_ncm_get_coalesce': ak8975.c:(.text+0x1ac994): undefined reference to `__aeabi_uldivmod' Reported-by: Stephen Rothwell Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 4 ++-- include/linux/usb/cdc_ncm.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 2d0caf1eea25..ad2a386a6e92 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -138,7 +138,7 @@ static int cdc_ncm_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames = ctx->tx_max / ctx->max_datagram_size; /* the timer will fire CDC_NCM_TIMER_PENDING_CNT times in a row */ - ec->tx_coalesce_usecs = (ctx->timer_interval * CDC_NCM_TIMER_PENDING_CNT) / NSEC_PER_USEC; + ec->tx_coalesce_usecs = ctx->timer_interval / (NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT); return 0; } @@ -164,7 +164,7 @@ static int cdc_ncm_set_coalesce(struct net_device *netdev, return -EINVAL; spin_lock_bh(&ctx->mtx); - ctx->timer_interval = ec->tx_coalesce_usecs * NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT; + ctx->timer_interval = ec->tx_coalesce_usecs * (NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT); if (!ctx->timer_interval) ctx->tx_timer_pending = 0; spin_unlock_bh(&ctx->mtx); diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 8c5e38819828..7c9b484735c5 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -78,7 +78,7 @@ #define CDC_NCM_TIMER_PENDING_CNT 2 #define CDC_NCM_TIMER_INTERVAL_USEC 400UL #define CDC_NCM_TIMER_INTERVAL_MIN 5UL -#define CDC_NCM_TIMER_INTERVAL_MAX (15UL * USEC_PER_SEC) +#define CDC_NCM_TIMER_INTERVAL_MAX (U32_MAX / NSEC_PER_USEC) #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) @@ -104,7 +104,7 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; - u64 timer_interval; + u32 timer_interval; u32 max_ndp_size; u32 tx_timer_pending; -- cgit v1.2.3-59-g8ed1b From 5fe821a9dee241fa450703ab7015d970ee0cfb8d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 19 May 2014 14:56:14 -0700 Subject: net: filter: cleanup invocation of internal BPF Kernel API for classic BPF socket filters is: sk_unattached_filter_create() - validate classic BPF, convert, JIT SK_RUN_FILTER() - run it sk_unattached_filter_destroy() - destroy socket filter Cleanup internal BPF kernel API as following: sk_filter_select_runtime() - final step of internal BPF creation. Try to JIT internal BPF program, if JIT is not available select interpreter SK_RUN_FILTER() - run it sk_filter_free() - free internal BPF program Disallow direct calls to BPF interpreter. Execution of the BPF program should be done with SK_RUN_FILTER() macro. Example of internal BPF create, run, destroy: struct sk_filter *fp; fp = kzalloc(sk_filter_size(prog_len), GFP_KERNEL); memcpy(fp->insni, prog, prog_len * sizeof(fp->insni[0])); fp->len = prog_len; sk_filter_select_runtime(fp); SK_RUN_FILTER(fp, ctx); sk_filter_free(fp); Sockets, seccomp, testsuite, tracing are using different ways to populate sk_filter, so first steps of program creation are not common. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 6 ++---- kernel/seccomp.c | 6 ++---- lib/test_bpf.c | 4 ++-- net/core/filter.c | 44 ++++++++++++++++++++++++++++---------------- 4 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 9d5ae0a2c954..7977b3958e25 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -184,10 +184,8 @@ static inline unsigned int sk_filter_size(unsigned int proglen) int sk_filter(struct sock *sk, struct sk_buff *skb); -u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, - const struct sock_filter_int *insni); -u32 sk_run_filter_int_skb(const struct sk_buff *ctx, - const struct sock_filter_int *insni); +void sk_filter_select_runtime(struct sk_filter *fp); +void sk_filter_free(struct sk_filter *fp); int sk_convert_filter(struct sock_filter *prog, int len, struct sock_filter_int *new_prog, int *new_len); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 7e02d624cc50..1036b6f2fded 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -273,10 +273,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) atomic_set(&filter->usage, 1); filter->prog->len = new_len; - filter->prog->bpf_func = (void *)sk_run_filter_int_seccomp; - /* JIT internal BPF into native HW instructions */ - bpf_int_jit_compile(filter->prog); + sk_filter_select_runtime(filter->prog); /* * If there is an existing filter, make it the prev and don't drop its @@ -340,7 +338,7 @@ void put_seccomp_filter(struct task_struct *tsk) while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; - bpf_jit_free(freeme->prog); + sk_filter_free(freeme->prog); kfree(freeme); } } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 3603ebcd5d65..e160934430eb 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1489,7 +1489,7 @@ static __init int test_bpf(void) memcpy(fp_ext->insns, tests[i].insns_int, fprog.len * 8); fp->len = fprog.len; - fp->bpf_func = sk_run_filter_int_skb; + sk_filter_select_runtime(fp); } else { err = sk_unattached_filter_create(&fp, &fprog); if (tests[i].data_type == EXPECTED_FAIL) { @@ -1516,7 +1516,7 @@ static __init int test_bpf(void) if (tests[i].data_type != SKB_INT) sk_unattached_filter_destroy(fp); else - kfree(fp); + sk_filter_free(fp); if (err) { pr_cont("FAIL %d\n", err); diff --git a/net/core/filter.c b/net/core/filter.c index 32c5b44c537e..7067cb240d3e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -153,7 +153,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) * keep, 0 for none. @ctx is the data we are operating on, @insn is the * array of filter instructions. */ -unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) +static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) { u64 stack[MAX_BPF_STACK / sizeof(u64)]; u64 regs[MAX_BPF_REG], tmp; @@ -571,15 +571,6 @@ load_byte: return 0; } -u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, - const struct sock_filter_int *insni) - __attribute__ ((alias ("__sk_run_filter"))); - -u32 sk_run_filter_int_skb(const struct sk_buff *ctx, - const struct sock_filter_int *insni) - __attribute__ ((alias ("__sk_run_filter"))); -EXPORT_SYMBOL_GPL(sk_run_filter_int_skb); - /* Helper to find the offset of pkt_type in sk_buff structure. We want * to make sure its still a 3bit field starting at a byte boundary; * taken from arch/x86/net/bpf_jit_comp.c. @@ -1397,7 +1388,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu) struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); sk_release_orig_filter(fp); - bpf_jit_free(fp); + sk_filter_free(fp); } /** @@ -1497,7 +1488,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, goto out_err_free; } - fp->bpf_func = sk_run_filter_int_skb; fp->len = new_len; /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */ @@ -1510,6 +1500,8 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, */ goto out_err_free; + sk_filter_select_runtime(fp); + kfree(old_prog); return fp; @@ -1528,6 +1520,29 @@ void __weak bpf_int_jit_compile(struct sk_filter *prog) { } +/** + * sk_filter_select_runtime - select execution runtime for BPF program + * @fp: sk_filter populated with internal BPF program + * + * try to JIT internal BPF program, if JIT is not available select interpreter + * BPF program will be executed via SK_RUN_FILTER() macro + */ +void sk_filter_select_runtime(struct sk_filter *fp) +{ + fp->bpf_func = (void *) __sk_run_filter; + + /* Probe if internal BPF can be JITed */ + bpf_int_jit_compile(fp); +} +EXPORT_SYMBOL_GPL(sk_filter_select_runtime); + +/* free internal BPF program */ +void sk_filter_free(struct sk_filter *fp) +{ + bpf_jit_free(fp); +} +EXPORT_SYMBOL_GPL(sk_filter_free); + static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, struct sock *sk) { @@ -1548,12 +1563,9 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, /* JIT compiler couldn't process this filter, so do the * internal BPF translation for the optimized interpreter. */ - if (!fp->jited) { + if (!fp->jited) fp = __sk_migrate_filter(fp, sk); - /* Probe if internal BPF can be jit-ed */ - bpf_int_jit_compile(fp); - } return fp; } -- cgit v1.2.3-59-g8ed1b From ca8a22634381537c92b5a10308652e1c38fd9edf Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 22 May 2014 10:41:08 -0400 Subject: tcp: make cwnd-limited checks measurement-based, and gentler Experience with the recent e114a710aa50 ("tcp: fix cwnd limited checking to improve congestion control") has shown that there are common cases where that commit can cause cwnd to be much larger than necessary. This leads to TSO autosizing cooking skbs that are too large, among other things. The main problems seemed to be: (1) That commit attempted to predict the future behavior of the connection by looking at the write queue (if TSO or TSQ limit sending). That prediction sometimes overestimated future outstanding packets. (2) That commit always allowed cwnd to grow to twice the number of outstanding packets (even in congestion avoidance, where this is not needed). This commit improves both of these, by: (1) Switching to a measurement-based approach where we explicitly track the largest number of packets in flight during the past window ("max_packets_out"), and remember whether we were cwnd-limited at the moment we finished sending that flight. (2) Only allowing cwnd to grow to twice the number of outstanding packets ("max_packets_out") in slow start. In congestion avoidance mode we now only allow cwnd to grow if it was fully utilized. Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++-- include/net/tcp.h | 11 ++++++++--- net/ipv4/tcp_output.c | 37 +++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bc35e4709e8e..a0513210798f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -197,7 +197,8 @@ struct tcp_sock { u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ - syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ @@ -209,6 +210,8 @@ struct tcp_sock { u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ + u32 max_packets_out; /* max packets_out in last window */ + u32 max_packets_seq; /* right edge of max_packets_out flight */ u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ @@ -230,7 +233,6 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; - u32 lsnd_pending; /* packets inflight or unsent since last xmit */ u32 prior_cwnd; /* Congestion window at start of Recovery. */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index f5d6ca4a9d28..e80abe4486cb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -971,8 +971,9 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) /* We follow the spirit of RFC2861 to validate cwnd but implement a more * flexible approach. The RFC suggests cwnd should not be raised unless - * it was fully used previously. But we allow cwnd to grow as long as the - * application has used half the cwnd. + * it was fully used previously. And that's exactly what we do in + * congestion avoidance mode. But in slow start we allow cwnd to grow + * as long as the application has used half the cwnd. * Example : * cwnd is 10 (IW10), but application sends 9 frames. * We allow cwnd to reach 18 when all frames are ACKed. @@ -985,7 +986,11 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return tp->snd_cwnd < 2 * tp->lsnd_pending; + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ + if (tp->snd_cwnd <= tp->snd_ssthresh) + return tp->snd_cwnd < 2 * tp->max_packets_out; + + return tp->is_cwnd_limited; } static inline void tcp_check_probe_timer(struct sock *sk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3d61c52bdf79..d463c35db33d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1402,11 +1402,19 @@ static void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs) +static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) { struct tcp_sock *tp = tcp_sk(sk); - tp->lsnd_pending = tp->packets_out + unsent_segs; + /* Track the maximum number of outstanding packets in each + * window, and remember whether we were cwnd-limited then. + */ + if (!before(tp->snd_una, tp->max_packets_seq) || + tp->packets_out > tp->max_packets_out) { + tp->max_packets_out = tp->packets_out; + tp->max_packets_seq = tp->snd_nxt; + tp->is_cwnd_limited = is_cwnd_limited; + } if (tcp_is_cwnd_limited(sk)) { /* Network is feed fully. */ @@ -1660,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, * * This algorithm is from John Heffner. */ -static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) +static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, + bool *is_cwnd_limited) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1724,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) if (!tp->tso_deferred) tp->tso_deferred = 1 | (jiffies << 1); + if (cong_win < send_win && cong_win < skb->len) + *is_cwnd_limited = true; + return true; send_now: @@ -1881,9 +1893,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - unsigned int tso_segs, sent_pkts, unsent_segs = 0; + unsigned int tso_segs, sent_pkts; int cwnd_quota; int result; + bool is_cwnd_limited = false; sent_pkts = 0; @@ -1908,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) { + is_cwnd_limited = true; if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; @@ -1924,8 +1938,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, nonagle : TCP_NAGLE_PUSH)))) break; } else { - if (!push_one && tcp_tso_should_defer(sk, skb)) - goto compute_unsent_segs; + if (!push_one && + tcp_tso_should_defer(sk, skb, &is_cwnd_limited)) + break; } /* TCP Small Queues : @@ -1950,14 +1965,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, * there is no smp_mb__after_set_bit() yet */ smp_mb__after_clear_bit(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) { - u32 unsent_bytes; - -compute_unsent_segs: - unsent_bytes = tp->write_seq - tp->snd_nxt; - unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now); + if (atomic_read(&sk->sk_wmem_alloc) > limit) break; - } } limit = mss_now; @@ -1997,7 +2006,7 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk); - tcp_cwnd_validate(sk, unsent_segs); + tcp_cwnd_validate(sk, is_cwnd_limited); return false; } return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); -- cgit v1.2.3-59-g8ed1b From da08143b85203b581f4a6461b149186b0e9592df Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Tue, 20 May 2014 08:29:25 +0200 Subject: vlan: more careful checksum features handling When combining real_dev's features and vlan_features, simple bitwise AND is used. This doesn't work well for checksum offloading features as if one set has NETIF_F_HW_CSUM and the other NETIF_F_IP_CSUM and/or NETIF_F_IPV6_CSUM, we end up with no checksum offloading. However, from the logical point of view (how can_checksum_protocol() works), NETIF_F_HW_CSUM contains the functionality of NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM so that the result should be IP/IPV6. Add helper function netdev_intersect_features() implementing this logic and use it in vlan_dev_fix_features(). Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 ++++++++++++++ net/8021q/vlan_dev.c | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2dea98cbbdba..f4ad247fd324 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3153,6 +3153,20 @@ const char *netdev_drivername(const struct net_device *dev); void linkwatch_run_queue(void); +static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, + netdev_features_t f2) +{ + if (f1 & NETIF_F_GEN_CSUM) + f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + if (f2 & NETIF_F_GEN_CSUM) + f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 &= f2; + if (f1 & NETIF_F_GEN_CSUM) + f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + + return f1; +} + static inline netdev_features_t netdev_get_wanted_features( struct net_device *dev) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8f025afa29fd..4181fb71ba77 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -678,9 +678,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; netdev_features_t old_features = features; - features &= real_dev->vlan_features; + features = netdev_intersect_features(features, real_dev->vlan_features); features |= NETIF_F_RXCSUM; - features &= real_dev->features; + features = netdev_intersect_features(features, real_dev->features); features |= old_features & NETIF_F_SOFT_FEATURES; features |= NETIF_F_LLTX; -- cgit v1.2.3-59-g8ed1b From ea3429c77d4e34cb2983b90e49a5506fedf70b98 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 22 May 2014 09:47:50 -0700 Subject: of: mdio: remove of_phy_connect_fixed_link All in-tree drivers have been converted to use the new pair of functions: of_is_fixed_phy_link() plus of_phy_register_fixed_link(), we can now safely remove of_phy_connect_fixed_link. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 38 -------------------------------------- include/linux/of_mdio.h | 10 ---------- 2 files changed, 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 1def0bb5cb37..4c1e01ed16dc 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -245,44 +245,6 @@ struct phy_device *of_phy_connect(struct net_device *dev, } EXPORT_SYMBOL(of_phy_connect); -/** - * of_phy_connect_fixed_link - Parse fixed-link property and return a dummy phy - * @dev: pointer to net_device claiming the phy - * @hndlr: Link state callback for the network device - * @iface: PHY data interface type - * - * This function is a temporary stop-gap and will be removed soon. It is - * only to support the fs_enet, ucc_geth and gianfar Ethernet drivers. Do - * not call this function from new drivers. - */ -struct phy_device *of_phy_connect_fixed_link(struct net_device *dev, - void (*hndlr)(struct net_device *), - phy_interface_t iface) -{ - struct device_node *net_np; - char bus_id[MII_BUS_ID_SIZE + 3]; - struct phy_device *phy; - const __be32 *phy_id; - int sz; - - if (!dev->dev.parent) - return NULL; - - net_np = dev->dev.parent->of_node; - if (!net_np) - return NULL; - - phy_id = of_get_property(net_np, "fixed-link", &sz); - if (!phy_id || sz < sizeof(*phy_id)) - return NULL; - - sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0])); - - phy = phy_connect(dev, bus_id, hndlr, iface); - return IS_ERR(phy) ? NULL : phy; -} -EXPORT_SYMBOL(of_phy_connect_fixed_link); - /** * of_phy_attach - Attach to a PHY without starting the state machine * @dev: pointer to net_device claiming the phy diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 0aa367e316cb..d449018d0726 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -22,9 +22,6 @@ extern struct phy_device *of_phy_connect(struct net_device *dev, struct phy_device *of_phy_attach(struct net_device *dev, struct device_node *phy_np, u32 flags, phy_interface_t iface); -extern struct phy_device *of_phy_connect_fixed_link(struct net_device *dev, - void (*hndlr)(struct net_device *), - phy_interface_t iface); extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); @@ -59,13 +56,6 @@ static inline struct phy_device *of_phy_attach(struct net_device *dev, return NULL; } -static inline struct phy_device *of_phy_connect_fixed_link(struct net_device *dev, - void (*hndlr)(struct net_device *), - phy_interface_t iface) -{ - return NULL; -} - static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; -- cgit v1.2.3-59-g8ed1b From ecc8fb11cdb37d108d4597ba0f6bdff77c6019af Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 22 May 2014 15:55:39 +0300 Subject: net/mlx4_core: Deprecate use_prio module parameter use_prio was added as part of an infrastructure for running FCoE in A0 mode. FCoE didn't get into Mellanox Upstream driver, and when it will, it won't be using A0 steering mode. Therefore we can safely deprecate this module parameter without hurting any existing user. CC: Carol Soto Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 8 ++++---- include/linux/mlx4/device.h | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a56f6012258d..08ff5dd9298f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -132,8 +132,7 @@ MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)"); static bool use_prio; module_param_named(use_prio, use_prio, bool, 0444); -MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " - "(0/1, default 0)"); +MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)"); int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG); module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); @@ -290,7 +289,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; - dev->caps.log_num_prios = use_prio ? 3 : 0; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; @@ -358,7 +356,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = (1 << dev->caps.log_num_macs) * (1 << dev->caps.log_num_vlans) * - (1 << dev->caps.log_num_prios) * dev->caps.num_ports; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; @@ -2775,6 +2772,9 @@ static int __init mlx4_verify_params(void) pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", MLX4_LOG_NUM_VLANS); + if (use_prio != 0) + pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n"); + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) { pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); return -1; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c0468e6f0442..ca38871a585c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -449,7 +449,6 @@ struct mlx4_caps { int reserved_qps_base[MLX4_NUM_QP_REGION]; int log_num_macs; int log_num_vlans; - int log_num_prios; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; u8 supported_type[MLX4_MAX_PORTS + 1]; u8 suggested_type[MLX4_MAX_PORTS + 1]; -- cgit v1.2.3-59-g8ed1b From ed616689a3d95eb6c9bdbb1ef74b0f50cbdf276a Mon Sep 17 00:00:00 2001 From: Sucheta Chakraborty Date: Thu, 22 May 2014 09:59:05 -0400 Subject: net-next:v4: Add support to configure SR-IOV VF minimum and maximum Tx rate through ip tool. o min_tx_rate puts lower limit on the VF bandwidth. VF is guaranteed to have a bandwidth of at least this value. max_tx_rate puts cap on the VF bandwidth. VF can have a bandwidth of up to this value. o A new handler set_vf_rate for attr IFLA_VF_RATE has been introduced which takes 4 arguments: netdev, VF number, min_tx_rate, max_tx_rate o ndo_set_vf_rate replaces ndo_set_vf_tx_rate handler. o Drivers that currently implement ndo_set_vf_tx_rate should now call ndo_set_vf_rate instead and reject attempt to set a minimum bandwidth greater than 0 for IFLA_VF_TX_RATE when IFLA_VF_RATE is not yet implemented by driver. o If user enters only one of either min_tx_rate or max_tx_rate, then, userland should read back the other value from driver and set both for IFLA_VF_RATE. Drivers that have not yet implemented IFLA_VF_RATE should always return min_tx_rate as 0 when read from ip tool. o If both IFLA_VF_TX_RATE and IFLA_VF_RATE options are specified, then IFLA_VF_RATE should override. o Idea is to have consistent display of rate values to user. o Usage example: - ./ip link set p4p1 vf 0 rate 900 ./ip link show p4p1 32: p4p1: mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000 link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 900 (Mbps), max_tx_rate 900Mbps vf 1 MAC f6:c6:7c:3f:3d:6c vf 2 MAC 56:32:43:98:d7:71 vf 3 MAC d6:be:c3:b5:85:ff vf 4 MAC ee:a9:9a:1e:19:14 vf 5 MAC 4a:d0:4c:07:52:18 vf 6 MAC 3a:76:44:93:62:f9 vf 7 MAC 82:e9:e7:e3:15:1a ./ip link set p4p1 vf 0 max_tx_rate 300 min_tx_rate 200 ./ip link show p4p1 32: p4p1: mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000 link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 300 (Mbps), max_tx_rate 300Mbps, min_tx_rate 200Mbps vf 1 MAC f6:c6:7c:3f:3d:6c vf 2 MAC 56:32:43:98:d7:71 vf 3 MAC d6:be:c3:b5:85:ff vf 4 MAC ee:a9:9a:1e:19:14 vf 5 MAC 4a:d0:4c:07:52:18 vf 6 MAC 3a:76:44:93:62:f9 vf 7 MAC 82:e9:e7:e3:15:1a ./ip link set p4p1 vf 0 max_tx_rate 600 rate 300 ./ip link show p4p1 32: p4p1: mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000 link/ether 00:0e:1e:08:b0:f brd ff:ff:ff:ff:ff:ff vf 0 MAC 3e:a0:ca:bd:ae:5, tx rate 600 (Mbps), max_tx_rate 600Mbps, min_tx_rate 200Mbps vf 1 MAC f6:c6:7c:3f:3d:6c vf 2 MAC 56:32:43:98:d7:71 vf 3 MAC d6:be:c3:b5:85:ff vf 4 MAC ee:a9:9a:1e:19:14 vf 5 MAC 4a:d0:4c:07:52:18 vf 6 MAC 3a:76:44:93:62:f9 vf 7 MAC 82:e9:e7:e3:15:1a Signed-off-by: Sucheta Chakraborty Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 3 +- drivers/net/ethernet/emulex/benet/be_main.c | 21 +++++---- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 26 +++++++---- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 3 +- drivers/net/ethernet/intel/igb/igb_main.c | 20 ++++++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 13 ++++-- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h | 3 +- drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 ++--- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h | 2 +- .../ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 1 + .../net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c | 52 +++++++++++++++------- drivers/net/ethernet/sfc/siena_sriov.c | 3 +- include/linux/if_link.h | 3 +- include/linux/netdevice.h | 8 ++-- include/uapi/linux/if_link.h | 9 +++- net/core/rtnetlink.c | 38 +++++++++++++--- 20 files changed, 156 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 81cc2d9831c2..8d0479d5be8e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2576,7 +2576,8 @@ int bnx2x_get_vf_config(struct net_device *dev, int vfidx, ivi->vf = vfidx; ivi->qos = 0; - ivi->tx_rate = 10000; /* always 10G. TBA take from link struct */ + ivi->max_tx_rate = 10000; /* always 10G. TBA take from link struct */ + ivi->min_tx_rate = 0; ivi->spoofchk = 1; /*always enabled */ if (vf->state == VF_ENABLED) { /* mac and vlan are in vlan_mac objects */ diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index dcc5e5c69743..4693d004a223 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1302,7 +1302,8 @@ static int be_get_vf_config(struct net_device *netdev, int vf, return -EINVAL; vi->vf = vf; - vi->tx_rate = vf_cfg->tx_rate; + vi->max_tx_rate = vf_cfg->tx_rate; + vi->min_tx_rate = 0; vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK; vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT; memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN); @@ -1342,7 +1343,8 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) return status; } -static int be_set_vf_tx_rate(struct net_device *netdev, int vf, int rate) +static int be_set_vf_tx_rate(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) { struct be_adapter *adapter = netdev_priv(netdev); int status = 0; @@ -1353,18 +1355,21 @@ static int be_set_vf_tx_rate(struct net_device *netdev, int vf, int rate) if (vf >= adapter->num_vfs) return -EINVAL; - if (rate < 100 || rate > 10000) { + if (min_tx_rate) + return -EINVAL; + + if (max_tx_rate < 100 || max_tx_rate > 10000) { dev_err(&adapter->pdev->dev, - "tx rate must be between 100 and 10000 Mbps\n"); + "max tx rate must be between 100 and 10000 Mbps\n"); return -EINVAL; } - status = be_cmd_config_qos(adapter, rate / 10, vf + 1); + status = be_cmd_config_qos(adapter, max_tx_rate / 10, vf + 1); if (status) dev_err(&adapter->pdev->dev, - "tx rate %d on VF %d failed\n", rate, vf); + "max tx rate %d on VF %d failed\n", max_tx_rate, vf); else - adapter->vf_cfg[vf].tx_rate = rate; + adapter->vf_cfg[vf].tx_rate = max_tx_rate; return status; } static int be_set_vf_link_state(struct net_device *netdev, int vf, @@ -4257,7 +4262,7 @@ static const struct net_device_ops be_netdev_ops = { .ndo_vlan_rx_kill_vid = be_vlan_rem_vid, .ndo_set_vf_mac = be_set_vf_mac, .ndo_set_vf_vlan = be_set_vf_vlan, - .ndo_set_vf_tx_rate = be_set_vf_tx_rate, + .ndo_set_vf_rate = be_set_vf_tx_rate, .ndo_get_vf_config = be_get_vf_config, .ndo_set_vf_link_state = be_set_vf_link_state, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e0e5c6a867b1..96f7fabd8758 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6724,7 +6724,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, - .ndo_set_vf_tx_rate = i40e_ndo_set_vf_bw, + .ndo_set_vf_rate = i40e_ndo_set_vf_bw, .ndo_get_vf_config = i40e_ndo_get_vf_config, .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, #ifdef CONFIG_I40E_VXLAN diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 4d219566a04d..8564b0939dc4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2205,7 +2205,8 @@ error_pvid: * * configure vf tx rate **/ -int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate) +int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; @@ -2221,6 +2222,12 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate) goto error; } + if (min_tx_rate) { + dev_err(&pf->pdev->dev, "Invalid min tx rate (%d) (greater than 0) specified for vf %d.\n", + min_tx_rate, vf_id); + return -EINVAL; + } + vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_index]; if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { @@ -2243,23 +2250,23 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate) break; } - if (tx_rate > speed) { - dev_err(&pf->pdev->dev, "Invalid tx rate %d specified for vf %d.", - tx_rate, vf->vf_id); + if (max_tx_rate > speed) { + dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for vf %d.", + max_tx_rate, vf->vf_id); ret = -EINVAL; goto error; } /* Tx rate credits are in values of 50Mbps, 0 is disabled*/ - ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, tx_rate / 50, 0, - NULL); + ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, max_tx_rate / 50, + 0, NULL); if (ret) { - dev_err(&pf->pdev->dev, "Unable to set tx rate, error code %d.\n", + dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n", ret); ret = -EIO; goto error; } - vf->tx_rate = tx_rate; + vf->tx_rate = max_tx_rate; error: return ret; } @@ -2301,7 +2308,8 @@ int i40e_ndo_get_vf_config(struct net_device *netdev, memcpy(&ivi->mac, vf->default_lan_addr.addr, ETH_ALEN); - ivi->tx_rate = vf->tx_rate; + ivi->max_tx_rate = vf->tx_rate; + ivi->min_tx_rate = 0; ivi->vlan = le16_to_cpu(vsi->info.pvid) & I40E_VLAN_MASK; ivi->qos = (le16_to_cpu(vsi->info.pvid) & I40E_PRIORITY_MASK) >> I40E_VLAN_PRIORITY_SHIFT; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index ba3d1f8414be..5a559be4ba2c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -116,7 +116,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf); int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos); -int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate); +int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate); int i40e_ndo_get_vf_config(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi); int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index bfcda8a455f4..1075b3f8c415 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -169,7 +169,7 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter); static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, @@ -2084,7 +2084,7 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, .ndo_set_vf_mac = igb_ndo_set_vf_mac, .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, - .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, + .ndo_set_vf_rate = igb_ndo_set_vf_bw, .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, .ndo_get_vf_config = igb_ndo_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -7879,7 +7879,8 @@ static void igb_check_vf_rate_limit(struct igb_adapter *adapter) } } -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -7888,15 +7889,19 @@ static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) if (hw->mac.type != e1000_82576) return -EOPNOTSUPP; + if (min_tx_rate) + return -EINVAL; + actual_link_speed = igb_link_mbps(adapter->link_speed); if ((vf >= adapter->vfs_allocated_count) || (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) || - (tx_rate < 0) || (tx_rate > actual_link_speed)) + (max_tx_rate < 0) || + (max_tx_rate > actual_link_speed)) return -EINVAL; adapter->vf_rate_link_speed = actual_link_speed; - adapter->vf_data[vf].tx_rate = (u16)tx_rate; - igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); + adapter->vf_data[vf].tx_rate = (u16)max_tx_rate; + igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed); return 0; } @@ -7936,7 +7941,8 @@ static int igb_ndo_get_vf_config(struct net_device *netdev, return -EINVAL; ivi->vf = vf; memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); - ivi->tx_rate = adapter->vf_data[vf].tx_rate; + ivi->max_tx_rate = adapter->vf_data[vf].tx_rate; + ivi->min_tx_rate = 0; ivi->vlan = adapter->vf_data[vf].pf_vlan; ivi->qos = adapter->vf_data[vf].pf_qos; ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 8089ea9f2fba..a5332389620a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7926,7 +7926,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_do_ioctl = ixgbe_ioctl, .ndo_set_vf_mac = ixgbe_ndo_set_vf_mac, .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, - .ndo_set_vf_tx_rate = ixgbe_ndo_set_vf_bw, + .ndo_set_vf_rate = ixgbe_ndo_set_vf_bw, .ndo_set_vf_spoofchk = ixgbe_ndo_set_vf_spoofchk, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, .ndo_get_stats64 = ixgbe_get_stats64, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index a01417c06620..3248e208c9dc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1222,7 +1222,8 @@ void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter) } } -int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) +int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, + int max_tx_rate) { struct ixgbe_adapter *adapter = netdev_priv(netdev); int link_speed; @@ -1240,13 +1241,16 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) if (link_speed != 10000) return -EINVAL; + if (min_tx_rate) + return -EINVAL; + /* rate limit cannot be less than 10Mbs or greater than link speed */ - if (tx_rate && ((tx_rate <= 10) || (tx_rate > link_speed))) + if (max_tx_rate && ((max_tx_rate <= 10) || (max_tx_rate > link_speed))) return -EINVAL; /* store values */ adapter->vf_rate_link_speed = link_speed; - adapter->vfinfo[vf].tx_rate = tx_rate; + adapter->vfinfo[vf].tx_rate = max_tx_rate; /* update hardware configuration */ ixgbe_set_vf_rate_limit(adapter, vf); @@ -1288,7 +1292,8 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev, return -EINVAL; ivi->vf = vf; memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN); - ivi->tx_rate = adapter->vfinfo[vf].tx_rate; + ivi->max_tx_rate = adapter->vfinfo[vf].tx_rate; + ivi->min_tx_rate = 0; ivi->vlan = adapter->vfinfo[vf].pf_vlan; ivi->qos = adapter->vfinfo[vf].pf_qos; ivi->spoofchk = adapter->vfinfo[vf].spoofchk_enabled; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index cea640147604..32c26d586c01 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -44,7 +44,8 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, u8 qos); -int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, + int max_tx_rate); int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); int ixgbe_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 59c7fd406805..ca8e7cb5a8e4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2486,11 +2486,12 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in ivf->mac[4] = ((s_info->mac >> (1*8)) & 0xff); ivf->mac[5] = ((s_info->mac) & 0xff); - ivf->vlan = s_info->default_vlan; - ivf->qos = s_info->default_qos; - ivf->tx_rate = s_info->tx_rate; - ivf->spoofchk = s_info->spoofchk; - ivf->linkstate = s_info->link_state; + ivf->vlan = s_info->default_vlan; + ivf->qos = s_info->default_qos; + ivf->max_tx_rate = s_info->tx_rate; + ivf->min_tx_rate = 0; + ivf->spoofchk = s_info->spoofchk; + ivf->linkstate = s_info->link_state; return 0; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 6e7527e2b595..41abe6070466 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -1319,6 +1319,7 @@ struct qlcnic_eswitch { #define QL_STATUS_INVALID_PARAM -1 #define MAX_BW 100 /* % of link speed */ +#define MIN_BW 1 /* % of link speed */ #define MAX_VLAN_ID 4095 #define MIN_VLAN_ID 2 #define DEFAULT_MAC_LEARN 1 diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index f0a285359e66..f06ba90b4282 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -525,7 +525,7 @@ static const struct net_device_ops qlcnic_netdev_ops = { #endif #ifdef CONFIG_QLCNIC_SRIOV .ndo_set_vf_mac = qlcnic_sriov_set_vf_mac, - .ndo_set_vf_tx_rate = qlcnic_sriov_set_vf_tx_rate, + .ndo_set_vf_rate = qlcnic_sriov_set_vf_tx_rate, .ndo_get_vf_config = qlcnic_sriov_get_vf_config, .ndo_set_vf_vlan = qlcnic_sriov_set_vf_vlan, .ndo_set_vf_spoofchk = qlcnic_sriov_set_vf_spoofchk, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 335b50f7bd3e..4677b2edccca 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -233,7 +233,7 @@ bool qlcnic_sriov_soft_flr_check(struct qlcnic_adapter *, void qlcnic_sriov_pf_reset(struct qlcnic_adapter *); int qlcnic_sriov_pf_reinit(struct qlcnic_adapter *); int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *); -int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int); +int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int); int qlcnic_sriov_get_vf_config(struct net_device *, int , struct ifla_vf_info *); int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 498fa6350c8d..2bdd9deffb38 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -201,6 +201,7 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs) sriov->vf_info[i].vp = vp; vp->vlan_mode = QLC_GUEST_VLAN_MODE; vp->max_tx_bw = MAX_BW; + vp->min_tx_bw = MIN_BW; vp->spoofchk = false; random_ether_addr(vp->mac); dev_info(&adapter->pdev->dev, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 6d2f72f114f2..a29538b86edf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -1848,7 +1848,8 @@ int qlcnic_sriov_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) return 0; } -int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate) +int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_sriov *sriov = adapter->ahw->sriov; @@ -1863,35 +1864,52 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate) if (vf >= sriov->num_vfs) return -EINVAL; - if (tx_rate >= 10000 || tx_rate < 100) { + vf_info = &sriov->vf_info[vf]; + vp = vf_info->vp; + vpid = vp->handle; + + if (!min_tx_rate) + min_tx_rate = QLC_VF_MIN_TX_RATE; + + if (max_tx_rate && + (max_tx_rate >= 10000 || max_tx_rate < min_tx_rate)) { netdev_err(netdev, - "Invalid Tx rate, allowed range is [%d - %d]", - QLC_VF_MIN_TX_RATE, QLC_VF_MAX_TX_RATE); + "Invalid max Tx rate, allowed range is [%d - %d]", + min_tx_rate, QLC_VF_MAX_TX_RATE); return -EINVAL; } - if (tx_rate == 0) - tx_rate = 10000; + if (!max_tx_rate) + max_tx_rate = 10000; - vf_info = &sriov->vf_info[vf]; - vp = vf_info->vp; - vpid = vp->handle; + if (min_tx_rate && + (min_tx_rate > max_tx_rate || min_tx_rate < QLC_VF_MIN_TX_RATE)) { + netdev_err(netdev, + "Invalid min Tx rate, allowed range is [%d - %d]", + QLC_VF_MIN_TX_RATE, max_tx_rate); + return -EINVAL; + } if (test_bit(QLC_BC_VF_STATE, &vf_info->state)) { if (qlcnic_sriov_get_vf_vport_info(adapter, &nic_info, vpid)) return -EIO; - nic_info.max_tx_bw = tx_rate / 100; + nic_info.max_tx_bw = max_tx_rate / 100; + nic_info.min_tx_bw = min_tx_rate / 100; nic_info.bit_offsets = BIT_0; if (qlcnic_sriov_pf_set_vport_info(adapter, &nic_info, vpid)) return -EIO; } - vp->max_tx_bw = tx_rate / 100; + vp->max_tx_bw = max_tx_rate / 100; + netdev_info(netdev, + "Setting Max Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n", + max_tx_rate, vp->max_tx_bw, vf); + vp->min_tx_bw = min_tx_rate / 100; netdev_info(netdev, - "Setting Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n", - tx_rate, vp->max_tx_bw, vf); + "Setting Min Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n", + min_tx_rate, vp->min_tx_bw, vf); return 0; } @@ -1990,9 +2008,13 @@ int qlcnic_sriov_get_vf_config(struct net_device *netdev, ivi->qos = vp->qos; ivi->spoofchk = vp->spoofchk; if (vp->max_tx_bw == MAX_BW) - ivi->tx_rate = 0; + ivi->max_tx_rate = 0; + else + ivi->max_tx_rate = vp->max_tx_bw * 100; + if (vp->min_tx_bw == MIN_BW) + ivi->min_tx_rate = 0; else - ivi->tx_rate = vp->max_tx_bw * 100; + ivi->min_tx_rate = vp->min_tx_bw * 100; ivi->vf = vf; return 0; diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index 9a9205e77896..43d2e64546ed 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -1633,7 +1633,8 @@ int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, ivi->vf = vf_i; ether_addr_copy(ivi->mac, vf->addr.mac_addr); - ivi->tx_rate = 0; + ivi->max_tx_rate = 0; + ivi->min_tx_rate = 0; tci = ntohs(vf->addr.tci); ivi->vlan = tci & VLAN_VID_MASK; ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index a86784dec3d3..119130e9298b 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -10,8 +10,9 @@ struct ifla_vf_info { __u8 mac[32]; __u32 vlan; __u32 qos; - __u32 tx_rate; __u32 spoofchk; __u32 linkstate; + __u32 min_tx_rate; + __u32 max_tx_rate; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f4ad247fd324..9ec3a945caf2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -850,7 +850,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); - * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate); + * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, + * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_config)(struct net_device *dev, * int vf, struct ifla_vf_info *ivf); @@ -1044,8 +1045,9 @@ struct net_device_ops { int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, int queue, u16 vlan, u8 qos); - int (*ndo_set_vf_tx_rate)(struct net_device *dev, - int vf, int rate); + int (*ndo_set_vf_rate)(struct net_device *dev, + int vf, int min_tx_rate, + int max_tx_rate); int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); int (*ndo_get_vf_config)(struct net_device *dev, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9a7f7ace6649..622e7910b8cc 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -399,9 +399,10 @@ enum { IFLA_VF_UNSPEC, IFLA_VF_MAC, /* Hardware queue specific attributes */ IFLA_VF_VLAN, - IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ + IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ + IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ __IFLA_VF_MAX, }; @@ -423,6 +424,12 @@ struct ifla_vf_tx_rate { __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ }; +struct ifla_vf_rate { + __u32 vf; + __u32 min_tx_rate; /* Min Bandwidth in Mbps */ + __u32 max_tx_rate; /* Max Bandwidth in Mbps */ +}; + struct ifla_vf_spoofchk { __u32 vf; __u32 setting; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9837bebf93ce..d6417464dc66 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -767,8 +767,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + nla_total_size(sizeof(struct ifla_vf_vlan)) + - nla_total_size(sizeof(struct ifla_vf_tx_rate)) + - nla_total_size(sizeof(struct ifla_vf_spoofchk))); + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + + nla_total_size(sizeof(struct ifla_vf_rate))); return size; } else return 0; @@ -1034,6 +1034,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_info ivi; struct ifla_vf_mac vf_mac; struct ifla_vf_vlan vf_vlan; + struct ifla_vf_rate vf_rate; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_link_state vf_linkstate; @@ -1054,6 +1055,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, break; vf_mac.vf = vf_vlan.vf = + vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = vf_linkstate.vf = ivi.vf; @@ -1061,7 +1063,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; - vf_tx_rate.rate = ivi.tx_rate; + vf_tx_rate.rate = ivi.max_tx_rate; + vf_rate.min_tx_rate = ivi.min_tx_rate; + vf_rate.max_tx_rate = ivi.max_tx_rate; vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; vf = nla_nest_start(skb, IFLA_VF_INFO); @@ -1071,6 +1075,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || + nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), + &vf_rate) || nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), @@ -1177,6 +1183,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, .len = sizeof(struct ifla_vf_spoofchk) }, + [IFLA_VF_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_rate) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { @@ -1336,11 +1344,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) } case IFLA_VF_TX_RATE: { struct ifla_vf_tx_rate *ivt; + struct ifla_vf_info ivf; ivt = nla_data(vf); err = -EOPNOTSUPP; - if (ops->ndo_set_vf_tx_rate) - err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, - ivt->rate); + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, + &ivf); + if (err) + break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + break; + } + case IFLA_VF_RATE: { + struct ifla_vf_rate *ivt; + ivt = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); break; } case IFLA_VF_SPOOFCHK: { -- cgit v1.2.3-59-g8ed1b From 1c19448c9ba6545b80ded18488a64a7f3d8e6998 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 23 May 2014 08:47:32 -0700 Subject: net: Make enabling of zero UDP6 csums more restrictive RFC 6935 permits zero checksums to be used in IPv6 however this is recommended only for certain tunnel protocols, it does not make checksums completely optional like they are in IPv4. This patch restricts the use of IPv6 zero checksums that was previously intoduced. no_check6_tx and no_check6_rx have been added to control the use of checksums in UDP6 RX and TX path. The normal sk_no_check_{rx,tx} settings are not used (this avoids ambiguity when dealing with a dual stack socket). A helper function has been added (udp_set_no_check6) which can be called by tunnel impelmentations to all zero checksums (send on the socket, and accept them as valid). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/udp.h | 24 +++++++++++++++++++++++- include/uapi/linux/udp.h | 2 ++ net/ipv4/udp.c | 20 +++++++++++++++++++- net/ipv6/udp.c | 8 ++++---- 4 files changed, 48 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 42278bbf7a88..247cfdcc4b08 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -47,7 +47,9 @@ struct udp_sock { #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node int pending; /* Any pending frames ? */ unsigned int corkflag; /* Cork is required */ - __u16 encap_type; /* Is this an Encapsulation socket? */ + __u8 encap_type; /* Is this an Encapsulation socket? */ + unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ + no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -76,6 +78,26 @@ static inline struct udp_sock *udp_sk(const struct sock *sk) return (struct udp_sock *)sk; } +static inline void udp_set_no_check6_tx(struct sock *sk, bool val) +{ + udp_sk(sk)->no_check6_tx = val; +} + +static inline void udp_set_no_check6_rx(struct sock *sk, bool val) +{ + udp_sk(sk)->no_check6_rx = val; +} + +static inline bool udp_get_no_check6_tx(struct sock *sk) +{ + return udp_sk(sk)->no_check6_tx; +} + +static inline bool udp_get_no_check6_rx(struct sock *sk) +{ + return udp_sk(sk)->no_check6_rx; +} + #define udp_portaddr_for_each_entry(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index e2bcfd75a30d..16574ea18f0c 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -29,6 +29,8 @@ struct udphdr { /* UDP socket options */ #define UDP_CORK 1 /* Never send partially complete segments */ #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ +#define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ +#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 12c6175b29cd..e07d52b8617a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1968,7 +1968,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, int (*push_pending_frames)(struct sock *)) { struct udp_sock *up = udp_sk(sk); - int val; + int val, valbool; int err = 0; int is_udplite = IS_UDPLITE(sk); @@ -1978,6 +1978,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; + valbool = val ? 1 : 0; + switch (optname) { case UDP_CORK: if (val != 0) { @@ -2007,6 +2009,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, } break; + case UDP_NO_CHECK6_TX: + up->no_check6_tx = valbool; + break; + + case UDP_NO_CHECK6_RX: + up->no_check6_rx = valbool; + break; + /* * UDP-Lite's partial checksum coverage (RFC 3828). */ @@ -2089,6 +2099,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = up->encap_type; break; + case UDP_NO_CHECK6_TX: + val = up->no_check6_tx; + break; + + case UDP_NO_CHECK6_RX: + val = up->no_check6_rx; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b8db453133aa..60325236446a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -794,10 +794,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); while (sk) { - /* If zero checksum and sk_no_check is not on for + /* If zero checksum and no_check is not on for * the socket then skip it. */ - if (uh->check || sk->sk_no_check_rx) + if (uh->check || udp_sk(sk)->no_check6_rx) stack[count++] = sk; sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, @@ -887,7 +887,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - if (!uh->check && !sk->sk_no_check_rx) { + if (!uh->check && !udp_sk(sk)->no_check6_rx) { sock_put(sk); udp6_csum_zero_error(skb); goto csum_error; @@ -1037,7 +1037,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (is_udplite) csum = udplite_csum_outgoing(sk, skb); - else if (sk->sk_no_check_tx) { /* UDP csum disabled */ + else if (up->no_check6_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ -- cgit v1.2.3-59-g8ed1b From 8556ce79d5986a87fee4c29300b4efee07c0f15e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 May 2014 18:43:57 +0200 Subject: net: filter: remove DL macro Lets get rid of this macro. After commit 5bcfedf06f7f ("net: filter: simplify label names from jump-table"), labels have become more readable due to omission of BPF_ prefix but at the same time more generic, so that things like `git grep -n` would not find them. As a middle path, lets get rid of the DL macro as it's not strictly needed and would otherwise just hide the full name. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 3 - net/core/filter.c | 193 +++++++++++++++++++++++++------------------------ 2 files changed, 99 insertions(+), 97 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7977b3958e25..2b0056afd1f7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -37,9 +37,6 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ -/* Placeholder/dummy for 0 */ -#define BPF_0 0 - /* Register numbers */ enum { BPF_REG_0 = 0, diff --git a/net/core/filter.c b/net/core/filter.c index 7067cb240d3e..b3b0889fe089 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -160,95 +160,100 @@ static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *ins static const void *jumptable[256] = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ -#define DL(A, B, C) [BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C - DL(ALU, ADD, X), - DL(ALU, ADD, K), - DL(ALU, SUB, X), - DL(ALU, SUB, K), - DL(ALU, AND, X), - DL(ALU, AND, K), - DL(ALU, OR, X), - DL(ALU, OR, K), - DL(ALU, LSH, X), - DL(ALU, LSH, K), - DL(ALU, RSH, X), - DL(ALU, RSH, K), - DL(ALU, XOR, X), - DL(ALU, XOR, K), - DL(ALU, MUL, X), - DL(ALU, MUL, K), - DL(ALU, MOV, X), - DL(ALU, MOV, K), - DL(ALU, DIV, X), - DL(ALU, DIV, K), - DL(ALU, MOD, X), - DL(ALU, MOD, K), - DL(ALU, NEG, 0), - DL(ALU, END, TO_BE), - DL(ALU, END, TO_LE), - DL(ALU64, ADD, X), - DL(ALU64, ADD, K), - DL(ALU64, SUB, X), - DL(ALU64, SUB, K), - DL(ALU64, AND, X), - DL(ALU64, AND, K), - DL(ALU64, OR, X), - DL(ALU64, OR, K), - DL(ALU64, LSH, X), - DL(ALU64, LSH, K), - DL(ALU64, RSH, X), - DL(ALU64, RSH, K), - DL(ALU64, XOR, X), - DL(ALU64, XOR, K), - DL(ALU64, MUL, X), - DL(ALU64, MUL, K), - DL(ALU64, MOV, X), - DL(ALU64, MOV, K), - DL(ALU64, ARSH, X), - DL(ALU64, ARSH, K), - DL(ALU64, DIV, X), - DL(ALU64, DIV, K), - DL(ALU64, MOD, X), - DL(ALU64, MOD, K), - DL(ALU64, NEG, 0), - DL(JMP, CALL, 0), - DL(JMP, JA, 0), - DL(JMP, JEQ, X), - DL(JMP, JEQ, K), - DL(JMP, JNE, X), - DL(JMP, JNE, K), - DL(JMP, JGT, X), - DL(JMP, JGT, K), - DL(JMP, JGE, X), - DL(JMP, JGE, K), - DL(JMP, JSGT, X), - DL(JMP, JSGT, K), - DL(JMP, JSGE, X), - DL(JMP, JSGE, K), - DL(JMP, JSET, X), - DL(JMP, JSET, K), - DL(JMP, EXIT, 0), - DL(STX, MEM, B), - DL(STX, MEM, H), - DL(STX, MEM, W), - DL(STX, MEM, DW), - DL(STX, XADD, W), - DL(STX, XADD, DW), - DL(ST, MEM, B), - DL(ST, MEM, H), - DL(ST, MEM, W), - DL(ST, MEM, DW), - DL(LDX, MEM, B), - DL(LDX, MEM, H), - DL(LDX, MEM, W), - DL(LDX, MEM, DW), - DL(LD, ABS, W), - DL(LD, ABS, H), - DL(LD, ABS, B), - DL(LD, IND, W), - DL(LD, IND, H), - DL(LD, IND, B), -#undef DL + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X, + [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K, + [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X, + [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K, + [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X, + [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K, + [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X, + [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K, + [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X, + [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K, + [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X, + [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K, + [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X, + [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K, + [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X, + [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K, + [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X, + [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K, + [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X, + [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K, + [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X, + [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K, + [BPF_ALU | BPF_NEG] = &&ALU_NEG, + [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE, + [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE, + /* 64 bit ALU operations */ + [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X, + [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K, + [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X, + [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K, + [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X, + [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K, + [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X, + [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K, + [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X, + [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K, + [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X, + [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K, + [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X, + [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K, + [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X, + [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K, + [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X, + [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K, + [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X, + [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K, + [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X, + [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K, + [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X, + [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K, + [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, + /* Call instruction */ + [BPF_JMP | BPF_CALL] = &&JMP_CALL, + /* Jumps */ + [BPF_JMP | BPF_JA] = &&JMP_JA, + [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, + [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K, + [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X, + [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, + [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, + [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, + [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, + [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, + [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, + [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, + [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, + [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, + [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, + [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, + /* Program return */ + [BPF_JMP | BPF_EXIT] = &&JMP_EXIT, + /* Store instructions */ + [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B, + [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H, + [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W, + [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW, + [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W, + [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW, + [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B, + [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H, + [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W, + [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW, + /* Load instructions */ + [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B, + [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H, + [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W, + [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW, + [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W, + [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H, + [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B, + [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, + [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, + [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, }; void *ptr; int off; @@ -290,10 +295,10 @@ select_insn: ALU(XOR, ^) ALU(MUL, *) #undef ALU - ALU_NEG_0: + ALU_NEG: A = (u32) -A; CONT; - ALU64_NEG_0: + ALU64_NEG: A = -A; CONT; ALU_MOV_X: @@ -382,7 +387,7 @@ select_insn: CONT; /* CALL */ - JMP_CALL_0: + JMP_CALL: /* Function call scratches BPF_R1-BPF_R5 registers, * preserves BPF_R6-BPF_R9, and stores return value * into BPF_R0. @@ -392,7 +397,7 @@ select_insn: CONT; /* JMP */ - JMP_JA_0: + JMP_JA: insn += insn->off; CONT; JMP_JEQ_X: @@ -479,7 +484,7 @@ select_insn: CONT_JMP; } CONT; - JMP_EXIT_0: + JMP_EXIT: return BPF_R0; /* STX and ST and LDX*/ -- cgit v1.2.3-59-g8ed1b From b1fcd35cf53553a0a3ef949b05106d921446abc3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 May 2014 18:43:58 +0200 Subject: net: filter: let unattached filters use sock_fprog_kern The sk_unattached_filter_create() API is used by BPF filters that are not directly attached or related to sockets, and are used in team, ptp, xt_bpf, cls_bpf, etc. As such all users do their own internal managment of obtaining filter blocks and thus already have them in kernel memory and set up before calling into sk_unattached_filter_create(). As a result, due to __user annotation in sock_fprog, sparse triggers false positives (incorrect type in assignment [different address space]) when filters are set up before passing them to sk_unattached_filter_create(). Therefore, let sk_unattached_filter_create() API use sock_fprog_kern to overcome this issue. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_ppp.c | 4 ++-- drivers/net/ppp/ppp_generic.c | 4 ++-- drivers/net/team/team_mode_loadbalance.c | 10 +++++----- include/linux/filter.h | 2 +- lib/test_bpf.c | 2 +- net/core/filter.c | 2 +- net/core/ptp_classifier.c | 2 +- net/netfilter/xt_bpf.c | 5 +++-- net/sched/cls_bpf.c | 4 ++-- 9 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index a5da511e3c9a..61ac63237446 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -634,7 +634,7 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) #ifdef CONFIG_IPPP_FILTER case PPPIOCSPASS: { - struct sock_fprog fprog; + struct sock_fprog_kern fprog; struct sock_filter *code; int err, len = get_filter(argp, &code); @@ -653,7 +653,7 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) } case PPPIOCSACTIVE: { - struct sock_fprog fprog; + struct sock_fprog_kern fprog; struct sock_filter *code; int err, len = get_filter(argp, &code); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index e3923ebb693f..91d6c1272fcf 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -757,7 +757,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = get_filter(argp, &code); if (err >= 0) { - struct sock_fprog fprog = { + struct sock_fprog_kern fprog = { .len = err, .filter = code, }; @@ -778,7 +778,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = get_filter(argp, &code); if (err >= 0) { - struct sock_fprog fprog = { + struct sock_fprog_kern fprog = { .len = err, .filter = code, }; diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index dbde3412ee5e..0a6ee07bf0af 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -49,7 +49,7 @@ struct lb_port_mapping { struct lb_priv_ex { struct team *team; struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE]; - struct sock_fprog *orig_fprog; + struct sock_fprog_kern *orig_fprog; struct { unsigned int refresh_interval; /* in tenths of second */ struct delayed_work refresh_dw; @@ -241,10 +241,10 @@ static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx) return 0; } -static int __fprog_create(struct sock_fprog **pfprog, u32 data_len, +static int __fprog_create(struct sock_fprog_kern **pfprog, u32 data_len, const void *data) { - struct sock_fprog *fprog; + struct sock_fprog_kern *fprog; struct sock_filter *filter = (struct sock_filter *) data; if (data_len % sizeof(struct sock_filter)) @@ -262,7 +262,7 @@ static int __fprog_create(struct sock_fprog **pfprog, u32 data_len, return 0; } -static void __fprog_destroy(struct sock_fprog *fprog) +static void __fprog_destroy(struct sock_fprog_kern *fprog) { kfree(fprog->filter); kfree(fprog); @@ -273,7 +273,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) struct lb_priv *lb_priv = get_lb_priv(team); struct sk_filter *fp = NULL; struct sk_filter *orig_fp; - struct sock_fprog *fprog = NULL; + struct sock_fprog_kern *fprog = NULL; int err; if (ctx->data.bin_val.len) { diff --git a/include/linux/filter.h b/include/linux/filter.h index 2b0056afd1f7..625f4de9bdf2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -188,7 +188,7 @@ int sk_convert_filter(struct sock_filter *prog, int len, struct sock_filter_int *new_prog, int *new_len); int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog *fprog); + struct sock_fprog_kern *fprog); void sk_unattached_filter_destroy(struct sk_filter *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 3d80adbdb559..e03991ea8cc2 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1472,7 +1472,7 @@ static int run_one(struct sk_filter *fp, struct bpf_test *t) static __init int test_bpf(void) { struct sk_filter *fp, *fp_ext = NULL; - struct sock_fprog fprog; + struct sock_fprog_kern fprog; int err, i, err_cnt = 0; for (i = 0; i < ARRAY_SIZE(tests); i++) { diff --git a/net/core/filter.c b/net/core/filter.c index b3b0889fe089..2c2d35d9d101 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1585,7 +1585,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, * a negative errno code is returned. On success the return is zero. */ int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog *fprog) + struct sock_fprog_kern *fprog) { unsigned int fsize = sk_filter_proglen(fprog); struct sk_filter *fp; diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 37d86157b76e..d3027a73fd4b 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -133,7 +133,7 @@ void __init ptp_classifier_init(void) { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; - struct sock_fprog ptp_prog = { + struct sock_fprog_kern ptp_prog = { .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, }; diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 12d4da8e6c77..bbffdbdaf603 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -23,10 +23,11 @@ MODULE_ALIAS("ip6t_bpf"); static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; - struct sock_fprog program; + struct sock_fprog_kern program; program.len = info->bpf_program_num_elem; - program.filter = (struct sock_filter __user *) info->bpf_program; + program.filter = info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { pr_info("bpf: check failed: parse error\n"); return -EINVAL; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 16186965af97..13f64df2c710 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, { struct sock_filter *bpf_ops, *bpf_old; struct tcf_exts exts; - struct sock_fprog tmp; + struct sock_fprog_kern tmp; struct sk_filter *fp, *fp_old; u16 bpf_size, bpf_len; u32 classid; @@ -191,7 +191,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); tmp.len = bpf_len; - tmp.filter = (struct sock_filter __user *) bpf_ops; + tmp.filter = bpf_ops; ret = sk_unattached_filter_create(&fp, &tmp); if (ret) -- cgit v1.2.3-59-g8ed1b From 494b6590043b4cd73ceb3f58e1c012a2c6c98d85 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 26 May 2014 18:06:34 +0200 Subject: wireless: add missing WLAN_EID_BSS_INTOLERANT_CHL_REPORT Signed-off-by: Jes Sorensen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f194ccb8539c..6bff13f74050 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1711,6 +1711,7 @@ enum ieee80211_eid { WLAN_EID_RRM_ENABLED_CAPABILITIES = 70, WLAN_EID_MULTIPLE_BSSID = 71, WLAN_EID_BSS_COEX_2040 = 72, + WLAN_EID_BSS_INTOLERANT_CHL_REPORT = 73, WLAN_EID_OVERLAP_BSS_SCAN_PARAM = 74, WLAN_EID_RIC_DESCRIPTOR = 75, WLAN_EID_MMIE = 76, -- cgit v1.2.3-59-g8ed1b From 86f6cf41272de9d6ffa05ab46028b15d160a6f3e Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sat, 24 May 2014 09:34:26 +0200 Subject: net: of_mdio: add of_mdiobus_link_phydev() Add a function to walk the list of subnodes of a mdio bus and look for a node that matches the phy's address with its 'reg' property. If found, set the of_node pointer for the phy. This allows auto-probed pyh devices to be augmented by information passed in via DT. Signed-off-by: Daniel Mack Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 6 ++++++ drivers/of/of_mdio.c | 33 +++++++++++++++++++++++++++++++++ include/linux/of_mdio.h | 8 ++++++++ 3 files changed, 47 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index a6284964b711..2e58aa54484c 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -300,6 +300,12 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr) if (IS_ERR(phydev) || phydev == NULL) return phydev; + /* + * For DT, see if the auto-probed phy has a correspoding child + * in the bus node, and set the of_node pointer in this case. + */ + of_mdiobus_link_phydev(bus, phydev); + err = phy_device_register(phydev); if (err) { phy_device_free(phydev); diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 731d3d9052d7..7c8c142e4eb8 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -183,6 +183,39 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) } EXPORT_SYMBOL(of_mdiobus_register); +/** + * of_mdiobus_link_phydev - Find a device node for a phy + * @mdio: pointer to mii_bus structure + * @phydev: phydev for which the of_node pointer should be set + * + * Walk the list of subnodes of a mdio bus and look for a node that matches the + * phy's address with its 'reg' property. If found, set the of_node pointer for + * the phy. This allows auto-probed pyh devices to be supplied with information + * passed in via DT. + */ +void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev) +{ + struct device *dev = &phydev->dev; + struct device_node *child; + + if (dev->of_node || !mdio->dev.of_node) + return; + + for_each_available_child_of_node(mdio->dev.of_node, child) { + int addr; + + addr = of_mdio_parse_addr(&mdio->dev, child); + if (addr < 0) + continue; + + if (addr == phydev->addr) { + dev->of_node = child; + return; + } + } +} + /* Helper function for of_phy_find_device */ static int of_phy_match(struct device *dev, void *phy_np) { diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index d449018d0726..a70c9493d55a 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -25,6 +25,9 @@ struct phy_device *of_phy_attach(struct net_device *dev, extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); +extern void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev); + #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { @@ -60,6 +63,11 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; } + +static inline void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev) +{ +} #endif /* CONFIG_OF */ #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) -- cgit v1.2.3-59-g8ed1b From c8865b64b05b2f4eeefd369373e9c8aeb069e7a1 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 25 May 2014 17:47:26 +0300 Subject: cpumask: Utility function to set n'th cpu - local cpu first This function sets the n'th cpu - local cpu's first. For example: in a 16 cores server with even cpu's local, will get the following values: cpumask_set_cpu_local_first(0, numa, cpumask) => cpu 0 is set cpumask_set_cpu_local_first(1, numa, cpumask) => cpu 2 is set ... cpumask_set_cpu_local_first(7, numa, cpumask) => cpu 14 is set cpumask_set_cpu_local_first(8, numa, cpumask) => cpu 1 is set cpumask_set_cpu_local_first(9, numa, cpumask) => cpu 3 is set ... cpumask_set_cpu_local_first(15, numa, cpumask) => cpu 15 is set Curently this function will be used by multi queue networking devices to calculate the irq affinity mask, such that as many local cpu's as possible will be utilized to handle the mq device irq's. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/cpumask.h | 2 ++ lib/cpumask.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d08e4d2a9b92..3551d667ef9f 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -257,6 +257,8 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } +int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); + /** * cpumask_clear_cpu - clear a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) diff --git a/lib/cpumask.c b/lib/cpumask.c index b810b753c607..14049a96f04a 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -163,4 +163,68 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { memblock_free_early(__pa(mask), cpumask_size()); } + +/** + * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first + * + * @i: index number + * @numa_node: local numa_node + * @dstp: cpumask with the relevant cpu bit set according to the policy + * + * This function sets the cpumask according to a numa aware policy. + * cpumask could be used as an affinity hint for the IRQ related to a + * queue. When the policy is to spread queues across cores - local cores + * first. + * + * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set + * the cpu bit and need to re-call the function. + */ +int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +{ + cpumask_var_t mask; + int cpu; + int ret = 0; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + i %= num_online_cpus(); + + if (!cpumask_of_node(numa_node)) { + /* Use all online cpu's for non numa aware system */ + cpumask_copy(mask, cpu_online_mask); + } else { + int n; + + cpumask_and(mask, + cpumask_of_node(numa_node), cpu_online_mask); + + n = cpumask_weight(mask); + if (i >= n) { + i -= n; + + /* If index > number of local cpu's, mask out local + * cpu's + */ + cpumask_andnot(mask, cpu_online_mask, mask); + } + } + + for_each_cpu(cpu, mask) { + if (--i < 0) + goto out; + } + + ret = -EAGAIN; + +out: + free_cpumask_var(mask); + + if (!ret) + cpumask_set_cpu(cpu, dstp); + + return ret; +} +EXPORT_SYMBOL(cpumask_set_cpu_local_first); + #endif -- cgit v1.2.3-59-g8ed1b From 70a640d0dae3a9b1b222ce673eb5d92c263ddd61 Mon Sep 17 00:00:00 2001 From: Yuval Atias Date: Sun, 25 May 2014 17:47:27 +0300 Subject: net/mlx4_en: Use affinity hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The “affinity hint” mechanism is used by the user space daemon, irqbalancer, to indicate a preferred CPU mask for irqs. Irqbalancer can use this hint to balance the irqs between the cpus indicated by the mask. We wish the HCA to preferentially map the IRQs it uses to numa cores close to it. To accomplish this, we use cpumask_set_cpu_local_first(), that sets the affinity hint according the following policy: First it maps IRQs to “close” numa cores. If these are exhausted, the remaining IRQs are mapped to “far” numa cores. Signed-off-by: Yuval Atias Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 6 +++++- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 30 ++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/eq.c | 13 ++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/device.h | 2 +- 6 files changed, 50 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 199c7896f081..58b1f239ac2b 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, - &ibdev->eq_table[eq])) { + &ibdev->eq_table[eq], NULL)) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 636963db598a..ea2cd72e5368 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -118,11 +118,15 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, if (cq->is_tx == RX) { if (mdev->dev->caps.comp_pool) { if (!cq->vector) { + struct mlx4_en_rx_ring *ring = + priv->rx_ring[cq->ring]; + sprintf(name, "%s-%d", priv->dev->name, cq->ring); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(mdev->dev, name, rmap, - &cq->vector)) { + &cq->vector, + ring->affinity_mask)) { cq->vector = (cq->ring + 1 + priv->port) % mdev->dev->caps.num_comp_vectors; mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 58209bd0c94c..05d135572abc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1526,6 +1526,32 @@ static void mlx4_en_linkstate(struct work_struct *work) mutex_unlock(&mdev->state_lock); } +static void mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) +{ + struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; + int numa_node = priv->mdev->dev->numa_node; + + if (numa_node == -1) + return; + + if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) { + en_err(priv, "Failed to allocate core mask\n"); + return; + } + + if (cpumask_set_cpu_local_first(ring_idx, numa_node, + ring->affinity_mask)) { + en_err(priv, "Failed setting affinity hint\n"); + free_cpumask_var(ring->affinity_mask); + ring->affinity_mask = NULL; + } +} + +static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) +{ + free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); + priv->rx_ring[ring_idx]->affinity_mask = NULL; +} int mlx4_en_start_port(struct net_device *dev) { @@ -1567,6 +1593,8 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_cq_init_lock(cq); + mlx4_en_init_affinity_hint(priv, i); + err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); @@ -1847,6 +1875,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) msleep(1); mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); + + mlx4_en_free_affinity_hint(priv, i); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index d954ec1eac17..f91659e5fa13 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1376,7 +1376,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) EXPORT_SYMBOL(mlx4_test_interrupts); int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector) + int *vector, cpumask_var_t cpu_hint_mask) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1411,6 +1411,15 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, } mlx4_assign_irq_notifier(priv, dev, priv->eq_table.eq[vec].irq); + if (cpu_hint_mask) { + err = irq_set_affinity_hint( + priv->eq_table.eq[vec].irq, + cpu_hint_mask); + if (err) { + mlx4_warn(dev, "Failed setting affinity hint\n"); + /*we dont want to break here*/ + } + } eq_set_ci(&priv->eq_table.eq[vec], 1); } @@ -1441,6 +1450,8 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) irq_set_affinity_notifier( priv->eq_table.eq[vec].irq, NULL); + irq_set_affinity_hint(priv->eq_table.eq[vec].irq, + NULL); free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index b5db1bf361dc..0e15295bedd6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -313,6 +313,7 @@ struct mlx4_en_rx_ring { unsigned long csum_ok; unsigned long csum_none; int hwtstamp_rx_filter; + cpumask_var_t affinity_mask; }; struct mlx4_en_cq { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ca38871a585c..b9b70e00e3c1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1161,7 +1161,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupts(struct mlx4_dev *dev); int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector); + int *vector, cpumask_t *cpu_hint_mask); void mlx4_release_eq(struct mlx4_dev *dev, int vec); int mlx4_get_phys_port_id(struct mlx4_dev *dev); -- cgit v1.2.3-59-g8ed1b From ee39facbf82e73e468c504d2b40e83e2d223c28c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 1 Jun 2014 21:58:02 -0700 Subject: net: Revert mlx4 cpumask changes. This reverts commit 70a640d0dae3a9b1b222ce673eb5d92c263ddd61 ("net/mlx4_en: Use affinity hint") and commit c8865b64b05b2f4eeefd369373e9c8aeb069e7a1 ("cpumask: Utility function to set n'th cpu - local cpu first") because these changes break the build when SMP is disabled amongst other things. Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 50 +++++++++++++++------------- include/linux/cpumask.h | 2 -- lib/cpumask.c | 64 ------------------------------------ 3 files changed, 28 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 66d4ab703f45..e72918970a58 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1441,30 +1441,32 @@ static int ks8851_probe(struct spi_device *spi) } } - ks->vdd_io = devm_regulator_get(&spi->dev, "vdd-io"); + ks->vdd_io = devm_regulator_get_optional(&spi->dev, "vdd-io"); if (IS_ERR(ks->vdd_io)) { ret = PTR_ERR(ks->vdd_io); - goto err_reg_io; - } - - ret = regulator_enable(ks->vdd_io); - if (ret) { - dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n", - ret); - goto err_reg_io; + if (ret == -EPROBE_DEFER) + goto err_reg_io; + } else { + ret = regulator_enable(ks->vdd_io); + if (ret) { + dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n", + ret); + goto err_reg_io; + } } - ks->vdd_reg = devm_regulator_get(&spi->dev, "vdd"); + ks->vdd_reg = devm_regulator_get_optional(&spi->dev, "vdd"); if (IS_ERR(ks->vdd_reg)) { ret = PTR_ERR(ks->vdd_reg); - goto err_reg; - } - - ret = regulator_enable(ks->vdd_reg); - if (ret) { - dev_err(&spi->dev, "regulator vdd enable fail: %d\n", - ret); - goto err_reg; + if (ret == -EPROBE_DEFER) + goto err_reg; + } else { + ret = regulator_enable(ks->vdd_reg); + if (ret) { + dev_err(&spi->dev, "regulator vdd enable fail: %d\n", + ret); + goto err_reg; + } } if (gpio_is_valid(gpio)) { @@ -1570,9 +1572,11 @@ err_irq: if (gpio_is_valid(gpio)) gpio_set_value(gpio, 0); err_id: - regulator_disable(ks->vdd_reg); + if (!IS_ERR(ks->vdd_reg)) + regulator_disable(ks->vdd_reg); err_reg: - regulator_disable(ks->vdd_io); + if (!IS_ERR(ks->vdd_io)) + regulator_disable(ks->vdd_io); err_reg_io: err_gpio: free_netdev(ndev); @@ -1590,8 +1594,10 @@ static int ks8851_remove(struct spi_device *spi) free_irq(spi->irq, priv); if (gpio_is_valid(priv->gpio)) gpio_set_value(priv->gpio, 0); - regulator_disable(priv->vdd_reg); - regulator_disable(priv->vdd_io); + if (!IS_ERR(priv->vdd_reg)) + regulator_disable(priv->vdd_reg); + if (!IS_ERR(priv->vdd_io)) + regulator_disable(priv->vdd_io); free_netdev(priv->netdev); return 0; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 3551d667ef9f..d08e4d2a9b92 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -257,8 +257,6 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); - /** * cpumask_clear_cpu - clear a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) diff --git a/lib/cpumask.c b/lib/cpumask.c index 14049a96f04a..b810b753c607 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -163,68 +163,4 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { memblock_free_early(__pa(mask), cpumask_size()); } - -/** - * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first - * - * @i: index number - * @numa_node: local numa_node - * @dstp: cpumask with the relevant cpu bit set according to the policy - * - * This function sets the cpumask according to a numa aware policy. - * cpumask could be used as an affinity hint for the IRQ related to a - * queue. When the policy is to spread queues across cores - local cores - * first. - * - * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set - * the cpu bit and need to re-call the function. - */ -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) -{ - cpumask_var_t mask; - int cpu; - int ret = 0; - - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; - - i %= num_online_cpus(); - - if (!cpumask_of_node(numa_node)) { - /* Use all online cpu's for non numa aware system */ - cpumask_copy(mask, cpu_online_mask); - } else { - int n; - - cpumask_and(mask, - cpumask_of_node(numa_node), cpu_online_mask); - - n = cpumask_weight(mask); - if (i >= n) { - i -= n; - - /* If index > number of local cpu's, mask out local - * cpu's - */ - cpumask_andnot(mask, cpu_online_mask, mask); - } - } - - for_each_cpu(cpu, mask) { - if (--i < 0) - goto out; - } - - ret = -EAGAIN; - -out: - free_cpumask_var(mask); - - if (!ret) - cpumask_set_cpu(cpu, dstp); - - return ret; -} -EXPORT_SYMBOL(cpumask_set_cpu_local_first); - #endif -- cgit v1.2.3-59-g8ed1b From 3480593131e0b781287dae0139bf7ccee7cba7ff Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 May 2014 10:22:50 +0200 Subject: net: filter: get rid of BPF_S_* enum This patch finally allows us to get rid of the BPF_S_* enum. Currently, the code performs unnecessary encode and decode workarounds in seccomp and filter migration itself when a filter is being attached in order to overcome BPF_S_* encoding which is not used anymore by the new interpreter resp. JIT compilers. Keeping it around would mean that also in future we would need to extend and maintain this enum and related encoders/decoders. We can get rid of all that and save us these operations during filter attaching. Naturally, also JIT compilers need to be updated by this. Before JIT conversion is being done, each compiler checks if A is being loaded at startup to obtain information if it needs to emit instructions to clear A first. Since BPF extensions are a subset of BPF_LD | BPF_{W,H,B} | BPF_ABS variants, case statements for extensions can be removed at that point. To ease and minimalize code changes in the classic JITs, we have introduced bpf_anc_helper(). Tested with test_bpf on x86_64 (JIT, int), s390x (JIT, int), arm (JIT, int), i368 (int), ppc64 (JIT, int); for sparc we unfortunately didn't have access, but changes are analogous to the rest. Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Mircea Gherzan Cc: Kees Cook Acked-by: Chema Gonzalez Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 139 ++++++++-------- arch/powerpc/net/bpf_jit_64.S | 2 +- arch/powerpc/net/bpf_jit_comp.c | 157 +++++++++--------- arch/s390/net/bpf_jit_comp.c | 163 +++++++++---------- arch/sparc/net/bpf_jit_comp.c | 154 +++++++++--------- include/linux/filter.h | 108 +++++-------- kernel/seccomp.c | 83 +++++----- net/core/filter.c | 341 +++++++++++++++------------------------- 8 files changed, 498 insertions(+), 649 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6f879c319a9d..fb5503ce016f 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -136,7 +136,7 @@ static u16 saved_regs(struct jit_ctx *ctx) u16 ret = 0; if ((ctx->skf->len > 1) || - (ctx->skf->insns[0].code == BPF_S_RET_A)) + (ctx->skf->insns[0].code == (BPF_RET | BPF_A))) ret |= 1 << r_A; #ifdef CONFIG_FRAME_POINTER @@ -164,18 +164,10 @@ static inline int mem_words_used(struct jit_ctx *ctx) static inline bool is_load_to_a(u16 inst) { switch (inst) { - case BPF_S_LD_W_LEN: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - case BPF_S_ANC_CPU: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_QUEUE: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: return true; default: return false; @@ -215,7 +207,7 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_MOV_I(r_X, 0), ctx); /* do not leak kernel data to userspace */ - if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst))) + if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) emit(ARM_MOV_I(r_A, 0), ctx); /* stack space for the BPF_MEM words */ @@ -480,36 +472,39 @@ static int build_body(struct jit_ctx *ctx) u32 k; for (i = 0; i < prog->len; i++) { + u16 code; + inst = &(prog->insns[i]); /* K as an immediate value operand */ k = inst->k; + code = bpf_anc_helper(inst); /* compute offsets only in the fake pass */ if (ctx->target == NULL) ctx->offsets[i] = ctx->idx * 4; - switch (inst->code) { - case BPF_S_LD_IMM: + switch (code) { + case BPF_LD | BPF_IMM: emit_mov_i(r_A, k, ctx); break; - case BPF_S_LD_W_LEN: + case BPF_LD | BPF_W | BPF_LEN: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); emit(ARM_LDR_I(r_A, r_skb, offsetof(struct sk_buff, len)), ctx); break; - case BPF_S_LD_MEM: + case BPF_LD | BPF_MEM: /* A = scratch[k] */ ctx->seen |= SEEN_MEM_WORD(k); emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: load_order = 2; goto load; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: load_order = 1; goto load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: load_order = 0; load: /* the interpreter will deal with the negative K */ @@ -552,31 +547,31 @@ load_common: emit_err_ret(ARM_COND_NE, ctx); emit(ARM_MOV_R(r_A, ARM_R0), ctx); break; - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: load_order = 2; goto load_ind; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: load_order = 1; goto load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: load_order = 0; load_ind: OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); goto load_common; - case BPF_S_LDX_IMM: + case BPF_LDX | BPF_IMM: ctx->seen |= SEEN_X; emit_mov_i(r_X, k, ctx); break; - case BPF_S_LDX_W_LEN: + case BPF_LDX | BPF_W | BPF_LEN: ctx->seen |= SEEN_X | SEEN_SKB; emit(ARM_LDR_I(r_X, r_skb, offsetof(struct sk_buff, len)), ctx); break; - case BPF_S_LDX_MEM: + case BPF_LDX | BPF_MEM: ctx->seen |= SEEN_X | SEEN_MEM_WORD(k); emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: /* x = ((*(frame + k)) & 0xf) << 2; */ ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; /* the interpreter should deal with the negative K */ @@ -606,113 +601,113 @@ load_ind: emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx); emit(ARM_LSL_I(r_X, r_X, 2), ctx); break; - case BPF_S_ST: + case BPF_ST: ctx->seen |= SEEN_MEM_WORD(k); emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_STX: + case BPF_STX: update_on_xread(ctx); ctx->seen |= SEEN_MEM_WORD(k); emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_ALU_ADD_K: + case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ OP_IMM3(ARM_ADD, r_A, r_A, k, ctx); break; - case BPF_S_ALU_ADD_X: + case BPF_ALU | BPF_ADD | BPF_X: update_on_xread(ctx); emit(ARM_ADD_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_SUB_K: + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ OP_IMM3(ARM_SUB, r_A, r_A, k, ctx); break; - case BPF_S_ALU_SUB_X: + case BPF_ALU | BPF_SUB | BPF_X: update_on_xread(ctx); emit(ARM_SUB_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_MUL_K: + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ emit_mov_i(r_scratch, k, ctx); emit(ARM_MUL(r_A, r_A, r_scratch), ctx); break; - case BPF_S_ALU_MUL_X: + case BPF_ALU | BPF_MUL | BPF_X: update_on_xread(ctx); emit(ARM_MUL(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_DIV_K: + case BPF_ALU | BPF_DIV | BPF_K: if (k == 1) break; emit_mov_i(r_scratch, k, ctx); emit_udiv(r_A, r_A, r_scratch, ctx); break; - case BPF_S_ALU_DIV_X: + case BPF_ALU | BPF_DIV | BPF_X: update_on_xread(ctx); emit(ARM_CMP_I(r_X, 0), ctx); emit_err_ret(ARM_COND_EQ, ctx); emit_udiv(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_OR_K: + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ OP_IMM3(ARM_ORR, r_A, r_A, k, ctx); break; - case BPF_S_ALU_OR_X: + case BPF_ALU | BPF_OR | BPF_X: update_on_xread(ctx); emit(ARM_ORR_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_XOR_K: + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K; */ OP_IMM3(ARM_EOR, r_A, r_A, k, ctx); break; - case BPF_S_ANC_ALU_XOR_X: - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ update_on_xread(ctx); emit(ARM_EOR_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_AND_K: + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ OP_IMM3(ARM_AND, r_A, r_A, k, ctx); break; - case BPF_S_ALU_AND_X: + case BPF_ALU | BPF_AND | BPF_X: update_on_xread(ctx); emit(ARM_AND_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_LSH_K: + case BPF_ALU | BPF_LSH | BPF_K: if (unlikely(k > 31)) return -1; emit(ARM_LSL_I(r_A, r_A, k), ctx); break; - case BPF_S_ALU_LSH_X: + case BPF_ALU | BPF_LSH | BPF_X: update_on_xread(ctx); emit(ARM_LSL_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_RSH_K: + case BPF_ALU | BPF_RSH | BPF_K: if (unlikely(k > 31)) return -1; emit(ARM_LSR_I(r_A, r_A, k), ctx); break; - case BPF_S_ALU_RSH_X: + case BPF_ALU | BPF_RSH | BPF_X: update_on_xread(ctx); emit(ARM_LSR_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: /* A = -A */ emit(ARM_RSB_I(r_A, r_A, 0), ctx); break; - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: /* pc += K */ emit(ARM_B(b_imm(i + k + 1, ctx)), ctx); break; - case BPF_S_JMP_JEQ_K: + case BPF_JMP | BPF_JEQ | BPF_K: /* pc += (A == K) ? pc->jt : pc->jf */ condt = ARM_COND_EQ; goto cmp_imm; - case BPF_S_JMP_JGT_K: + case BPF_JMP | BPF_JGT | BPF_K: /* pc += (A > K) ? pc->jt : pc->jf */ condt = ARM_COND_HI; goto cmp_imm; - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JGE | BPF_K: /* pc += (A >= K) ? pc->jt : pc->jf */ condt = ARM_COND_HS; cmp_imm: @@ -731,22 +726,22 @@ cond_jump: _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1, ctx)), ctx); break; - case BPF_S_JMP_JEQ_X: + case BPF_JMP | BPF_JEQ | BPF_X: /* pc += (A == X) ? pc->jt : pc->jf */ condt = ARM_COND_EQ; goto cmp_x; - case BPF_S_JMP_JGT_X: + case BPF_JMP | BPF_JGT | BPF_X: /* pc += (A > X) ? pc->jt : pc->jf */ condt = ARM_COND_HI; goto cmp_x; - case BPF_S_JMP_JGE_X: + case BPF_JMP | BPF_JGE | BPF_X: /* pc += (A >= X) ? pc->jt : pc->jf */ condt = ARM_COND_CS; cmp_x: update_on_xread(ctx); emit(ARM_CMP_R(r_A, r_X), ctx); goto cond_jump; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: /* pc += (A & K) ? pc->jt : pc->jf */ condt = ARM_COND_NE; /* not set iff all zeroes iff Z==1 iff EQ */ @@ -759,16 +754,16 @@ cmp_x: emit(ARM_TST_I(r_A, imm12), ctx); } goto cond_jump; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: /* pc += (A & X) ? pc->jt : pc->jf */ update_on_xread(ctx); condt = ARM_COND_NE; emit(ARM_TST_R(r_A, r_X), ctx); goto cond_jump; - case BPF_S_RET_A: + case BPF_RET | BPF_A: emit(ARM_MOV_R(ARM_R0, r_A), ctx); goto b_epilogue; - case BPF_S_RET_K: + case BPF_RET | BPF_K: if ((k == 0) && (ctx->ret0_fp_idx < 0)) ctx->ret0_fp_idx = i; emit_mov_i(ARM_R0, k, ctx); @@ -776,17 +771,17 @@ b_epilogue: if (i != ctx->skf->len - 1) emit(ARM_B(b_imm(prog->len, ctx)), ctx); break; - case BPF_S_MISC_TAX: + case BPF_MISC | BPF_TAX: /* X = A */ ctx->seen |= SEEN_X; emit(ARM_MOV_R(r_X, r_A), ctx); break; - case BPF_S_MISC_TXA: + case BPF_MISC | BPF_TXA: /* A = X */ update_on_xread(ctx); emit(ARM_MOV_R(r_A, r_X), ctx); break; - case BPF_S_ANC_PROTOCOL: + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol) */ ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, @@ -795,7 +790,7 @@ b_epilogue: emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx); emit_swap16(r_A, r_scratch, ctx); break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: /* r_scratch = current_thread_info() */ OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx); /* A = current_thread_info()->cpu */ @@ -803,7 +798,7 @@ b_epilogue: off = offsetof(struct thread_info, cpu); emit(ARM_LDR_I(r_A, r_scratch, off), ctx); break; - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: /* A = skb->dev->ifindex */ ctx->seen |= SEEN_SKB; off = offsetof(struct sk_buff, dev); @@ -817,30 +812,30 @@ b_epilogue: off = offsetof(struct net_device, ifindex); emit(ARM_LDR_I(r_A, r_scratch, off), ctx); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); off = offsetof(struct sk_buff, mark); emit(ARM_LDR_I(r_A, r_skb, off), ctx); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); off = offsetof(struct sk_buff, hash); emit(ARM_LDR_I(r_A, r_skb, off), ctx); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); off = offsetof(struct sk_buff, vlan_tci); emit(ARM_LDRH_I(r_A, r_skb, off), ctx); - if (inst->code == BPF_S_ANC_VLAN_TAG) + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx); else OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx); break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S index e76eba74d9da..8f87d9217122 100644 --- a/arch/powerpc/net/bpf_jit_64.S +++ b/arch/powerpc/net/bpf_jit_64.S @@ -78,7 +78,7 @@ sk_load_byte_positive_offset: blr /* - * BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf) + * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf) * r_addr is the offset value */ .globl sk_load_byte_msh diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 808ce1cae21a..6dcdadefd8d0 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -79,19 +79,11 @@ static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image, } switch (filter[0].code) { - case BPF_S_RET_K: - case BPF_S_LD_W_LEN: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_CPU: - case BPF_S_ANC_QUEUE: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_RET | BPF_K: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: /* first instruction sets A register (or is RET 'constant') */ break; default: @@ -144,6 +136,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, for (i = 0; i < flen; i++) { unsigned int K = filter[i].k; + u16 code = bpf_anc_helper(&filter[i]); /* * addrs[] maps a BPF bytecode address into a real offset from @@ -151,35 +144,35 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, */ addrs[i] = ctx->idx * 4; - switch (filter[i].code) { + switch (code) { /*** ALU ops ***/ - case BPF_S_ALU_ADD_X: /* A += X; */ + case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ ctx->seen |= SEEN_XREG; PPC_ADD(r_A, r_A, r_X); break; - case BPF_S_ALU_ADD_K: /* A += K; */ + case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ if (!K) break; PPC_ADDI(r_A, r_A, IMM_L(K)); if (K >= 32768) PPC_ADDIS(r_A, r_A, IMM_HA(K)); break; - case BPF_S_ALU_SUB_X: /* A -= X; */ + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ ctx->seen |= SEEN_XREG; PPC_SUB(r_A, r_A, r_X); break; - case BPF_S_ALU_SUB_K: /* A -= K */ + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ if (!K) break; PPC_ADDI(r_A, r_A, IMM_L(-K)); if (K >= 32768) PPC_ADDIS(r_A, r_A, IMM_HA(-K)); break; - case BPF_S_ALU_MUL_X: /* A *= X; */ + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ ctx->seen |= SEEN_XREG; PPC_MUL(r_A, r_A, r_X); break; - case BPF_S_ALU_MUL_K: /* A *= K */ + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K < 32768) PPC_MULI(r_A, r_A, K); else { @@ -187,7 +180,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; - case BPF_S_ALU_MOD_X: /* A %= X; */ + case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ ctx->seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); if (ctx->pc_ret0 != -1) { @@ -201,13 +194,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_scratch1, r_X, r_scratch1); PPC_SUB(r_A, r_A, r_scratch1); break; - case BPF_S_ALU_MOD_K: /* A %= K; */ + case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ PPC_LI32(r_scratch2, K); PPC_DIVWU(r_scratch1, r_A, r_scratch2); PPC_MUL(r_scratch1, r_scratch2, r_scratch1); PPC_SUB(r_A, r_A, r_scratch1); break; - case BPF_S_ALU_DIV_X: /* A /= X; */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ ctx->seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); if (ctx->pc_ret0 != -1) { @@ -223,17 +216,17 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, } PPC_DIVWU(r_A, r_A, r_X); break; - case BPF_S_ALU_DIV_K: /* A /= K */ + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; PPC_LI32(r_scratch1, K); PPC_DIVWU(r_A, r_A, r_scratch1); break; - case BPF_S_ALU_AND_X: + case BPF_ALU | BPF_AND | BPF_X: ctx->seen |= SEEN_XREG; PPC_AND(r_A, r_A, r_X); break; - case BPF_S_ALU_AND_K: + case BPF_ALU | BPF_AND | BPF_K: if (!IMM_H(K)) PPC_ANDI(r_A, r_A, K); else { @@ -241,51 +234,51 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_AND(r_A, r_A, r_scratch1); } break; - case BPF_S_ALU_OR_X: + case BPF_ALU | BPF_OR | BPF_X: ctx->seen |= SEEN_XREG; PPC_OR(r_A, r_A, r_X); break; - case BPF_S_ALU_OR_K: + case BPF_ALU | BPF_OR | BPF_K: if (IMM_L(K)) PPC_ORI(r_A, r_A, IMM_L(K)); if (K >= 65536) PPC_ORIS(r_A, r_A, IMM_H(K)); break; - case BPF_S_ANC_ALU_XOR_X: - case BPF_S_ALU_XOR_X: /* A ^= X */ + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ ctx->seen |= SEEN_XREG; PPC_XOR(r_A, r_A, r_X); break; - case BPF_S_ALU_XOR_K: /* A ^= K */ + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ if (IMM_L(K)) PPC_XORI(r_A, r_A, IMM_L(K)); if (K >= 65536) PPC_XORIS(r_A, r_A, IMM_H(K)); break; - case BPF_S_ALU_LSH_X: /* A <<= X; */ + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ ctx->seen |= SEEN_XREG; PPC_SLW(r_A, r_A, r_X); break; - case BPF_S_ALU_LSH_K: + case BPF_ALU | BPF_LSH | BPF_K: if (K == 0) break; else PPC_SLWI(r_A, r_A, K); break; - case BPF_S_ALU_RSH_X: /* A >>= X; */ + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ ctx->seen |= SEEN_XREG; PPC_SRW(r_A, r_A, r_X); break; - case BPF_S_ALU_RSH_K: /* A >>= K; */ + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ if (K == 0) break; else PPC_SRWI(r_A, r_A, K); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: PPC_NEG(r_A, r_A); break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: PPC_LI32(r_ret, K); if (!K) { if (ctx->pc_ret0 == -1) @@ -312,7 +305,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_BLR(); } break; - case BPF_S_RET_A: + case BPF_RET | BPF_A: PPC_MR(r_ret, r_A); if (i != flen - 1) { if (ctx->seen) @@ -321,53 +314,53 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_BLR(); } break; - case BPF_S_MISC_TAX: /* X = A */ + case BPF_MISC | BPF_TAX: /* X = A */ PPC_MR(r_X, r_A); break; - case BPF_S_MISC_TXA: /* A = X */ + case BPF_MISC | BPF_TXA: /* A = X */ ctx->seen |= SEEN_XREG; PPC_MR(r_A, r_X); break; /*** Constant loads/M[] access ***/ - case BPF_S_LD_IMM: /* A = K */ + case BPF_LD | BPF_IMM: /* A = K */ PPC_LI32(r_A, K); break; - case BPF_S_LDX_IMM: /* X = K */ + case BPF_LDX | BPF_IMM: /* X = K */ PPC_LI32(r_X, K); break; - case BPF_S_LD_MEM: /* A = mem[K] */ + case BPF_LD | BPF_MEM: /* A = mem[K] */ PPC_MR(r_A, r_M + (K & 0xf)); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_LDX_MEM: /* X = mem[K] */ + case BPF_LDX | BPF_MEM: /* X = mem[K] */ PPC_MR(r_X, r_M + (K & 0xf)); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_ST: /* mem[K] = A */ + case BPF_ST: /* mem[K] = A */ PPC_MR(r_M + (K & 0xf), r_A); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_STX: /* mem[K] = X */ + case BPF_STX: /* mem[K] = X */ PPC_MR(r_M + (K & 0xf), r_X); ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_LD_W_LEN: /* A = skb->len; */ + case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); break; - case BPF_S_LDX_W_LEN: /* X = skb->len; */ + case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); break; /*** Ancillary info loads ***/ - case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, protocol)); break; - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); PPC_CMPDI(r_scratch1, 0); @@ -384,33 +377,33 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_LWZ_OFFS(r_A, r_scratch1, offsetof(struct net_device, ifindex)); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, mark)); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, hash)); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); - if (filter[i].code == BPF_S_ANC_VLAN_TAG) + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) PPC_ANDI(r_A, r_A, VLAN_VID_MASK); else PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, queue_mapping)); break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: #ifdef CONFIG_SMP /* * PACA ptr is r13: @@ -426,13 +419,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; /*** Absolute loads from packet header/data ***/ - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_word); goto common_load; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_half); goto common_load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_byte); common_load: /* Load from [K]. */ @@ -449,13 +442,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; /*** Indirect loads from packet header/data ***/ - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: func = sk_load_word; goto common_load_ind; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: func = sk_load_half; goto common_load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: func = sk_load_byte; common_load_ind: /* @@ -473,31 +466,31 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_BCC(COND_LT, exit_addr); break; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); goto common_load; break; /*** Jump and branches ***/ - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: if (K != 0) PPC_JMP(addrs[i + 1 + K]); break; - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: true_cond = COND_GT; goto cond_branch; - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: true_cond = COND_GE; goto cond_branch; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: true_cond = COND_EQ; goto cond_branch; - case BPF_S_JMP_JSET_K: - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: true_cond = COND_NE; /* Fall through */ cond_branch: @@ -508,20 +501,20 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; } - switch (filter[i].code) { - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JEQ_X: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: ctx->seen |= SEEN_XREG; PPC_CMPLW(r_A, r_X); break; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: ctx->seen |= SEEN_XREG; PPC_AND_DOT(r_scratch1, r_A, r_X); break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: if (K < 32768) PPC_CMPLWI(r_A, K); else { @@ -529,7 +522,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_CMPLW(r_A, r_scratch1); } break; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: if (K < 32768) /* PPC_ANDI is /only/ dot-form */ PPC_ANDI(r_scratch1, r_A, K); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e9f8fa9337fe..a2cbd875543a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -269,27 +269,17 @@ static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) EMIT4(0xa7c80000); /* Clear A if the first register does not set it. */ switch (filter[0].code) { - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - case BPF_S_LD_W_LEN: - case BPF_S_LD_W_IND: - case BPF_S_LD_H_IND: - case BPF_S_LD_B_IND: - case BPF_S_LD_IMM: - case BPF_S_LD_MEM: - case BPF_S_MISC_TXA: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_PKTTYPE: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_QUEUE: - case BPF_S_ANC_HATYPE: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_CPU: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_RET_K: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_IND: + case BPF_LD | BPF_H | BPF_IND: + case BPF_LD | BPF_B | BPF_IND: + case BPF_LD | BPF_IMM: + case BPF_LD | BPF_MEM: + case BPF_MISC | BPF_TXA: + case BPF_RET | BPF_K: /* first instruction sets A register */ break; default: /* A = 0 */ @@ -304,15 +294,18 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, unsigned int K; int offset; unsigned int mask; + u16 code; K = filter->k; - switch (filter->code) { - case BPF_S_ALU_ADD_X: /* A += X */ + code = bpf_anc_helper(filter); + + switch (code) { + case BPF_ALU | BPF_ADD | BPF_X: /* A += X */ jit->seen |= SEEN_XREG; /* ar %r5,%r12 */ EMIT2(0x1a5c); break; - case BPF_S_ALU_ADD_K: /* A += K */ + case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ if (!K) break; if (K <= 16383) @@ -325,12 +318,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* a %r5,(%r13) */ EMIT4_DISP(0x5a50d000, EMIT_CONST(K)); break; - case BPF_S_ALU_SUB_X: /* A -= X */ + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */ jit->seen |= SEEN_XREG; /* sr %r5,%r12 */ EMIT2(0x1b5c); break; - case BPF_S_ALU_SUB_K: /* A -= K */ + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ if (!K) break; if (K <= 16384) @@ -343,12 +336,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* s %r5,(%r13) */ EMIT4_DISP(0x5b50d000, EMIT_CONST(K)); break; - case BPF_S_ALU_MUL_X: /* A *= X */ + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */ jit->seen |= SEEN_XREG; /* msr %r5,%r12 */ EMIT4(0xb252005c); break; - case BPF_S_ALU_MUL_K: /* A *= K */ + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K <= 16383) /* mhi %r5,K */ EMIT4_IMM(0xa75c0000, K); @@ -359,7 +352,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* ms %r5,(%r13) */ EMIT4_DISP(0x7150d000, EMIT_CONST(K)); break; - case BPF_S_ALU_DIV_X: /* A /= X */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */ jit->seen |= SEEN_XREG | SEEN_RET0; /* ltr %r12,%r12 */ EMIT2(0x12cc); @@ -370,7 +363,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* dlr %r4,%r12 */ EMIT4(0xb997004c); break; - case BPF_S_ALU_DIV_K: /* A /= K */ + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; /* lhi %r4,0 */ @@ -378,7 +371,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* dl %r4,(%r13) */ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); break; - case BPF_S_ALU_MOD_X: /* A %= X */ + case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */ jit->seen |= SEEN_XREG | SEEN_RET0; /* ltr %r12,%r12 */ EMIT2(0x12cc); @@ -391,7 +384,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* lr %r5,%r4 */ EMIT2(0x1854); break; - case BPF_S_ALU_MOD_K: /* A %= K */ + case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */ if (K == 1) { /* lhi %r5,0 */ EMIT4(0xa7580000); @@ -404,12 +397,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* lr %r5,%r4 */ EMIT2(0x1854); break; - case BPF_S_ALU_AND_X: /* A &= X */ + case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ jit->seen |= SEEN_XREG; /* nr %r5,%r12 */ EMIT2(0x145c); break; - case BPF_S_ALU_AND_K: /* A &= K */ + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ if (test_facility(21)) /* nilf %r5, */ EMIT6_IMM(0xc05b0000, K); @@ -417,12 +410,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* n %r5,(%r13) */ EMIT4_DISP(0x5450d000, EMIT_CONST(K)); break; - case BPF_S_ALU_OR_X: /* A |= X */ + case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ jit->seen |= SEEN_XREG; /* or %r5,%r12 */ EMIT2(0x165c); break; - case BPF_S_ALU_OR_K: /* A |= K */ + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ if (test_facility(21)) /* oilf %r5, */ EMIT6_IMM(0xc05d0000, K); @@ -430,55 +423,55 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* o %r5,(%r13) */ EMIT4_DISP(0x5650d000, EMIT_CONST(K)); break; - case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */ + case BPF_ALU | BPF_XOR | BPF_X: jit->seen |= SEEN_XREG; /* xr %r5,%r12 */ EMIT2(0x175c); break; - case BPF_S_ALU_XOR_K: /* A ^= K */ + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ if (!K) break; /* x %r5,(%r13) */ EMIT4_DISP(0x5750d000, EMIT_CONST(K)); break; - case BPF_S_ALU_LSH_X: /* A <<= X; */ + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ jit->seen |= SEEN_XREG; /* sll %r5,0(%r12) */ EMIT4(0x8950c000); break; - case BPF_S_ALU_LSH_K: /* A <<= K */ + case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ if (K == 0) break; /* sll %r5,K */ EMIT4_DISP(0x89500000, K); break; - case BPF_S_ALU_RSH_X: /* A >>= X; */ + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ jit->seen |= SEEN_XREG; /* srl %r5,0(%r12) */ EMIT4(0x8850c000); break; - case BPF_S_ALU_RSH_K: /* A >>= K; */ + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ if (K == 0) break; /* srl %r5,K */ EMIT4_DISP(0x88500000, K); break; - case BPF_S_ALU_NEG: /* A = -A */ + case BPF_ALU | BPF_NEG: /* A = -A */ /* lnr %r5,%r5 */ EMIT2(0x1155); break; - case BPF_S_JMP_JA: /* ip += K */ + case BPF_JMP | BPF_JA: /* ip += K */ offset = addrs[i + K] + jit->start - jit->prg; EMIT4_PCREL(0xa7f40000, offset); break; - case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */ + case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */ mask = 0x200000; /* jh */ goto kbranch; - case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */ + case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */ mask = 0xa00000; /* jhe */ goto kbranch; - case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */ + case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */ mask = 0x800000; /* je */ kbranch: /* Emit compare if the branch targets are different */ if (filter->jt != filter->jf) { @@ -511,7 +504,7 @@ branch: if (filter->jt == filter->jf) { EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset); } break; - case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */ + case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */ mask = 0x700000; /* jnz */ /* Emit test if the branch targets are different */ if (filter->jt != filter->jf) { @@ -525,13 +518,13 @@ branch: if (filter->jt == filter->jf) { EMIT4_IMM(0xa7510000, K); } goto branch; - case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */ + case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */ mask = 0x200000; /* jh */ goto xbranch; - case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */ + case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */ mask = 0xa00000; /* jhe */ goto xbranch; - case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */ + case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */ mask = 0x800000; /* je */ xbranch: /* Emit compare if the branch targets are different */ if (filter->jt != filter->jf) { @@ -540,7 +533,7 @@ xbranch: /* Emit compare if the branch targets are different */ EMIT2(0x195c); } goto branch; - case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */ + case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */ mask = 0x700000; /* jnz */ /* Emit test if the branch targets are different */ if (filter->jt != filter->jf) { @@ -551,15 +544,15 @@ xbranch: /* Emit compare if the branch targets are different */ EMIT2(0x144c); } goto branch; - case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */ + case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD; offset = jit->off_load_word; goto load_abs; - case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */ + case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF; offset = jit->off_load_half; goto load_abs; - case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */ + case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE; offset = jit->off_load_byte; load_abs: if ((int) K < 0) @@ -573,19 +566,19 @@ call_fn: /* lg %r1,(%r13) */ /* jnz */ EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg)); break; - case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */ + case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD; offset = jit->off_load_iword; goto call_fn; - case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */ + case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF; offset = jit->off_load_ihalf; goto call_fn; - case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */ + case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE; offset = jit->off_load_ibyte; goto call_fn; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ jit->seen |= SEEN_RET0; if ((int) K < 0) { @@ -596,17 +589,17 @@ call_fn: /* lg %r1,(%r13) */ jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH; offset = jit->off_load_bmsh; goto call_fn; - case BPF_S_LD_W_LEN: /* A = skb->len; */ + case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); /* l %r5,(%r2) */ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len)); break; - case BPF_S_LDX_W_LEN: /* X = skb->len; */ + case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ jit->seen |= SEEN_XREG; /* l %r12,(%r2) */ EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len)); break; - case BPF_S_LD_IMM: /* A = K */ + case BPF_LD | BPF_IMM: /* A = K */ if (K <= 16383) /* lhi %r5,K */ EMIT4_IMM(0xa7580000, K); @@ -617,7 +610,7 @@ call_fn: /* lg %r1,(%r13) */ /* l %r5,(%r13) */ EMIT4_DISP(0x5850d000, EMIT_CONST(K)); break; - case BPF_S_LDX_IMM: /* X = K */ + case BPF_LDX | BPF_IMM: /* X = K */ jit->seen |= SEEN_XREG; if (K <= 16383) /* lhi %r12, */ @@ -629,29 +622,29 @@ call_fn: /* lg %r1,(%r13) */ /* l %r12,(%r13) */ EMIT4_DISP(0x58c0d000, EMIT_CONST(K)); break; - case BPF_S_LD_MEM: /* A = mem[K] */ + case BPF_LD | BPF_MEM: /* A = mem[K] */ jit->seen |= SEEN_MEM; /* l %r5,(%r15) */ EMIT4_DISP(0x5850f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_LDX_MEM: /* X = mem[K] */ + case BPF_LDX | BPF_MEM: /* X = mem[K] */ jit->seen |= SEEN_XREG | SEEN_MEM; /* l %r12,(%r15) */ EMIT4_DISP(0x58c0f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_MISC_TAX: /* X = A */ + case BPF_MISC | BPF_TAX: /* X = A */ jit->seen |= SEEN_XREG; /* lr %r12,%r5 */ EMIT2(0x18c5); break; - case BPF_S_MISC_TXA: /* A = X */ + case BPF_MISC | BPF_TXA: /* A = X */ jit->seen |= SEEN_XREG; /* lr %r5,%r12 */ EMIT2(0x185c); break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: if (K == 0) { jit->seen |= SEEN_RET0; if (last) @@ -671,33 +664,33 @@ call_fn: /* lg %r1,(%r13) */ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); } break; - case BPF_S_RET_A: + case BPF_RET | BPF_A: /* llgfr %r2,%r5 */ EMIT4(0xb9160025); /* j */ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); break; - case BPF_S_ST: /* mem[K] = A */ + case BPF_ST: /* mem[K] = A */ jit->seen |= SEEN_MEM; /* st %r5,(%r15) */ EMIT4_DISP(0x5050f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ + case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ jit->seen |= SEEN_XREG | SEEN_MEM; /* st %r12,(%r15) */ EMIT4_DISP(0x50c0f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); /* lhi %r5,0 */ EMIT4(0xa7580000); /* icm %r5,3,(%r2) */ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol)); break; - case BPF_S_ANC_IFINDEX: /* if (!skb->dev) return 0; - * A = skb->dev->ifindex */ + case BPF_ANC | SKF_AD_IFINDEX: /* if (!skb->dev) return 0; + * A = skb->dev->ifindex */ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); jit->seen |= SEEN_RET0; /* lg %r1,(%r2) */ @@ -709,20 +702,20 @@ call_fn: /* lg %r1,(%r13) */ /* l %r5,(%r1) */ EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex)); break; - case BPF_S_ANC_MARK: /* A = skb->mark */ + case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); /* l %r5,(%r2) */ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark)); break; - case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */ + case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); /* lhi %r5,0 */ EMIT4(0xa7580000); /* icm %r5,3,(%r2) */ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping)); break; - case BPF_S_ANC_HATYPE: /* if (!skb->dev) return 0; - * A = skb->dev->type */ + case BPF_ANC | SKF_AD_HATYPE: /* if (!skb->dev) return 0; + * A = skb->dev->type */ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); jit->seen |= SEEN_RET0; /* lg %r1,(%r2) */ @@ -736,20 +729,20 @@ call_fn: /* lg %r1,(%r13) */ /* icm %r5,3,(%r1) */ EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); break; - case BPF_S_ANC_RXHASH: /* A = skb->hash */ + case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); /* l %r5,(%r2) */ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash)); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); /* lhi %r5,0 */ EMIT4(0xa7580000); /* icm %r5,3,(%r2) */ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci)); - if (filter->code == BPF_S_ANC_VLAN_TAG) { + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { /* nill %r5,0xefff */ EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT); } else { @@ -759,7 +752,7 @@ call_fn: /* lg %r1,(%r13) */ EMIT4_DISP(0x88500000, 12); } break; - case BPF_S_ANC_PKTTYPE: + case BPF_ANC | SKF_AD_PKTTYPE: if (pkt_type_offset < 0) goto out; /* lhi %r5,0 */ @@ -769,7 +762,7 @@ call_fn: /* lg %r1,(%r13) */ /* srl %r5,5 */ EMIT4_DISP(0x88500000, 5); break; - case BPF_S_ANC_CPU: /* A = smp_processor_id() */ + case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */ #ifdef CONFIG_SMP /* l %r5, */ EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr)); diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index a82c6b2a9780..c88cf147deed 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -415,20 +415,11 @@ void bpf_jit_compile(struct sk_filter *fp) emit_reg_move(O7, r_saved_O7); switch (filter[0].code) { - case BPF_S_RET_K: - case BPF_S_LD_W_LEN: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_PKTTYPE: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_CPU: - case BPF_S_ANC_QUEUE: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_RET | BPF_K: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: /* The first instruction sets the A register (or is * a "RET 'constant'") */ @@ -445,59 +436,60 @@ void bpf_jit_compile(struct sk_filter *fp) unsigned int t_offset; unsigned int f_offset; u32 t_op, f_op; + u16 code = bpf_anc_helper(&filter[i]); int ilen; - switch (filter[i].code) { - case BPF_S_ALU_ADD_X: /* A += X; */ + switch (code) { + case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ emit_alu_X(ADD); break; - case BPF_S_ALU_ADD_K: /* A += K; */ + case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ emit_alu_K(ADD, K); break; - case BPF_S_ALU_SUB_X: /* A -= X; */ + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ emit_alu_X(SUB); break; - case BPF_S_ALU_SUB_K: /* A -= K */ + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ emit_alu_K(SUB, K); break; - case BPF_S_ALU_AND_X: /* A &= X */ + case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ emit_alu_X(AND); break; - case BPF_S_ALU_AND_K: /* A &= K */ + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ emit_alu_K(AND, K); break; - case BPF_S_ALU_OR_X: /* A |= X */ + case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ emit_alu_X(OR); break; - case BPF_S_ALU_OR_K: /* A |= K */ + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ emit_alu_K(OR, K); break; - case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */ + case BPF_ALU | BPF_XOR | BPF_X: emit_alu_X(XOR); break; - case BPF_S_ALU_XOR_K: /* A ^= K */ + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ emit_alu_K(XOR, K); break; - case BPF_S_ALU_LSH_X: /* A <<= X */ + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X */ emit_alu_X(SLL); break; - case BPF_S_ALU_LSH_K: /* A <<= K */ + case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ emit_alu_K(SLL, K); break; - case BPF_S_ALU_RSH_X: /* A >>= X */ + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X */ emit_alu_X(SRL); break; - case BPF_S_ALU_RSH_K: /* A >>= K */ + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K */ emit_alu_K(SRL, K); break; - case BPF_S_ALU_MUL_X: /* A *= X; */ + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ emit_alu_X(MUL); break; - case BPF_S_ALU_MUL_K: /* A *= K */ + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ emit_alu_K(MUL, K); break; - case BPF_S_ALU_DIV_K: /* A /= K with K != 0*/ + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K with K != 0*/ if (K == 1) break; emit_write_y(G0); @@ -512,7 +504,7 @@ void bpf_jit_compile(struct sk_filter *fp) #endif emit_alu_K(DIV, K); break; - case BPF_S_ALU_DIV_X: /* A /= X; */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ emit_cmpi(r_X, 0); if (pc_ret0 > 0) { t_offset = addrs[pc_ret0 - 1]; @@ -544,10 +536,10 @@ void bpf_jit_compile(struct sk_filter *fp) #endif emit_alu_X(DIV); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: emit_neg(); break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: if (!K) { if (pc_ret0 == -1) pc_ret0 = i; @@ -556,7 +548,7 @@ void bpf_jit_compile(struct sk_filter *fp) emit_loadimm(K, r_A); } /* Fallthrough */ - case BPF_S_RET_A: + case BPF_RET | BPF_A: if (seen_or_pass0) { if (i != flen - 1) { emit_jump(cleanup_addr); @@ -573,18 +565,18 @@ void bpf_jit_compile(struct sk_filter *fp) emit_jmpl(r_saved_O7, 8, G0); emit_reg_move(r_A, O0); /* delay slot */ break; - case BPF_S_MISC_TAX: + case BPF_MISC | BPF_TAX: seen |= SEEN_XREG; emit_reg_move(r_A, r_X); break; - case BPF_S_MISC_TXA: + case BPF_MISC | BPF_TXA: seen |= SEEN_XREG; emit_reg_move(r_X, r_A); break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: emit_load_cpu(r_A); break; - case BPF_S_ANC_PROTOCOL: + case BPF_ANC | SKF_AD_PROTOCOL: emit_skb_load16(protocol, r_A); break; #if 0 @@ -592,38 +584,38 @@ void bpf_jit_compile(struct sk_filter *fp) * a bit field even though we very much * know what we are doing here. */ - case BPF_S_ANC_PKTTYPE: + case BPF_ANC | SKF_AD_PKTTYPE: __emit_skb_load8(pkt_type, r_A); emit_alu_K(SRL, 5); break; #endif - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); emit_branch(BNE_PTR, cleanup_addr + 4); emit_nop(); emit_load32(r_A, struct net_device, ifindex, r_A); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: emit_skb_load32(mark, r_A); break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: emit_skb_load16(queue_mapping, r_A); break; - case BPF_S_ANC_HATYPE: + case BPF_ANC | SKF_AD_HATYPE: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); emit_branch(BNE_PTR, cleanup_addr + 4); emit_nop(); emit_load16(r_A, struct net_device, type, r_A); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: emit_skb_load32(hash, r_A); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: emit_skb_load16(vlan_tci, r_A); - if (filter[i].code == BPF_S_ANC_VLAN_TAG) { + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { emit_andi(r_A, VLAN_VID_MASK, r_A); } else { emit_loadimm(VLAN_TAG_PRESENT, r_TMP); @@ -631,44 +623,44 @@ void bpf_jit_compile(struct sk_filter *fp) } break; - case BPF_S_LD_IMM: + case BPF_LD | BPF_IMM: emit_loadimm(K, r_A); break; - case BPF_S_LDX_IMM: + case BPF_LDX | BPF_IMM: emit_loadimm(K, r_X); break; - case BPF_S_LD_MEM: + case BPF_LD | BPF_MEM: emit_ldmem(K * 4, r_A); break; - case BPF_S_LDX_MEM: + case BPF_LDX | BPF_MEM: emit_ldmem(K * 4, r_X); break; - case BPF_S_ST: + case BPF_ST: emit_stmem(K * 4, r_A); break; - case BPF_S_STX: + case BPF_STX: emit_stmem(K * 4, r_X); break; #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word); common_load: seen |= SEEN_DATAREF; emit_loadimm(K, r_OFF); emit_call(func); break; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half); goto common_load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte); goto common_load; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh); goto common_load; - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: func = bpf_jit_load_word; common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; if (K) { @@ -683,13 +675,13 @@ common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; } emit_call(func); break; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: func = bpf_jit_load_half; goto common_load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: func = bpf_jit_load_byte; goto common_load_ind; - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: emit_jump(addrs[i + K]); emit_nop(); break; @@ -700,14 +692,14 @@ common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; f_op = FOP; \ goto cond_branch - COND_SEL(BPF_S_JMP_JGT_K, BGU, BLEU); - COND_SEL(BPF_S_JMP_JGE_K, BGEU, BLU); - COND_SEL(BPF_S_JMP_JEQ_K, BE, BNE); - COND_SEL(BPF_S_JMP_JSET_K, BNE, BE); - COND_SEL(BPF_S_JMP_JGT_X, BGU, BLEU); - COND_SEL(BPF_S_JMP_JGE_X, BGEU, BLU); - COND_SEL(BPF_S_JMP_JEQ_X, BE, BNE); - COND_SEL(BPF_S_JMP_JSET_X, BNE, BE); + COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU); + COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU); + COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE); + COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE); + COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU); + COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU); + COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE); + COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE); cond_branch: f_offset = addrs[i + filter[i].jf]; t_offset = addrs[i + filter[i].jt]; @@ -719,20 +711,20 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; break; } - switch (filter[i].code) { - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JEQ_X: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: seen |= SEEN_XREG; emit_cmp(r_A, r_X); break; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: seen |= SEEN_XREG; emit_btst(r_A, r_X); break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: if (is_simm13(K)) { emit_cmpi(r_A, K); } else { @@ -740,7 +732,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; emit_cmp(r_A, r_TMP); } break; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: if (is_simm13(K)) { emit_btsti(r_A, K); } else { diff --git a/include/linux/filter.h b/include/linux/filter.h index 625f4de9bdf2..49ef7a298c92 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -197,7 +197,6 @@ int sk_detach_filter(struct sock *sk); int sk_chk_filter(struct sock_filter *filter, unsigned int flen); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); -void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); void sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); @@ -205,6 +204,41 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_int_jit_compile(struct sk_filter *fp); +#define BPF_ANC BIT(15) + +static inline u16 bpf_anc_helper(const struct sock_filter *ftest) +{ + BUG_ON(ftest->code & BPF_ANC); + + switch (ftest->code) { + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: +#define BPF_ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ + return BPF_ANC | SKF_AD_##CODE + switch (ftest->k) { + BPF_ANCILLARY(PROTOCOL); + BPF_ANCILLARY(PKTTYPE); + BPF_ANCILLARY(IFINDEX); + BPF_ANCILLARY(NLATTR); + BPF_ANCILLARY(NLATTR_NEST); + BPF_ANCILLARY(MARK); + BPF_ANCILLARY(QUEUE); + BPF_ANCILLARY(HATYPE); + BPF_ANCILLARY(RXHASH); + BPF_ANCILLARY(CPU); + BPF_ANCILLARY(ALU_XOR_X); + BPF_ANCILLARY(VLAN_TAG); + BPF_ANCILLARY(VLAN_TAG_PRESENT); + BPF_ANCILLARY(PAY_OFFSET); + BPF_ANCILLARY(RANDOM); + } + /* Fallthrough. */ + default: + return ftest->code; + } +} + #ifdef CONFIG_BPF_JIT #include #include @@ -224,86 +258,20 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, } #else #include + static inline void bpf_jit_compile(struct sk_filter *fp) { } + static inline void bpf_jit_free(struct sk_filter *fp) { kfree(fp); } -#endif +#endif /* CONFIG_BPF_JIT */ static inline int bpf_tell_extensions(void) { return SKF_AD_MAX; } -enum { - BPF_S_RET_K = 1, - BPF_S_RET_A, - BPF_S_ALU_ADD_K, - BPF_S_ALU_ADD_X, - BPF_S_ALU_SUB_K, - BPF_S_ALU_SUB_X, - BPF_S_ALU_MUL_K, - BPF_S_ALU_MUL_X, - BPF_S_ALU_DIV_X, - BPF_S_ALU_MOD_K, - BPF_S_ALU_MOD_X, - BPF_S_ALU_AND_K, - BPF_S_ALU_AND_X, - BPF_S_ALU_OR_K, - BPF_S_ALU_OR_X, - BPF_S_ALU_XOR_K, - BPF_S_ALU_XOR_X, - BPF_S_ALU_LSH_K, - BPF_S_ALU_LSH_X, - BPF_S_ALU_RSH_K, - BPF_S_ALU_RSH_X, - BPF_S_ALU_NEG, - BPF_S_LD_W_ABS, - BPF_S_LD_H_ABS, - BPF_S_LD_B_ABS, - BPF_S_LD_W_LEN, - BPF_S_LD_W_IND, - BPF_S_LD_H_IND, - BPF_S_LD_B_IND, - BPF_S_LD_IMM, - BPF_S_LDX_W_LEN, - BPF_S_LDX_B_MSH, - BPF_S_LDX_IMM, - BPF_S_MISC_TAX, - BPF_S_MISC_TXA, - BPF_S_ALU_DIV_K, - BPF_S_LD_MEM, - BPF_S_LDX_MEM, - BPF_S_ST, - BPF_S_STX, - BPF_S_JMP_JA, - BPF_S_JMP_JEQ_K, - BPF_S_JMP_JEQ_X, - BPF_S_JMP_JGE_K, - BPF_S_JMP_JGE_X, - BPF_S_JMP_JGT_K, - BPF_S_JMP_JGT_X, - BPF_S_JMP_JSET_K, - BPF_S_JMP_JSET_X, - /* Ancillary data */ - BPF_S_ANC_PROTOCOL, - BPF_S_ANC_PKTTYPE, - BPF_S_ANC_IFINDEX, - BPF_S_ANC_NLATTR, - BPF_S_ANC_NLATTR_NEST, - BPF_S_ANC_MARK, - BPF_S_ANC_QUEUE, - BPF_S_ANC_HATYPE, - BPF_S_ANC_RXHASH, - BPF_S_ANC_CPU, - BPF_S_ANC_ALU_XOR_X, - BPF_S_ANC_VLAN_TAG, - BPF_S_ANC_VLAN_TAG_PRESENT, - BPF_S_ANC_PAY_OFFSET, - BPF_S_ANC_RANDOM, -}; - #endif /* __LINUX_FILTER_H__ */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 1036b6f2fded..44e69483b383 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -103,60 +103,59 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) u32 k = ftest->k; switch (code) { - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: ftest->code = BPF_LDX | BPF_W | BPF_ABS; /* 32-bit aligned and not out of bounds. */ if (k >= sizeof(struct seccomp_data) || k & 3) return -EINVAL; continue; - case BPF_S_LD_W_LEN: + case BPF_LD | BPF_W | BPF_LEN: ftest->code = BPF_LD | BPF_IMM; ftest->k = sizeof(struct seccomp_data); continue; - case BPF_S_LDX_W_LEN: + case BPF_LDX | BPF_W | BPF_LEN: ftest->code = BPF_LDX | BPF_IMM; ftest->k = sizeof(struct seccomp_data); continue; /* Explicitly include allowed calls. */ - case BPF_S_RET_K: - case BPF_S_RET_A: - case BPF_S_ALU_ADD_K: - case BPF_S_ALU_ADD_X: - case BPF_S_ALU_SUB_K: - case BPF_S_ALU_SUB_X: - case BPF_S_ALU_MUL_K: - case BPF_S_ALU_MUL_X: - case BPF_S_ALU_DIV_X: - case BPF_S_ALU_AND_K: - case BPF_S_ALU_AND_X: - case BPF_S_ALU_OR_K: - case BPF_S_ALU_OR_X: - case BPF_S_ALU_XOR_K: - case BPF_S_ALU_XOR_X: - case BPF_S_ALU_LSH_K: - case BPF_S_ALU_LSH_X: - case BPF_S_ALU_RSH_K: - case BPF_S_ALU_RSH_X: - case BPF_S_ALU_NEG: - case BPF_S_LD_IMM: - case BPF_S_LDX_IMM: - case BPF_S_MISC_TAX: - case BPF_S_MISC_TXA: - case BPF_S_ALU_DIV_K: - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: - case BPF_S_ST: - case BPF_S_STX: - case BPF_S_JMP_JA: - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_K: - case BPF_S_JMP_JSET_X: - sk_decode_filter(ftest, ftest); + case BPF_RET | BPF_K: + case BPF_RET | BPF_A: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_NEG: + case BPF_LD | BPF_IMM: + case BPF_LDX | BPF_IMM: + case BPF_MISC | BPF_TAX: + case BPF_MISC | BPF_TXA: + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + case BPF_ST: + case BPF_STX: + case BPF_JMP | BPF_JA: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: continue; default: return -EINVAL; diff --git a/net/core/filter.c b/net/core/filter.c index 2c2d35d9d101..328aaf6ff4d1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -536,11 +536,13 @@ load_word: * Output: * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness */ + ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be32(ptr); CONT; } + return 0; LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */ off = K; @@ -550,6 +552,7 @@ load_half: BPF_R0 = get_unaligned_be16(ptr); CONT; } + return 0; LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */ off = K; @@ -559,6 +562,7 @@ load_byte: BPF_R0 = *(u8 *)ptr; CONT; } + return 0; LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */ off = K + X; @@ -1136,44 +1140,46 @@ err: */ static int check_load_and_stores(struct sock_filter *filter, int flen) { - u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ + u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ int pc, ret = 0; BUILD_BUG_ON(BPF_MEMWORDS > 16); + masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); if (!masks) return -ENOMEM; + memset(masks, 0xff, flen * sizeof(*masks)); for (pc = 0; pc < flen; pc++) { memvalid &= masks[pc]; switch (filter[pc].code) { - case BPF_S_ST: - case BPF_S_STX: + case BPF_ST: + case BPF_STX: memvalid |= (1 << filter[pc].k); break; - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: if (!(memvalid & (1 << filter[pc].k))) { ret = -EINVAL; goto error; } break; - case BPF_S_JMP_JA: - /* a jump must set masks on target */ + case BPF_JMP | BPF_JA: + /* A jump must set masks on target */ masks[pc + 1 + filter[pc].k] &= memvalid; memvalid = ~0; break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_X: - case BPF_S_JMP_JSET_K: - /* a jump must set masks on targets */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* A jump must set masks on targets */ masks[pc + 1 + filter[pc].jt] &= memvalid; masks[pc + 1 + filter[pc].jf] &= memvalid; memvalid = ~0; @@ -1185,6 +1191,72 @@ error: return ret; } +static bool chk_code_allowed(u16 code_to_probe) +{ + static const bool codes[] = { + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_K] = true, + [BPF_ALU | BPF_ADD | BPF_X] = true, + [BPF_ALU | BPF_SUB | BPF_K] = true, + [BPF_ALU | BPF_SUB | BPF_X] = true, + [BPF_ALU | BPF_MUL | BPF_K] = true, + [BPF_ALU | BPF_MUL | BPF_X] = true, + [BPF_ALU | BPF_DIV | BPF_K] = true, + [BPF_ALU | BPF_DIV | BPF_X] = true, + [BPF_ALU | BPF_MOD | BPF_K] = true, + [BPF_ALU | BPF_MOD | BPF_X] = true, + [BPF_ALU | BPF_AND | BPF_K] = true, + [BPF_ALU | BPF_AND | BPF_X] = true, + [BPF_ALU | BPF_OR | BPF_K] = true, + [BPF_ALU | BPF_OR | BPF_X] = true, + [BPF_ALU | BPF_XOR | BPF_K] = true, + [BPF_ALU | BPF_XOR | BPF_X] = true, + [BPF_ALU | BPF_LSH | BPF_K] = true, + [BPF_ALU | BPF_LSH | BPF_X] = true, + [BPF_ALU | BPF_RSH | BPF_K] = true, + [BPF_ALU | BPF_RSH | BPF_X] = true, + [BPF_ALU | BPF_NEG] = true, + /* Load instructions */ + [BPF_LD | BPF_W | BPF_ABS] = true, + [BPF_LD | BPF_H | BPF_ABS] = true, + [BPF_LD | BPF_B | BPF_ABS] = true, + [BPF_LD | BPF_W | BPF_LEN] = true, + [BPF_LD | BPF_W | BPF_IND] = true, + [BPF_LD | BPF_H | BPF_IND] = true, + [BPF_LD | BPF_B | BPF_IND] = true, + [BPF_LD | BPF_IMM] = true, + [BPF_LD | BPF_MEM] = true, + [BPF_LDX | BPF_W | BPF_LEN] = true, + [BPF_LDX | BPF_B | BPF_MSH] = true, + [BPF_LDX | BPF_IMM] = true, + [BPF_LDX | BPF_MEM] = true, + /* Store instructions */ + [BPF_ST] = true, + [BPF_STX] = true, + /* Misc instructions */ + [BPF_MISC | BPF_TAX] = true, + [BPF_MISC | BPF_TXA] = true, + /* Return instructions */ + [BPF_RET | BPF_K] = true, + [BPF_RET | BPF_A] = true, + /* Jump instructions */ + [BPF_JMP | BPF_JA] = true, + [BPF_JMP | BPF_JEQ | BPF_K] = true, + [BPF_JMP | BPF_JEQ | BPF_X] = true, + [BPF_JMP | BPF_JGE | BPF_K] = true, + [BPF_JMP | BPF_JGE | BPF_X] = true, + [BPF_JMP | BPF_JGT | BPF_K] = true, + [BPF_JMP | BPF_JGT | BPF_X] = true, + [BPF_JMP | BPF_JSET | BPF_K] = true, + [BPF_JMP | BPF_JSET | BPF_X] = true, + }; + + if (code_to_probe >= ARRAY_SIZE(codes)) + return false; + + return codes[code_to_probe]; +} + /** * sk_chk_filter - verify socket filter code * @filter: filter to verify @@ -1201,154 +1273,76 @@ error: */ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) { - /* - * Valid instructions are initialized to non-0. - * Invalid instructions are initialized to 0. - */ - static const u8 codes[] = { - [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K, - [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X, - [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K, - [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X, - [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, - [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, - [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, - [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K, - [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X, - [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, - [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, - [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, - [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, - [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K, - [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X, - [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, - [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, - [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, - [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X, - [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG, - [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS, - [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS, - [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS, - [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN, - [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND, - [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND, - [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND, - [BPF_LD|BPF_IMM] = BPF_S_LD_IMM, - [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN, - [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH, - [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM, - [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX, - [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA, - [BPF_RET|BPF_K] = BPF_S_RET_K, - [BPF_RET|BPF_A] = BPF_S_RET_A, - [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K, - [BPF_LD|BPF_MEM] = BPF_S_LD_MEM, - [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM, - [BPF_ST] = BPF_S_ST, - [BPF_STX] = BPF_S_STX, - [BPF_JMP|BPF_JA] = BPF_S_JMP_JA, - [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K, - [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X, - [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K, - [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X, - [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K, - [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X, - [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K, - [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, - }; - int pc; bool anc_found; + int pc; if (flen == 0 || flen > BPF_MAXINSNS) return -EINVAL; - /* check the filter code now */ + /* Check the filter code now */ for (pc = 0; pc < flen; pc++) { struct sock_filter *ftest = &filter[pc]; - u16 code = ftest->code; - if (code >= ARRAY_SIZE(codes)) - return -EINVAL; - code = codes[code]; - if (!code) + /* May we actually operate on this code? */ + if (!chk_code_allowed(ftest->code)) return -EINVAL; + /* Some instructions need special checks */ - switch (code) { - case BPF_S_ALU_DIV_K: - case BPF_S_ALU_MOD_K: - /* check for division by zero */ + switch (ftest->code) { + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + /* Check for division by zero */ if (ftest->k == 0) return -EINVAL; break; - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: - case BPF_S_ST: - case BPF_S_STX: - /* check for invalid memory addresses */ + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + case BPF_ST: + case BPF_STX: + /* Check for invalid memory addresses */ if (ftest->k >= BPF_MEMWORDS) return -EINVAL; break; - case BPF_S_JMP_JA: - /* - * Note, the large ftest->k might cause loops. + case BPF_JMP | BPF_JA: + /* Note, the large ftest->k might cause loops. * Compare this with conditional jumps below, * where offsets are limited. --ANK (981016) */ - if (ftest->k >= (unsigned int)(flen-pc-1)) + if (ftest->k >= (unsigned int)(flen - pc - 1)) return -EINVAL; break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_X: - case BPF_S_JMP_JSET_K: - /* for conditionals both must be safe */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* Both conditionals must be safe */ if (pc + ftest->jt + 1 >= flen || pc + ftest->jf + 1 >= flen) return -EINVAL; break; - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: anc_found = false; -#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ - code = BPF_S_ANC_##CODE; \ - anc_found = true; \ - break - switch (ftest->k) { - ANCILLARY(PROTOCOL); - ANCILLARY(PKTTYPE); - ANCILLARY(IFINDEX); - ANCILLARY(NLATTR); - ANCILLARY(NLATTR_NEST); - ANCILLARY(MARK); - ANCILLARY(QUEUE); - ANCILLARY(HATYPE); - ANCILLARY(RXHASH); - ANCILLARY(CPU); - ANCILLARY(ALU_XOR_X); - ANCILLARY(VLAN_TAG); - ANCILLARY(VLAN_TAG_PRESENT); - ANCILLARY(PAY_OFFSET); - ANCILLARY(RANDOM); - } - - /* ancillary operation unknown or unsupported */ + if (bpf_anc_helper(ftest) & BPF_ANC) + anc_found = true; + /* Ancillary operation unknown or unsupported */ if (anc_found == false && ftest->k >= SKF_AD_OFF) return -EINVAL; } - ftest->code = code; } - /* last instruction must be a RET code */ + /* Last instruction must be a RET code */ switch (filter[flen - 1].code) { - case BPF_S_RET_K: - case BPF_S_RET_A: + case BPF_RET | BPF_K: + case BPF_RET | BPF_A: return check_load_and_stores(filter, flen); } + return -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); @@ -1448,7 +1442,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, { struct sock_filter *old_prog; struct sk_filter *old_fp; - int i, err, new_len, old_len = fp->len; + int err, new_len, old_len = fp->len; /* We are free to overwrite insns et al right here as it * won't be used at this point in time anymore internally @@ -1458,13 +1452,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, BUILD_BUG_ON(sizeof(struct sock_filter) != sizeof(struct sock_filter_int)); - /* For now, we need to unfiddle BPF_S_* identifiers in place. - * This can sooner or later on be subject to removal, e.g. when - * JITs have been converted. - */ - for (i = 0; i < fp->len; i++) - sk_decode_filter(&fp->insns[i], &fp->insns[i]); - /* Conversion cannot happen on overlapping memory areas, * so we need to keep the user BPF around until the 2nd * pass. At this time, the user BPF is stored in fp->insns. @@ -1706,84 +1693,6 @@ int sk_detach_filter(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_detach_filter); -void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) -{ - static const u16 decodes[] = { - [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, - [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, - [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, - [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, - [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, - [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, - [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, - [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, - [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, - [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, - [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, - [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, - [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, - [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, - [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, - [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, - [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, - [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, - [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, - [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, - [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, - [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, - [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, - [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, - [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, - [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, - [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, - [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, - [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, - [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, - [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, - [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, - [BPF_S_RET_K] = BPF_RET|BPF_K, - [BPF_S_RET_A] = BPF_RET|BPF_A, - [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, - [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, - [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, - [BPF_S_ST] = BPF_ST, - [BPF_S_STX] = BPF_STX, - [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, - [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, - [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, - [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, - [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, - [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, - [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, - [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, - [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, - }; - u16 code; - - code = filt->code; - - to->code = decodes[code]; - to->jt = filt->jt; - to->jf = filt->jf; - to->k = filt->k; -} - int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) { -- cgit v1.2.3-59-g8ed1b From f8f6d679aaa78b989d9aee8d2935066fbdca2a30 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 May 2014 10:22:51 +0200 Subject: net: filter: improve filter block macros Commit 9739eef13c92 ("net: filter: make BPF conversion more readable") started to introduce helper macros similar to BPF_STMT()/BPF_JUMP() macros from classic BPF. However, quite some statements in the filter conversion functions remained in the old style which gives a mixture of block macros and non block macros in the code. This patch makes the block macros itself more readable by using explicit member initialization, and converts the remaining ones where possible to remain in a more consistent state. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 255 +++++++++++++++++++++++++++++++++++++++---------- net/core/filter.c | 196 ++++++++++++++----------------------- 2 files changed, 277 insertions(+), 174 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 49ef7a298c92..f0c2ad43b4af 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -76,56 +76,211 @@ enum { /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 -/* bpf_add|sub|...: a += x, bpf_mov: a = x */ -#define BPF_ALU64_REG(op, a, x) \ - ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_X, a, x, 0, 0}) -#define BPF_ALU32_REG(op, a, x) \ - ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_X, a, x, 0, 0}) - -/* bpf_add|sub|...: a += imm, bpf_mov: a = imm */ -#define BPF_ALU64_IMM(op, a, imm) \ - ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_K, a, 0, 0, imm}) -#define BPF_ALU32_IMM(op, a, imm) \ - ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_K, a, 0, 0, imm}) - -/* R0 = *(uint *) (skb->data + off) */ -#define BPF_LD_ABS(size, off) \ - ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_ABS, 0, 0, 0, off}) - -/* R0 = *(uint *) (skb->data + x + off) */ -#define BPF_LD_IND(size, x, off) \ - ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_IND, 0, x, 0, off}) - -/* a = *(uint *) (x + off) */ -#define BPF_LDX_MEM(sz, a, x, off) \ - ((struct sock_filter_int) {BPF_LDX|BPF_SIZE(sz)|BPF_MEM, a, x, off, 0}) - -/* if (a 'op' x) goto pc+off */ -#define BPF_JMP_REG(op, a, x, off) \ - ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_X, a, x, off, 0}) - -/* if (a 'op' imm) goto pc+off */ -#define BPF_JMP_IMM(op, a, imm, off) \ - ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_K, a, 0, off, imm}) - -#define BPF_EXIT_INSN() \ - ((struct sock_filter_int) {BPF_JMP|BPF_EXIT, 0, 0, 0, 0}) - -static inline int size_to_bpf(int size) -{ - switch (size) { - case 1: - return BPF_B; - case 2: - return BPF_H; - case 4: - return BPF_W; - case 8: - return BPF_DW; - default: - return -EINVAL; - } -} +/* Helper macros for filter block array initializers. */ + +/* ALU ops on registers, bpf_add|sub|...: A += X */ + +#define BPF_ALU64_REG(OP, A, X) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, A, X) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: A += IMM */ + +#define BPF_ALU64_IMM(OP, A, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, A, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ + +#define BPF_ENDIAN(TYPE, A, LEN) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = LEN }) + +/* Short form of mov, A = X */ + +#define BPF_MOV64_REG(A, X) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(A, X) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, A = IMM */ + +#define BPF_MOV64_IMM(A, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_IMM(A, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov based on type, BPF_X: A = X, BPF_K: A = IMM */ + +#define BPF_MOV64_RAW(TYPE, A, X, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_RAW(TYPE, A, X, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = IMM }) + +/* Direct packet access, R0 = *(uint *) (skb->data + OFF) */ + +#define BPF_LD_ABS(SIZE, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .a_reg = 0, \ + .x_reg = 0, \ + .off = 0, \ + .imm = OFF }) + +/* Indirect packet access, R0 = *(uint *) (skb->data + X + OFF) */ + +#define BPF_LD_IND(SIZE, X, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \ + .a_reg = 0, \ + .x_reg = X, \ + .off = 0, \ + .imm = OFF }) + +/* Memory store, A = *(uint *) (X + OFF), and vice versa */ + +#define BPF_LDX_MEM(SIZE, A, X, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .a_reg = A, \ + .x_reg = X, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_STX_MEM(SIZE, A, X, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .a_reg = A, \ + .x_reg = X, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against registers, if (A 'op' X) goto pc + OFF */ + +#define BPF_JMP_REG(OP, A, X, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (A 'op' IMM) goto pc + OFF */ + +#define BPF_JMP_IMM(OP, A, IMM, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Function call */ + +#define BPF_EMIT_CALL(FUNC) \ + ((struct sock_filter_int) { \ + .code = BPF_JMP | BPF_CALL, \ + .a_reg = 0, \ + .x_reg = 0, \ + .off = 0, \ + .imm = ((FUNC) - __bpf_call_base) }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, A, X, OFF, IMM) \ + ((struct sock_filter_int) { \ + .code = CODE, \ + .a_reg = A, \ + .x_reg = X, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct sock_filter_int) { \ + .code = BPF_JMP | BPF_EXIT, \ + .a_reg = 0, \ + .x_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#define bytes_to_bpf_size(bytes) \ +({ \ + int bpf_size = -EINVAL; \ + \ + if (bytes == sizeof(u8)) \ + bpf_size = BPF_B; \ + else if (bytes == sizeof(u16)) \ + bpf_size = BPF_H; \ + else if (bytes == sizeof(u32)) \ + bpf_size = BPF_W; \ + else if (bytes == sizeof(u64)) \ + bpf_size = BPF_DW; \ + \ + bpf_size; \ +}) /* Macro to invoke filter function. */ #define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) diff --git a/net/core/filter.c b/net/core/filter.c index 328aaf6ff4d1..842f8393121d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -672,14 +672,10 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); /* A = *(u16 *) (ctx + offsetof(protocol)) */ - *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, protocol)); - insn++; - + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, protocol)); /* A = ntohs(A) [emitting a nop or swap16] */ - insn->code = BPF_ALU | BPF_END | BPF_FROM_BE; - insn->a_reg = BPF_REG_A; - insn->imm = 16; + *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); break; case SKF_AD_OFF + SKF_AD_PKTTYPE: @@ -688,37 +684,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp, if (insn->off < 0) return false; insn++; - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); break; case SKF_AD_OFF + SKF_AD_IFINDEX: case SKF_AD_OFF + SKF_AD_HATYPE: - *insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)), - BPF_REG_TMP, BPF_REG_CTX, - offsetof(struct sk_buff, dev)); - insn++; - - /* if (tmp != 0) goto pc+1 */ - *insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); - insn++; - - *insn = BPF_EXIT_INSN(); - insn++; - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_TMP; - - if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) { - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->off = offsetof(struct net_device, ifindex); - } else { - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->off = offsetof(struct net_device, type); - } + BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0); + + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + BPF_REG_TMP, BPF_REG_CTX, + offsetof(struct sk_buff, dev)); + /* if (tmp != 0) goto pc + 1 */ + *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); + *insn++ = BPF_EXIT_INSN(); + if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP, + offsetof(struct net_device, ifindex)); + else + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP, + offsetof(struct net_device, type)); break; case SKF_AD_OFF + SKF_AD_MARK: @@ -745,22 +731,17 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_VLAN_TAG: case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - - /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */ - *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, vlan_tci)); - insn++; - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */ + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, vlan_tci)); if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, ~VLAN_TAG_PRESENT); } else { /* A >>= 12 */ - *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); - insn++; - + *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); /* A &= 1 */ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); } @@ -772,34 +753,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_CPU: case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); /* arg2 = A */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A); /* arg3 = X */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X); /* Emit call(ctx, arg2=A, arg3=X) */ - insn->code = BPF_JMP | BPF_CALL; switch (fp->k) { case SKF_AD_OFF + SKF_AD_PAY_OFFSET: - insn->imm = __skb_get_pay_offset - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_pay_offset); break; case SKF_AD_OFF + SKF_AD_NLATTR: - insn->imm = __skb_get_nlattr - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_nlattr); break; case SKF_AD_OFF + SKF_AD_NLATTR_NEST: - insn->imm = __skb_get_nlattr_nest - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest); break; case SKF_AD_OFF + SKF_AD_CPU: - insn->imm = __get_raw_cpu_id - __bpf_call_base; + *insn = BPF_EMIT_CALL(__get_raw_cpu_id); break; case SKF_AD_OFF + SKF_AD_RANDOM: - insn->imm = __get_random_u32 - __bpf_call_base; + *insn = BPF_EMIT_CALL(__get_random_u32); break; } break; @@ -871,9 +845,8 @@ do_pass: new_insn = new_prog; fp = prog; - if (new_insn) { - *new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1); - } + if (new_insn) + *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); new_insn++; for (i = 0; i < len; fp++, i++) { @@ -921,17 +894,16 @@ do_pass: convert_bpf_extensions(fp, &insn)) break; - insn->code = fp->code; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_X; - insn->imm = fp->k; + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; - /* Jump opcodes map as-is, but offsets need adjustment. */ - case BPF_JMP | BPF_JA: - target = i + fp->k + 1; - insn->code = fp->code; -#define EMIT_JMP \ + /* Jump transformation cannot use BPF block macros + * everywhere as offset calculation and target updates + * require a bit more work than the rest, i.e. jump + * opcodes map as-is, but offsets need adjustment. + */ + +#define BPF_EMIT_JMP \ do { \ if (target >= len || target < 0) \ goto err; \ @@ -940,7 +912,10 @@ do_pass: insn->off -= insn - tmp_insns; \ } while (0) - EMIT_JMP; + case BPF_JMP | BPF_JA: + target = i + fp->k + 1; + insn->code = fp->code; + BPF_EMIT_JMP; break; case BPF_JMP | BPF_JEQ | BPF_K: @@ -956,10 +931,7 @@ do_pass: * immediate into tmp register and use it * in compare insn. */ - insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = BPF_REG_TMP; - insn->imm = fp->k; - insn++; + *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); insn->a_reg = BPF_REG_A; insn->x_reg = BPF_REG_TMP; @@ -975,7 +947,7 @@ do_pass: if (fp->jf == 0) { insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; target = i + fp->jt + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; } @@ -983,116 +955,94 @@ do_pass: if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { insn->code = BPF_JMP | BPF_JNE | bpf_src; target = i + fp->jf + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; } /* Other jumps are mapped into two insns: Jxx and JA. */ target = i + fp->jt + 1; insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; - EMIT_JMP; + BPF_EMIT_JMP; insn++; insn->code = BPF_JMP | BPF_JA; target = i + fp->jf + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ case BPF_LDX | BPF_MSH | BPF_B: /* tmp = A */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); /* A = BPF_R0 = *(u8 *) (skb->data + K) */ - *insn = BPF_LD_ABS(BPF_B, fp->k); - insn++; - + *insn++ = BPF_LD_ABS(BPF_B, fp->k); /* A &= 0xf */ - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); - insn++; - + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); /* A <<= 2 */ - *insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); - insn++; - + *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); /* X = A */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); /* A = tmp */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP); + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); break; /* RET_K, RET_A are remaped into 2 insns. */ case BPF_RET | BPF_A: case BPF_RET | BPF_K: - insn->code = BPF_ALU | BPF_MOV | - (BPF_RVAL(fp->code) == BPF_K ? - BPF_K : BPF_X); - insn->a_reg = 0; - insn->x_reg = BPF_REG_A; - insn->imm = fp->k; - insn++; - + *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? + BPF_K : BPF_X, BPF_REG_0, + BPF_REG_A, fp->k); *insn = BPF_EXIT_INSN(); break; /* Store to stack. */ case BPF_ST: case BPF_STX: - insn->code = BPF_STX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_FP; - insn->x_reg = fp->code == BPF_ST ? - BPF_REG_A : BPF_REG_X; - insn->off = -(BPF_MEMWORDS - fp->k) * 4; + *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == + BPF_ST ? BPF_REG_A : BPF_REG_X, + -(BPF_MEMWORDS - fp->k) * 4); break; /* Load from stack. */ case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - BPF_REG_A : BPF_REG_X; - insn->x_reg = BPF_REG_FP; - insn->off = -(BPF_MEMWORDS - fp->k) * 4; + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_FP, + -(BPF_MEMWORDS - fp->k) * 4); break; /* A = K or X = K */ case BPF_LD | BPF_IMM: case BPF_LDX | BPF_IMM: - insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - BPF_REG_A : BPF_REG_X; - insn->imm = fp->k; + *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, fp->k); break; /* X = A */ case BPF_MISC | BPF_TAX: - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A); + *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); break; /* A = X */ case BPF_MISC | BPF_TXA: - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X); + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); break; /* A = skb->len or X = skb->len */ case BPF_LD | BPF_W | BPF_LEN: case BPF_LDX | BPF_W | BPF_LEN: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - BPF_REG_A : BPF_REG_X; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, len); + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_CTX, + offsetof(struct sk_buff, len)); break; - /* access seccomp_data fields */ + /* Access seccomp_data fields. */ case BPF_LDX | BPF_ABS | BPF_W: /* A = *(u32 *) (ctx + K) */ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); break; + /* Unkown instruction. */ default: goto err; } @@ -1101,7 +1051,6 @@ do_pass: if (new_prog) memcpy(new_insn, tmp_insns, sizeof(*insn) * (insn - tmp_insns)); - new_insn += insn - tmp_insns; } @@ -1116,7 +1065,6 @@ do_pass: new_flen = new_insn - new_prog; if (pass > 2) goto err; - goto do_pass; } -- cgit v1.2.3-59-g8ed1b From 96b2e73c5471542cb9c622c4360716684f8797ed Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jun 2014 00:18:48 -0700 Subject: Revert "net/mlx4_en: Use affinity hint" This reverts commit 70a640d0dae3a9b1b222ce673eb5d92c263ddd61. Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 6 +----- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 30 -------------------------- drivers/net/ethernet/mellanox/mlx4/eq.c | 13 +---------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - include/linux/mlx4/device.h | 2 +- 6 files changed, 4 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 58b1f239ac2b..199c7896f081 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, - &ibdev->eq_table[eq], NULL)) { + &ibdev->eq_table[eq])) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index ea2cd72e5368..636963db598a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -118,15 +118,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, if (cq->is_tx == RX) { if (mdev->dev->caps.comp_pool) { if (!cq->vector) { - struct mlx4_en_rx_ring *ring = - priv->rx_ring[cq->ring]; - sprintf(name, "%s-%d", priv->dev->name, cq->ring); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(mdev->dev, name, rmap, - &cq->vector, - ring->affinity_mask)) { + &cq->vector)) { cq->vector = (cq->ring + 1 + priv->port) % mdev->dev->caps.num_comp_vectors; mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 05d135572abc..58209bd0c94c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1526,32 +1526,6 @@ static void mlx4_en_linkstate(struct work_struct *work) mutex_unlock(&mdev->state_lock); } -static void mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) -{ - struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; - int numa_node = priv->mdev->dev->numa_node; - - if (numa_node == -1) - return; - - if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) { - en_err(priv, "Failed to allocate core mask\n"); - return; - } - - if (cpumask_set_cpu_local_first(ring_idx, numa_node, - ring->affinity_mask)) { - en_err(priv, "Failed setting affinity hint\n"); - free_cpumask_var(ring->affinity_mask); - ring->affinity_mask = NULL; - } -} - -static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) -{ - free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); - priv->rx_ring[ring_idx]->affinity_mask = NULL; -} int mlx4_en_start_port(struct net_device *dev) { @@ -1593,8 +1567,6 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_cq_init_lock(cq); - mlx4_en_init_affinity_hint(priv, i); - err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); @@ -1875,8 +1847,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) msleep(1); mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); - - mlx4_en_free_affinity_hint(priv, i); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index f91659e5fa13..d954ec1eac17 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1376,7 +1376,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) EXPORT_SYMBOL(mlx4_test_interrupts); int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector, cpumask_var_t cpu_hint_mask) + int *vector) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1411,15 +1411,6 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, } mlx4_assign_irq_notifier(priv, dev, priv->eq_table.eq[vec].irq); - if (cpu_hint_mask) { - err = irq_set_affinity_hint( - priv->eq_table.eq[vec].irq, - cpu_hint_mask); - if (err) { - mlx4_warn(dev, "Failed setting affinity hint\n"); - /*we dont want to break here*/ - } - } eq_set_ci(&priv->eq_table.eq[vec], 1); } @@ -1450,8 +1441,6 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) irq_set_affinity_notifier( priv->eq_table.eq[vec].irq, NULL); - irq_set_affinity_hint(priv->eq_table.eq[vec].irq, - NULL); free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 0e15295bedd6..b5db1bf361dc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -313,7 +313,6 @@ struct mlx4_en_rx_ring { unsigned long csum_ok; unsigned long csum_none; int hwtstamp_rx_filter; - cpumask_var_t affinity_mask; }; struct mlx4_en_cq { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b9b70e00e3c1..ca38871a585c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1161,7 +1161,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupts(struct mlx4_dev *dev); int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector, cpumask_t *cpu_hint_mask); + int *vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); int mlx4_get_phys_port_id(struct mlx4_dev *dev); -- cgit v1.2.3-59-g8ed1b From 670e5b8eaf85704742bc3cb1df51fdd3ce08fc15 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 28 May 2014 18:44:46 -0700 Subject: net: Add support for device specific address syncing This change provides a function to be used in order to break the ndo_set_rx_mode call into a set of address add and remove calls. The code is based on the implementation of dev_uc_sync/dev_mc_sync. Since they essentially do the same thing but with only one dev I simply named my functions __dev_uc_sync/__dev_mc_sync. I also implemented an unsync version of the functions as well to allow for cleanup on close. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 73 ++++++++++++++++++++++++++++++++++++++++ net/core/dev_addr_lists.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2db1610bf109..774e5391eb8e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3003,6 +3003,15 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); +int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*sync)(struct net_device *, const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)); +void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)); void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ @@ -3023,6 +3032,38 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from); void dev_uc_flush(struct net_device *dev); void dev_uc_init(struct net_device *dev); +/** + * __dev_uc_sync - Synchonize device's unicast list + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * Add newly added addresses to the interface, and release + * addresses that have been deleted. + **/ +static inline int __dev_uc_sync(struct net_device *dev, + int (*sync)(struct net_device *, + const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + return __hw_addr_sync_dev(&dev->uc, dev, sync, unsync); +} + +/** + * __dev_uc_unsync - Remove synchonized addresses from device + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by dev_uc_sync(). + **/ +static inline void __dev_uc_unsync(struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + __hw_addr_unsync_dev(&dev->uc, dev, unsync); +} + /* Functions used for multicast addresses handling */ int dev_mc_add(struct net_device *dev, const unsigned char *addr); int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); @@ -3035,6 +3076,38 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from); void dev_mc_flush(struct net_device *dev); void dev_mc_init(struct net_device *dev); +/** + * __dev_mc_sync - Synchonize device's multicast list + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * Add newly added addresses to the interface, and release + * addresses that have been deleted. + **/ +static inline int __dev_mc_sync(struct net_device *dev, + int (*sync)(struct net_device *, + const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + return __hw_addr_sync_dev(&dev->mc, dev, sync, unsync); +} + +/** + * __dev_mc_unsync - Remove synchonized addresses from device + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by dev_mc_sync(). + **/ +static inline void __dev_mc_unsync(struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + __hw_addr_unsync_dev(&dev->mc, dev, unsync); +} + /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); void __dev_set_rx_mode(struct net_device *dev); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 329d5794e7dc..b6b230600b97 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, } EXPORT_SYMBOL(__hw_addr_unsync); +/** + * __hw_addr_sync_dev - Synchonize device's multicast list + * @list: address list to syncronize + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * This funciton is intended to be called from the ndo_set_rx_mode + * function of devices that require explicit address add/remove + * notifications. The unsync function may be NULL in which case + * the addresses requiring removal will simply be removed without + * any notification to the device. + **/ +int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*sync)(struct net_device *, const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + struct netdev_hw_addr *ha, *tmp; + int err; + + /* first go through and flush out any stale entries */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (!ha->sync_cnt || ha->refcount != 1) + continue; + + /* if unsync is defined and fails defer unsyncing address */ + if (unsync && unsync(dev, ha->addr)) + continue; + + ha->sync_cnt--; + __hw_addr_del_entry(list, ha, false, false); + } + + /* go through and sync new entries to the list */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (ha->sync_cnt) + continue; + + err = sync(dev, ha->addr); + if (err) + return err; + + ha->sync_cnt++; + ha->refcount++; + } + + return 0; +} +EXPORT_SYMBOL(__hw_addr_sync_dev); + +/** + * __hw_addr_unsync_dev - Remove synchonized addresses from device + * @list: address list to remove syncronized addresses from + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by __hw_addr_sync_dev(). + * This function is intended to be called from the ndo_stop or ndo_open + * functions on devices that require explicit address add/remove + * notifications. If the unsync function pointer is NULL then this function + * can be used to just reset the sync_cnt for the addresses in the list. + **/ +void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (!ha->sync_cnt) + continue; + + /* if unsync is defined and fails defer unsyncing address */ + if (unsync && unsync(dev, ha->addr)) + continue; + + ha->sync_cnt--; + __hw_addr_del_entry(list, ha, false, false); + } +} +EXPORT_SYMBOL(__hw_addr_unsync_dev); + static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; -- cgit v1.2.3-59-g8ed1b From 688cea83f4396fa98b77a126ed278b89daccccdc Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 30 May 2014 16:00:56 +0800 Subject: macvlan: add netpoll support Add netpoll support to macvlan devices. Based on the netpoll support in the 802.1q vlan code. Tested and macvlan could work well with netconsole. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 66 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/if_macvlan.h | 3 +++ 2 files changed, 68 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d2dbcfc68ee4..eee9106d1da1 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -33,6 +33,7 @@ #include #include #include +#include #define MACVLAN_HASH_SIZE (1 << BITS_PER_BYTE) @@ -357,12 +358,26 @@ xmit_world: return dev_queue_xmit(skb); } +static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb) +{ +#ifdef CONFIG_NET_POLL_CONTROLLER + if (vlan->netpoll) + netpoll_send_skb(vlan->netpoll, skb); +#else + BUG(); +#endif + return NETDEV_TX_OK; +} + static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned int len = skb->len; int ret; - const struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_dev *vlan = netdev_priv(dev); + + if (unlikely(netpoll_tx_running(dev))) + return macvlan_netpoll_send_skb(vlan, skb); if (vlan->fwd_priv) { skb->dev = vlan->lowerdev; @@ -788,6 +803,50 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev, return features; } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void macvlan_dev_poll_controller(struct net_device *dev) +{ + return; +} + +static int macvlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *real_dev = vlan->lowerdev; + struct netpoll *netpoll; + int err = 0; + + netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); + err = -ENOMEM; + if (!netpoll) + goto out; + + err = __netpoll_setup(netpoll, real_dev); + if (err) { + kfree(netpoll); + goto out; + } + + vlan->netpoll = netpoll; + +out: + return err; +} + +static void macvlan_dev_netpoll_cleanup(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct netpoll *netpoll = vlan->netpoll; + + if (!netpoll) + return; + + vlan->netpoll = NULL; + + __netpoll_free_async(netpoll); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + static const struct ethtool_ops macvlan_ethtool_ops = { .get_link = ethtool_op_get_link, .get_settings = macvlan_ethtool_get_settings, @@ -813,6 +872,11 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_fdb_del = macvlan_fdb_del, .ndo_fdb_dump = ndo_dflt_fdb_dump, .ndo_get_lock_subclass = macvlan_get_nest_level, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = macvlan_dev_poll_controller, + .ndo_netpoll_setup = macvlan_dev_netpoll_setup, + .ndo_netpoll_cleanup = macvlan_dev_netpoll_cleanup, +#endif }; void macvlan_common_setup(struct net_device *dev) diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index a9a53b12397b..6b2c7cf352a5 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -57,6 +57,9 @@ struct macvlan_dev { netdev_features_t tap_features; int minor; int nest_level; +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *netpoll; +#endif }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, -- cgit v1.2.3-59-g8ed1b From fe62d001372388abb15a324148c913f9b43722a8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 May 2014 01:25:27 +0100 Subject: ethtool: Replace ethtool_ops::{get,set}_rxfh_indir() with {get,set}_rxfh() ETHTOOL_{G,S}RXFHINDIR and ETHTOOL_{G,S}RSSH should work for drivers regardless of whether they expose the hash key, unless you try to set a hash key for a driver that doesn't expose it. Signed-off-by: Ben Hutchings Acked-by: Jeff Kirsher --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 15 ++++++++------- drivers/net/ethernet/broadcom/tg3.c | 8 ++++---- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 8 ++++---- drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 15 +++++++++------ drivers/net/ethernet/intel/igb/igb_ethtool.c | 9 +++++---- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 10 +++++----- drivers/net/ethernet/sfc/ethtool.c | 10 +++++----- drivers/net/vmxnet3/vmxnet3_ethtool.c | 8 ++++---- include/linux/ethtool.h | 6 ------ net/core/ethtool.c | 8 ++++---- 10 files changed, 48 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 03224090ecf9..af138f8aa361 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3316,7 +3316,7 @@ static u32 bnx2x_get_rxfh_indir_size(struct net_device *dev) return T_ETH_INDIRECTION_TABLE_SIZE; } -static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir) +static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) { struct bnx2x *bp = netdev_priv(dev); u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0}; @@ -3340,14 +3340,15 @@ static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir) return 0; } -static int bnx2x_set_rxfh_indir(struct net_device *dev, const u32 *indir) +static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key) { struct bnx2x *bp = netdev_priv(dev); size_t i; for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) { /* - * The same as in bnx2x_get_rxfh_indir: we can't use a memcpy() + * The same as in bnx2x_get_rxfh: we can't use a memcpy() * as an internal storage of an indirection table is a u8 array * while indir->ring_index points to an array of u32. * @@ -3471,8 +3472,8 @@ static const struct ethtool_ops bnx2x_ethtool_ops = { .get_rxnfc = bnx2x_get_rxnfc, .set_rxnfc = bnx2x_set_rxnfc, .get_rxfh_indir_size = bnx2x_get_rxfh_indir_size, - .get_rxfh_indir = bnx2x_get_rxfh_indir, - .set_rxfh_indir = bnx2x_set_rxfh_indir, + .get_rxfh = bnx2x_get_rxfh, + .set_rxfh = bnx2x_set_rxfh, .get_channels = bnx2x_get_channels, .set_channels = bnx2x_set_channels, .get_module_info = bnx2x_get_module_info, @@ -3498,8 +3499,8 @@ static const struct ethtool_ops bnx2x_vf_ethtool_ops = { .get_rxnfc = bnx2x_get_rxnfc, .set_rxnfc = bnx2x_set_rxnfc, .get_rxfh_indir_size = bnx2x_get_rxfh_indir_size, - .get_rxfh_indir = bnx2x_get_rxfh_indir, - .set_rxfh_indir = bnx2x_set_rxfh_indir, + .get_rxfh = bnx2x_get_rxfh, + .set_rxfh = bnx2x_set_rxfh, .get_channels = bnx2x_get_channels, .set_channels = bnx2x_set_channels, }; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ccd90156aebc..8c2314ed260c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12501,7 +12501,7 @@ static u32 tg3_get_rxfh_indir_size(struct net_device *dev) return size; } -static int tg3_get_rxfh_indir(struct net_device *dev, u32 *indir) +static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) { struct tg3 *tp = netdev_priv(dev); int i; @@ -12512,7 +12512,7 @@ static int tg3_get_rxfh_indir(struct net_device *dev, u32 *indir) return 0; } -static int tg3_set_rxfh_indir(struct net_device *dev, const u32 *indir) +static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key) { struct tg3 *tp = netdev_priv(dev); size_t i; @@ -14044,8 +14044,8 @@ static const struct ethtool_ops tg3_ethtool_ops = { .get_sset_count = tg3_get_sset_count, .get_rxnfc = tg3_get_rxnfc, .get_rxfh_indir_size = tg3_get_rxfh_indir_size, - .get_rxfh_indir = tg3_get_rxfh_indir, - .set_rxfh_indir = tg3_set_rxfh_indir, + .get_rxfh = tg3_get_rxfh, + .set_rxfh = tg3_set_rxfh, .get_channels = tg3_get_channels, .set_channels = tg3_set_channels, .get_ts_info = tg3_get_ts_info, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 266a5bc6aedf..8cf6be93f491 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2739,7 +2739,7 @@ static u32 get_rss_table_size(struct net_device *dev) return pi->rss_size; } -static int get_rss_table(struct net_device *dev, u32 *p) +static int get_rss_table(struct net_device *dev, u32 *p, u8 *key) { const struct port_info *pi = netdev_priv(dev); unsigned int n = pi->rss_size; @@ -2749,7 +2749,7 @@ static int get_rss_table(struct net_device *dev, u32 *p) return 0; } -static int set_rss_table(struct net_device *dev, const u32 *p) +static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key) { unsigned int i; struct port_info *pi = netdev_priv(dev); @@ -2851,8 +2851,8 @@ static const struct ethtool_ops cxgb_ethtool_ops = { .set_wol = set_wol, .get_rxnfc = get_rxnfc, .get_rxfh_indir_size = get_rss_table_size, - .get_rxfh_indir = get_rss_table, - .set_rxfh_indir = set_rss_table, + .get_rxfh = get_rss_table, + .set_rxfh = set_rss_table, .flash_device = set_flash, }; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 77e786d2d0e0..dbc8986c2dae 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -626,13 +626,14 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev) } /** - * i40evf_get_rxfh_indir - get the rx flow hash indirection table + * i40evf_get_rxfh - get the rx flow hash indirection table * @netdev: network interface device structure * @indir: indirection table + * @key: hash key (will be %NULL until get_rxfh_key_size is implemented) * * Reads the indirection table directly from the hardware. Always returns 0. **/ -static int i40evf_get_rxfh_indir(struct net_device *netdev, u32 *indir) +static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_hw *hw = &adapter->hw; @@ -650,14 +651,16 @@ static int i40evf_get_rxfh_indir(struct net_device *netdev, u32 *indir) } /** - * i40evf_set_rxfh_indir - set the rx flow hash indirection table + * i40evf_set_rxfh - set the rx flow hash indirection table * @netdev: network interface device structure * @indir: indirection table + * @key: hash key (will be %NULL until get_rxfh_key_size is implemented) * * Returns -EINVAL if the table specifies an inavlid queue id, otherwise * returns 0 after programming the table. **/ -static int i40evf_set_rxfh_indir(struct net_device *netdev, const u32 *indir) +static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_hw *hw = &adapter->hw; @@ -691,8 +694,8 @@ static struct ethtool_ops i40evf_ethtool_ops = { .get_rxnfc = i40evf_get_rxnfc, .set_rxnfc = i40evf_set_rxnfc, .get_rxfh_indir_size = i40evf_get_rxfh_indir_size, - .get_rxfh_indir = i40evf_get_rxfh_indir, - .set_rxfh_indir = i40evf_set_rxfh_indir, + .get_rxfh = i40evf_get_rxfh, + .set_rxfh = i40evf_set_rxfh, .get_channels = i40evf_get_channels, }; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index a84297c85fb1..d8bbcf1873ca 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2830,7 +2830,7 @@ static u32 igb_get_rxfh_indir_size(struct net_device *netdev) return IGB_RETA_SIZE; } -static int igb_get_rxfh_indir(struct net_device *netdev, u32 *indir) +static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) { struct igb_adapter *adapter = netdev_priv(netdev); int i; @@ -2876,7 +2876,8 @@ void igb_write_rss_indir_tbl(struct igb_adapter *adapter) } } -static int igb_set_rxfh_indir(struct net_device *netdev, const u32 *indir) +static int igb_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -3025,8 +3026,8 @@ static const struct ethtool_ops igb_ethtool_ops = { .get_module_info = igb_get_module_info, .get_module_eeprom = igb_get_module_eeprom, .get_rxfh_indir_size = igb_get_rxfh_indir_size, - .get_rxfh_indir = igb_get_rxfh_indir, - .set_rxfh_indir = igb_set_rxfh_indir, + .get_rxfh = igb_get_rxfh, + .set_rxfh = igb_set_rxfh, .get_channels = igb_get_channels, .set_channels = igb_set_channels, .begin = igb_ethtool_begin, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index a72d99fd7a2d..263a1c7a3370 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -564,7 +564,7 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) return priv->rx_ring_num; } -static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index) +static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rss_map *rss_map = &priv->rss_map; @@ -582,8 +582,8 @@ static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index) return err; } -static int mlx4_en_set_rxfh_indir(struct net_device *dev, - const u32 *ring_index) +static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, + const u8 *key) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; @@ -1224,8 +1224,8 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_rxnfc = mlx4_en_get_rxnfc, .set_rxnfc = mlx4_en_set_rxnfc, .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size, - .get_rxfh_indir = mlx4_en_get_rxfh_indir, - .set_rxfh_indir = mlx4_en_set_rxfh_indir, + .get_rxfh = mlx4_en_get_rxfh, + .set_rxfh = mlx4_en_set_rxfh, .get_channels = mlx4_en_get_channels, .set_channels = mlx4_en_set_channels, .get_ts_info = mlx4_en_get_ts_info, diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 0de8b07c24c2..74739c4b9997 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -1033,7 +1033,7 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev) 0 : ARRAY_SIZE(efx->rx_indir_table)); } -static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir) +static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key) { struct efx_nic *efx = netdev_priv(net_dev); @@ -1041,8 +1041,8 @@ static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir) return 0; } -static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev, - const u32 *indir) +static int efx_ethtool_set_rxfh(struct net_device *net_dev, + const u32 *indir, const u8 *key) { struct efx_nic *efx = netdev_priv(net_dev); @@ -1125,8 +1125,8 @@ const struct ethtool_ops efx_ethtool_ops = { .get_rxnfc = efx_ethtool_get_rxnfc, .set_rxnfc = efx_ethtool_set_rxnfc, .get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size, - .get_rxfh_indir = efx_ethtool_get_rxfh_indir, - .set_rxfh_indir = efx_ethtool_set_rxfh_indir, + .get_rxfh = efx_ethtool_get_rxfh, + .set_rxfh = efx_ethtool_set_rxfh, .get_ts_info = efx_ethtool_get_ts_info, .get_module_info = efx_ethtool_get_module_info, .get_module_eeprom = efx_ethtool_get_module_eeprom, diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 00e120296e92..9396cca93b09 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -579,7 +579,7 @@ vmxnet3_get_rss_indir_size(struct net_device *netdev) } static int -vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p) +vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); struct UPT1_RSSConf *rssConf = adapter->rss_conf; @@ -592,7 +592,7 @@ vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p) } static int -vmxnet3_set_rss_indir(struct net_device *netdev, const u32 *p) +vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key) { unsigned int i; unsigned long flags; @@ -628,8 +628,8 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = { .get_rxnfc = vmxnet3_get_rxnfc, #ifdef VMXNET3_RSS .get_rxfh_indir_size = vmxnet3_get_rss_indir_size, - .get_rxfh_indir = vmxnet3_get_rss_indir, - .set_rxfh_indir = vmxnet3_set_rss_indir, + .get_rxfh = vmxnet3_get_rss, + .set_rxfh = vmxnet3_set_rss, #endif }; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 874fde01d398..e658229fee39 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -158,15 +158,11 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * Returns zero if not supported for this specific device. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. * Returns zero if not supported for this specific device. - * @get_rxfh_indir: Get the contents of the RX flow hash indirection table. - * Will not be called if @get_rxfh_indir_size returns zero. * @get_rxfh: Get the contents of the RX flow hash indirection table and hash * key. * Will only be called if one or both of @get_rxfh_indir_size and * @get_rxfh_key_size are implemented and return non-zero. * Returns a negative error code or zero. - * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. - * Will not be called if @get_rxfh_indir_size returns zero. * @set_rxfh: Set the contents of the RX flow hash indirection table and/or * hash key. In case only the indirection table or hash key is to be * changed, the other argument will be %NULL. @@ -248,8 +244,6 @@ struct ethtool_ops { int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key); int (*set_rxfh)(struct net_device *, const u32 *indir, const u8 *key); - int (*get_rxfh_indir)(struct net_device *, u32 *); - int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b8857348bdf3..8ae452afb545 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -582,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, int ret; if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh_indir) + !dev->ethtool_ops->get_rxfh) return -EOPNOTSUPP; dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); if (dev_size == 0) @@ -608,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, if (!indir) return -ENOMEM; - ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL); if (ret) goto out; @@ -632,7 +632,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, int ret; u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); - if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || + if (!ops->get_rxfh_indir_size || !ops->set_rxfh || !ops->get_rxnfc) return -EOPNOTSUPP; @@ -669,7 +669,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, goto out; } - ret = ops->set_rxfh_indir(dev, indir); + ret = ops->set_rxfh(dev, indir, NULL); out: kfree(indir); -- cgit v1.2.3-59-g8ed1b From 7e2b10c1e52ca37fb522be49f4be367f9311d0cd Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 4 Jun 2014 17:20:02 -0700 Subject: net: Support for multiple checksums with gso When creating a GSO packet segment we may need to set more than one checksum in the packet (for instance a TCP checksum and UDP checksum for VXLAN encapsulation). To be efficient, we want to do checksum calculation for any part of the packet at most once. This patch adds csum_start offset to skb_gso_cb. This tracks the starting offset for skb->csum which is initially set in skb_segment. When a protocol needs to compute a transport checksum it calls gso_make_checksum which computes the checksum value from the start of transport header to csum_start and then adds in skb->csum to get the full checksum. skb->csum and csum_start are then updated to reflect the checksum of the resultant packet starting from the transport header. This patch also adds a flag to skbuff, encap_hdr_csum, which is set in *gso_segment fucntions to indicate that a tunnel protocol needs checksum calculation Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 26 +++++++++++++++++++++++++- net/core/skbuff.c | 8 +++++++- net/ipv4/ip_tunnel_core.c | 8 ++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7a9beeb1c458..d8d397acb52c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -567,7 +567,8 @@ struct sk_buff { * headers if needed */ __u8 encapsulation:1; - /* 6/8 bit hole (depending on ndisc_nodetype presence) */ + __u8 encap_hdr_csum:1; + /* 5/7 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL @@ -2988,6 +2989,7 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) struct skb_gso_cb { int mac_offset; int encap_level; + __u16 csum_start; }; #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) @@ -3012,6 +3014,28 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) return 0; } +/* Compute the checksum for a gso segment. First compute the checksum value + * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and + * then add in skb->csum (checksum from csum_start to end of packet). + * skb->csum and csum_start are then updated to reflect the checksum of the + * resultant packet starting from the transport header-- the resultant checksum + * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo + * header. + */ +static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) +{ + int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) - + skb_transport_offset(skb); + __u16 csum; + + csum = csum_fold(csum_partial(skb_transport_header(skb), + plen, skb->csum)); + skb->csum = res; + SKB_GSO_CB(skb)->csum_start -= plen; + + return csum; +} + static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3f6c7e8be8a4..05f4bef2ce12 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2885,7 +2885,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (unlikely(!proto)) return ERR_PTR(-EINVAL); - csum = !!can_checksum_protocol(features, proto); + csum = !head_skb->encap_hdr_csum && + !!can_checksum_protocol(features, proto); + __skb_push(head_skb, doffset); headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -2983,6 +2985,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len, 0); + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + offset; continue; } @@ -3052,6 +3056,8 @@ perform_csum_check: nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + doffset; } } while ((offset += len) < head_skb->len); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 847e69cbff7e..f4c987bb7e94 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, return skb; } + /* If packet is not gso and we are resolving any partial checksum, + * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL + * on the outer header without confusing devices that implement + * NETIF_F_IP_CSUM with encapsulation. + */ + if (csum_help) + skb->encapsulation = 0; + if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { err = skb_checksum_help(skb); if (unlikely(err)) -- cgit v1.2.3-59-g8ed1b From 0f4f4ffa7b7c3d29d0537a126145c9f8d8ed5dbc Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 4 Jun 2014 17:20:16 -0700 Subject: net: Add GSO support for UDP tunnels with checksum Added a new netif feature for GSO_UDP_TUNNEL_CSUM. This indicates that a device is capable of computing the UDP checksum in the encapsulating header of a UDP tunnel. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 1 + include/linux/skbuff.h | 2 ++ net/ipv4/af_inet.c | 1 + net/ipv4/tcp_offload.c | 1 + net/ipv4/udp.c | 40 +++++++++++++++++++--------------------- net/ipv4/udp_offload.c | 4 +++- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 4 +++- 8 files changed, 31 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index c26d0ec2ef3a..f1338e0f9866 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -45,6 +45,7 @@ enum { NETIF_F_GSO_IPIP_BIT, /* ... IPIP tunnel with TSO */ NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ + NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ NETIF_F_GSO_MPLS_BIT, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d8d397acb52c..5a6d10a538f5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -345,6 +345,8 @@ enum { SKB_GSO_UDP_TUNNEL = 1 << 9, SKB_GSO_MPLS = 1 << 10, + + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, }; #if BITS_PER_LONG > 32 diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0e9bb08a91e4..0070ab87109b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1258,6 +1258,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_SIT | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_MPLS | 0))) goto out; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index d8de7b9e0720..c02f2d2e7bab 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -61,6 +61,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_SIT | SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a84f6762ea9e..8d8c33d84c9a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2528,7 +2528,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; netdev_features_t enc_features; - int outer_hlen; + int udp_offset, outer_hlen; + unsigned int oldlen; + bool need_csum; + + oldlen = (u16)~skb->len; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; @@ -2540,6 +2544,10 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb->mac_len = skb_inner_network_offset(skb); skb->protocol = htons(ETH_P_TEB); + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); + if (need_csum) + skb->encap_hdr_csum = 1; + /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); @@ -2550,10 +2558,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, } outer_hlen = skb_tnl_header_len(skb); + udp_offset = outer_hlen - tnl_hlen; skb = segs; do { struct udphdr *uh; - int udp_offset = outer_hlen - tnl_hlen; + int len; skb_reset_inner_headers(skb); skb->encapsulation = 1; @@ -2564,31 +2573,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); + len = skb->len - udp_offset; uh = udp_hdr(skb); - uh->len = htons(skb->len - udp_offset); - - /* csum segment if tunnel sets skb with csum. */ - if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) { - struct iphdr *iph = ip_hdr(skb); + uh->len = htons(len); - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - udp_offset, - IPPROTO_UDP, 0); - uh->check = csum_fold(skb_checksum(skb, udp_offset, - skb->len - udp_offset, 0)); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; + if (need_csum) { + __be32 delta = htonl(oldlen + len); - } else if (protocol == htons(ETH_P_IPV6)) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u32 len = skb->len - udp_offset; + uh->check = ~csum_fold((__force __wsum) + ((__force u32)uh->check + + (__force u32)delta)); + uh->check = gso_make_checksum(skb, ~uh->check); - uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - len, IPPROTO_UDP, 0); - uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; } skb->protocol = protocol; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 88b4023ecfcf..5c23f4765af9 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, __wsum csum; if (skb->encapsulation && - skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { + (skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { segs = skb_udp_tunnel_segment(skb, features); goto out; } @@ -71,6 +72,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_IPIP | SKB_GSO_GRE | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b2f091566f88..d54c5744e3db 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b261ee8b83fc..79da8b305ced 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -63,6 +63,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_GRE | SKB_GSO_IPIP | SKB_GSO_SIT | @@ -76,7 +77,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, goto out; } - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + if (skb->encapsulation && skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features); else { /* Do software UFO. Complete and fill in the UDP checksum as HW cannot -- cgit v1.2.3-59-g8ed1b From 4749c09c37030ccdc44aecebe0f71b02a377fc14 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 4 Jun 2014 17:20:23 -0700 Subject: gre: Call gso_make_checksum Call gso_make_checksum. This should have the benefit of using a checksum that may have been previously computed for the packet. This also adds NETIF_F_GSO_GRE_CSUM to differentiate devices that offload GRE GSO with and without the GRE checksum offloaed. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ include/linux/skbuff.h | 2 ++ include/net/gre.h | 5 +++-- net/ipv4/af_inet.c | 1 + net/ipv4/gre_demux.c | 3 ++- net/ipv4/gre_offload.c | 10 ++++++++-- net/ipv4/tcp_offload.c | 1 + net/ipv4/udp_offload.c | 3 ++- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 1 + net/mpls/mpls_gso.c | 1 + 11 files changed, 24 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index f1338e0f9866..e5a589435e2b 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -42,6 +42,7 @@ enum { NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ + NETIF_F_GSO_GRE_CSUM_BIT, /* ... GRE with csum with TSO */ NETIF_F_GSO_IPIP_BIT, /* ... IPIP tunnel with TSO */ NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ @@ -112,6 +113,7 @@ enum { #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) #define NETIF_F_GSO_GRE __NETIF_F(GSO_GRE) +#define NETIF_F_GSO_GRE_CSUM __NETIF_F(GSO_GRE_CSUM) #define NETIF_F_GSO_IPIP __NETIF_F(GSO_IPIP) #define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5a6d10a538f5..c705808bef9c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -347,6 +347,8 @@ enum { SKB_GSO_MPLS = 1 << 10, SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, + + SKB_GSO_GRE_CSUM = 1 << 12, }; #if BITS_PER_LONG > 32 diff --git a/include/net/gre.h b/include/net/gre.h index 70046a0b0b89..b53182018743 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -37,9 +37,10 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, int hdr_len); static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb, - bool gre_csum) + bool csum) { - return iptunnel_handle_offloads(skb, gre_csum, SKB_GSO_GRE); + return iptunnel_handle_offloads(skb, csum, + csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0070ab87109b..d5e6836cf772 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1254,6 +1254,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_TCPV6 | diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index fbfd829f4049..4e9619bca732 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -84,7 +84,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, ptr--; } if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) { *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, skb->len, 0)); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index f1d32280cb54..24deb3928b9e 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP))) goto out; @@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, goto out; csum = !!(greh->flags & GRE_CSUM); + if (csum) + skb->encap_hdr_csum = 1; if (unlikely(!pskb_may_pull(skb, ghl))) goto out; @@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } } - greh = (struct gre_base_hdr *)(skb->data); + skb_reset_transport_header(skb); + + greh = (struct gre_base_hdr *) + skb_transport_header(skb); pcsum = (__be32 *)(greh + 1); *pcsum = 0; - *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + *(__sum16 *)pcsum = gso_make_checksum(skb, 0); } __skb_push(skb, tnl_hlen - ghl); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index c02f2d2e7bab..4e86c59ec7f7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -57,6 +57,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_MPLS | diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 5c23f4765af9..7b1840110173 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -74,7 +74,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_IPIP | - SKB_GSO_GRE | SKB_GSO_MPLS) || + SKB_GSO_GRE | SKB_GSO_GRE_CSUM | + SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d54c5744e3db..65eda2a8af48 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -97,6 +97,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 79da8b305ced..0ae3d98f83e0 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -65,6 +65,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_MPLS) || diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 851cd880b0c0..6b38d083e1c9 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_MPLS))) goto out; -- cgit v1.2.3-59-g8ed1b From ca05e3a7551b71891d42d637d3a1e443c6bbd781 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 1 Jun 2014 23:47:24 +0200 Subject: isdn/capi: move capi_info2str to capidrv.c capi_info2str() is apparently meant to be of general utility. It is actually only used in capidrv.c. So move it from capiutil.c to capidrv.c and (obviously) stop exporting it. And, since we're touching this, merge the two versions of this function. Signed-off-by: Paul Bolle Signed-off-by: Tilman Schmidt Signed-off-by: David S. Miller --- drivers/isdn/capi/capidrv.c | 195 ++++++++++++++++++++++++++++++++++++++++ drivers/isdn/capi/capiutil.c | 200 ------------------------------------------ include/linux/isdn/capiutil.h | 5 -- 3 files changed, 195 insertions(+), 205 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c index cc9f1927a322..70e67f6a22ff 100644 --- a/drivers/isdn/capi/capidrv.c +++ b/drivers/isdn/capi/capidrv.c @@ -763,6 +763,201 @@ static inline int new_bchan(capidrv_contr *card) } /* ------------------------------------------------------------------- */ +static char *capi_info2str(u16 reason) +{ +#ifndef CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON + return ".."; +#else + switch (reason) { + +/*-- informative values (corresponding message was processed) -----*/ + case 0x0001: + return "NCPI not supported by current protocol, NCPI ignored"; + case 0x0002: + return "Flags not supported by current protocol, flags ignored"; + case 0x0003: + return "Alert already sent by another application"; + +/*-- error information concerning CAPI_REGISTER -----*/ + case 0x1001: + return "Too many applications"; + case 0x1002: + return "Logical block size too small, must be at least 128 Bytes"; + case 0x1003: + return "Buffer exceeds 64 kByte"; + case 0x1004: + return "Message buffer size too small, must be at least 1024 Bytes"; + case 0x1005: + return "Max. number of logical connections not supported"; + case 0x1006: + return "Reserved"; + case 0x1007: + return "The message could not be accepted because of an internal busy condition"; + case 0x1008: + return "OS resource error (no memory ?)"; + case 0x1009: + return "CAPI not installed"; + case 0x100A: + return "Controller does not support external equipment"; + case 0x100B: + return "Controller does only support external equipment"; + +/*-- error information concerning message exchange functions -----*/ + case 0x1101: + return "Illegal application number"; + case 0x1102: + return "Illegal command or subcommand or message length less than 12 bytes"; + case 0x1103: + return "The message could not be accepted because of a queue full condition !! The error code does not imply that CAPI cannot receive messages directed to another controller, PLCI or NCCI"; + case 0x1104: + return "Queue is empty"; + case 0x1105: + return "Queue overflow, a message was lost !! This indicates a configuration error. The only recovery from this error is to perform a CAPI_RELEASE"; + case 0x1106: + return "Unknown notification parameter"; + case 0x1107: + return "The Message could not be accepted because of an internal busy condition"; + case 0x1108: + return "OS Resource error (no memory ?)"; + case 0x1109: + return "CAPI not installed"; + case 0x110A: + return "Controller does not support external equipment"; + case 0x110B: + return "Controller does only support external equipment"; + +/*-- error information concerning resource / coding problems -----*/ + case 0x2001: + return "Message not supported in current state"; + case 0x2002: + return "Illegal Controller / PLCI / NCCI"; + case 0x2003: + return "Out of PLCI"; + case 0x2004: + return "Out of NCCI"; + case 0x2005: + return "Out of LISTEN"; + case 0x2006: + return "Out of FAX resources (protocol T.30)"; + case 0x2007: + return "Illegal message parameter coding"; + +/*-- error information concerning requested services -----*/ + case 0x3001: + return "B1 protocol not supported"; + case 0x3002: + return "B2 protocol not supported"; + case 0x3003: + return "B3 protocol not supported"; + case 0x3004: + return "B1 protocol parameter not supported"; + case 0x3005: + return "B2 protocol parameter not supported"; + case 0x3006: + return "B3 protocol parameter not supported"; + case 0x3007: + return "B protocol combination not supported"; + case 0x3008: + return "NCPI not supported"; + case 0x3009: + return "CIP Value unknown"; + case 0x300A: + return "Flags not supported (reserved bits)"; + case 0x300B: + return "Facility not supported"; + case 0x300C: + return "Data length not supported by current protocol"; + case 0x300D: + return "Reset procedure not supported by current protocol"; + +/*-- informations about the clearing of a physical connection -----*/ + case 0x3301: + return "Protocol error layer 1 (broken line or B-channel removed by signalling protocol)"; + case 0x3302: + return "Protocol error layer 2"; + case 0x3303: + return "Protocol error layer 3"; + case 0x3304: + return "Another application got that call"; +/*-- T.30 specific reasons -----*/ + case 0x3311: + return "Connecting not successful (remote station is no FAX G3 machine)"; + case 0x3312: + return "Connecting not successful (training error)"; + case 0x3313: + return "Disconnected before transfer (remote station does not support transfer mode, e.g. resolution)"; + case 0x3314: + return "Disconnected during transfer (remote abort)"; + case 0x3315: + return "Disconnected during transfer (remote procedure error, e.g. unsuccessful repetition of T.30 commands)"; + case 0x3316: + return "Disconnected during transfer (local tx data underrun)"; + case 0x3317: + return "Disconnected during transfer (local rx data overflow)"; + case 0x3318: + return "Disconnected during transfer (local abort)"; + case 0x3319: + return "Illegal parameter coding (e.g. SFF coding error)"; + +/*-- disconnect causes from the network according to ETS 300 102-1/Q.931 -----*/ + case 0x3481: return "Unallocated (unassigned) number"; + case 0x3482: return "No route to specified transit network"; + case 0x3483: return "No route to destination"; + case 0x3486: return "Channel unacceptable"; + case 0x3487: + return "Call awarded and being delivered in an established channel"; + case 0x3490: return "Normal call clearing"; + case 0x3491: return "User busy"; + case 0x3492: return "No user responding"; + case 0x3493: return "No answer from user (user alerted)"; + case 0x3495: return "Call rejected"; + case 0x3496: return "Number changed"; + case 0x349A: return "Non-selected user clearing"; + case 0x349B: return "Destination out of order"; + case 0x349C: return "Invalid number format"; + case 0x349D: return "Facility rejected"; + case 0x349E: return "Response to STATUS ENQUIRY"; + case 0x349F: return "Normal, unspecified"; + case 0x34A2: return "No circuit / channel available"; + case 0x34A6: return "Network out of order"; + case 0x34A9: return "Temporary failure"; + case 0x34AA: return "Switching equipment congestion"; + case 0x34AB: return "Access information discarded"; + case 0x34AC: return "Requested circuit / channel not available"; + case 0x34AF: return "Resources unavailable, unspecified"; + case 0x34B1: return "Quality of service unavailable"; + case 0x34B2: return "Requested facility not subscribed"; + case 0x34B9: return "Bearer capability not authorized"; + case 0x34BA: return "Bearer capability not presently available"; + case 0x34BF: return "Service or option not available, unspecified"; + case 0x34C1: return "Bearer capability not implemented"; + case 0x34C2: return "Channel type not implemented"; + case 0x34C5: return "Requested facility not implemented"; + case 0x34C6: return "Only restricted digital information bearer capability is available"; + case 0x34CF: return "Service or option not implemented, unspecified"; + case 0x34D1: return "Invalid call reference value"; + case 0x34D2: return "Identified channel does not exist"; + case 0x34D3: return "A suspended call exists, but this call identity does not"; + case 0x34D4: return "Call identity in use"; + case 0x34D5: return "No call suspended"; + case 0x34D6: return "Call having the requested call identity has been cleared"; + case 0x34D8: return "Incompatible destination"; + case 0x34DB: return "Invalid transit network selection"; + case 0x34DF: return "Invalid message, unspecified"; + case 0x34E0: return "Mandatory information element is missing"; + case 0x34E1: return "Message type non-existent or not implemented"; + case 0x34E2: return "Message not compatible with call state or message type non-existent or not implemented"; + case 0x34E3: return "Information element non-existent or not implemented"; + case 0x34E4: return "Invalid information element contents"; + case 0x34E5: return "Message not compatible with call state"; + case 0x34E6: return "Recovery on timer expiry"; + case 0x34EF: return "Protocol error, unspecified"; + case 0x34FF: return "Interworking, unspecified"; + + default: return "No additional information"; + } +#endif +} static void handle_controller(_cmsg *cmsg) { diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c index d26f17033b68..6e797e502cfa 100644 --- a/drivers/isdn/capi/capiutil.c +++ b/drivers/isdn/capi/capiutil.c @@ -22,205 +22,6 @@ /* from CAPI2.0 DDK AVM Berlin GmbH */ -#ifndef CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON -char *capi_info2str(u16 reason) -{ - return ".."; -} -#else -char *capi_info2str(u16 reason) -{ - switch (reason) { - -/*-- informative values (corresponding message was processed) -----*/ - case 0x0001: - return "NCPI not supported by current protocol, NCPI ignored"; - case 0x0002: - return "Flags not supported by current protocol, flags ignored"; - case 0x0003: - return "Alert already sent by another application"; - -/*-- error information concerning CAPI_REGISTER -----*/ - case 0x1001: - return "Too many applications"; - case 0x1002: - return "Logical block size too small, must be at least 128 Bytes"; - case 0x1003: - return "Buffer exceeds 64 kByte"; - case 0x1004: - return "Message buffer size too small, must be at least 1024 Bytes"; - case 0x1005: - return "Max. number of logical connections not supported"; - case 0x1006: - return "Reserved"; - case 0x1007: - return "The message could not be accepted because of an internal busy condition"; - case 0x1008: - return "OS resource error (no memory ?)"; - case 0x1009: - return "CAPI not installed"; - case 0x100A: - return "Controller does not support external equipment"; - case 0x100B: - return "Controller does only support external equipment"; - -/*-- error information concerning message exchange functions -----*/ - case 0x1101: - return "Illegal application number"; - case 0x1102: - return "Illegal command or subcommand or message length less than 12 bytes"; - case 0x1103: - return "The message could not be accepted because of a queue full condition !! The error code does not imply that CAPI cannot receive messages directed to another controller, PLCI or NCCI"; - case 0x1104: - return "Queue is empty"; - case 0x1105: - return "Queue overflow, a message was lost !! This indicates a configuration error. The only recovery from this error is to perform a CAPI_RELEASE"; - case 0x1106: - return "Unknown notification parameter"; - case 0x1107: - return "The Message could not be accepted because of an internal busy condition"; - case 0x1108: - return "OS Resource error (no memory ?)"; - case 0x1109: - return "CAPI not installed"; - case 0x110A: - return "Controller does not support external equipment"; - case 0x110B: - return "Controller does only support external equipment"; - -/*-- error information concerning resource / coding problems -----*/ - case 0x2001: - return "Message not supported in current state"; - case 0x2002: - return "Illegal Controller / PLCI / NCCI"; - case 0x2003: - return "Out of PLCI"; - case 0x2004: - return "Out of NCCI"; - case 0x2005: - return "Out of LISTEN"; - case 0x2006: - return "Out of FAX resources (protocol T.30)"; - case 0x2007: - return "Illegal message parameter coding"; - -/*-- error information concerning requested services -----*/ - case 0x3001: - return "B1 protocol not supported"; - case 0x3002: - return "B2 protocol not supported"; - case 0x3003: - return "B3 protocol not supported"; - case 0x3004: - return "B1 protocol parameter not supported"; - case 0x3005: - return "B2 protocol parameter not supported"; - case 0x3006: - return "B3 protocol parameter not supported"; - case 0x3007: - return "B protocol combination not supported"; - case 0x3008: - return "NCPI not supported"; - case 0x3009: - return "CIP Value unknown"; - case 0x300A: - return "Flags not supported (reserved bits)"; - case 0x300B: - return "Facility not supported"; - case 0x300C: - return "Data length not supported by current protocol"; - case 0x300D: - return "Reset procedure not supported by current protocol"; - -/*-- informations about the clearing of a physical connection -----*/ - case 0x3301: - return "Protocol error layer 1 (broken line or B-channel removed by signalling protocol)"; - case 0x3302: - return "Protocol error layer 2"; - case 0x3303: - return "Protocol error layer 3"; - case 0x3304: - return "Another application got that call"; -/*-- T.30 specific reasons -----*/ - case 0x3311: - return "Connecting not successful (remote station is no FAX G3 machine)"; - case 0x3312: - return "Connecting not successful (training error)"; - case 0x3313: - return "Disconnected before transfer (remote station does not support transfer mode, e.g. resolution)"; - case 0x3314: - return "Disconnected during transfer (remote abort)"; - case 0x3315: - return "Disconnected during transfer (remote procedure error, e.g. unsuccessful repetition of T.30 commands)"; - case 0x3316: - return "Disconnected during transfer (local tx data underrun)"; - case 0x3317: - return "Disconnected during transfer (local rx data overflow)"; - case 0x3318: - return "Disconnected during transfer (local abort)"; - case 0x3319: - return "Illegal parameter coding (e.g. SFF coding error)"; - -/*-- disconnect causes from the network according to ETS 300 102-1/Q.931 -----*/ - case 0x3481: return "Unallocated (unassigned) number"; - case 0x3482: return "No route to specified transit network"; - case 0x3483: return "No route to destination"; - case 0x3486: return "Channel unacceptable"; - case 0x3487: - return "Call awarded and being delivered in an established channel"; - case 0x3490: return "Normal call clearing"; - case 0x3491: return "User busy"; - case 0x3492: return "No user responding"; - case 0x3493: return "No answer from user (user alerted)"; - case 0x3495: return "Call rejected"; - case 0x3496: return "Number changed"; - case 0x349A: return "Non-selected user clearing"; - case 0x349B: return "Destination out of order"; - case 0x349C: return "Invalid number format"; - case 0x349D: return "Facility rejected"; - case 0x349E: return "Response to STATUS ENQUIRY"; - case 0x349F: return "Normal, unspecified"; - case 0x34A2: return "No circuit / channel available"; - case 0x34A6: return "Network out of order"; - case 0x34A9: return "Temporary failure"; - case 0x34AA: return "Switching equipment congestion"; - case 0x34AB: return "Access information discarded"; - case 0x34AC: return "Requested circuit / channel not available"; - case 0x34AF: return "Resources unavailable, unspecified"; - case 0x34B1: return "Quality of service unavailable"; - case 0x34B2: return "Requested facility not subscribed"; - case 0x34B9: return "Bearer capability not authorized"; - case 0x34BA: return "Bearer capability not presently available"; - case 0x34BF: return "Service or option not available, unspecified"; - case 0x34C1: return "Bearer capability not implemented"; - case 0x34C2: return "Channel type not implemented"; - case 0x34C5: return "Requested facility not implemented"; - case 0x34C6: return "Only restricted digital information bearer capability is available"; - case 0x34CF: return "Service or option not implemented, unspecified"; - case 0x34D1: return "Invalid call reference value"; - case 0x34D2: return "Identified channel does not exist"; - case 0x34D3: return "A suspended call exists, but this call identity does not"; - case 0x34D4: return "Call identity in use"; - case 0x34D5: return "No call suspended"; - case 0x34D6: return "Call having the requested call identity has been cleared"; - case 0x34D8: return "Incompatible destination"; - case 0x34DB: return "Invalid transit network selection"; - case 0x34DF: return "Invalid message, unspecified"; - case 0x34E0: return "Mandatory information element is missing"; - case 0x34E1: return "Message type non-existent or not implemented"; - case 0x34E2: return "Message not compatible with call state or message type non-existent or not implemented"; - case 0x34E3: return "Information element non-existent or not implemented"; - case 0x34E4: return "Invalid information element contents"; - case 0x34E5: return "Message not compatible with call state"; - case 0x34E6: return "Recovery on timer expiry"; - case 0x34EF: return "Protocol error, unspecified"; - case 0x34FF: return "Interworking, unspecified"; - - default: return "No additional information"; - } -} -#endif - typedef struct { int typ; size_t off; @@ -1073,4 +874,3 @@ EXPORT_SYMBOL(capi_cmsg_header); EXPORT_SYMBOL(capi_cmd2str); EXPORT_SYMBOL(capi_cmsg2str); EXPORT_SYMBOL(capi_message2str); -EXPORT_SYMBOL(capi_info2str); diff --git a/include/linux/isdn/capiutil.h b/include/linux/isdn/capiutil.h index 5a52f2c94f3f..44bd6046e6e2 100644 --- a/include/linux/isdn/capiutil.h +++ b/include/linux/isdn/capiutil.h @@ -164,11 +164,6 @@ unsigned capi_cmsg_header(_cmsg * cmsg, __u16 _ApplId, __u8 _Command, __u8 _Subcommand, __u16 _Messagenumber, __u32 _Controller); -/* - * capi_info2str generated a readable string for Capi2.0 reasons. - */ -char *capi_info2str(__u16 reason); - /*-----------------------------------------------------------------------*/ /* -- cgit v1.2.3-59-g8ed1b From 46cfd6ea23b0a207c87269d86457727dc4485708 Mon Sep 17 00:00:00 2001 From: Konrad Zapalowicz Date: Thu, 5 Jun 2014 20:27:42 +0200 Subject: net: phy: fix sparse warning in fixed.c This commit fixes the following sparse warning: drivers/net/phy/fixed.c:207 - warning: symbol 'fixed_phy_del' was not declared. Should it be static? by adding symbol definition to the phy_fixed.h API file. It is ok to do because the function in question is an exported symbol. Signed-off-by: Konrad Zapalowicz Signed-off-by: David S. Miller --- include/linux/phy_fixed.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 4f2478b47136..ae612acebb53 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -17,6 +17,7 @@ extern int fixed_phy_add(unsigned int irq, int phy_id, extern int fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, struct device_node *np); +extern void fixed_phy_del(int phy_addr); #else static inline int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status) @@ -29,6 +30,10 @@ static inline int fixed_phy_register(unsigned int irq, { return -ENODEV; } +static inline int fixed_phy_del(int phy_addr) +{ + return -ENODEV; +} #endif /* CONFIG_FIXED_PHY */ /* -- cgit v1.2.3-59-g8ed1b From 07f8ac4a1e26e8283542cdaf658a6e2a12fd6980 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Sat, 7 Jun 2014 18:26:28 +0200 Subject: bridge: add export of multicast database adjacent to net_dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this new, exported function br_multicast_list_adjacent(net_dev) a list of IPv4/6 addresses is returned. This list contains all multicast addresses sensed by the bridge multicast snooping feature on all bridge ports of the bridge interface of net_dev, excluding addresses from the specified net_device itself. Adding bridge support to the batman-adv multicast optimization requires batman-adv knowing about the existence of bridged-in multicast listeners to be able to reliably serve them with multicast packets. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 18 +++++++++++++++ net/bridge/br_multicast.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++ net/bridge/br_private.h | 12 ---------- 3 files changed, 76 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 1085ffeef956..44d6eb0eb852 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -16,9 +16,27 @@ #include #include +struct br_ip { + union { + __be32 ip4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr ip6; +#endif + } u; + __be16 proto; + __u16 vid; +}; + +struct br_ip_list { + struct list_head list; + struct br_ip addr; +}; + extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); typedef int br_should_route_hook_t(struct sk_buff *skb); extern br_should_route_hook_t __rcu *br_should_route_hook; +int br_multicast_list_adjacent(struct net_device *dev, + struct list_head *br_ip_list); #endif diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b3f17c9b4d06..772476b7c4b7 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -2141,3 +2142,60 @@ unlock: return err; } + +/** + * br_multicast_list_adjacent - Returns snooped multicast addresses + * @dev: The bridge port adjacent to which to retrieve addresses + * @br_ip_list: The list to store found, snooped multicast IP addresses in + * + * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast + * snooping feature on all bridge ports of dev's bridge device, excluding + * the addresses from dev itself. + * + * Returns the number of items added to br_ip_list. + * + * Notes: + * - br_ip_list needs to be initialized by caller + * - br_ip_list might contain duplicates in the end + * (needs to be taken care of by caller) + * - br_ip_list needs to be freed by caller + */ +int br_multicast_list_adjacent(struct net_device *dev, + struct list_head *br_ip_list) +{ + struct net_bridge *br; + struct net_bridge_port *port; + struct net_bridge_port_group *group; + struct br_ip_list *entry; + int count = 0; + + rcu_read_lock(); + if (!br_ip_list || !br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + list_for_each_entry_rcu(port, &br->port_list, list) { + if (!port->dev || port->dev == dev) + continue; + + hlist_for_each_entry_rcu(group, &port->mglist, mglist) { + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + goto unlock; + + entry->addr = group->addr; + list_add(&entry->list, br_ip_list); + count++; + } + } + +unlock: + rcu_read_unlock(); + return count; +} +EXPORT_SYMBOL_GPL(br_multicast_list_adjacent); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 97c5e46dde72..50e2ab021484 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -54,18 +54,6 @@ struct mac_addr unsigned char addr[ETH_ALEN]; }; -struct br_ip -{ - union { - __be32 ip4; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr ip6; -#endif - } u; - __be16 proto; - __u16 vid; -}; - #ifdef CONFIG_BRIDGE_IGMP_SNOOPING /* our own querier */ struct bridge_mcast_own_query { -- cgit v1.2.3-59-g8ed1b From 2cd4143192e8c60f66cb32c3a30c76d0470a372d Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Sat, 7 Jun 2014 18:26:29 +0200 Subject: bridge: memorize and export selected IGMP/MLD querier port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding bridge support to the batman-adv multicast optimization requires batman-adv knowing about the existence of bridged-in IGMP/MLD queriers to be able to reliably serve any multicast listener behind this same bridge. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + net/bridge/br_multicast.c | 72 +++++++++++++++++++++++++++++++++++++++++++---- net/bridge/br_private.h | 1 + 3 files changed, 68 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 44d6eb0eb852..fd22789d7b2e 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -38,5 +38,6 @@ typedef int br_should_route_hook_t(struct sk_buff *skb); extern br_should_route_hook_t __rcu *br_should_route_hook; int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list); +bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto); #endif diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 772476b7c4b7..cd3cf394c477 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1081,6 +1081,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, #endif static bool br_ip4_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, __be32 saddr) { if (!timer_pending(&br->ip4_own_query.timer) && @@ -1098,11 +1099,15 @@ static bool br_ip4_multicast_select_querier(struct net_bridge *br, update: br->ip4_querier.addr.u.ip4 = saddr; + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip4_querier.port, port); + return true; } #if IS_ENABLED(CONFIG_IPV6) static bool br_ip6_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, struct in6_addr *saddr) { if (!timer_pending(&br->ip6_own_query.timer) && @@ -1117,19 +1122,23 @@ static bool br_ip6_multicast_select_querier(struct net_bridge *br, update: br->ip6_querier.addr.u.ip6 = *saddr; + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip6_querier.port, port); + return true; } #endif static bool br_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, struct br_ip *saddr) { switch (saddr->proto) { case htons(ETH_P_IP): - return br_ip4_multicast_select_querier(br, saddr->u.ip4); + return br_ip4_multicast_select_querier(br, port, saddr->u.ip4); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return br_ip6_multicast_select_querier(br, &saddr->u.ip6); + return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6); #endif } @@ -1201,7 +1210,7 @@ static void br_multicast_query_received(struct net_bridge *br, struct br_ip *saddr, unsigned long max_delay) { - if (!br_multicast_select_querier(br, saddr)) + if (!br_multicast_select_querier(br, port, saddr)) return; br_multicast_update_query_timer(br, query, max_delay); @@ -1804,12 +1813,14 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, } static void br_multicast_query_expired(struct net_bridge *br, - struct bridge_mcast_own_query *query) + struct bridge_mcast_own_query *query, + struct bridge_mcast_querier *querier) { spin_lock(&br->multicast_lock); if (query->startup_sent < br->multicast_startup_query_count) query->startup_sent++; + rcu_assign_pointer(querier, NULL); br_multicast_send_query(br, NULL, query); spin_unlock(&br->multicast_lock); } @@ -1818,7 +1829,7 @@ static void br_ip4_multicast_query_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_query_expired(br, &br->ip4_own_query); + br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); } #if IS_ENABLED(CONFIG_IPV6) @@ -1826,7 +1837,7 @@ static void br_ip6_multicast_query_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_query_expired(br, &br->ip6_own_query); + br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); } #endif @@ -1849,8 +1860,10 @@ void br_multicast_init(struct net_bridge *br) br->multicast_membership_interval = 260 * HZ; br->ip4_other_query.delay_time = 0; + br->ip4_querier.port = NULL; #if IS_ENABLED(CONFIG_IPV6) br->ip6_other_query.delay_time = 0; + br->ip6_querier.port = NULL; #endif spin_lock_init(&br->multicast_lock); @@ -2199,3 +2212,50 @@ unlock: return count; } EXPORT_SYMBOL_GPL(br_multicast_list_adjacent); + +/** + * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port + * @dev: The bridge port adjacent to which to check for a querier + * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 + * + * Checks whether the given interface has a bridge on top and if so returns + * true if a selected querier is behind one of the other ports of this + * bridge. Otherwise returns false. + */ +bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) +{ + struct net_bridge *br; + struct net_bridge_port *port; + bool ret = false; + + rcu_read_lock(); + if (!br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + switch (proto) { + case ETH_P_IP: + if (!timer_pending(&br->ip4_other_query.timer) || + rcu_dereference(br->ip4_querier.port) == port) + goto unlock; + break; + case ETH_P_IPV6: + if (!timer_pending(&br->ip6_other_query.timer) || + rcu_dereference(br->ip6_querier.port) == port) + goto unlock; + break; + default: + goto unlock; + } + + ret = true; +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 50e2ab021484..8346e9504cdb 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -70,6 +70,7 @@ struct bridge_mcast_other_query { /* selected querier */ struct bridge_mcast_querier { struct br_ip addr; + struct net_bridge_port __rcu *port; }; #endif -- cgit v1.2.3-59-g8ed1b From e430f34ee5192c84bcabd3c79ab7e2388b5eec74 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 6 Jun 2014 14:46:06 -0700 Subject: net: filter: cleanup A/X name usage The macro 'A' used in internal BPF interpreter: #define A regs[insn->a_reg] was easily confused with the name of classic BPF register 'A', since 'A' would mean two different things depending on context. This patch is trying to clean up the naming and clarify its usage in the following way: - A and X are names of two classic BPF registers - BPF_REG_A denotes internal BPF register R0 used to map classic register A in internal BPF programs generated from classic - BPF_REG_X denotes internal BPF register R7 used to map classic register X in internal BPF programs generated from classic - internal BPF instruction format: struct sock_filter_int { __u8 code; /* opcode */ __u8 dst_reg:4; /* dest register */ __u8 src_reg:4; /* source register */ __s16 off; /* signed offset */ __s32 imm; /* signed immediate constant */ }; - BPF_X/BPF_K is 1 bit used to encode source operand of instruction In classic: BPF_X - means use register X as source operand BPF_K - means use 32-bit immediate as source operand In internal: BPF_X - means use 'src_reg' register as source operand BPF_K - means use 32-bit immediate as source operand Suggested-by: Chema Gonzalez Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Chema Gonzalez Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 2 +- arch/x86/net/bpf_jit_comp.c | 260 ++++++++++++++++++------------------ include/linux/filter.h | 156 ++++++++++++---------- net/core/filter.c | 198 +++++++++++++-------------- 4 files changed, 314 insertions(+), 302 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 58c443926647..9f49b8690500 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -805,7 +805,7 @@ to seccomp_data, for converted BPF filters R1 points to a skb. A program, that is translated internally consists of the following elements: - op:16, jt:8, jf:8, k:32 ==> op:8, a_reg:4, x_reg:4, off:16, imm:32 + op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32 So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field has room for new instructions. Some of them may use 16/24/32 byte encoding. New diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 080f3f071bb0..99bef86ed6df 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -64,10 +64,10 @@ static inline bool is_simm32(s64 value) return value == (s64) (s32) value; } -/* mov A, X */ -#define EMIT_mov(A, X) \ - do {if (A != X) \ - EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \ +/* mov dst, src */ +#define EMIT_mov(DST, SRC) \ + do {if (DST != SRC) \ + EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ } while (0) static int bpf_size_to_x86_bytes(int bpf_size) @@ -194,16 +194,16 @@ static inline u8 add_2mod(u8 byte, u32 r1, u32 r2) return byte; } -/* encode dest register 'a_reg' into x64 opcode 'byte' */ -static inline u8 add_1reg(u8 byte, u32 a_reg) +/* encode 'dst_reg' register into x64 opcode 'byte' */ +static inline u8 add_1reg(u8 byte, u32 dst_reg) { - return byte + reg2hex[a_reg]; + return byte + reg2hex[dst_reg]; } -/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */ -static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg) +/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ +static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) { - return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3); + return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); } struct jit_context { @@ -286,9 +286,9 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, } for (i = 0; i < insn_cnt; i++, insn++) { - const s32 K = insn->imm; - u32 a_reg = insn->a_reg; - u32 x_reg = insn->x_reg; + const s32 imm32 = insn->imm; + u32 dst_reg = insn->dst_reg; + u32 src_reg = insn->src_reg; u8 b1 = 0, b2 = 0, b3 = 0; s64 jmp_offset; u8 jmp_cond; @@ -315,32 +315,32 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_XOR: b2 = 0x31; break; } if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_2mod(0x48, a_reg, x_reg)); - else if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT1(add_2mod(0x40, a_reg, x_reg)); - EMIT2(b2, add_2reg(0xC0, a_reg, x_reg)); + EMIT1(add_2mod(0x48, dst_reg, src_reg)); + else if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); break; - /* mov A, X */ + /* mov dst, src */ case BPF_ALU64 | BPF_MOV | BPF_X: - EMIT_mov(a_reg, x_reg); + EMIT_mov(dst_reg, src_reg); break; - /* mov32 A, X */ + /* mov32 dst, src */ case BPF_ALU | BPF_MOV | BPF_X: - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT1(add_2mod(0x40, a_reg, x_reg)); - EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg)); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); break; - /* neg A */ + /* neg dst */ case BPF_ALU | BPF_NEG: case BPF_ALU64 | BPF_NEG: if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, a_reg)); - else if (is_ereg(a_reg)) - EMIT1(add_1mod(0x40, a_reg)); - EMIT2(0xF7, add_1reg(0xD8, a_reg)); + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + EMIT2(0xF7, add_1reg(0xD8, dst_reg)); break; case BPF_ALU | BPF_ADD | BPF_K: @@ -354,9 +354,9 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, a_reg)); - else if (is_ereg(a_reg)) - EMIT1(add_1mod(0x40, a_reg)); + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); switch (BPF_OP(insn->code)) { case BPF_ADD: b3 = 0xC0; break; @@ -366,10 +366,10 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_XOR: b3 = 0xF0; break; } - if (is_imm8(K)) - EMIT3(0x83, add_1reg(b3, a_reg), K); + if (is_imm8(imm32)) + EMIT3(0x83, add_1reg(b3, dst_reg), imm32); else - EMIT2_off32(0x81, add_1reg(b3, a_reg), K); + EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU64 | BPF_MOV | BPF_K: @@ -377,23 +377,23 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, * use 'mov eax, imm32' (which zero-extends imm32) * to save 2 bytes */ - if (K < 0) { + if (imm32 < 0) { /* 'mov rax, imm32' sign extends imm32 */ - b1 = add_1mod(0x48, a_reg); + b1 = add_1mod(0x48, dst_reg); b2 = 0xC7; b3 = 0xC0; - EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K); + EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); break; } case BPF_ALU | BPF_MOV | BPF_K: /* mov %eax, imm32 */ - if (is_ereg(a_reg)) - EMIT1(add_1mod(0x40, a_reg)); - EMIT1_off32(add_1reg(0xB8, a_reg), K); + if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); break; - /* A %= X, A /= X, A %= K, A /= K */ + /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: @@ -406,14 +406,14 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT1(0x52); /* push rdx */ if (BPF_SRC(insn->code) == BPF_X) - /* mov r11, X */ - EMIT_mov(AUX_REG, x_reg); + /* mov r11, src_reg */ + EMIT_mov(AUX_REG, src_reg); else - /* mov r11, K */ - EMIT3_off32(0x49, 0xC7, 0xC3, K); + /* mov r11, imm32 */ + EMIT3_off32(0x49, 0xC7, 0xC3, imm32); - /* mov rax, A */ - EMIT_mov(BPF_REG_0, a_reg); + /* mov rax, dst_reg */ + EMIT_mov(BPF_REG_0, dst_reg); /* xor edx, edx * equivalent to 'xor rdx, rdx', but one byte less @@ -421,7 +421,7 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT2(0x31, 0xd2); if (BPF_SRC(insn->code) == BPF_X) { - /* if (X == 0) return 0 */ + /* if (src_reg == 0) return 0 */ /* cmp r11, 0 */ EMIT4(0x49, 0x83, 0xFB, 0x00); @@ -457,8 +457,8 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT1(0x5A); /* pop rdx */ EMIT1(0x58); /* pop rax */ - /* mov A, r11 */ - EMIT_mov(a_reg, AUX_REG); + /* mov dst_reg, r11 */ + EMIT_mov(dst_reg, AUX_REG); break; case BPF_ALU | BPF_MUL | BPF_K: @@ -468,15 +468,15 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT1(0x50); /* push rax */ EMIT1(0x52); /* push rdx */ - /* mov r11, A */ - EMIT_mov(AUX_REG, a_reg); + /* mov r11, dst_reg */ + EMIT_mov(AUX_REG, dst_reg); if (BPF_SRC(insn->code) == BPF_X) - /* mov rax, X */ - EMIT_mov(BPF_REG_0, x_reg); + /* mov rax, src_reg */ + EMIT_mov(BPF_REG_0, src_reg); else - /* mov rax, K */ - EMIT3_off32(0x48, 0xC7, 0xC0, K); + /* mov rax, imm32 */ + EMIT3_off32(0x48, 0xC7, 0xC0, imm32); if (BPF_CLASS(insn->code) == BPF_ALU64) EMIT1(add_1mod(0x48, AUX_REG)); @@ -491,8 +491,8 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT1(0x5A); /* pop rdx */ EMIT1(0x58); /* pop rax */ - /* mov A, r11 */ - EMIT_mov(a_reg, AUX_REG); + /* mov dst_reg, r11 */ + EMIT_mov(dst_reg, AUX_REG); break; /* shifts */ @@ -503,39 +503,39 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, a_reg)); - else if (is_ereg(a_reg)) - EMIT1(add_1mod(0x40, a_reg)); + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); switch (BPF_OP(insn->code)) { case BPF_LSH: b3 = 0xE0; break; case BPF_RSH: b3 = 0xE8; break; case BPF_ARSH: b3 = 0xF8; break; } - EMIT3(0xC1, add_1reg(b3, a_reg), K); + EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU | BPF_END | BPF_FROM_BE: - switch (K) { + switch (imm32) { case 16: /* emit 'ror %ax, 8' to swap lower 2 bytes */ EMIT1(0x66); - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT1(0x41); - EMIT3(0xC1, add_1reg(0xC8, a_reg), 8); + EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); break; case 32: /* emit 'bswap eax' to swap lower 4 bytes */ - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT2(0x41, 0x0F); else EMIT1(0x0F); - EMIT1(add_1reg(0xC8, a_reg)); + EMIT1(add_1reg(0xC8, dst_reg)); break; case 64: /* emit 'bswap rax' to swap 8 bytes */ - EMIT3(add_1mod(0x48, a_reg), 0x0F, - add_1reg(0xC8, a_reg)); + EMIT3(add_1mod(0x48, dst_reg), 0x0F, + add_1reg(0xC8, dst_reg)); break; } break; @@ -543,117 +543,117 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_END | BPF_FROM_LE: break; - /* ST: *(u8*)(a_reg + off) = imm */ + /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT2(0x41, 0xC6); else EMIT1(0xC6); goto st; case BPF_ST | BPF_MEM | BPF_H: - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT3(0x66, 0x41, 0xC7); else EMIT2(0x66, 0xC7); goto st; case BPF_ST | BPF_MEM | BPF_W: - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT2(0x41, 0xC7); else EMIT1(0xC7); goto st; case BPF_ST | BPF_MEM | BPF_DW: - EMIT2(add_1mod(0x48, a_reg), 0xC7); + EMIT2(add_1mod(0x48, dst_reg), 0xC7); st: if (is_imm8(insn->off)) - EMIT2(add_1reg(0x40, a_reg), insn->off); + EMIT2(add_1reg(0x40, dst_reg), insn->off); else - EMIT1_off32(add_1reg(0x80, a_reg), insn->off); + EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); - EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); + EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); break; - /* STX: *(u8*)(a_reg + off) = x_reg */ + /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: /* emit 'mov byte ptr [rax + off], al' */ - if (is_ereg(a_reg) || is_ereg(x_reg) || + if (is_ereg(dst_reg) || is_ereg(src_reg) || /* have to add extra byte for x86 SIL, DIL regs */ - x_reg == BPF_REG_1 || x_reg == BPF_REG_2) - EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88); + src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else EMIT1(0x88); goto stx; case BPF_STX | BPF_MEM | BPF_H: - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); else EMIT2(0x66, 0x89); goto stx; case BPF_STX | BPF_MEM | BPF_W: - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); else EMIT1(0x89); goto stx; case BPF_STX | BPF_MEM | BPF_DW: - EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89); + EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); stx: if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off); + EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); else - EMIT1_off32(add_2reg(0x80, a_reg, x_reg), + EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), insn->off); break; - /* LDX: a_reg = *(u8*)(x_reg + off) */ + /* LDX: dst_reg = *(u8*)(src_reg + off) */ case BPF_LDX | BPF_MEM | BPF_B: /* emit 'movzx rax, byte ptr [rax + off]' */ - EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6); + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); goto ldx; case BPF_LDX | BPF_MEM | BPF_H: /* emit 'movzx rax, word ptr [rax + off]' */ - EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7); + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); goto ldx; case BPF_LDX | BPF_MEM | BPF_W: /* emit 'mov eax, dword ptr [rax+0x14]' */ - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); else EMIT1(0x8B); goto ldx; case BPF_LDX | BPF_MEM | BPF_DW: /* emit 'mov rax, qword ptr [rax+0x14]' */ - EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B); + EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); ldx: /* if insn->off == 0 we can save one extra byte, but * special case of x86 r13 which always needs an offset * is not worth the hassle */ if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off); + EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); else - EMIT1_off32(add_2reg(0x80, x_reg, a_reg), + EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), insn->off); break; - /* STX XADD: lock *(u32*)(a_reg + off) += x_reg */ + /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ case BPF_STX | BPF_XADD | BPF_W: /* emit 'lock add dword ptr [rax + off], eax' */ - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); else EMIT2(0xF0, 0x01); goto xadd; case BPF_STX | BPF_XADD | BPF_DW: - EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01); + EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); xadd: if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off); + EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); else - EMIT1_off32(add_2reg(0x80, a_reg, x_reg), + EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), insn->off); break; /* call */ case BPF_JMP | BPF_CALL: - func = (u8 *) __bpf_call_base + K; + func = (u8 *) __bpf_call_base + imm32; jmp_offset = func - (image + addrs[i]); if (ctx->seen_ld_abs) { EMIT2(0x41, 0x52); /* push %r10 */ @@ -663,9 +663,9 @@ xadd: if (is_imm8(insn->off)) */ jmp_offset += 4; } - if (!K || !is_simm32(jmp_offset)) { + if (!imm32 || !is_simm32(jmp_offset)) { pr_err("unsupported bpf func %d addr %p image %p\n", - K, func, image); + imm32, func, image); return -EINVAL; } EMIT1_off32(0xE8, jmp_offset); @@ -682,21 +682,21 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP | BPF_JSGE | BPF_X: - /* cmp a_reg, x_reg */ - EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39, - add_2reg(0xC0, a_reg, x_reg)); + /* cmp dst_reg, src_reg */ + EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39, + add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_X: - /* test a_reg, x_reg */ - EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85, - add_2reg(0xC0, a_reg, x_reg)); + /* test dst_reg, src_reg */ + EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85, + add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: - /* test a_reg, imm32 */ - EMIT1(add_1mod(0x48, a_reg)); - EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K); + /* test dst_reg, imm32 */ + EMIT1(add_1mod(0x48, dst_reg)); + EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); goto emit_cond_jmp; case BPF_JMP | BPF_JEQ | BPF_K: @@ -705,13 +705,13 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K: - /* cmp a_reg, imm8/32 */ - EMIT1(add_1mod(0x48, a_reg)); + /* cmp dst_reg, imm8/32 */ + EMIT1(add_1mod(0x48, dst_reg)); - if (is_imm8(K)) - EMIT3(0x83, add_1reg(0xF8, a_reg), K); + if (is_imm8(imm32)) + EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); else - EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K); + EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); emit_cond_jmp: /* convert BPF opcode to x86 */ switch (BPF_OP(insn->code)) { @@ -773,27 +773,27 @@ emit_jmp: func = sk_load_word; goto common_load; case BPF_LD | BPF_ABS | BPF_W: - func = CHOOSE_LOAD_FUNC(K, sk_load_word); + func = CHOOSE_LOAD_FUNC(imm32, sk_load_word); common_load: ctx->seen_ld_abs = true; jmp_offset = func - (image + addrs[i]); if (!func || !is_simm32(jmp_offset)) { pr_err("unsupported bpf func %d addr %p image %p\n", - K, func, image); + imm32, func, image); return -EINVAL; } if (BPF_MODE(insn->code) == BPF_ABS) { /* mov %esi, imm32 */ - EMIT1_off32(0xBE, K); + EMIT1_off32(0xBE, imm32); } else { - /* mov %rsi, x_reg */ - EMIT_mov(BPF_REG_2, x_reg); - if (K) { - if (is_imm8(K)) + /* mov %rsi, src_reg */ + EMIT_mov(BPF_REG_2, src_reg); + if (imm32) { + if (is_imm8(imm32)) /* add %esi, imm8 */ - EMIT3(0x83, 0xC6, K); + EMIT3(0x83, 0xC6, imm32); else /* add %esi, imm32 */ - EMIT2_off32(0x81, 0xC6, K); + EMIT2_off32(0x81, 0xC6, imm32); } } /* skb pointer is in R6 (%rbx), it will be copied into @@ -808,13 +808,13 @@ common_load: ctx->seen_ld_abs = true; func = sk_load_half; goto common_load; case BPF_LD | BPF_ABS | BPF_H: - func = CHOOSE_LOAD_FUNC(K, sk_load_half); + func = CHOOSE_LOAD_FUNC(imm32, sk_load_half); goto common_load; case BPF_LD | BPF_IND | BPF_B: func = sk_load_byte; goto common_load; case BPF_LD | BPF_ABS | BPF_B: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte); + func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte); goto common_load; case BPF_JMP | BPF_EXIT: diff --git a/include/linux/filter.h b/include/linux/filter.h index f0c2ad43b4af..a7e3c48d73a7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -78,161 +78,173 @@ enum { /* Helper macros for filter block array initializers. */ -/* ALU ops on registers, bpf_add|sub|...: A += X */ +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ -#define BPF_ALU64_REG(OP, A, X) \ +#define BPF_ALU64_REG(OP, DST, SRC) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = 0 }) -#define BPF_ALU32_REG(OP, A, X) \ +#define BPF_ALU32_REG(OP, DST, SRC) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = 0 }) -/* ALU ops on immediates, bpf_add|sub|...: A += IMM */ +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ -#define BPF_ALU64_IMM(OP, A, IMM) \ +#define BPF_ALU64_IMM(OP, DST, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = IMM }) -#define BPF_ALU32_IMM(OP, A, IMM) \ +#define BPF_ALU32_IMM(OP, DST, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ -#define BPF_ENDIAN(TYPE, A, LEN) \ +#define BPF_ENDIAN(TYPE, DST, LEN) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = LEN }) -/* Short form of mov, A = X */ +/* Short form of mov, dst_reg = src_reg */ -#define BPF_MOV64_REG(A, X) \ +#define BPF_MOV64_REG(DST, SRC) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = 0 }) -#define BPF_MOV32_REG(A, X) \ +#define BPF_MOV32_REG(DST, SRC) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_MOV | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = 0 }) -/* Short form of mov, A = IMM */ +/* Short form of mov, dst_reg = imm32 */ -#define BPF_MOV64_IMM(A, IMM) \ +#define BPF_MOV64_IMM(DST, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_MOV | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = IMM }) -#define BPF_MOV32_IMM(A, IMM) \ +#define BPF_MOV32_IMM(DST, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_MOV | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = IMM }) -/* Short form of mov based on type, BPF_X: A = X, BPF_K: A = IMM */ +/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ -#define BPF_MOV64_RAW(TYPE, A, X, IMM) \ +#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = IMM }) -#define BPF_MOV32_RAW(TYPE, A, X, IMM) \ +#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = IMM }) -/* Direct packet access, R0 = *(uint *) (skb->data + OFF) */ +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ -#define BPF_LD_ABS(SIZE, OFF) \ +#define BPF_LD_ABS(SIZE, IMM) \ ((struct sock_filter_int) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ - .a_reg = 0, \ - .x_reg = 0, \ + .dst_reg = 0, \ + .src_reg = 0, \ .off = 0, \ - .imm = OFF }) + .imm = IMM }) -/* Indirect packet access, R0 = *(uint *) (skb->data + X + OFF) */ +/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */ -#define BPF_LD_IND(SIZE, X, OFF) \ +#define BPF_LD_IND(SIZE, SRC, IMM) \ ((struct sock_filter_int) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \ - .a_reg = 0, \ - .x_reg = X, \ + .dst_reg = 0, \ + .src_reg = SRC, \ .off = 0, \ - .imm = OFF }) + .imm = IMM }) -/* Memory store, A = *(uint *) (X + OFF), and vice versa */ +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ -#define BPF_LDX_MEM(SIZE, A, X, OFF) \ +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ ((struct sock_filter_int) { \ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) -#define BPF_STX_MEM(SIZE, A, X, OFF) \ +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ ((struct sock_filter_int) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) -/* Conditional jumps against registers, if (A 'op' X) goto pc + OFF */ +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ -#define BPF_JMP_REG(OP, A, X, OFF) \ +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ ((struct sock_filter_int) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) -/* Conditional jumps against immediates, if (A 'op' IMM) goto pc + OFF */ +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ -#define BPF_JMP_IMM(OP, A, IMM, OFF) \ +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ ((struct sock_filter_int) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = OFF, \ .imm = IMM }) @@ -241,18 +253,18 @@ enum { #define BPF_EMIT_CALL(FUNC) \ ((struct sock_filter_int) { \ .code = BPF_JMP | BPF_CALL, \ - .a_reg = 0, \ - .x_reg = 0, \ + .dst_reg = 0, \ + .src_reg = 0, \ .off = 0, \ .imm = ((FUNC) - __bpf_call_base) }) /* Raw code statement block */ -#define BPF_RAW_INSN(CODE, A, X, OFF, IMM) \ +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ ((struct sock_filter_int) { \ .code = CODE, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = OFF, \ .imm = IMM }) @@ -261,8 +273,8 @@ enum { #define BPF_EXIT_INSN() \ ((struct sock_filter_int) { \ .code = BPF_JMP | BPF_EXIT, \ - .a_reg = 0, \ - .x_reg = 0, \ + .dst_reg = 0, \ + .src_reg = 0, \ .off = 0, \ .imm = 0 }) @@ -287,8 +299,8 @@ enum { struct sock_filter_int { __u8 code; /* opcode */ - __u8 a_reg:4; /* dest register */ - __u8 x_reg:4; /* source register */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ __s16 off; /* signed offset */ __s32 imm; /* signed immediate constant */ }; diff --git a/net/core/filter.c b/net/core/filter.c index 6bd2e350e751..b3f21751b238 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -59,12 +59,12 @@ #define BPF_R10 regs[BPF_REG_10] /* Named registers */ -#define A regs[insn->a_reg] -#define X regs[insn->x_reg] +#define DST regs[insn->dst_reg] +#define SRC regs[insn->src_reg] #define FP regs[BPF_REG_FP] #define ARG1 regs[BPF_REG_ARG1] #define CTX regs[BPF_REG_CTX] -#define K insn->imm +#define IMM insn->imm /* No hurry in this branch * @@ -264,7 +264,7 @@ static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *ins FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; ARG1 = (u64) (unsigned long) ctx; - /* Register for user BPF programs need to be reset first. */ + /* Registers used in classic BPF programs need to be reset first. */ regs[BPF_REG_A] = 0; regs[BPF_REG_X] = 0; @@ -274,16 +274,16 @@ select_insn: /* ALU */ #define ALU(OPCODE, OP) \ ALU64_##OPCODE##_X: \ - A = A OP X; \ + DST = DST OP SRC; \ CONT; \ ALU_##OPCODE##_X: \ - A = (u32) A OP (u32) X; \ + DST = (u32) DST OP (u32) SRC; \ CONT; \ ALU64_##OPCODE##_K: \ - A = A OP K; \ + DST = DST OP IMM; \ CONT; \ ALU_##OPCODE##_K: \ - A = (u32) A OP (u32) K; \ + DST = (u32) DST OP (u32) IMM; \ CONT; ALU(ADD, +) @@ -296,92 +296,92 @@ select_insn: ALU(MUL, *) #undef ALU ALU_NEG: - A = (u32) -A; + DST = (u32) -DST; CONT; ALU64_NEG: - A = -A; + DST = -DST; CONT; ALU_MOV_X: - A = (u32) X; + DST = (u32) SRC; CONT; ALU_MOV_K: - A = (u32) K; + DST = (u32) IMM; CONT; ALU64_MOV_X: - A = X; + DST = SRC; CONT; ALU64_MOV_K: - A = K; + DST = IMM; CONT; ALU64_ARSH_X: - (*(s64 *) &A) >>= X; + (*(s64 *) &DST) >>= SRC; CONT; ALU64_ARSH_K: - (*(s64 *) &A) >>= K; + (*(s64 *) &DST) >>= IMM; CONT; ALU64_MOD_X: - if (unlikely(X == 0)) + if (unlikely(SRC == 0)) return 0; - tmp = A; - A = do_div(tmp, X); + tmp = DST; + DST = do_div(tmp, SRC); CONT; ALU_MOD_X: - if (unlikely(X == 0)) + if (unlikely(SRC == 0)) return 0; - tmp = (u32) A; - A = do_div(tmp, (u32) X); + tmp = (u32) DST; + DST = do_div(tmp, (u32) SRC); CONT; ALU64_MOD_K: - tmp = A; - A = do_div(tmp, K); + tmp = DST; + DST = do_div(tmp, IMM); CONT; ALU_MOD_K: - tmp = (u32) A; - A = do_div(tmp, (u32) K); + tmp = (u32) DST; + DST = do_div(tmp, (u32) IMM); CONT; ALU64_DIV_X: - if (unlikely(X == 0)) + if (unlikely(SRC == 0)) return 0; - do_div(A, X); + do_div(DST, SRC); CONT; ALU_DIV_X: - if (unlikely(X == 0)) + if (unlikely(SRC == 0)) return 0; - tmp = (u32) A; - do_div(tmp, (u32) X); - A = (u32) tmp; + tmp = (u32) DST; + do_div(tmp, (u32) SRC); + DST = (u32) tmp; CONT; ALU64_DIV_K: - do_div(A, K); + do_div(DST, IMM); CONT; ALU_DIV_K: - tmp = (u32) A; - do_div(tmp, (u32) K); - A = (u32) tmp; + tmp = (u32) DST; + do_div(tmp, (u32) IMM); + DST = (u32) tmp; CONT; ALU_END_TO_BE: - switch (K) { + switch (IMM) { case 16: - A = (__force u16) cpu_to_be16(A); + DST = (__force u16) cpu_to_be16(DST); break; case 32: - A = (__force u32) cpu_to_be32(A); + DST = (__force u32) cpu_to_be32(DST); break; case 64: - A = (__force u64) cpu_to_be64(A); + DST = (__force u64) cpu_to_be64(DST); break; } CONT; ALU_END_TO_LE: - switch (K) { + switch (IMM) { case 16: - A = (__force u16) cpu_to_le16(A); + DST = (__force u16) cpu_to_le16(DST); break; case 32: - A = (__force u32) cpu_to_le32(A); + DST = (__force u32) cpu_to_le32(DST); break; case 64: - A = (__force u64) cpu_to_le64(A); + DST = (__force u64) cpu_to_le64(DST); break; } CONT; @@ -401,85 +401,85 @@ select_insn: insn += insn->off; CONT; JMP_JEQ_X: - if (A == X) { + if (DST == SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JEQ_K: - if (A == K) { + if (DST == IMM) { insn += insn->off; CONT_JMP; } CONT; JMP_JNE_X: - if (A != X) { + if (DST != SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JNE_K: - if (A != K) { + if (DST != IMM) { insn += insn->off; CONT_JMP; } CONT; JMP_JGT_X: - if (A > X) { + if (DST > SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JGT_K: - if (A > K) { + if (DST > IMM) { insn += insn->off; CONT_JMP; } CONT; JMP_JGE_X: - if (A >= X) { + if (DST >= SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JGE_K: - if (A >= K) { + if (DST >= IMM) { insn += insn->off; CONT_JMP; } CONT; JMP_JSGT_X: - if (((s64) A) > ((s64) X)) { + if (((s64) DST) > ((s64) SRC)) { insn += insn->off; CONT_JMP; } CONT; JMP_JSGT_K: - if (((s64) A) > ((s64) K)) { + if (((s64) DST) > ((s64) IMM)) { insn += insn->off; CONT_JMP; } CONT; JMP_JSGE_X: - if (((s64) A) >= ((s64) X)) { + if (((s64) DST) >= ((s64) SRC)) { insn += insn->off; CONT_JMP; } CONT; JMP_JSGE_K: - if (((s64) A) >= ((s64) K)) { + if (((s64) DST) >= ((s64) IMM)) { insn += insn->off; CONT_JMP; } CONT; JMP_JSET_X: - if (A & X) { + if (DST & SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JSET_K: - if (A & K) { + if (DST & IMM) { insn += insn->off; CONT_JMP; } @@ -488,15 +488,15 @@ select_insn: return BPF_R0; /* STX and ST and LDX*/ -#define LDST(SIZEOP, SIZE) \ - STX_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = X; \ - CONT; \ - ST_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = K; \ - CONT; \ - LDX_MEM_##SIZEOP: \ - A = *(SIZE *)(unsigned long) (X + insn->off); \ +#define LDST(SIZEOP, SIZE) \ + STX_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ + CONT; \ + ST_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \ + CONT; \ + LDX_MEM_##SIZEOP: \ + DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ CONT; LDST(B, u8) @@ -504,16 +504,16 @@ select_insn: LDST(W, u32) LDST(DW, u64) #undef LDST - STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */ - atomic_add((u32) X, (atomic_t *)(unsigned long) - (A + insn->off)); + STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ + atomic_add((u32) SRC, (atomic_t *)(unsigned long) + (DST + insn->off)); CONT; - STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ - atomic64_add((u64) X, (atomic64_t *)(unsigned long) - (A + insn->off)); + STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ + atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) + (DST + insn->off)); CONT; - LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + K)) */ - off = K; + LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */ + off = IMM; load_word: /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are * only appearing in the programs where ctx == @@ -527,51 +527,51 @@ load_word: * BPF_R6-BPF_R9, and store return value into BPF_R0. * * Implicit input: - * ctx + * ctx == skb == BPF_R6 == CTX * * Explicit input: - * X == any register - * K == 32-bit immediate + * SRC == any register + * IMM == 32-bit immediate * * Output: * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness */ - ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); + ptr = load_pointer((struct sk_buff *) CTX, off, 4, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be32(ptr); CONT; } return 0; - LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */ - off = K; + LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */ + off = IMM; load_half: - ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); + ptr = load_pointer((struct sk_buff *) CTX, off, 2, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be16(ptr); CONT; } return 0; - LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */ - off = K; + LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */ + off = IMM; load_byte: - ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); + ptr = load_pointer((struct sk_buff *) CTX, off, 1, &tmp); if (likely(ptr != NULL)) { BPF_R0 = *(u8 *)ptr; CONT; } return 0; - LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */ - off = K + X; + LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */ + off = IMM + SRC; goto load_word; - LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + X + K)) */ - off = K + X; + LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */ + off = IMM + SRC; goto load_half; - LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + X + K) */ - off = K + X; + LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */ + off = IMM + SRC; goto load_byte; default_label: @@ -675,7 +675,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_PROTOCOL: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - /* A = *(u16 *) (ctx + offsetof(protocol)) */ + /* A = *(u16 *) (CTX + offsetof(protocol)) */ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, offsetof(struct sk_buff, protocol)); /* A = ntohs(A) [emitting a nop or swap16] */ @@ -741,7 +741,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); - /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */ + /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, offsetof(struct sk_buff, vlan_tci)); if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { @@ -760,13 +760,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_NLATTR_NEST: case SKF_AD_OFF + SKF_AD_CPU: case SKF_AD_OFF + SKF_AD_RANDOM: - /* arg1 = ctx */ + /* arg1 = CTX */ *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); /* arg2 = A */ *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A); /* arg3 = X */ *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X); - /* Emit call(ctx, arg2=A, arg3=X) */ + /* Emit call(arg1=CTX, arg2=A, arg3=X) */ switch (fp->k) { case SKF_AD_OFF + SKF_AD_PAY_OFFSET: *insn = BPF_EMIT_CALL(__skb_get_pay_offset); @@ -941,12 +941,12 @@ do_pass: */ *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_TMP; + insn->dst_reg = BPF_REG_A; + insn->src_reg = BPF_REG_TMP; bpf_src = BPF_X; } else { - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_X; + insn->dst_reg = BPF_REG_A; + insn->src_reg = BPF_REG_X; insn->imm = fp->k; bpf_src = BPF_SRC(fp->code); } -- cgit v1.2.3-59-g8ed1b From 67cb9366ff5f99868100198efba5ca88aaa6ad25 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Jun 2014 18:19:28 +0200 Subject: ktime: add ktime_after and ktime_before helper Add two minimal helper functions analogous to time_before() and time_after() that will later on both be needed by SCTP code. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/ktime.h | 24 ++++++++++++++++++++++++ net/sctp/sm_make_chunk.c | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 31c0cd1c941a..de9e46e6bcc9 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -304,6 +304,30 @@ static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2) return 0; } +/** + * ktime_after - Compare if a ktime_t value is bigger than another one. + * @cmp1: comparable1 + * @cmp2: comparable2 + * + * Return: true if cmp1 happened after cmp2. + */ +static inline bool ktime_after(const ktime_t cmp1, const ktime_t cmp2) +{ + return ktime_compare(cmp1, cmp2) > 0; +} + +/** + * ktime_before - Compare if a ktime_t value is smaller than another one. + * @cmp1: comparable1 + * @cmp2: comparable2 + * + * Return: true if cmp1 happened before cmp2. + */ +static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) +{ + return ktime_compare(cmp1, cmp2) < 0; +} + static inline s64 ktime_to_us(const ktime_t kt) { struct timeval tv = ktime_to_timeval(kt); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fee5552ddf92..ae0e616a7ca5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1782,7 +1782,7 @@ no_hmac: else kt = ktime_get(); - if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) { + if (!asoc && ktime_before(bear_cookie->expiration, kt)) { /* * Section 3.3.10.3 Stale Cookie Error (3) * -- cgit v1.2.3-59-g8ed1b From da91309e0a7e8966d916a74cce42ed170fde06bf Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Mon, 9 Jun 2014 10:24:38 +0300 Subject: cpumask: Utility function to set n'th cpu - local cpu first This function sets the n'th cpu - local cpu's first. For example: in a 16 cores server with even cpu's local, will get the following values: cpumask_set_cpu_local_first(0, numa, cpumask) => cpu 0 is set cpumask_set_cpu_local_first(1, numa, cpumask) => cpu 2 is set ... cpumask_set_cpu_local_first(7, numa, cpumask) => cpu 14 is set cpumask_set_cpu_local_first(8, numa, cpumask) => cpu 1 is set cpumask_set_cpu_local_first(9, numa, cpumask) => cpu 3 is set ... cpumask_set_cpu_local_first(15, numa, cpumask) => cpu 15 is set Curently this function will be used by multi queue networking devices to calculate the irq affinity mask, such that as many local cpu's as possible will be utilized to handle the mq device irq's. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/cpumask.h | 8 +++++++ lib/cpumask.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d08e4d2a9b92..d5ef249735d2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -142,6 +142,13 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, return 1; } +static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +{ + set_bit(0, cpumask_bits(dstp)); + + return 0; +} + #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_not(cpu, mask) \ @@ -192,6 +199,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); +int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); /** * for_each_cpu - iterate over every cpu in a mask diff --git a/lib/cpumask.c b/lib/cpumask.c index b810b753c607..c101230658eb 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -164,3 +164,66 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) memblock_free_early(__pa(mask), cpumask_size()); } #endif + +/** + * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first + * + * @i: index number + * @numa_node: local numa_node + * @dstp: cpumask with the relevant cpu bit set according to the policy + * + * This function sets the cpumask according to a numa aware policy. + * cpumask could be used as an affinity hint for the IRQ related to a + * queue. When the policy is to spread queues across cores - local cores + * first. + * + * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set + * the cpu bit and need to re-call the function. + */ +int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +{ + cpumask_var_t mask; + int cpu; + int ret = 0; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + i %= num_online_cpus(); + + if (!cpumask_of_node(numa_node)) { + /* Use all online cpu's for non numa aware system */ + cpumask_copy(mask, cpu_online_mask); + } else { + int n; + + cpumask_and(mask, + cpumask_of_node(numa_node), cpu_online_mask); + + n = cpumask_weight(mask); + if (i >= n) { + i -= n; + + /* If index > number of local cpu's, mask out local + * cpu's + */ + cpumask_andnot(mask, cpu_online_mask, mask); + } + } + + for_each_cpu(cpu, mask) { + if (--i < 0) + goto out; + } + + ret = -EAGAIN; + +out: + free_cpumask_var(mask); + + if (!ret) + cpumask_set_cpu(cpu, dstp); + + return ret; +} +EXPORT_SYMBOL(cpumask_set_cpu_local_first); -- cgit v1.2.3-59-g8ed1b From bad93e9d4eeb0d2d6b79204d6cedc7f2e7b256f1 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Thu, 12 Jun 2014 01:36:26 +0300 Subject: net: add __pskb_copy_fclone and pskb_copy_for_clone There are several instances where a pskb_copy or __pskb_copy is immediately followed by an skb_clone. Add a couple of new functions to allow the copy skb to be allocated from the fclone cache and thus speed up subsequent skb_clone calls. Cc: Alexander Smirnov Cc: Dmitry Eremin-Solenikov Cc: Marek Lindner Cc: Simon Wunderlich Cc: Antonio Quartulli Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Cc: Arvid Brodin Cc: Patrick McHardy Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Lauro Ramos Venancio Cc: Aloisio Almeida Jr Cc: Samuel Ortiz Cc: Jon Maloy Cc: Allan Stephens Cc: Andrew Hendry Cc: Eric Dumazet Reviewed-by: Christoph Paasch Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 +++++++++++++++- net/batman-adv/distributed-arp-table.c | 2 +- net/batman-adv/network-coding.c | 2 +- net/bluetooth/hci_sock.c | 6 +++--- net/core/skbuff.c | 14 +++++++++----- net/nfc/llcp_core.c | 4 ++-- net/nfc/rawsock.c | 4 ++-- net/tipc/bcast.c | 2 +- 8 files changed, 34 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c705808bef9c..1f50bfe2243d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -744,7 +744,13 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); -struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask); +struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, + gfp_t gfp_mask, bool fclone); +static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, + gfp_t gfp_mask) +{ + return __pskb_copy_fclone(skb, headroom, gfp_mask, false); +} int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, @@ -2238,6 +2244,14 @@ static inline struct sk_buff *pskb_copy(struct sk_buff *skb, return __pskb_copy(skb, skb_headroom(skb), gfp_mask); } + +static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb, + gfp_t gfp_mask) +{ + return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true); +} + + /** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index dcd99b2bea3c..f2c066b21716 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -594,7 +594,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, if (!neigh_node) goto free_orig; - tmp_skb = pskb_copy(skb, GFP_ATOMIC); + tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb, cand[i].orig_node, packet_subtype)) { diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 40a2fc4bcf4c..8d04d174669e 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1344,7 +1344,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, struct ethhdr *ethhdr; /* Copy skb header to change the mac header */ - skb = pskb_copy(skb, GFP_ATOMIC); + skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!skb) return; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index f608bffdb8b9..80d25c150a65 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -143,7 +143,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (!skb_copy) { /* Create a private copy with headroom */ - skb_copy = __pskb_copy(skb, 1, GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true); if (!skb_copy) continue; @@ -247,8 +247,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) struct hci_mon_hdr *hdr; /* Create a private copy with headroom */ - skb_copy = __pskb_copy(skb, HCI_MON_HDR_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, + GFP_ATOMIC, true); if (!skb_copy) continue; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 05f4bef2ce12..b9e85e6cb26a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -951,10 +951,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) EXPORT_SYMBOL(skb_copy); /** - * __pskb_copy - create copy of an sk_buff with private head. + * __pskb_copy_fclone - create copy of an sk_buff with private head. * @skb: buffer to copy * @headroom: headroom of new skb * @gfp_mask: allocation priority + * @fclone: if true allocate the copy of the skb from the fclone + * cache instead of the head cache; it is recommended to set this + * to true for the cases where the copy will likely be cloned * * Make a copy of both an &sk_buff and part of its data, located * in header. Fragmented data remain shared. This is used when @@ -964,11 +967,12 @@ EXPORT_SYMBOL(skb_copy); * The returned buffer has a reference count of 1. */ -struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) +struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, + gfp_t gfp_mask, bool fclone) { unsigned int size = skb_headlen(skb) + headroom; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0); + struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE); if (!n) goto out; @@ -1008,7 +1012,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) out: return n; } -EXPORT_SYMBOL(__pskb_copy); +EXPORT_SYMBOL(__pskb_copy_fclone); /** * pskb_expand_head - reallocate header of &sk_buff diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index f6278da68763..51e788797317 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -680,8 +680,8 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, continue; if (skb_copy == NULL) { - skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC, true); if (skb_copy == NULL) continue; diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 55eefee311eb..11c3544ea546 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -378,8 +378,8 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, sk_for_each(sk, &raw_sk_list.head) { if (!skb_copy) { - skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC, true); if (!skb_copy) continue; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 671f9817b4f4..26631679a1fa 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -653,7 +653,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, tipc_bearer_send(b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ - tbuf = pskb_copy(buf, GFP_ATOMIC); + tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); if (!tbuf) break; tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); -- cgit v1.2.3-59-g8ed1b From 5d0c2b95bc57cf8fdc0e7b3e9d7e751eb65ad052 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 10 Jun 2014 18:54:13 -0700 Subject: net: Preserve CHECKSUM_COMPLETE at validation Currently when the first checksum in a packet is validated using CHECKSUM_COMPLETE, ip_summed is overwritten to be CHECKSUM_UNNECESSARY so that any subsequent checksums in the packet are not correctly validated. This patch adds csum_valid flag in sk_buff and uses that to indicate validated checksum instead of setting CHECKSUM_UNNECESSARY. The bit is set accordingly in the skb_checksum_validate_* functions. The flag is checked in skb_checksum_complete, so that validation is communicated between checksum_init and checksum_complete sequence in TCP and UDP. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1f50bfe2243d..72a53805858a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -572,7 +572,8 @@ struct sk_buff { */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; - /* 5/7 bit hole (depending on ndisc_nodetype presence) */ + __u8 csum_valid:1; + /* 4/6 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL @@ -2735,7 +2736,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb); static inline int skb_csum_unnecessary(const struct sk_buff *skb) { - return skb->ip_summed & CHECKSUM_UNNECESSARY; + return ((skb->ip_summed & CHECKSUM_UNNECESSARY) || skb->csum_valid); } /** @@ -2769,10 +2770,8 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, bool zero_okay, __sum16 check) { - if (skb_csum_unnecessary(skb)) { - return false; - } else if (zero_okay && !check) { - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (skb_csum_unnecessary(skb) || (zero_okay && !check)) { + skb->csum_valid = 1; return false; } @@ -2799,15 +2798,20 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, { if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!csum_fold(csum_add(psum, skb->csum))) { - skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_valid = 1; return 0; } } skb->csum = psum; - if (complete || skb->len <= CHECKSUM_BREAK) - return __skb_checksum_complete(skb); + if (complete || skb->len <= CHECKSUM_BREAK) { + __sum16 csum; + + csum = __skb_checksum_complete(skb); + skb->csum_valid = !csum; + return csum; + } return 0; } @@ -2831,6 +2835,7 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) zero_okay, check, compute_pseudo) \ ({ \ __sum16 __ret = 0; \ + skb->csum_valid = 0; \ if (__skb_checksum_validate_needed(skb, zero_okay, check)) \ __ret = __skb_checksum_validate_complete(skb, \ complete, compute_pseudo(skb, proto)); \ -- cgit v1.2.3-59-g8ed1b From 7e3cead5172927732f51fde77fef6f521e22f209 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 10 Jun 2014 18:54:19 -0700 Subject: net: Save software checksum complete In skb_checksum complete, if we need to compute the checksum for the packet (via skb_checksum) save the result as CHECKSUM_COMPLETE. Subsequent checksum verification can use this. Also, added csum_complete_sw flag to distinguish between software and hardware generated checksum complete, we should always be able to trust the software computation. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- net/core/datagram.c | 14 +++++++++----- net/ipv4/gre_offload.c | 6 ++++-- net/sunrpc/socklib.c | 3 ++- 4 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 72a53805858a..5b5cd3189c98 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -573,7 +573,8 @@ struct sk_buff { __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; - /* 4/6 bit hole (depending on ndisc_nodetype presence) */ + __u8 csum_complete_sw:1; + /* 3/5 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL diff --git a/net/core/datagram.c b/net/core/datagram.c index a16ed7bbe376..6b1c04ca1d50 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -739,11 +739,15 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) __sum16 sum; sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); - if (likely(!sum)) { - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) - netdev_rx_csum_fault(skb->dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; - } + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !sum && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + + /* Save checksum complete for later use */ + skb->csum = sum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 24deb3928b9e..eb92deb12666 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -131,10 +131,12 @@ static __sum16 gro_skb_checksum(struct sk_buff *skb) csum_partial(skb->data, skb_gro_offset(skb), 0)); sum = csum_fold(NAPI_GRO_CB(skb)->csum); if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { - if (unlikely(!sum)) + if (unlikely(!sum) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); - } else + } else { skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + } return sum; } diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 0a648c502fc3..2df87f78e518 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -173,7 +173,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; if (csum_fold(desc.csum)) return -1; - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: -- cgit v1.2.3-59-g8ed1b