aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/dsa/dsa.txt3
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/net/can/sja1000/plx_pci.c18
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_hw.c12
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_hw.h2
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c3
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c12
-rw-r--r--drivers/net/ethernet/broadcom/bgmac.c9
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c48
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c11
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c4
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hnae.c8
-rw-r--r--drivers/net/ethernet/ibm/ehea/ehea_main.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c10
-rw-r--r--drivers/net/ethernet/marvell/mv643xx_eth.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c134
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c14
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hsi.h3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c17
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c25
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c2
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac-mac.c15
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac-sgmii.c2
-rw-r--r--drivers/net/ethernet/sfc/efx.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c7
-rw-r--r--drivers/net/ethernet/ti/cpsw-phy-sel.c3
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c10
-rw-r--r--drivers/net/ethernet/toshiba/ps3_gelic_wireless.c2
-rw-r--r--drivers/net/ethernet/xscale/ixp4xx_eth.c3
-rw-r--r--drivers/net/macvlan.c31
-rw-r--r--drivers/net/phy/phy_device.c2
-rw-r--r--drivers/net/usb/ax88179_178a.c17
-rw-r--r--drivers/net/usb/r8152.c21
-rw-r--r--drivers/net/virtio_net.c30
-rw-r--r--drivers/net/vxlan.c4
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c49
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/scan.c33
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c79
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c8
-rw-r--r--drivers/net/xen-netfront.c4
-rw-r--r--include/linux/ipv6.h2
-rw-r--r--include/linux/netdevice.h15
-rw-r--r--include/net/ip.h3
-rw-r--r--include/net/ip6_tunnel.h1
-rw-r--r--include/net/netfilter/nf_conntrack_labels.h3
-rw-r--r--include/net/netfilter/nf_tables.h8
-rw-r--r--include/net/sctp/sctp.h2
-rw-r--r--include/net/sock.h4
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/uapi/linux/atm_zatm.h1
-rw-r--r--include/uapi/linux/bpqether.h2
-rw-r--r--kernel/bpf/hashtab.c3
-rw-r--r--kernel/bpf/syscall.c4
-rw-r--r--kernel/taskstats.c6
-rw-r--r--net/can/bcm.c32
-rw-r--r--net/core/dev.c19
-rw-r--r--net/core/filter.c68
-rw-r--r--net/core/flow_dissector.c11
-rw-r--r--net/core/rtnetlink.c1
-rw-r--r--net/core/sock.c6
-rw-r--r--net/dccp/ipv4.c16
-rw-r--r--net/dccp/ipv6.c19
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/ipv4/af_inet.c9
-rw-r--r--net/ipv4/fib_trie.c21
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ip_tunnel_core.c11
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c6
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_dctcp.c13
-rw-r--r--net/ipv4/tcp_ipv4.c19
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6_udp_tunnel.c3
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c6
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c7
-rw-r--r--net/netfilter/nf_conntrack_core.c49
-rw-r--r--net/netfilter/nf_conntrack_helper.c11
-rw-r--r--net/netfilter/nf_conntrack_sip.c5
-rw-r--r--net/netfilter/nf_tables_api.c18
-rw-r--r--net/netfilter/nft_dynset.c19
-rw-r--r--net/netfilter/nft_set_hash.c19
-rw-r--r--net/netfilter/nft_set_rbtree.c2
-rw-r--r--net/netfilter/xt_connmark.c4
-rw-r--r--net/netlink/diag.c5
-rw-r--r--net/netlink/genetlink.c4
-rw-r--r--net/sctp/input.c35
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/socket.c27
-rw-r--r--net/socket.c2
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--samples/bpf/Makefile4
-rwxr-xr-xsamples/bpf/tc_l2_redirect.sh173
-rw-r--r--samples/bpf/tc_l2_redirect_kern.c236
-rw-r--r--samples/bpf/tc_l2_redirect_user.c73
120 files changed, 1358 insertions, 465 deletions
diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt
index 6d6c07cf1a9a..63912ef34606 100644
--- a/Documentation/networking/dsa/dsa.txt
+++ b/Documentation/networking/dsa/dsa.txt
@@ -67,13 +67,14 @@ Note that DSA does not currently create network interfaces for the "cpu" and
Switch tagging protocols
------------------------
-DSA currently supports 4 different tagging protocols, and a tag-less mode as
+DSA currently supports 5 different tagging protocols, and a tag-less mode as
well. The different protocols are implemented in:
net/dsa/tag_trailer.c: Marvell's 4 trailer tag mode (legacy)
net/dsa/tag_dsa.c: Marvell's original DSA tag
net/dsa/tag_edsa.c: Marvell's enhanced DSA tag
net/dsa/tag_brcm.c: Broadcom's 4 bytes tag
+net/dsa/tag_qca.c: Qualcomm's 2 bytes tag
The exact format of the tag protocol is vendor specific, but in general, they
all contain something which:
diff --git a/MAINTAINERS b/MAINTAINERS
index 851b89b9edcb..2a58eeac9452 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8057,6 +8057,7 @@ F: drivers/infiniband/hw/mlx4/
F: include/linux/mlx4/
MELLANOX MLX5 core VPI driver
+M: Saeed Mahameed <saeedm@mellanox.com>
M: Matan Barak <matanb@mellanox.com>
M: Leon Romanovsky <leonro@mellanox.com>
L: netdev@vger.kernel.org
diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c
index 3eb7430dffbf..f8ff25c8ee2e 100644
--- a/drivers/net/can/sja1000/plx_pci.c
+++ b/drivers/net/can/sja1000/plx_pci.c
@@ -142,6 +142,9 @@ struct plx_pci_card {
#define CTI_PCI_VENDOR_ID 0x12c4
#define CTI_PCI_DEVICE_ID_CRG001 0x0900
+#define MOXA_PCI_VENDOR_ID 0x1393
+#define MOXA_PCI_DEVICE_ID 0x0100
+
static void plx_pci_reset_common(struct pci_dev *pdev);
static void plx9056_pci_reset_common(struct pci_dev *pdev);
static void plx_pci_reset_marathon_pci(struct pci_dev *pdev);
@@ -258,6 +261,14 @@ static struct plx_pci_card_info plx_pci_card_info_elcus = {
/* based on PLX9030 */
};
+static struct plx_pci_card_info plx_pci_card_info_moxa = {
+ "MOXA", 2,
+ PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR,
+ {0, 0x00, 0x00}, { {0, 0x00, 0x80}, {1, 0x00, 0x80} },
+ &plx_pci_reset_common
+ /* based on PLX9052 */
+};
+
static const struct pci_device_id plx_pci_tbl[] = {
{
/* Adlink PCI-7841/cPCI-7841 */
@@ -357,6 +368,13 @@ static const struct pci_device_id plx_pci_tbl[] = {
0, 0,
(kernel_ulong_t)&plx_pci_card_info_elcus
},
+ {
+ /* moxa */
+ MOXA_PCI_VENDOR_ID, MOXA_PCI_DEVICE_ID,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ (kernel_ulong_t)&plx_pci_card_info_moxa
+ },
{ 0,}
};
MODULE_DEVICE_TABLE(pci, plx_pci_tbl);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index c481f104a8fe..5390ae89136c 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -204,17 +204,6 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring)
return num_msgs;
}
-static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring)
-{
- u32 data = 0x7777;
-
- xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e);
- xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data);
- xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16);
- xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40);
- xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80);
-}
-
void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring,
struct xgene_enet_pdata *pdata,
enum xgene_enet_err_code status)
@@ -929,5 +918,4 @@ struct xgene_ring_ops xgene_ring1_ops = {
.clear = xgene_enet_clear_ring,
.wr_cmd = xgene_enet_wr_cmd,
.len = xgene_enet_ring_len,
- .coalesce = xgene_enet_setup_coalescing,
};
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
index 8456337a237d..06e598c8bc16 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
@@ -55,8 +55,10 @@ enum xgene_enet_rm {
#define PREFETCH_BUF_EN BIT(21)
#define CSR_RING_ID_BUF 0x000c
#define CSR_PBM_COAL 0x0014
+#define CSR_PBM_CTICK0 0x0018
#define CSR_PBM_CTICK1 0x001c
#define CSR_PBM_CTICK2 0x0020
+#define CSR_PBM_CTICK3 0x0024
#define CSR_THRESHOLD0_SET1 0x0030
#define CSR_THRESHOLD1_SET1 0x0034
#define CSR_RING_NE_INT_MODE 0x017c
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 429f18fc5503..8158d4698734 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -1188,7 +1188,8 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring);
}
- pdata->ring_ops->coalesce(pdata->tx_ring[0]);
+ if (pdata->ring_ops->coalesce)
+ pdata->ring_ops->coalesce(pdata->tx_ring[0]);
pdata->tx_qcnt_hi = pdata->tx_ring[0]->slots - 128;
return 0;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c
index 2b76732add5d..af51dd5844ce 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c
@@ -30,7 +30,7 @@ static void xgene_enet_ring_init(struct xgene_enet_desc_ring *ring)
ring_cfg[0] |= SET_VAL(X2_INTLINE, ring->id & RING_BUFNUM_MASK);
ring_cfg[3] |= SET_BIT(X2_DEQINTEN);
}
- ring_cfg[0] |= SET_VAL(X2_CFGCRID, 1);
+ ring_cfg[0] |= SET_VAL(X2_CFGCRID, 2);
addr >>= 8;
ring_cfg[2] |= QCOHERENT | SET_VAL(RINGADDRL, addr);
@@ -192,13 +192,15 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring)
static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring)
{
- u32 data = 0x7777;
+ u32 data = 0x77777777;
xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e);
+ xgene_enet_ring_wr32(ring, CSR_PBM_CTICK0, data);
xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data);
- xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16);
- xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40);
- xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80);
+ xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data);
+ xgene_enet_ring_wr32(ring, CSR_PBM_CTICK3, data);
+ xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x08);
+ xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x10);
}
struct xgene_ring_ops xgene_ring2_ops = {
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 31ca204b38d2..49f4cafe5438 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
u32 ctl;
ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
+
+ /* preserve ONLY bits 16-17 from current hardware value */
+ ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
+
if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
ctl &= ~BGMAC_DMA_RX_BL_MASK;
ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
@@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
ctl &= ~BGMAC_DMA_RX_PT_MASK;
ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
}
- ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
ctl |= BGMAC_DMA_RX_ENABLE;
ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
@@ -1046,9 +1049,9 @@ static void bgmac_enable(struct bgmac *bgmac)
mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
BGMAC_DS_MM_SHIFT;
- if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) || mode != 0)
+ if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0)
bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT);
- if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2)
+ if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) && mode == 2)
bgmac_cco_ctl_maskset(bgmac, 1, ~0,
BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index b3791b394715..1f7034d739b0 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -49,6 +49,7 @@
#include <linux/firmware.h>
#include <linux/log2.h>
#include <linux/aer.h>
+#include <linux/crash_dump.h>
#if IS_ENABLED(CONFIG_CNIC)
#define BCM_CNIC 1
@@ -4764,15 +4765,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp)
BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR);
}
-static int
-bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
+static void
+bnx2_wait_dma_complete(struct bnx2 *bp)
{
u32 val;
- int i, rc = 0;
- u8 old_port;
+ int i;
- /* Wait for the current PCI transaction to complete before
- * issuing a reset. */
+ /*
+ * Wait for the current PCI transaction to complete before
+ * issuing a reset.
+ */
if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) ||
(BNX2_CHIP(bp) == BNX2_CHIP_5708)) {
BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS,
@@ -4796,6 +4798,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
}
}
+ return;
+}
+
+
+static int
+bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
+{
+ u32 val;
+ int i, rc = 0;
+ u8 old_port;
+
+ /* Wait for the current PCI transaction to complete before
+ * issuing a reset. */
+ bnx2_wait_dma_complete(bp);
+
/* Wait for the firmware to tell us it is ok to issue a reset. */
bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1);
@@ -6361,6 +6378,10 @@ bnx2_open(struct net_device *dev)
struct bnx2 *bp = netdev_priv(dev);
int rc;
+ rc = bnx2_request_firmware(bp);
+ if (rc < 0)
+ goto out;
+
netif_carrier_off(dev);
bnx2_disable_int(bp);
@@ -6429,6 +6450,7 @@ open_err:
bnx2_free_irq(bp);
bnx2_free_mem(bp);
bnx2_del_napi(bp);
+ bnx2_release_firmware(bp);
goto out;
}
@@ -8575,12 +8597,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, dev);
- rc = bnx2_request_firmware(bp);
- if (rc < 0)
- goto error;
-
+ /*
+ * In-flight DMA from 1st kernel could continue going in kdump kernel.
+ * New io-page table has been created before bnx2 does reset at open stage.
+ * We have to wait for the in-flight DMA to complete to avoid it look up
+ * into the newly created io-page table.
+ */
+ if (is_kdump_kernel())
+ bnx2_wait_dma_complete(bp);
- bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
@@ -8613,7 +8638,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
error:
- bnx2_release_firmware(bp);
pci_iounmap(pdev, bp->regview);
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index a9f9f3738022..c6909660e097 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6309,6 +6309,7 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
struct tc_to_netdev *ntc)
{
struct bnxt *bp = netdev_priv(dev);
+ bool sh = false;
u8 tc;
if (ntc->type != TC_SETUP_MQPRIO)
@@ -6325,12 +6326,11 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
if (netdev_get_num_tc(dev) == tc)
return 0;
+ if (bp->flags & BNXT_FLAG_SHARED_RINGS)
+ sh = true;
+
if (tc) {
int max_rx_rings, max_tx_rings, rc;
- bool sh = false;
-
- if (bp->flags & BNXT_FLAG_SHARED_RINGS)
- sh = true;
rc = bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh);
if (rc || bp->tx_nr_rings_per_tc * tc > max_tx_rings)
@@ -6348,7 +6348,8 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
bp->tx_nr_rings = bp->tx_nr_rings_per_tc;
netdev_reset_tc(dev);
}
- bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings);
+ bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
+ bp->tx_nr_rings + bp->rx_nr_rings;
bp->num_stat_ctxs = bp->cp_nr_rings;
if (netif_running(bp->dev))
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index ec6cd18842c3..60e2af8678bd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -774,8 +774,8 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
if (vf->flags & BNXT_VF_LINK_UP) {
/* if physical link is down, force link up on VF */
- if (phy_qcfg_resp.link ==
- PORT_PHY_QCFG_RESP_LINK_NO_LINK) {
+ if (phy_qcfg_resp.link !=
+ PORT_PHY_QCFG_RESP_LINK_LINK) {
phy_qcfg_resp.link =
PORT_PHY_QCFG_RESP_LINK_LINK;
phy_qcfg_resp.link_speed = cpu_to_le16(
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index f9df4b5ae90e..f42f672b0e7e 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -177,6 +177,7 @@ bnad_txcmpl_process(struct bnad *bnad, struct bna_tcb *tcb)
return 0;
hw_cons = *(tcb->hw_consumer_index);
+ rmb();
cons = tcb->consumer_index;
q_depth = tcb->q_depth;
@@ -3094,7 +3095,7 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev)
BNA_QE_INDX_INC(prod, q_depth);
tcb->producer_index = prod;
- smp_mb();
+ wmb();
if (unlikely(!test_bit(BNAD_TXQ_TX_STARTED, &tcb->flags)))
return NETDEV_TX_OK;
@@ -3102,7 +3103,6 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev)
skb_tx_timestamp(skb);
bna_txq_prod_indx_doorbell(tcb);
- smp_mb();
return NETDEV_TX_OK;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index 50812a1d67bd..df1573c4a659 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -178,9 +178,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
CH_PCI_ID_TABLE_FENTRY(0x6005),
CH_PCI_ID_TABLE_FENTRY(0x6006),
CH_PCI_ID_TABLE_FENTRY(0x6007),
+ CH_PCI_ID_TABLE_FENTRY(0x6008),
CH_PCI_ID_TABLE_FENTRY(0x6009),
CH_PCI_ID_TABLE_FENTRY(0x600d),
- CH_PCI_ID_TABLE_FENTRY(0x6010),
CH_PCI_ID_TABLE_FENTRY(0x6011),
CH_PCI_ID_TABLE_FENTRY(0x6014),
CH_PCI_ID_TABLE_FENTRY(0x6015),
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index c54c6fac0d1d..b6ed818f78ff 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -332,8 +332,10 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev,
return ERR_PTR(-ENODEV);
handle = dev->ops->get_handle(dev, port_id);
- if (IS_ERR(handle))
+ if (IS_ERR(handle)) {
+ put_device(&dev->cls_dev);
return handle;
+ }
handle->dev = dev;
handle->owner_dev = owner_dev;
@@ -356,6 +358,8 @@ out_when_init_queue:
for (j = i - 1; j >= 0; j--)
hnae_fini_queue(handle->qs[j]);
+ put_device(&dev->cls_dev);
+
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(hnae_get_handle);
@@ -377,6 +381,8 @@ void hnae_put_handle(struct hnae_handle *h)
dev->ops->put_handle(h);
module_put(dev->owner);
+
+ put_device(&dev->cls_dev);
}
EXPORT_SYMBOL(hnae_put_handle);
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 54efa9a5167b..bd719e25dd76 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2446,6 +2446,8 @@ static int ehea_open(struct net_device *dev)
netif_info(port, ifup, dev, "enabling port\n");
+ netif_carrier_off(dev);
+
ret = ehea_up(dev);
if (!ret) {
port_napi_enable(port);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5f44c5520fbc..4f3281a03e7e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1505,9 +1505,8 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
adapter->max_rx_add_entries_per_subcrq > entries_page ?
entries_page : adapter->max_rx_add_entries_per_subcrq;
- /* Choosing the maximum number of queues supported by firmware*/
- adapter->req_tx_queues = adapter->max_tx_queues;
- adapter->req_rx_queues = adapter->max_rx_queues;
+ adapter->req_tx_queues = adapter->opt_tx_comp_sub_queues;
+ adapter->req_rx_queues = adapter->opt_rx_comp_queues;
adapter->req_rx_add_queues = adapter->max_rx_add_queues;
adapter->req_mtu = adapter->max_mtu;
@@ -3706,7 +3705,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
struct net_device *netdev;
unsigned char *mac_addr_p;
struct dentry *ent;
- char buf[16]; /* debugfs name buf */
+ char buf[17]; /* debugfs name buf */
int rc;
dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n",
@@ -3845,6 +3844,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir))
debugfs_remove_recursive(adapter->debugfs_dir);
+ dma_unmap_single(&dev->dev, adapter->stats_token,
+ sizeof(struct ibmvnic_statistics), DMA_FROM_DEVICE);
+
if (adapter->ras_comps)
dma_free_coherent(&dev->dev,
adapter->ras_comp_num *
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index bf5cc55ba24c..5b12022adf1f 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1381,6 +1381,7 @@ static unsigned int get_rx_coal(struct mv643xx_eth_private *mp)
temp = (val & 0x003fff00) >> 8;
temp *= 64000000;
+ temp += mp->t_clk / 2;
do_div(temp, mp->t_clk);
return (unsigned int)temp;
@@ -1417,6 +1418,7 @@ static unsigned int get_tx_coal(struct mv643xx_eth_private *mp)
temp = (rdlp(mp, TX_FIFO_URGENT_THRESHOLD) & 0x3fff0) >> 4;
temp *= 64000000;
+ temp += mp->t_clk / 2;
do_div(temp, mp->t_clk);
return (unsigned int)temp;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 12c99a2655f2..3a47e83d3e07 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2202,7 +2202,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
if (!shutdown)
free_netdev(dev);
- dev->ethtool_ops = NULL;
}
static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f4c687ce4c59..84e8b250e2af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1445,6 +1445,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->netdev = priv->netdev;
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
c->num_tc = priv->params.num_tc;
+ c->xdp = !!priv->xdp_prog;
if (priv->params.rx_am_enabled)
rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode);
@@ -1468,6 +1469,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
if (err)
goto err_close_tx_cqs;
+ /* XDP SQ CQ params are same as normal TXQ sq CQ params */
+ err = c->xdp ? mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq,
+ priv->params.tx_cq_moderation) : 0;
+ if (err)
+ goto err_close_rx_cq;
+
napi_enable(&c->napi);
err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq);
@@ -1488,21 +1495,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
}
}
- if (priv->xdp_prog) {
- /* XDP SQ CQ params are same as normal TXQ sq CQ params */
- err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq,
- priv->params.tx_cq_moderation);
- if (err)
- goto err_close_sqs;
-
- err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq);
- if (err) {
- mlx5e_close_cq(&c->xdp_sq.cq);
- goto err_close_sqs;
- }
- }
+ err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq) : 0;
+ if (err)
+ goto err_close_sqs;
- c->xdp = !!priv->xdp_prog;
err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
if (err)
goto err_close_xdp_sq;
@@ -1512,7 +1508,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
return 0;
err_close_xdp_sq:
- mlx5e_close_sq(&c->xdp_sq);
+ if (c->xdp)
+ mlx5e_close_sq(&c->xdp_sq);
err_close_sqs:
mlx5e_close_sqs(c);
@@ -1522,6 +1519,10 @@ err_close_icosq:
err_disable_napi:
napi_disable(&c->napi);
+ if (c->xdp)
+ mlx5e_close_cq(&c->xdp_sq.cq);
+
+err_close_rx_cq:
mlx5e_close_cq(&c->rq.cq);
err_close_tx_cqs:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 7fe6559e4ab3..bf1c09ca73c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -308,7 +308,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
#endif
- netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC;
+ netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
netdev->hw_features |= NETIF_F_HW_TC;
eth_hw_addr_random(netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index ce8c54d18906..6bb21b31cfeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -237,12 +237,15 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_VLAN,
f->mask);
- if (mask->vlan_id) {
+ if (mask->vlan_id || mask->vlan_priority) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index c55ad8d00c05..d239f5d0ea36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -57,7 +57,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (esw->mode != SRIOV_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
- action = attr->action;
+ /* per flow vlan pop/push is emulated, don't set that into the firmware */
+ action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 89696048b045..914e5466f729 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1690,7 +1690,7 @@ static int init_root_ns(struct mlx5_flow_steering *steering)
{
steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
- if (IS_ERR_OR_NULL(steering->root_ns))
+ if (!steering->root_ns)
goto cleanup;
if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index d5433c49b2b0..3eb931585b3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1226,6 +1226,9 @@ static int init_one(struct pci_dev *pdev,
pci_set_drvdata(pdev, dev);
+ dev->pdev = pdev;
+ dev->event = mlx5_core_event;
+
if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) {
mlx5_core_warn(dev,
"selected profile out of range, selecting default (%d)\n",
@@ -1233,8 +1236,6 @@ static int init_one(struct pci_dev *pdev,
prof_sel = MLX5_DEFAULT_PROF;
}
dev->profile = &profile[prof_sel];
- dev->pdev = pdev;
- dev->event = mlx5_core_event;
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 1ec0a4ce3c46..dda5761e91bc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
span_entry->used = true;
span_entry->id = index;
- span_entry->ref_count = 0;
+ span_entry->ref_count = 1;
span_entry->local_port = local_port;
return span_entry;
}
@@ -270,6 +270,7 @@ static struct mlxsw_sp_span_entry
span_entry = mlxsw_sp_span_entry_find(port);
if (span_entry) {
+ /* Already exists, just take a reference */
span_entry->ref_count++;
return span_entry;
}
@@ -280,6 +281,7 @@ static struct mlxsw_sp_span_entry
static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_span_entry *span_entry)
{
+ WARN_ON(!span_entry->ref_count);
if (--span_entry->ref_count == 0)
mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 9b22863a924b..97bbc1d21df8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -115,7 +115,7 @@ struct mlxsw_sp_rif {
struct mlxsw_sp_mid {
struct list_head list;
unsigned char addr[ETH_ALEN];
- u16 vid;
+ u16 fid;
u16 mid;
unsigned int ref_count;
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 4573da2c5560..e83072da6272 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -594,21 +594,22 @@ static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
return 0;
}
+static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
+
static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
{
+ mlxsw_sp_router_fib_flush(mlxsw_sp);
kfree(mlxsw_sp->router.vrs);
}
struct mlxsw_sp_neigh_key {
- unsigned char addr[sizeof(struct in6_addr)];
- struct net_device *dev;
+ struct neighbour *n;
};
struct mlxsw_sp_neigh_entry {
struct rhash_head ht_node;
struct mlxsw_sp_neigh_key key;
u16 rif;
- struct neighbour *n;
bool offloaded;
struct delayed_work dw;
struct mlxsw_sp_port *mlxsw_sp_port;
@@ -646,19 +647,15 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work);
static struct mlxsw_sp_neigh_entry *
-mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len,
- struct net_device *dev, u16 rif,
- struct neighbour *n)
+mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif)
{
struct mlxsw_sp_neigh_entry *neigh_entry;
neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC);
if (!neigh_entry)
return NULL;
- memcpy(neigh_entry->key.addr, addr, addr_len);
- neigh_entry->key.dev = dev;
+ neigh_entry->key.n = n;
neigh_entry->rif = rif;
- neigh_entry->n = n;
INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
INIT_LIST_HEAD(&neigh_entry->nexthop_list);
return neigh_entry;
@@ -671,13 +668,11 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry)
}
static struct mlxsw_sp_neigh_entry *
-mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr,
- size_t addr_len, struct net_device *dev)
+mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
{
- struct mlxsw_sp_neigh_key key = {{ 0 } };
+ struct mlxsw_sp_neigh_key key;
- memcpy(key.addr, addr, addr_len);
- key.dev = dev;
+ key.n = n;
return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
&key, mlxsw_sp_neigh_ht_params);
}
@@ -689,26 +684,20 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev,
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_neigh_entry *neigh_entry;
struct mlxsw_sp_rif *r;
- u32 dip;
int err;
if (n->tbl != &arp_tbl)
return 0;
- dip = ntohl(*((__be32 *) n->primary_key));
- neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
- n->dev);
- if (neigh_entry) {
- WARN_ON(neigh_entry->n != n);
+ neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
+ if (neigh_entry)
return 0;
- }
r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
if (WARN_ON(!r))
return -EINVAL;
- neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev,
- r->rif, n);
+ neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif);
if (!neigh_entry)
return -ENOMEM;
err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
@@ -727,14 +716,11 @@ void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_neigh_entry *neigh_entry;
- u32 dip;
if (n->tbl != &arp_tbl)
return;
- dip = ntohl(*((__be32 *) n->primary_key));
- neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
- n->dev);
+ neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
if (!neigh_entry)
return;
mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
@@ -817,6 +803,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
}
}
+static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
+{
+ u8 num_rec, last_rec_index, num_entries;
+
+ num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
+ last_rec_index = num_rec - 1;
+
+ if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
+ return false;
+ if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
+ MLXSW_REG_RAUHTD_TYPE_IPV6)
+ return true;
+
+ num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
+ last_rec_index);
+ if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
+ return true;
+ return false;
+}
+
static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
{
char *rauhtd_pl;
@@ -843,7 +849,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
for (i = 0; i < num_rec; i++)
mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
i);
- } while (num_rec);
+ } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
rtnl_unlock();
kfree(rauhtd_pl);
@@ -862,7 +868,7 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
* is active regardless of the traffic.
*/
if (!list_empty(&neigh_entry->nexthop_list))
- neigh_event_send(neigh_entry->n, NULL);
+ neigh_event_send(neigh_entry->key.n, NULL);
}
rtnl_unlock();
}
@@ -908,9 +914,9 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
rtnl_lock();
list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
nexthop_neighs_list_node) {
- if (!(neigh_entry->n->nud_state & NUD_VALID) &&
+ if (!(neigh_entry->key.n->nud_state & NUD_VALID) &&
!list_empty(&neigh_entry->nexthop_list))
- neigh_event_send(neigh_entry->n, NULL);
+ neigh_event_send(neigh_entry->key.n, NULL);
}
rtnl_unlock();
@@ -927,7 +933,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
{
struct mlxsw_sp_neigh_entry *neigh_entry =
container_of(work, struct mlxsw_sp_neigh_entry, dw.work);
- struct neighbour *n = neigh_entry->n;
+ struct neighbour *n = neigh_entry->key.n;
struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port;
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char rauht_pl[MLXSW_REG_RAUHT_LEN];
@@ -1030,11 +1036,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
dip = ntohl(*((__be32 *) n->primary_key));
- neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp,
- &dip,
- sizeof(__be32),
- dev);
- if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) {
+ neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
+ if (WARN_ON(!neigh_entry)) {
mlxsw_sp_port_dev_put(mlxsw_sp_port);
return NOTIFY_DONE;
}
@@ -1343,33 +1346,26 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
struct fib_nh *fib_nh)
{
struct mlxsw_sp_neigh_entry *neigh_entry;
- u32 gwip = ntohl(fib_nh->nh_gw);
struct net_device *dev = fib_nh->nh_dev;
struct neighbour *n;
u8 nud_state;
- neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
- sizeof(gwip), dev);
- if (!neigh_entry) {
- __be32 gwipn = htonl(gwip);
-
- n = neigh_create(&arp_tbl, &gwipn, dev);
+ /* Take a reference of neigh here ensuring that neigh would
+ * not be detructed before the nexthop entry is finished.
+ * The reference is taken either in neigh_lookup() or
+ * in neith_create() in case n is not found.
+ */
+ n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev);
+ if (!n) {
+ n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev);
if (IS_ERR(n))
return PTR_ERR(n);
neigh_event_send(n, NULL);
- neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
- sizeof(gwip), dev);
- if (!neigh_entry) {
- neigh_release(n);
- return -EINVAL;
- }
- } else {
- /* Take a reference of neigh here ensuring that neigh would
- * not be detructed before the nexthop entry is finished.
- * The second branch takes the reference in neith_create()
- */
- n = neigh_entry->n;
- neigh_clone(n);
+ }
+ neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
+ if (!neigh_entry) {
+ neigh_release(n);
+ return -EINVAL;
}
/* If that is the first nexthop connected to that neigh, add to
@@ -1403,7 +1399,7 @@ static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
if (list_empty(&nh->neigh_entry->nexthop_list))
list_del(&nh->neigh_entry->nexthop_neighs_list_node);
- neigh_release(neigh_entry->n);
+ neigh_release(neigh_entry->key.n);
}
static struct mlxsw_sp_nexthop_group *
@@ -1463,11 +1459,11 @@ static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
for (i = 0; i < fi->fib_nhs; i++) {
struct fib_nh *fib_nh = &fi->fib_nh[i];
- u32 gwip = ntohl(fib_nh->nh_gw);
+ struct neighbour *n = nh->neigh_entry->key.n;
- if (memcmp(nh->neigh_entry->key.addr,
- &gwip, sizeof(u32)) == 0 &&
- nh->neigh_entry->key.dev == fib_nh->nh_dev)
+ if (memcmp(n->primary_key, &fib_nh->nh_gw,
+ sizeof(fib_nh->nh_gw)) == 0 &&
+ n->dev == fib_nh->nh_dev)
return true;
}
return false;
@@ -1874,18 +1870,18 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
-static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_resources *resources;
struct mlxsw_sp_fib_entry *fib_entry;
struct mlxsw_sp_fib_entry *tmp;
struct mlxsw_sp_vr *vr;
int i;
- int err;
resources = mlxsw_core_resources_get(mlxsw_sp->core);
for (i = 0; i < resources->max_virtual_routers; i++) {
vr = &mlxsw_sp->router.vrs[i];
+
if (!vr->used)
continue;
@@ -1901,6 +1897,13 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
break;
}
}
+}
+
+static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
+{
+ int err;
+
+ mlxsw_sp_router_fib_flush(mlxsw_sp);
mlxsw_sp->router.aborted = true;
err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
if (err)
@@ -1958,6 +1961,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
struct fib_entry_notifier_info *fen_info = ptr;
int err;
+ if (!net_eq(fen_info->info.net, &init_net))
+ return NOTIFY_DONE;
+
switch (event) {
case FIB_EVENT_ENTRY_ADD:
err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 5e00c79e8133..1e2c8eca3af1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -929,12 +929,12 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid,
static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp,
const unsigned char *addr,
- u16 vid)
+ u16 fid)
{
struct mlxsw_sp_mid *mid;
list_for_each_entry(mid, &mlxsw_sp->br_mids.list, list) {
- if (ether_addr_equal(mid->addr, addr) && mid->vid == vid)
+ if (ether_addr_equal(mid->addr, addr) && mid->fid == fid)
return mid;
}
return NULL;
@@ -942,7 +942,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp,
static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
const unsigned char *addr,
- u16 vid)
+ u16 fid)
{
struct mlxsw_sp_mid *mid;
u16 mid_idx;
@@ -958,7 +958,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
set_bit(mid_idx, mlxsw_sp->br_mids.mapped);
ether_addr_copy(mid->addr, addr);
- mid->vid = vid;
+ mid->fid = fid;
mid->mid = mid_idx;
mid->ref_count = 0;
list_add_tail(&mid->list, &mlxsw_sp->br_mids.list);
@@ -991,9 +991,9 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
if (switchdev_trans_ph_prepare(trans))
return 0;
- mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid);
+ mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid);
if (!mid) {
- mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, mdb->vid);
+ mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid);
if (!mid) {
netdev_err(dev, "Unable to allocate MC group\n");
return -ENOMEM;
@@ -1137,7 +1137,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
u16 mid_idx;
int err = 0;
- mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid);
+ mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid);
if (!mid) {
netdev_err(dev, "Unable to remove port from MC DB\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 72eee29c677f..2777d5bb4380 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -727,9 +727,6 @@ struct core_tx_bd_flags {
#define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT 6
#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK 0x1
#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7
-#define CORE_TX_BD_FLAGS_ROCE_FLAV_MASK 0x1
-#define CORE_TX_BD_FLAGS_ROCE_FLAV_SHIFT 12
-
};
struct core_tx_bd {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 63e1a1b0ef8e..f95385cbbd40 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -1119,6 +1119,7 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK <<
CORE_TX_BD_FLAGS_START_BD_SHIFT;
SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds);
+ SET_FIELD(start_bd->bitfield0, CORE_TX_BD_ROCE_FLAV, type);
DMA_REGPAIR_LE(start_bd->addr, first_frag);
start_bd->nbytes = cpu_to_le16(first_frag_len);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index c418360ba02a..333c7442e48a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -839,20 +839,19 @@ static void qed_update_pf_params(struct qed_dev *cdev,
{
int i;
+ if (IS_ENABLED(CONFIG_QED_RDMA)) {
+ params->rdma_pf_params.num_qps = QED_ROCE_QPS;
+ params->rdma_pf_params.min_dpis = QED_ROCE_DPIS;
+ /* divide by 3 the MRs to avoid MF ILT overflow */
+ params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS;
+ params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX;
+ }
+
for (i = 0; i < cdev->num_hwfns; i++) {
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
p_hwfn->pf_params = *params;
}
-
- if (!IS_ENABLED(CONFIG_QED_RDMA))
- return;
-
- params->rdma_pf_params.num_qps = QED_ROCE_QPS;
- params->rdma_pf_params.min_dpis = QED_ROCE_DPIS;
- /* divide by 3 the MRs to avoid MF ILT overflow */
- params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS;
- params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX;
}
static int qed_slowpath_start(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 12251a1032d1..7567cc464b88 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -175,16 +175,23 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf)
for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) {
int tc;
- for (j = 0; j < QEDE_NUM_RQSTATS; j++)
- sprintf(buf + (k + j) * ETH_GSTRING_LEN,
- "%d: %s", i, qede_rqstats_arr[j].string);
- k += QEDE_NUM_RQSTATS;
- for (tc = 0; tc < edev->num_tc; tc++) {
- for (j = 0; j < QEDE_NUM_TQSTATS; j++)
+ if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
+ for (j = 0; j < QEDE_NUM_RQSTATS; j++)
sprintf(buf + (k + j) * ETH_GSTRING_LEN,
- "%d.%d: %s", i, tc,
- qede_tqstats_arr[j].string);
- k += QEDE_NUM_TQSTATS;
+ "%d: %s", i,
+ qede_rqstats_arr[j].string);
+ k += QEDE_NUM_RQSTATS;
+ }
+
+ if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+ for (tc = 0; tc < edev->num_tc; tc++) {
+ for (j = 0; j < QEDE_NUM_TQSTATS; j++)
+ sprintf(buf + (k + j) *
+ ETH_GSTRING_LEN,
+ "%d.%d: %s", i, tc,
+ qede_tqstats_arr[j].string);
+ k += QEDE_NUM_TQSTATS;
+ }
}
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 7def29aaf65c..85f46dbecd5b 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -2839,7 +2839,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
}
mapping = dma_map_page(&edev->pdev->dev, replace_buf->data, 0,
- rxq->rx_buf_size, DMA_FROM_DEVICE);
+ PAGE_SIZE, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
DP_NOTICE(edev,
"Failed to map TPA replacement buffer\n");
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 6fb3bee904d3..0b4deb31e742 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -575,10 +575,11 @@ void emac_mac_start(struct emac_adapter *adpt)
mac |= TXEN | RXEN; /* enable RX/TX */
- /* We don't have ethtool support yet, so force flow-control mode
- * to 'full' always.
- */
- mac |= TXFC | RXFC;
+ /* Configure MAC flow control to match the PHY's settings. */
+ if (phydev->pause)
+ mac |= RXFC;
+ if (phydev->pause != phydev->asym_pause)
+ mac |= TXFC;
/* setup link speed */
mac &= ~SPEED_MASK;
@@ -1003,6 +1004,12 @@ int emac_mac_up(struct emac_adapter *adpt)
writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS);
writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK);
+ /* Enable pause frames. Without this feature, the EMAC has been shown
+ * to receive (and drop) frames with FCS errors at gigabit connections.
+ */
+ adpt->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+ adpt->phydev->advertising |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
adpt->phydev->irq = PHY_IGNORE_INTERRUPT;
phy_start(adpt->phydev);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index 75c1b530e39e..72fe343c7a36 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -421,7 +421,7 @@ static const struct emac_reg_write sgmii_v2_laned[] = {
/* CDR Settings */
{EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
- {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)},
+ {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)},
{EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
/* TX/RX Settings */
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 3cf3557106c2..6b89e4a7b164 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -485,6 +485,9 @@ efx_copy_channel(const struct efx_channel *old_channel)
*channel = *old_channel;
channel->napi_dev = NULL;
+ INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
+ channel->napi_str.napi_id = 0;
+ channel->napi_str.state = 0;
memset(&channel->eventq, 0, sizeof(channel->eventq));
for (j = 0; j < EFX_TXQ_TYPES; j++) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 48e71fad4210..e2c94ec4edd0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -880,6 +880,13 @@ static int stmmac_init_phy(struct net_device *dev)
return -ENODEV;
}
+ /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid
+ * subsequent PHY polling, make sure we force a link transition if
+ * we have a UP/DOWN/UP transition
+ */
+ if (phydev->is_pseudo_fixed_link)
+ phydev->irq = PHY_POLL;
+
pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)"
" Link = %d\n", dev->name, phydev->phy_id, phydev->link);
diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index 054a8dd23dae..ba1e45ff6aae 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c
@@ -176,9 +176,12 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave)
}
dev = bus_find_device(&platform_bus_type, NULL, node, match);
+ of_node_put(node);
priv = dev_get_drvdata(dev);
priv->cpsw_phy_sel(priv, phy_mode, slave);
+
+ put_device(dev);
}
EXPORT_SYMBOL_GPL(cpsw_phy_sel);
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 2fd94a5bc1f3..84fbe5714f8b 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1410,6 +1410,7 @@ static int emac_dev_open(struct net_device *ndev)
int i = 0;
struct emac_priv *priv = netdev_priv(ndev);
struct phy_device *phydev = NULL;
+ struct device *phy = NULL;
ret = pm_runtime_get_sync(&priv->pdev->dev);
if (ret < 0) {
@@ -1488,19 +1489,20 @@ static int emac_dev_open(struct net_device *ndev)
/* use the first phy on the bus if pdata did not give us a phy id */
if (!phydev && !priv->phy_id) {
- struct device *phy;
-
phy = bus_find_device(&mdio_bus_type, NULL, NULL,
match_first_device);
- if (phy)
+ if (phy) {
priv->phy_id = dev_name(phy);
+ if (!priv->phy_id || !*priv->phy_id)
+ put_device(phy);
+ }
}
if (!phydev && priv->phy_id && *priv->phy_id) {
phydev = phy_connect(ndev, priv->phy_id,
&emac_adjust_link,
PHY_INTERFACE_MODE_MII);
-
+ put_device(phy); /* reference taken by bus_find_device */
if (IS_ERR(phydev)) {
dev_err(emac_dev, "could not connect to phy %s\n",
priv->phy_id);
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c
index 446ea580ad42..928c1dca2673 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c
@@ -1694,7 +1694,7 @@ struct gelic_wl_scan_info *gelic_wl_find_best_bss(struct gelic_wl_info *wl)
pr_debug("%s: bssid matched\n", __func__);
break;
} else {
- pr_debug("%s: bssid unmached\n", __func__);
+ pr_debug("%s: bssid unmatched\n", __func__);
continue;
}
}
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 7f127dc1b7ba..fa32391720fe 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -708,8 +708,7 @@ static int eth_poll(struct napi_struct *napi, int budget)
if (!qmgr_stat_below_low_watermark(rxq) &&
napi_reschedule(napi)) { /* not empty again */
#if DEBUG_RX
- printk(KERN_DEBUG "%s: eth_poll"
- " napi_reschedule successed\n",
+ printk(KERN_DEBUG "%s: eth_poll napi_reschedule succeeded\n",
dev->name);
#endif
qmgr_disable_irq(rxq);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 3234fcdea317..d2d6f12a112f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1278,6 +1278,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
struct net_device *lowerdev;
int err;
int macmode;
+ bool create = false;
if (!tb[IFLA_LINK])
return -EINVAL;
@@ -1304,12 +1305,18 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
err = macvlan_port_create(lowerdev);
if (err < 0)
return err;
+ create = true;
}
port = macvlan_port_get_rtnl(lowerdev);
/* Only 1 macvlan device can be created in passthru mode */
- if (port->passthru)
- return -EINVAL;
+ if (port->passthru) {
+ /* The macvlan port must be not created this time,
+ * still goto destroy_macvlan_port for readability.
+ */
+ err = -EINVAL;
+ goto destroy_macvlan_port;
+ }
vlan->lowerdev = lowerdev;
vlan->dev = dev;
@@ -1325,24 +1332,28 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
- if (port->count)
- return -EINVAL;
+ if (port->count) {
+ err = -EINVAL;
+ goto destroy_macvlan_port;
+ }
port->passthru = true;
eth_hw_addr_inherit(dev, lowerdev);
}
if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
- if (vlan->mode != MACVLAN_MODE_SOURCE)
- return -EINVAL;
+ if (vlan->mode != MACVLAN_MODE_SOURCE) {
+ err = -EINVAL;
+ goto destroy_macvlan_port;
+ }
macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]);
err = macvlan_changelink_sources(vlan, macmode, data);
if (err)
- return err;
+ goto destroy_macvlan_port;
}
err = register_netdevice(dev);
if (err < 0)
- return err;
+ goto destroy_macvlan_port;
dev->priv_flags |= IFF_MACVLAN;
err = netdev_upper_dev_link(lowerdev, dev);
@@ -1357,7 +1368,9 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
unregister_netdev:
unregister_netdevice(dev);
-
+destroy_macvlan_port:
+ if (create)
+ macvlan_port_destroy(port->dev);
return err;
}
EXPORT_SYMBOL_GPL(macvlan_common_newlink);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index e977ba931878..1a4bf8acad78 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -723,6 +723,7 @@ struct phy_device *phy_connect(struct net_device *dev, const char *bus_id,
phydev = to_phy_device(d);
rc = phy_connect_direct(dev, phydev, handler, interface);
+ put_device(d);
if (rc)
return ERR_PTR(rc);
@@ -953,6 +954,7 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
phydev = to_phy_device(d);
rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
+ put_device(d);
if (rc)
return ERR_PTR(rc);
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index e6338c16081a..8a6675d92b98 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1656,6 +1656,19 @@ static const struct driver_info ax88178a_info = {
.tx_fixup = ax88179_tx_fixup,
};
+static const struct driver_info cypress_GX3_info = {
+ .description = "Cypress GX3 SuperSpeed to Gigabit Ethernet Controller",
+ .bind = ax88179_bind,
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+ .reset = ax88179_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+ .tx_fixup = ax88179_tx_fixup,
+};
+
static const struct driver_info dlink_dub1312_info = {
.description = "D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter",
.bind = ax88179_bind,
@@ -1718,6 +1731,10 @@ static const struct usb_device_id products[] = {
USB_DEVICE(0x0b95, 0x178a),
.driver_info = (unsigned long)&ax88178a_info,
}, {
+ /* Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller */
+ USB_DEVICE(0x04b4, 0x3610),
+ .driver_info = (unsigned long)&cypress_GX3_info,
+}, {
/* D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter */
USB_DEVICE(0x2001, 0x4a00),
.driver_info = (unsigned long)&dlink_dub1312_info,
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 44d439f50961..efb84f092492 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
u8 checksum = CHECKSUM_NONE;
u32 opts2, opts3;
- if (tp->version == RTL_VER_01)
+ if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02)
goto return_result;
opts2 = le32_to_cpu(rx_desc->opts2);
@@ -1745,7 +1745,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
checksum = CHECKSUM_NONE;
else
checksum = CHECKSUM_UNNECESSARY;
- } else if (RD_IPV6_CS) {
+ } else if (opts2 & RD_IPV6_CS) {
if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF))
checksum = CHECKSUM_UNNECESSARY;
else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF))
@@ -3266,10 +3266,8 @@ static int rtl8152_open(struct net_device *netdev)
goto out;
res = usb_autopm_get_interface(tp->intf);
- if (res < 0) {
- free_all_mem(tp);
- goto out;
- }
+ if (res < 0)
+ goto out_free;
mutex_lock(&tp->control);
@@ -3285,10 +3283,9 @@ static int rtl8152_open(struct net_device *netdev)
netif_device_detach(tp->netdev);
netif_warn(tp, ifup, netdev, "intr_urb submit failed: %d\n",
res);
- free_all_mem(tp);
- } else {
- napi_enable(&tp->napi);
+ goto out_unlock;
}
+ napi_enable(&tp->napi);
mutex_unlock(&tp->control);
@@ -3297,7 +3294,13 @@ static int rtl8152_open(struct net_device *netdev)
tp->pm_notifier.notifier_call = rtl_notifier;
register_pm_notifier(&tp->pm_notifier);
#endif
+ return 0;
+out_unlock:
+ mutex_unlock(&tp->control);
+ usb_autopm_put_interface(tp->intf);
+out_free:
+ free_all_mem(tp);
out:
return res;
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fad84f3f4109..fd8b1e62301f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2038,23 +2038,33 @@ static struct virtio_device_id id_table[] = {
{ 0 },
};
+#define VIRTNET_FEATURES \
+ VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
+ VIRTIO_NET_F_MAC, \
+ VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
+ VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
+ VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
+ VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
+ VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
+ VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
+ VIRTIO_NET_F_CTRL_MAC_ADDR, \
+ VIRTIO_NET_F_MTU
+
static unsigned int features[] = {
- VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
- VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
- VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
- VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
- VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
- VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
- VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
- VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
- VIRTIO_NET_F_CTRL_MAC_ADDR,
+ VIRTNET_FEATURES,
+};
+
+static unsigned int features_legacy[] = {
+ VIRTNET_FEATURES,
+ VIRTIO_NET_F_GSO,
VIRTIO_F_ANY_LAYOUT,
- VIRTIO_NET_F_MTU,
};
static struct virtio_driver virtio_net_driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
+ .feature_table_legacy = features_legacy,
+ .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f3c2fa3ab0d5..24532cdebb00 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -944,7 +944,9 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
{
struct vxlan_dev *vxlan;
struct vxlan_sock *sock4;
- struct vxlan_sock *sock6 = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct vxlan_sock *sock6;
+#endif
unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
sock4 = rtnl_dereference(dev->vn4_sock);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index b777e1b2f87a..78d9966a3957 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -4516,7 +4516,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
/* store current 11d setting */
if (brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY,
&ifp->vif->is_11d)) {
- supports_11d = false;
+ is_11d = supports_11d = false;
} else {
country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail,
settings->beacon.tail_len,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 4fdc3dad3e85..b88e2048ae0b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1087,6 +1087,15 @@ iwl_mvm_netdetect_config(struct iwl_mvm *mvm,
ret = iwl_mvm_switch_to_d3(mvm);
if (ret)
return ret;
+ } else {
+ /* In theory, we wouldn't have to stop a running sched
+ * scan in order to start another one (for
+ * net-detect). But in practice this doesn't seem to
+ * work properly, so stop any running sched_scan now.
+ */
+ ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED, true);
+ if (ret)
+ return ret;
}
/* rfkill release can be either for wowlan or netdetect */
@@ -1254,7 +1263,10 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
out:
if (ret < 0) {
iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN);
- ieee80211_restart_hw(mvm->hw);
+ if (mvm->restart_fw > 0) {
+ mvm->restart_fw--;
+ ieee80211_restart_hw(mvm->hw);
+ }
iwl_mvm_free_nd(mvm);
}
out_noreset:
@@ -2088,6 +2100,16 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
iwl_mvm_update_changed_regdom(mvm);
if (mvm->net_detect) {
+ /* If this is a non-unified image, we restart the FW,
+ * so no need to stop the netdetect scan. If that
+ * fails, continue and try to get the wake-up reasons,
+ * but trigger a HW restart by keeping a failure code
+ * in ret.
+ */
+ if (unified_image)
+ ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_NETDETECT,
+ false);
+
iwl_mvm_query_netdetect_reasons(mvm, vif);
/* has unlocked the mutex, so skip that */
goto out;
@@ -2271,7 +2293,8 @@ static void iwl_mvm_d3_test_disconn_work_iter(void *_data, u8 *mac,
static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
{
struct iwl_mvm *mvm = inode->i_private;
- int remaining_time = 10;
+ bool unified_image = fw_has_capa(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG);
mvm->d3_test_active = false;
@@ -2282,17 +2305,21 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
iwl_abort_notification_waits(&mvm->notif_wait);
- ieee80211_restart_hw(mvm->hw);
+ if (!unified_image) {
+ int remaining_time = 10;
- /* wait for restart and disconnect all interfaces */
- while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
- remaining_time > 0) {
- remaining_time--;
- msleep(1000);
- }
+ ieee80211_restart_hw(mvm->hw);
+
+ /* wait for restart and disconnect all interfaces */
+ while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
+ remaining_time > 0) {
+ remaining_time--;
+ msleep(1000);
+ }
- if (remaining_time == 0)
- IWL_ERR(mvm, "Timed out waiting for HW restart to finish!\n");
+ if (remaining_time == 0)
+ IWL_ERR(mvm, "Timed out waiting for HW restart!\n");
+ }
ieee80211_iterate_active_interfaces_atomic(
mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index 07da4efe8458..7b7d2a146e30 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -1529,8 +1529,8 @@ static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf,
.data = { &cmd, },
.len = { sizeof(cmd) },
};
- size_t delta, len;
- ssize_t ret;
+ size_t delta;
+ ssize_t ret, len;
hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR,
DEBUG_GROUP, 0);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 318efd814037..1db1dc13e988 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -4121,7 +4121,6 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
struct iwl_mvm_internal_rxq_notif *notif,
u32 size)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(notif_waitq);
u32 qmask = BIT(mvm->trans->num_rx_queues) - 1;
int ret;
@@ -4143,7 +4142,7 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
}
if (notif->sync)
- ret = wait_event_timeout(notif_waitq,
+ ret = wait_event_timeout(mvm->rx_sync_waitq,
atomic_read(&mvm->queue_sync_counter) == 0,
HZ);
WARN_ON_ONCE(!ret);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index d17cbf603f7c..c60703e0c246 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -937,6 +937,7 @@ struct iwl_mvm {
/* sync d0i3_tx queue and IWL_MVM_STATUS_IN_D0I3 status flag */
spinlock_t d0i3_tx_lock;
wait_queue_head_t d0i3_exit_waitq;
+ wait_queue_head_t rx_sync_waitq;
/* BT-Coex */
struct iwl_bt_coex_profile_notif last_bt_notif;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 05fe6dd1a2c8..4d35deb628bc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -619,6 +619,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
spin_lock_init(&mvm->refs_lock);
skb_queue_head_init(&mvm->d0i3_tx);
init_waitqueue_head(&mvm->d0i3_exit_waitq);
+ init_waitqueue_head(&mvm->rx_sync_waitq);
atomic_set(&mvm->queue_sync_counter, 0);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index a57c6ef5bc14..6c802cee900c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -547,7 +547,8 @@ void iwl_mvm_rx_queue_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb,
"Received expired RX queue sync message\n");
return;
}
- atomic_dec(&mvm->queue_sync_counter);
+ if (!atomic_dec_return(&mvm->queue_sync_counter))
+ wake_up(&mvm->rx_sync_waitq);
}
switch (internal_notif->type) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index f279fdd6eb44..fa9743205491 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1199,6 +1199,9 @@ static int iwl_mvm_num_scans(struct iwl_mvm *mvm)
static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type)
{
+ bool unified_image = fw_has_capa(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG);
+
/* This looks a bit arbitrary, but the idea is that if we run
* out of possible simultaneous scans and the userspace is
* trying to run a scan type that is already running, we
@@ -1225,12 +1228,30 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type)
return -EBUSY;
return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR, true);
case IWL_MVM_SCAN_NETDETECT:
- /* No need to stop anything for net-detect since the
- * firmware is restarted anyway. This way, any sched
- * scans that were running will be restarted when we
- * resume.
- */
- return 0;
+ /* For non-unified images, there's no need to stop
+ * anything for net-detect since the firmware is
+ * restarted anyway. This way, any sched scans that
+ * were running will be restarted when we resume.
+ */
+ if (!unified_image)
+ return 0;
+
+ /* If this is a unified image and we ran out of scans,
+ * we need to stop something. Prefer stopping regular
+ * scans, because the results are useless at this
+ * point, and we should be able to keep running
+ * another scheduled scan while suspended.
+ */
+ if (mvm->scan_status & IWL_MVM_SCAN_REGULAR_MASK)
+ return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR,
+ true);
+ if (mvm->scan_status & IWL_MVM_SCAN_SCHED_MASK)
+ return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED,
+ true);
+
+ /* fall through, something is wrong if no scan was
+ * running but we ran out of scans.
+ */
default:
WARN_ON(1);
break;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 001be406a3d3..2f8134b2a504 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -541,48 +541,64 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids);
#ifdef CONFIG_ACPI
-#define SPL_METHOD "SPLC"
-#define SPL_DOMAINTYPE_MODULE BIT(0)
-#define SPL_DOMAINTYPE_WIFI BIT(1)
-#define SPL_DOMAINTYPE_WIGIG BIT(2)
-#define SPL_DOMAINTYPE_RFEM BIT(3)
+#define ACPI_SPLC_METHOD "SPLC"
+#define ACPI_SPLC_DOMAIN_WIFI (0x07)
-static u64 splx_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splx)
+static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc)
{
- union acpi_object *limits, *domain_type, *power_limit;
-
- if (splx->type != ACPI_TYPE_PACKAGE ||
- splx->package.count != 2 ||
- splx->package.elements[0].type != ACPI_TYPE_INTEGER ||
- splx->package.elements[0].integer.value != 0) {
- IWL_ERR(trans, "Unsupported splx structure\n");
+ union acpi_object *data_pkg, *dflt_pwr_limit;
+ int i;
+
+ /* We need at least two elements, one for the revision and one
+ * for the data itself. Also check that the revision is
+ * supported (currently only revision 0).
+ */
+ if (splc->type != ACPI_TYPE_PACKAGE ||
+ splc->package.count < 2 ||
+ splc->package.elements[0].type != ACPI_TYPE_INTEGER ||
+ splc->package.elements[0].integer.value != 0) {
+ IWL_DEBUG_INFO(trans,
+ "Unsupported structure returned by the SPLC method. Ignoring.\n");
return 0;
}
- limits = &splx->package.elements[1];
- if (limits->type != ACPI_TYPE_PACKAGE ||
- limits->package.count < 2 ||
- limits->package.elements[0].type != ACPI_TYPE_INTEGER ||
- limits->package.elements[1].type != ACPI_TYPE_INTEGER) {
- IWL_ERR(trans, "Invalid limits element\n");
- return 0;
+ /* loop through all the packages to find the one for WiFi */
+ for (i = 1; i < splc->package.count; i++) {
+ union acpi_object *domain;
+
+ data_pkg = &splc->package.elements[i];
+
+ /* Skip anything that is not a package with the right
+ * amount of elements (i.e. at least 2 integers).
+ */
+ if (data_pkg->type != ACPI_TYPE_PACKAGE ||
+ data_pkg->package.count < 2 ||
+ data_pkg->package.elements[0].type != ACPI_TYPE_INTEGER ||
+ data_pkg->package.elements[1].type != ACPI_TYPE_INTEGER)
+ continue;
+
+ domain = &data_pkg->package.elements[0];
+ if (domain->integer.value == ACPI_SPLC_DOMAIN_WIFI)
+ break;
+
+ data_pkg = NULL;
}
- domain_type = &limits->package.elements[0];
- power_limit = &limits->package.elements[1];
- if (!(domain_type->integer.value & SPL_DOMAINTYPE_WIFI)) {
- IWL_DEBUG_INFO(trans, "WiFi power is not limited\n");
+ if (!data_pkg) {
+ IWL_DEBUG_INFO(trans,
+ "No element for the WiFi domain returned by the SPLC method.\n");
return 0;
}
- return power_limit->integer.value;
+ dflt_pwr_limit = &data_pkg->package.elements[1];
+ return dflt_pwr_limit->integer.value;
}
static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev)
{
acpi_handle pxsx_handle;
acpi_handle handle;
- struct acpi_buffer splx = {ACPI_ALLOCATE_BUFFER, NULL};
+ struct acpi_buffer splc = {ACPI_ALLOCATE_BUFFER, NULL};
acpi_status status;
pxsx_handle = ACPI_HANDLE(&pdev->dev);
@@ -593,23 +609,24 @@ static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev)
}
/* Get the method's handle */
- status = acpi_get_handle(pxsx_handle, (acpi_string)SPL_METHOD, &handle);
+ status = acpi_get_handle(pxsx_handle, (acpi_string)ACPI_SPLC_METHOD,
+ &handle);
if (ACPI_FAILURE(status)) {
- IWL_DEBUG_INFO(trans, "SPL method not found\n");
+ IWL_DEBUG_INFO(trans, "SPLC method not found\n");
return;
}
/* Call SPLC with no arguments */
- status = acpi_evaluate_object(handle, NULL, NULL, &splx);
+ status = acpi_evaluate_object(handle, NULL, NULL, &splc);
if (ACPI_FAILURE(status)) {
IWL_ERR(trans, "SPLC invocation failed (0x%x)\n", status);
return;
}
- trans->dflt_pwr_limit = splx_get_pwr_limit(trans, splx.pointer);
+ trans->dflt_pwr_limit = splc_get_pwr_limit(trans, splc.pointer);
IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n",
trans->dflt_pwr_limit);
- kfree(splx.pointer);
+ kfree(splc.pointer);
}
#else /* CONFIG_ACPI */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index e9a278b60dfd..5f840f16f40b 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -592,6 +592,7 @@ error:
static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
int slots_num, u32 txq_id)
{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
int ret;
txq->need_update = false;
@@ -606,6 +607,13 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
return ret;
spin_lock_init(&txq->lock);
+
+ if (txq_id == trans_pcie->cmd_queue) {
+ static struct lock_class_key iwl_pcie_cmd_queue_lock_class;
+
+ lockdep_set_class(&txq->lock, &iwl_pcie_cmd_queue_lock_class);
+ }
+
__skb_queue_head_init(&txq->overflow_q);
/*
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index e17879dd5d5a..bf2744e1e3db 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -304,7 +304,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
queue->rx_skbs[id] = skb;
ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
- BUG_ON((signed short)ref < 0);
+ WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
queue->grant_rx_ref[id] = ref;
page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
@@ -428,7 +428,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
- BUG_ON((signed short)ref < 0);
+ WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
gfn, GNTMAP_readonly);
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ca1ad9ebbc92..a0649973ee5b 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -149,7 +149,7 @@ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
{
#if defined(CONFIG_NET_L3_MASTER_DEV)
if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
- ipv6_l3mdev_skb(IP6CB(skb)->flags))
+ skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
return true;
#endif
return false;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 91ee3643ccc8..bf04a46f6d5b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(const struct net_device *dev,
const struct sk_buff *skb);
+static __always_inline int ____dev_forward_skb(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ if (skb_orphan_frags(skb, GFP_ATOMIC) ||
+ unlikely(!is_skb_forwardable(dev, skb))) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ skb_scrub_packet(skb, true);
+ skb->priority = 0;
+ return 0;
+}
+
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
extern int netdev_budget;
diff --git a/include/net/ip.h b/include/net/ip.h
index 5413883ac47f..d3a107850a41 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -47,8 +47,7 @@ struct inet_skb_parm {
#define IPSKB_REROUTED BIT(4)
#define IPSKB_DOREDIRECT BIT(5)
#define IPSKB_FRAG_PMTU BIT(6)
-#define IPSKB_FRAG_SEGS BIT(7)
-#define IPSKB_L3SLAVE BIT(8)
+#define IPSKB_L3SLAVE BIT(7)
u16 frag_max_size;
};
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 20ed9699fcd4..1b1cf33cbfb0 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -146,6 +146,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
{
int pkt_len, err;
+ memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
pkt_len = skb->len - skb_inner_network_offset(skb);
err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
if (unlikely(net_xmit_eval(err)))
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index 498814626e28..1723a67c0b0a 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
if (net->ct.labels_used == 0)
return NULL;
- return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
- sizeof(struct nf_conn_labels), GFP_ATOMIC);
+ return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC);
#else
return NULL;
#endif
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 5031e072567b..d79d1e9b9546 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -145,7 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
}
-unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
+int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
unsigned int nft_parse_register(const struct nlattr *attr);
int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
@@ -542,7 +542,8 @@ void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *data,
u64 timeout, gfp_t gfp);
-void nft_set_elem_destroy(const struct nft_set *set, void *elem);
+void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+ bool destroy_expr);
/**
* struct nft_set_gc_batch_head - nf_tables set garbage collection batch
@@ -693,7 +694,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
{
int err;
- __module_get(src->ops->type->owner);
if (src->ops->clone) {
dst->ops = src->ops;
err = src->ops->clone(dst, src);
@@ -702,6 +702,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
} else {
memcpy(dst, src, src->ops->size);
}
+
+ __module_get(src->ops->type->owner);
return 0;
}
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 87a7f42e7639..31acc3f4f132 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -152,7 +152,7 @@ void sctp_unhash_endpoint(struct sctp_endpoint *);
struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
struct sctphdr *, struct sctp_association **,
struct sctp_transport **);
-void sctp_err_finish(struct sock *, struct sctp_association *);
+void sctp_err_finish(struct sock *, struct sctp_transport *);
void sctp_icmp_frag_needed(struct sock *, struct sctp_association *,
struct sctp_transport *t, __u32 pmtu);
void sctp_icmp_redirect(struct sock *, struct sctp_transport *,
diff --git a/include/net/sock.h b/include/net/sock.h
index 73c6b008f1b7..92b269709b9a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1596,11 +1596,11 @@ static inline void sock_put(struct sock *sk)
void sock_gen_put(struct sock *sk);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
- unsigned int trim_cap);
+ unsigned int trim_cap, bool refcounted);
static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
const int nested)
{
- return __sk_receive_skb(sk, skb, nested, 1);
+ return __sk_receive_skb(sk, skb, nested, 1, true);
}
static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5b82d4d94834..123979fe12bf 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -805,7 +805,7 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
- ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+ skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
return true;
#endif
return false;
@@ -1220,6 +1220,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
+int tcp_filter(struct sock *sk, struct sk_buff *skb);
#undef STATE_TRACE
diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h
index 5cd4d4d2dd1d..9c9c6ad55f14 100644
--- a/include/uapi/linux/atm_zatm.h
+++ b/include/uapi/linux/atm_zatm.h
@@ -14,7 +14,6 @@
#include <linux/atmapi.h>
#include <linux/atmioc.h>
-#include <linux/time.h>
#define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
/* get pool statistics */
diff --git a/include/uapi/linux/bpqether.h b/include/uapi/linux/bpqether.h
index a6c35e1a89ad..05865edaefda 100644
--- a/include/uapi/linux/bpqether.h
+++ b/include/uapi/linux/bpqether.h
@@ -5,9 +5,7 @@
* Defines for the BPQETHER pseudo device driver
*/
-#ifndef __LINUX_IF_ETHER_H
#include <linux/if_ether.h>
-#endif
#define SIOCSBPQETHOPT (SIOCDEVPRIVATE+0) /* reserved */
#define SIOCSBPQETHADDR (SIOCDEVPRIVATE+1)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 570eeca7bdfa..ad1bc67aff1b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab)
hlist_for_each_entry_safe(l, n, head, hash_node) {
hlist_del_rcu(&l->hash_node);
- htab_elem_free(htab, l);
+ if (l->state != HTAB_EXTRA_ELEM_USED)
+ htab_elem_free(htab, l);
}
}
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 228f962447a5..237f3d6a7ddc 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -194,7 +194,7 @@ static int map_create(union bpf_attr *attr)
err = bpf_map_charge_memlock(map);
if (err)
- goto free_map;
+ goto free_map_nouncharge;
err = bpf_map_new_fd(map);
if (err < 0)
@@ -204,6 +204,8 @@ static int map_create(union bpf_attr *attr)
return err;
free_map:
+ bpf_map_uncharge_memlock(map);
+free_map_nouncharge:
map->ops->map_free(map);
return err;
}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index b3f05ee20d18..cbb387a265db 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -54,7 +54,11 @@ static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1
[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
-static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = {
+/*
+ * We have to use TASKSTATS_CMD_ATTR_MAX here, it is the maxattr in the family.
+ * Make sure they are always aligned.
+ */
+static const struct nla_policy cgroupstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = {
[CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
};
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 8e999ffdf28b..8af9d25ff988 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1549,24 +1549,31 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
struct bcm_sock *bo = bcm_sk(sk);
+ int ret = 0;
if (len < sizeof(*addr))
return -EINVAL;
- if (bo->bound)
- return -EISCONN;
+ lock_sock(sk);
+
+ if (bo->bound) {
+ ret = -EISCONN;
+ goto fail;
+ }
/* bind a device to this socket */
if (addr->can_ifindex) {
struct net_device *dev;
dev = dev_get_by_index(&init_net, addr->can_ifindex);
- if (!dev)
- return -ENODEV;
-
+ if (!dev) {
+ ret = -ENODEV;
+ goto fail;
+ }
if (dev->type != ARPHRD_CAN) {
dev_put(dev);
- return -ENODEV;
+ ret = -ENODEV;
+ goto fail;
}
bo->ifindex = dev->ifindex;
@@ -1577,17 +1584,24 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
bo->ifindex = 0;
}
- bo->bound = 1;
-
if (proc_dir) {
/* unique socket address as filename */
sprintf(bo->procname, "%lu", sock_i_ino(sk));
bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
proc_dir,
&bcm_proc_fops, sk);
+ if (!bo->bcm_proc_read) {
+ ret = -ENOMEM;
+ goto fail;
+ }
}
- return 0;
+ bo->bound = 1;
+
+fail:
+ release_sock(sk);
+
+ return ret;
}
static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/net/core/dev.c b/net/core/dev.c
index 820bac239738..6666b28b6815 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
- if (skb_orphan_frags(skb, GFP_ATOMIC) ||
- unlikely(!is_skb_forwardable(dev, skb))) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
+ int ret = ____dev_forward_skb(dev, skb);
- skb_scrub_packet(skb, true);
- skb->priority = 0;
- skb->protocol = eth_type_trans(skb, dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ if (likely(!ret)) {
+ skb->protocol = eth_type_trans(skb, dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ }
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(__dev_forward_skb);
@@ -2484,7 +2479,7 @@ int skb_checksum_help(struct sk_buff *skb)
goto out;
}
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
out_set_summed:
skb->ip_summed = CHECKSUM_NONE;
out:
diff --git a/net/core/filter.c b/net/core/filter.c
index 00351cdf7d0c..b391209838ef 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
return dev_forward_skb(dev, skb);
}
+static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ int ret = ____dev_forward_skb(dev, skb);
+
+ if (likely(!ret)) {
+ skb->dev = dev;
+ ret = netif_rx(skb);
+ }
+
+ return ret;
+}
+
static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
{
int ret;
@@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
return ret;
}
+static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ /* skb->mac_len is not set on normal egress */
+ unsigned int mlen = skb->network_header - skb->mac_header;
+
+ __skb_pull(skb, mlen);
+
+ /* At ingress, the mac header has already been pulled once.
+ * At egress, skb_pospull_rcsum has to be done in case that
+ * the skb is originated from ingress (i.e. a forwarded skb)
+ * to ensure that rcsum starts at net header.
+ */
+ if (!skb_at_tc_ingress(skb))
+ skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ skb_pop_mac_header(skb);
+ skb_reset_mac_len(skb);
+ return flags & BPF_F_INGRESS ?
+ __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ bpf_push_mac_rcsum(skb);
+ return flags & BPF_F_INGRESS ?
+ __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ switch (dev->type) {
+ case ARPHRD_TUNNEL:
+ case ARPHRD_TUNNEL6:
+ case ARPHRD_SIT:
+ case ARPHRD_IPGRE:
+ case ARPHRD_VOID:
+ case ARPHRD_NONE:
+ return __bpf_redirect_no_mac(skb, dev, flags);
+ default:
+ return __bpf_redirect_common(skb, dev, flags);
+ }
+}
+
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
{
struct net_device *dev;
@@ -1675,10 +1733,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
return -ENOMEM;
}
- bpf_push_mac_rcsum(clone);
-
- return flags & BPF_F_INGRESS ?
- __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone);
+ return __bpf_redirect(clone, dev, flags);
}
static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -1722,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb)
return -EINVAL;
}
- bpf_push_mac_rcsum(skb);
-
- return ri->flags & BPF_F_INGRESS ?
- __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+ return __bpf_redirect(skb, dev, ri->flags);
}
static const struct bpf_func_proto bpf_redirect_proto = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index ab193e5def07..69e4463a4b1b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -122,7 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_keyid *key_keyid;
bool skip_vlan = false;
u8 ip_proto = 0;
- bool ret = false;
+ bool ret;
if (!data) {
data = skb->data;
@@ -549,12 +549,17 @@ ip_proto_again:
out_good:
ret = true;
-out_bad:
+ key_control->thoff = (u16)nhoff;
+out:
key_basic->n_proto = proto;
key_basic->ip_proto = ip_proto;
- key_control->thoff = (u16)nhoff;
return ret;
+
+out_bad:
+ ret = false;
+ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
+ goto out;
}
EXPORT_SYMBOL(__skb_flow_dissect);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fb7348f13501..db313ec7af32 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -275,6 +275,7 @@ int rtnl_unregister(int protocol, int msgtype)
rtnl_msg_handlers[protocol][msgindex].doit = NULL;
rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+ rtnl_msg_handlers[protocol][msgindex].calcit = NULL;
return 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index c73e28fc9c2a..5e3ca414357e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL(sock_queue_rcv_skb);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
- const int nested, unsigned int trim_cap)
+ const int nested, unsigned int trim_cap, bool refcounted)
{
int rc = NET_RX_SUCCESS;
@@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
bh_unlock_sock(sk);
out:
- sock_put(sk);
+ if (refcounted)
+ sock_put(sk);
return rc;
discard_and_relse:
kfree_skb(skb);
@@ -1543,6 +1544,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
newsk->sk_err = 0;
+ newsk->sk_err_soft = 0;
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 345a3aeb8c7e..b567c8725aea 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (struct iphdr *)skb->data;
const u8 offset = iph->ihl << 2;
- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+ const struct dccp_hdr *dh;
struct dccp_sock *dp;
struct inet_sock *inet;
const int type = icmp_hdr(skb)->type;
@@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
int err;
struct net *net = dev_net(skb->dev);
- if (skb->len < offset + sizeof(*dh) ||
- skb->len < offset + __dccp_basic_hdr_len(dh)) {
- __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return;
- }
+ /* Only need dccph_dport & dccph_sport which are the first
+ * 4 bytes in dccp header.
+ * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
+ */
+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet_lookup_established(net, &dccp_hashinfo,
iph->daddr, dh->dccph_dport,
@@ -868,7 +870,7 @@ lookup:
goto discard_and_relse;
nf_reset(skb);
- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
+ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
no_dccp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 3828f94b234c..715e5d1dc107 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+ const struct dccp_hdr *dh;
struct dccp_sock *dp;
struct ipv6_pinfo *np;
struct sock *sk;
@@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u64 seq;
struct net *net = dev_net(skb->dev);
- if (skb->len < offset + sizeof(*dh) ||
- skb->len < offset + __dccp_basic_hdr_len(dh)) {
- __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
- return;
- }
+ /* Only need dccph_dport & dccph_sport which are the first
+ * 4 bytes in dccp header.
+ * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
+ */
+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
+ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet6_lookup_established(net, &dccp_hashinfo,
&hdr->daddr, dh->dccph_dport,
@@ -738,7 +739,8 @@ lookup:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
+ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
+ refcounted) ? -1 : 0;
no_dccp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
+ .bind_conflict = inet6_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 41e65804ddf5..9fe25bf63296 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout)
__kfree_skb(skb);
}
+ /* If socket has been already reset kill it. */
+ if (sk->sk_state == DCCP_CLOSED)
+ goto adjudge_to_death;
+
if (data_was_unread) {
/* Unread data was tossed, send an appropriate Reset Code */
DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9648c97e541f..5ddf5cda07f4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -533,9 +533,9 @@ EXPORT_SYMBOL(inet_dgram_connect);
static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
{
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
/* Basic assumption: if someone sets sk->sk_err, he _must_
@@ -545,13 +545,12 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
*/
while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
release_sock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
lock_sock(sk);
if (signal_pending(current) || !timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
return timeo;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 31cef3602585..4cff74d4133f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2413,22 +2413,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
struct key_vector *l, **tp = &iter->tnode;
t_key key;
- /* use cache location of next-to-find key */
+ /* use cached location of previously found key */
if (iter->pos > 0 && pos >= iter->pos) {
- pos -= iter->pos;
key = iter->key;
} else {
- iter->pos = 0;
+ iter->pos = 1;
key = 0;
}
- while ((l = leaf_walk_rcu(tp, key)) != NULL) {
+ pos -= iter->pos;
+
+ while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
key = l->key + 1;
iter->pos++;
-
- if (--pos <= 0)
- break;
-
l = NULL;
/* handle unlikely case of a key wrap */
@@ -2437,7 +2434,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
}
if (l)
- iter->key = key; /* remember it */
+ iter->key = l->key; /* remember it */
else
iter->pos = 0; /* forget it */
@@ -2465,7 +2462,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
return fib_route_get_idx(iter, *pos);
iter->pos = 0;
- iter->key = 0;
+ iter->key = KEY_MAX;
return SEQ_START_TOKEN;
}
@@ -2474,7 +2471,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct fib_route_iter *iter = seq->private;
struct key_vector *l = NULL;
- t_key key = iter->key;
+ t_key key = iter->key + 1;
++*pos;
@@ -2483,7 +2480,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
l = leaf_walk_rcu(&iter->tnode, key);
if (l) {
- iter->key = l->key + 1;
+ iter->key = l->key;
iter->pos++;
} else {
iter->pos = 0;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 38abe70e595f..48734ee6293f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
+ fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = __ip_route_output_key_hash(net, fl4,
@@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- if (inet_addr_type_dev_table(net, skb_in->dev,
+ if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
fl4_dec.saddr) == RTN_LOCAL) {
rt2 = __ip_route_output_key(net, &fl4_dec);
if (IS_ERR(rt2))
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 8b4ffd216839..9f0a7b96646f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
+ IPCB(skb)->flags |= IPSKB_FORWARDED;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
if (ip_exceeds_mtu(skb, mtu)) {
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 03e7f7310423..105908d841a3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -239,19 +239,23 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
struct sk_buff *segs;
int ret = 0;
- /* common case: fragmentation of segments is not allowed,
- * or seglen is <= mtu
+ /* common case: seglen is <= mtu
*/
- if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
- skb_gso_validate_mtu(skb, mtu))
+ if (skb_gso_validate_mtu(skb, mtu))
return ip_finish_output2(net, sk, skb);
- /* Slowpath - GSO segment length is exceeding the dst MTU.
+ /* Slowpath - GSO segment length exceeds the egress MTU.
*
- * This can happen in two cases:
- * 1) TCP GRO packet, DF bit not set
- * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
- * from host network stack.
+ * This can happen in several cases:
+ * - Forwarding of a TCP GRO skb, when DF flag is not set.
+ * - Forwarding of an skb that arrived on a virtualization interface
+ * (virtio-net/vhost/tap) with TSO/GSO size set by other network
+ * stack.
+ * - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an
+ * interface with a smaller MTU.
+ * - Arriving GRO skb (or GSO skb in a virtualized environment) that is
+ * bridged to a NETIF_F_TSO tunnel stacked over an interface with an
+ * insufficent MTU.
*/
features = netif_skb_features(skb);
BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
@@ -1579,7 +1583,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
}
oif = arg->bound_dev_if;
- oif = oif ? : skb->skb_iif;
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
+ oif = skb->skb_iif;
flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark),
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 777bc1883870..fed3d29f9eb3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
int pkt_len = skb->len - skb_inner_network_offset(skb);
struct net *net = dev_net(rt->dst.dev);
struct net_device *dev = skb->dev;
- int skb_iif = skb->skb_iif;
struct iphdr *iph;
int err;
@@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
skb_dst_set(skb, &rt->dst);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- if (skb_iif && !(df & htons(IP_DF))) {
- /* Arrived from an ingress interface, got encapsulated, with
- * fragmentation of encapulating frames allowed.
- * If skb is gso, the resulting encapsulated network segments
- * may exceed dst mtu.
- * Allow IP Fragmentation of segments.
- */
- IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
- }
-
/* Push down and install the IP header. */
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5f006e13de56..27089f5ebbb1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
vif->dev->stats.tx_bytes += skb->len;
}
- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
+ IPCB(skb)->flags |= IPSKB_FORWARDED;
/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
* not only before forwarding, but after forwarding on all output
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index bf855e64fc45..0c01a270bf9f 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
struct in_addr gw = {
.s_addr = (__force __be32)regs->data[priv->sreg_addr],
};
- int oif = regs->data[priv->sreg_dev];
+ int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
}
@@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
- if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+ goto nla_put_failure;
+ if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 62d4d90c1389..2a57566e6e91 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
goto reject_redirect;
}
- n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
+ n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
+ if (!n)
+ n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
if (!IS_ERR(n)) {
if (!(n->nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3251fe71f39f..814af89c1bd3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1164,7 +1164,7 @@ restart:
err = -EPIPE;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
- goto out_err;
+ goto do_error;
sg = !!(sk->sk_route_caps & NETIF_F_SG);
@@ -1241,7 +1241,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i == sysctl_max_skb_frags || !sg) {
+ if (i >= sysctl_max_skb_frags || !sg) {
tcp_mark_push(tp, skb);
goto new_segment;
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 10d728b6804c..ab37c6775630 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -56,6 +56,7 @@ struct dctcp {
u32 next_seq;
u32 ce_state;
u32 delayed_ack_reserved;
+ u32 loss_cwnd;
};
static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->delayed_ack_reserved = 0;
+ ca->loss_cwnd = 0;
ca->ce_state = 0;
dctcp_reset(tp, ca);
@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
static u32 dctcp_ssthresh(struct sock *sk)
{
- const struct dctcp *ca = inet_csk_ca(sk);
+ struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ ca->loss_cwnd = tp->snd_cwnd;
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
}
@@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
return 0;
}
+static u32 dctcp_cwnd_undo(struct sock *sk)
+{
+ const struct dctcp *ca = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
static struct tcp_congestion_ops dctcp __read_mostly = {
.init = dctcp_init,
.in_ack_event = dctcp_update_alpha,
.cwnd_event = dctcp_cwnd_event,
.ssthresh = dctcp_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
+ .undo_cwnd = dctcp_cwnd_undo,
.set_state = dctcp_state,
.get_info = dctcp_get_info,
.flags = TCP_CONG_NEEDS_ECN,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 61b7be303eec..2259114c7242 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1564,6 +1564,21 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_add_backlog);
+int tcp_filter(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcphdr *th = (struct tcphdr *)skb->data;
+ unsigned int eaten = skb->len;
+ int err;
+
+ err = sk_filter_trim_cap(sk, skb, th->doff * 4);
+ if (!err) {
+ eaten -= skb->len;
+ TCP_SKB_CB(skb)->end_seq -= eaten;
+ }
+ return err;
+}
+EXPORT_SYMBOL(tcp_filter);
+
/*
* From tcp_input.c
*/
@@ -1676,8 +1691,10 @@ process:
nf_reset(skb);
- if (sk_filter(sk, skb))
+ if (tcp_filter(sk, skb))
goto discard_and_relse;
+ th = (const struct tcphdr *)skb->data;
+ iph = ip_hdr(skb);
skb->dev = NULL;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index bd59c343d35f..7370ad2e693a 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (__ipv6_addr_needs_scope_id(addr_type))
iif = skb->dev->ifindex;
else
- iif = l3mdev_master_ifindex(skb->dev);
+ iif = l3mdev_master_ifindex(skb_dst(skb)->dev);
/*
* Must not send error if the source does not uniquely
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 6001e781164e..59eb4ed99ce8 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1366,7 +1366,7 @@ emsgsize:
if (((length > mtu) ||
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO) &&
+ (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
(sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, exthdrlen,
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index a7520528ecd2..b283f293ee4a 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -88,9 +88,6 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
uh->len = htons(skb->len);
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
- | IPSKB_REROUTED);
skb_dst_set(skb, dst);
udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 8bfd470cbe72..831f86e1ec08 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
- int oif = regs->data[priv->sreg_dev];
+ int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
}
@@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
- if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+ goto nla_put_failure;
+ if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 947ed1ded026..1b57e11e6e0d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1364,6 +1364,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (rt6->rt6i_flags & RTF_LOCAL)
return;
+ if (dst_metric_locked(dst, RTAX_MTU))
+ return;
+
dst_confirm(dst);
mtu = max_t(u32, mtu, IPV6_MIN_MTU);
if (mtu >= dst_mtu(dst))
@@ -2758,6 +2761,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
PMTU discouvery.
*/
if (rt->dst.dev == arg->dev &&
+ dst_metric_raw(&rt->dst, RTAX_MTU) &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
if (rt->rt6i_flags & RTF_CACHE) {
/* For RTF_CACHE with rt6i_pmtu == 0
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5a27ab4eab39..b9f1fee9a886 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -818,8 +818,12 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowi6_proto = IPPROTO_TCP;
if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = tcp_v6_iif(skb);
- else
- fl6.flowi6_oif = oif ? : skb->skb_iif;
+ else {
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
+ oif = skb->skb_iif;
+
+ fl6.flowi6_oif = oif;
+ }
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
@@ -1225,7 +1229,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
- if (sk_filter(sk, skb))
+ if (tcp_filter(sk, skb))
goto discard;
/*
@@ -1453,8 +1457,10 @@ process:
if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard_and_relse;
- if (sk_filter(sk, skb))
+ if (tcp_filter(sk, skb))
goto discard_and_relse;
+ th = (const struct tcphdr *)skb->data;
+ hdr = ipv6_hdr(skb);
skb->dev = NULL;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c3c809b2e712..a6e44ef2ec9a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2845,7 +2845,7 @@ static struct genl_family ip_vs_genl_family = {
.hdrsize = 0,
.name = IPVS_GENL_NAME,
.version = IPVS_GENL_VERSION,
- .maxattr = IPVS_CMD_MAX,
+ .maxattr = IPVS_CMD_ATTR_MAX,
.netnsok = true, /* Make ipvsadm to work on netns */
};
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 1b07578bedf3..9350530c16c1 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -283,6 +283,7 @@ struct ip_vs_sync_buff {
*/
static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
{
+ memset(ho, 0, sizeof(*ho));
ho->init_seq = get_unaligned_be32(&no->init_seq);
ho->delta = get_unaligned_be32(&no->delta);
ho->previous_delta = get_unaligned_be32(&no->previous_delta);
@@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa
kfree(param->pe_data);
}
- if (opt)
- memcpy(&cp->in_seq, opt, sizeof(*opt));
+ if (opt) {
+ cp->in_seq = opt->in_seq;
+ cp->out_seq = opt->out_seq;
+ }
atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
cp->state = state;
cp->old_state = cp->state;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index df2f5a3901df..0f87e5d21be7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -76,6 +76,7 @@ struct conntrack_gc_work {
struct delayed_work dwork;
u32 last_bucket;
bool exiting;
+ long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
@@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
#define GC_MAX_BUCKETS_DIV 64u
-#define GC_MAX_BUCKETS 8192u
-#define GC_INTERVAL (5 * HZ)
+/* upper bound of scan intervals */
+#define GC_INTERVAL_MAX (2 * HZ)
+/* maximum conntracks to evict per gc run */
#define GC_MAX_EVICTS 256u
static struct conntrack_gc_work conntrack_gc_work;
@@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
static void gc_worker(struct work_struct *work)
{
unsigned int i, goal, buckets = 0, expired_count = 0;
- unsigned long next_run = GC_INTERVAL;
- unsigned int ratio, scanned = 0;
struct conntrack_gc_work *gc_work;
+ unsigned int ratio, scanned = 0;
+ unsigned long next_run;
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
- goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket;
do {
@@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work)
if (gc_work->exiting)
return;
+ /*
+ * Eviction will normally happen from the packet path, and not
+ * from this gc worker.
+ *
+ * This worker is only here to reap expired entries when system went
+ * idle after a busy period.
+ *
+ * The heuristics below are supposed to balance conflicting goals:
+ *
+ * 1. Minimize time until we notice a stale entry
+ * 2. Maximize scan intervals to not waste cycles
+ *
+ * Normally, expired_count will be 0, this increases the next_run time
+ * to priorize 2) above.
+ *
+ * As soon as a timed-out entry is found, move towards 1) and increase
+ * the scan frequency.
+ * In case we have lots of evictions next scan is done immediately.
+ */
ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio >= 90 || expired_count == GC_MAX_EVICTS)
+ if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
+ gc_work->next_gc_run = 0;
next_run = 0;
+ } else if (expired_count) {
+ gc_work->next_gc_run /= 2U;
+ next_run = msecs_to_jiffies(1);
+ } else {
+ if (gc_work->next_gc_run < GC_INTERVAL_MAX)
+ gc_work->next_gc_run += msecs_to_jiffies(1);
+
+ next_run = gc_work->next_gc_run;
+ }
gc_work->last_bucket = i;
- schedule_delayed_work(&gc_work->dwork, next_run);
+ queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->next_gc_run = GC_INTERVAL_MAX;
gc_work->exiting = false;
}
@@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void)
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
conntrack_gc_work_init(&conntrack_gc_work);
- schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+ queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
return 0;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 336e21559e01..7341adf7059d 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
for (i = 0; i < nf_ct_helper_hsize; i++) {
hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
- if (!strcmp(h->name, name) &&
- h->tuple.src.l3num == l3num &&
- h->tuple.dst.protonum == protonum)
+ if (strcmp(h->name, name))
+ continue;
+
+ if (h->tuple.src.l3num != NFPROTO_UNSPEC &&
+ h->tuple.src.l3num != l3num)
+ continue;
+
+ if (h->tuple.dst.protonum == protonum)
return h;
}
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 621b81c7bddc..c3fc14e021ec 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
handler = &sip_handlers[i];
if (handler->request == NULL)
continue;
- if (*datalen < handler->len ||
+ if (*datalen < handler->len + 2 ||
strncasecmp(*dptr, handler->method, handler->len))
continue;
+ if ((*dptr)[handler->len] != ' ' ||
+ !isalpha((*dptr)[handler->len+1]))
+ continue;
if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
&matchoff, &matchlen) <= 0) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 24db22257586..026581b04ea8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2956,12 +2956,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
- goto err2;
+ goto err3;
list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;
+err3:
+ ops->destroy(set);
err2:
kfree(set);
err1:
@@ -3452,14 +3454,15 @@ void *nft_set_elem_init(const struct nft_set *set,
return elem;
}
-void nft_set_elem_destroy(const struct nft_set *set, void *elem)
+void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+ bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_uninit(nft_set_ext_data(ext), set->dtype);
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+ if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
kfree(elem);
@@ -3565,6 +3568,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
dreg = nft_type_to_reg(set->dtype);
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
+ .net = ctx->net,
.afi = ctx->afi,
.table = ctx->table,
.chain = (struct nft_chain *)binding->chain,
@@ -3812,7 +3816,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu)
gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
for (i = 0; i < gcb->head.cnt; i++)
- nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+ nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
kfree(gcb);
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
@@ -4030,7 +4034,7 @@ static void nf_tables_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv);
+ nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
@@ -4171,7 +4175,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv);
+ nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
@@ -4421,7 +4425,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
* Otherwise a 0 is returned and the attribute value is stored in the
* destination variable.
*/
-unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
+int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
{
u32 val;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 517f08767a3c..31ca94793aa9 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
&regs->data[priv->sreg_key],
&regs->data[priv->sreg_data],
timeout, GFP_ATOMIC);
- if (elem == NULL) {
- if (set->size)
- atomic_dec(&set->nelems);
- return NULL;
- }
+ if (elem == NULL)
+ goto err1;
ext = nft_set_elem_ext(set, elem);
if (priv->expr != NULL &&
nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
- return NULL;
+ goto err2;
return elem;
+
+err2:
+ nft_set_elem_destroy(set, elem, false);
+err1:
+ if (set->size)
+ atomic_dec(&set->nelems);
+ return NULL;
}
static void nft_dynset_eval(const struct nft_expr *expr,
@@ -139,6 +143,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return PTR_ERR(set);
}
+ if (set->ops->update == NULL)
+ return -EOPNOTSUPP;
+
if (set->flags & NFT_SET_CONSTANT)
return -EBUSY;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 3794cb2fc788..a3dface3e6e6 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -98,7 +98,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
const struct nft_set_ext **ext)
{
struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
+ struct nft_hash_elem *he, *prev;
struct nft_hash_cmp_arg arg = {
.genmask = NFT_GENMASK_ANY,
.set = set,
@@ -112,15 +112,24 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
he = new(set, expr, regs);
if (he == NULL)
goto err1;
- if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params))
+
+ prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params);
+ if (IS_ERR(prev))
goto err2;
+
+ /* Another cpu may race to insert the element with the same key */
+ if (prev) {
+ nft_set_elem_destroy(set, he, true);
+ he = prev;
+ }
+
out:
*ext = &he->ext;
return true;
err2:
- nft_set_elem_destroy(set, he);
+ nft_set_elem_destroy(set, he, true);
err1:
return false;
}
@@ -332,7 +341,7 @@ static int nft_hash_init(const struct nft_set *set,
static void nft_hash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+ nft_set_elem_destroy((const struct nft_set *)arg, ptr, true);
}
static void nft_hash_destroy(const struct nft_set *set)
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 38b5bda242f8..36493a7cae88 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_set_elem_destroy(set, rbe);
+ nft_set_elem_destroy(set, rbe, true);
}
}
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 69f78e96fdb4..b83e158e116a 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
u_int32_t newmark;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
+ if (ct == NULL || nf_ct_is_untracked(ct))
return XT_CONTINUE;
switch (info->mode) {
@@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
+ if (ct == NULL || nf_ct_is_untracked(ct))
return false;
return ((ct->mark & info->mask) == info->mark) ^ info->invert;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index b2f0e986a6f4..a5546249fb10 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -178,11 +178,8 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
cb->args[1] = i;
} else {
- if (req->sdiag_protocol >= MAX_LINKS) {
- read_unlock(&nl_table_lock);
- rcu_read_unlock();
+ if (req->sdiag_protocol >= MAX_LINKS)
return -ENOENT;
- }
err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 23cc12639ba7..49c28e8ef01b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -404,7 +404,7 @@ int __genl_register_family(struct genl_family *family)
err = genl_validate_assign_mc_groups(family);
if (err)
- goto errout_locked;
+ goto errout_free;
list_add_tail(&family->family_list, genl_family_chain(family->id));
genl_unlock_all();
@@ -417,6 +417,8 @@ int __genl_register_family(struct genl_family *family)
return 0;
+errout_free:
+ kfree(family->attrbuf);
errout_locked:
genl_unlock_all();
errout:
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a2ea1d1cc06a..a01a56ec8b8c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -181,9 +181,10 @@ int sctp_rcv(struct sk_buff *skb)
* bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
*/
if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) {
- if (asoc) {
- sctp_association_put(asoc);
+ if (transport) {
+ sctp_transport_put(transport);
asoc = NULL;
+ transport = NULL;
} else {
sctp_endpoint_put(ep);
ep = NULL;
@@ -269,8 +270,8 @@ int sctp_rcv(struct sk_buff *skb)
bh_unlock_sock(sk);
/* Release the asoc/ep ref we took in the lookup calls. */
- if (asoc)
- sctp_association_put(asoc);
+ if (transport)
+ sctp_transport_put(transport);
else
sctp_endpoint_put(ep);
@@ -283,8 +284,8 @@ discard_it:
discard_release:
/* Release the asoc/ep ref we took in the lookup calls. */
- if (asoc)
- sctp_association_put(asoc);
+ if (transport)
+ sctp_transport_put(transport);
else
sctp_endpoint_put(ep);
@@ -300,6 +301,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
struct sctp_inq *inqueue = &chunk->rcvr->inqueue;
+ struct sctp_transport *t = chunk->transport;
struct sctp_ep_common *rcvr = NULL;
int backloged = 0;
@@ -351,7 +353,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
done:
/* Release the refs we took in sctp_add_backlog */
if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
- sctp_association_put(sctp_assoc(rcvr));
+ sctp_transport_put(t);
else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
sctp_endpoint_put(sctp_ep(rcvr));
else
@@ -363,6 +365,7 @@ done:
static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
+ struct sctp_transport *t = chunk->transport;
struct sctp_ep_common *rcvr = chunk->rcvr;
int ret;
@@ -373,7 +376,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
* from us
*/
if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
- sctp_association_hold(sctp_assoc(rcvr));
+ sctp_transport_hold(t);
else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
sctp_endpoint_hold(sctp_ep(rcvr));
else
@@ -537,15 +540,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
return sk;
out:
- sctp_association_put(asoc);
+ sctp_transport_put(transport);
return NULL;
}
/* Common cleanup code for icmp/icmpv6 error handler. */
-void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
+void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
{
bh_unlock_sock(sk);
- sctp_association_put(asoc);
+ sctp_transport_put(t);
}
/*
@@ -641,7 +644,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
}
out_unlock:
- sctp_err_finish(sk, asoc);
+ sctp_err_finish(sk, transport);
}
/*
@@ -952,11 +955,8 @@ static struct sctp_association *__sctp_lookup_association(
goto out;
asoc = t->asoc;
- sctp_association_hold(asoc);
*pt = t;
- sctp_transport_put(t);
-
out:
return asoc;
}
@@ -986,7 +986,7 @@ int sctp_has_association(struct net *net,
struct sctp_transport *transport;
if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
- sctp_association_put(asoc);
+ sctp_transport_put(transport);
return 1;
}
@@ -1021,7 +1021,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
struct sctphdr *sh = sctp_hdr(skb);
union sctp_params params;
sctp_init_chunk_t *init;
- struct sctp_transport *transport;
struct sctp_af *af;
/*
@@ -1052,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
af->from_addr_param(paddr, params.addr, sh->source, 0);
- asoc = __sctp_lookup_association(net, laddr, paddr, &transport);
+ asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
if (asoc)
return asoc;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f473779e8b1c..176af3080a2b 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -198,7 +198,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
out_unlock:
- sctp_err_finish(sk, asoc);
+ sctp_err_finish(sk, transport);
out:
if (likely(idev != NULL))
in6_dev_put(idev);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9fbb6feb8c27..f23ad913dc7a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk,
timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
- err = sctp_wait_for_connect(asoc, &timeo);
- if ((err == 0 || err == -EINPROGRESS) && assoc_id)
+ if (assoc_id)
*assoc_id = asoc->assoc_id;
+ err = sctp_wait_for_connect(asoc, &timeo);
+ /* Note: the asoc may be freed after the return of
+ * sctp_wait_for_connect.
+ */
/* Don't free association on exit. */
asoc = NULL;
@@ -4282,19 +4285,18 @@ static void sctp_shutdown(struct sock *sk, int how)
{
struct net *net = sock_net(sk);
struct sctp_endpoint *ep;
- struct sctp_association *asoc;
if (!sctp_style(sk, TCP))
return;
- if (how & SEND_SHUTDOWN) {
+ ep = sctp_sk(sk)->ep;
+ if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
+ struct sctp_association *asoc;
+
sk->sk_state = SCTP_SS_CLOSING;
- ep = sctp_sk(sk)->ep;
- if (!list_empty(&ep->asocs)) {
- asoc = list_entry(ep->asocs.next,
- struct sctp_association, asocs);
- sctp_primitive_SHUTDOWN(net, asoc, NULL);
- }
+ asoc = list_entry(ep->asocs.next,
+ struct sctp_association, asocs);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
}
@@ -4480,12 +4482,9 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
if (!transport || !sctp_transport_hold(transport))
goto out;
- sctp_association_hold(transport->asoc);
- sctp_transport_put(transport);
-
rcu_read_unlock();
err = cb(transport, p);
- sctp_association_put(transport->asoc);
+ sctp_transport_put(transport);
out:
return err;
diff --git a/net/socket.c b/net/socket.c
index 5a9bf5ee2464..272518b087c8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2038,6 +2038,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
if (err)
break;
++datagrams;
+ if (msg_data_left(&msg_sys))
+ break;
cond_resched();
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 145082e2ba36..5d1c14a2f268 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2812,7 +2812,8 @@ static int unix_seq_show(struct seq_file *seq, void *v)
i++;
}
for ( ; i < len; i++)
- seq_putc(seq, u->addr->name->sun_path[i]);
+ seq_putc(seq, u->addr->name->sun_path[i] ?:
+ '@');
}
unix_state_unlock(s);
seq_putc(seq, '\n');
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 12b7304d55dc..72c58675973e 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -27,6 +27,7 @@ hostprogs-y += xdp2
hostprogs-y += test_current_task_under_cgroup
hostprogs-y += trace_event
hostprogs-y += sampleip
+hostprogs-y += tc_l2_redirect
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
@@ -56,6 +57,7 @@ test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
test_current_task_under_cgroup_user.o
trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
+tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -72,6 +74,7 @@ always += test_probe_write_user_kern.o
always += trace_output_kern.o
always += tcbpf1_kern.o
always += tcbpf2_kern.o
+always += tc_l2_redirect_kern.o
always += lathist_kern.o
always += offwaketime_kern.o
always += spintest_kern.o
@@ -111,6 +114,7 @@ HOSTLOADLIBES_xdp2 += -lelf
HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
HOSTLOADLIBES_trace_event += -lelf
HOSTLOADLIBES_sampleip += -lelf
+HOSTLOADLIBES_tc_l2_redirect += -l elf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh
new file mode 100755
index 000000000000..80a05591a140
--- /dev/null
+++ b/samples/bpf/tc_l2_redirect.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+
+[[ -z $TC ]] && TC='tc'
+[[ -z $IP ]] && IP='ip'
+
+REDIRECT_USER='./tc_l2_redirect'
+REDIRECT_BPF='./tc_l2_redirect_kern.o'
+
+RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter)
+IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding)
+
+function config_common {
+ local tun_type=$1
+
+ $IP netns add ns1
+ $IP netns add ns2
+ $IP link add ve1 type veth peer name vens1
+ $IP link add ve2 type veth peer name vens2
+ $IP link set dev ve1 up
+ $IP link set dev ve2 up
+ $IP link set dev ve1 mtu 1500
+ $IP link set dev ve2 mtu 1500
+ $IP link set dev vens1 netns ns1
+ $IP link set dev vens2 netns ns2
+
+ $IP -n ns1 link set dev lo up
+ $IP -n ns1 link set dev vens1 up
+ $IP -n ns1 addr add 10.1.1.101/24 dev vens1
+ $IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad
+ $IP -n ns1 route add default via 10.1.1.1 dev vens1
+ $IP -n ns1 route add default via 2401:db01::1 dev vens1
+
+ $IP -n ns2 link set dev lo up
+ $IP -n ns2 link set dev vens2 up
+ $IP -n ns2 addr add 10.2.1.102/24 dev vens2
+ $IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad
+ $IP -n ns2 addr add 10.10.1.102 dev lo
+ $IP -n ns2 addr add 2401:face::66/64 dev lo nodad
+ $IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1
+ $IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1
+ $IP -n ns2 link set dev ipt2 up
+ $IP -n ns2 link set dev ip6t2 up
+ $IP netns exec ns2 $TC qdisc add dev vens2 clsact
+ $IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip
+ if [[ $tun_type == "ipip" ]]; then
+ $IP -n ns2 route add 10.1.1.0/24 dev ipt2
+ $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0
+ else
+ $IP -n ns2 route add 10.1.1.0/24 dev ip6t2
+ $IP -n ns2 route add 2401:db01::/64 dev ip6t2
+ $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0
+ fi
+
+ $IP addr add 10.1.1.1/24 dev ve1
+ $IP addr add 2401:db01::1/64 dev ve1 nodad
+ $IP addr add 10.2.1.1/24 dev ve2
+ $IP addr add 2401:db02::1/64 dev ve2 nodad
+
+ $TC qdisc add dev ve2 clsact
+ $TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward
+
+ sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+function cleanup {
+ set +e
+ [[ -z $DEBUG ]] || set +x
+ $IP netns delete ns1 >& /dev/null
+ $IP netns delete ns2 >& /dev/null
+ $IP link del ve1 >& /dev/null
+ $IP link del ve2 >& /dev/null
+ $IP link del ipt >& /dev/null
+ $IP link del ip6t >& /dev/null
+ sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER
+ sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING
+ rm -f /sys/fs/bpf/tc/globals/tun_iface
+ [[ -z $DEBUG ]] || set -x
+ set -e
+}
+
+function l2_to_ipip {
+ echo -n "l2_to_ipip $1: "
+
+ local dir=$1
+
+ config_common ipip
+
+ $IP link add ipt type ipip external
+ $IP link set dev ipt up
+ sysctl -q -w net.ipv4.conf.ipt.rp_filter=0
+ sysctl -q -w net.ipv4.conf.ipt.forwarding=1
+
+ if [[ $dir == "egress" ]]; then
+ $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
+ $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
+ sysctl -q -w net.ipv4.conf.ve1.forwarding=1
+ else
+ $TC qdisc add dev ve1 clsact
+ $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
+ fi
+
+ $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex)
+
+ $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null
+
+ if [[ $dir == "egress" ]]; then
+ # test direct egress to ve2 (i.e. not forwarding from
+ # ve1 to ve2).
+ ping -c1 10.10.1.102 >& /dev/null
+ fi
+
+ cleanup
+
+ echo "OK"
+}
+
+function l2_to_ip6tnl {
+ echo -n "l2_to_ip6tnl $1: "
+
+ local dir=$1
+
+ config_common ip6tnl
+
+ $IP link add ip6t type ip6tnl mode any external
+ $IP link set dev ip6t up
+ sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0
+ sysctl -q -w net.ipv4.conf.ip6t.forwarding=1
+
+ if [[ $dir == "egress" ]]; then
+ $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
+ $IP route add 2401:face::/64 via 2401:db02::66 dev ve2
+ $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
+ sysctl -q -w net.ipv4.conf.ve1.forwarding=1
+ else
+ $TC qdisc add dev ve1 clsact
+ $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
+ fi
+
+ $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex)
+
+ $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null
+ $IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null
+
+ if [[ $dir == "egress" ]]; then
+ # test direct egress to ve2 (i.e. not forwarding from
+ # ve1 to ve2).
+ ping -c1 10.10.1.102 >& /dev/null
+ ping -6 -c1 2401:face::66 >& /dev/null
+ fi
+
+ cleanup
+
+ echo "OK"
+}
+
+cleanup
+test_names="l2_to_ipip l2_to_ip6tnl"
+test_dirs="ingress egress"
+if [[ $# -ge 2 ]]; then
+ test_names=$1
+ test_dirs=$2
+elif [[ $# -ge 1 ]]; then
+ test_names=$1
+fi
+
+for t in $test_names; do
+ for d in $test_dirs; do
+ $t $d
+ done
+done
diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c
new file mode 100644
index 000000000000..92a44729dbe4
--- /dev/null
+++ b/samples/bpf/tc_l2_redirect_kern.c
@@ -0,0 +1,236 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+#include <uapi/linux/pkt_cls.h>
+#include <net/ipv6.h>
+#include "bpf_helpers.h"
+
+#define _htonl __builtin_bswap32
+
+#define PIN_GLOBAL_NS 2
+struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+ __u32 flags;
+ __u32 id;
+ __u32 pinning;
+};
+
+/* copy of 'struct ethhdr' without __packed */
+struct eth_hdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ unsigned short h_proto;
+};
+
+struct bpf_elf_map SEC("maps") tun_iface = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .size_key = sizeof(int),
+ .size_value = sizeof(int),
+ .pinning = PIN_GLOBAL_NS,
+ .max_elem = 1,
+};
+
+static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr)
+{
+ if (eth_proto == htons(ETH_P_IP))
+ return (_htonl(0xffffff00) & daddr) == _htonl(0x0a0a0100);
+ else if (eth_proto == htons(ETH_P_IPV6))
+ return (daddr == _htonl(0x2401face));
+
+ return false;
+}
+
+SEC("l2_to_iptun_ingress_forward")
+int _l2_to_iptun_ingress_forward(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key tkey = {};
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int key = 0, *ifindex;
+
+ int ret;
+
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_OK;
+
+ ifindex = bpf_map_lookup_elem(&tun_iface, &key);
+ if (!ifindex)
+ return TC_ACT_OK;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ char fmt4[] = "ingress forward to ifindex:%d daddr4:%x\n";
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+ return TC_ACT_OK;
+
+ if (iph->protocol != IPPROTO_IPIP)
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt4, sizeof(fmt4), *ifindex,
+ _htonl(iph->daddr));
+ return bpf_redirect(*ifindex, BPF_F_INGRESS);
+ } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+ char fmt6[] = "ingress forward to ifindex:%d daddr6:%x::%x\n";
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+ return TC_ACT_OK;
+
+ if (ip6h->nexthdr != IPPROTO_IPIP &&
+ ip6h->nexthdr != IPPROTO_IPV6)
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt6, sizeof(fmt6), *ifindex,
+ _htonl(ip6h->daddr.s6_addr32[0]),
+ _htonl(ip6h->daddr.s6_addr32[3]));
+ return bpf_redirect(*ifindex, BPF_F_INGRESS);
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("l2_to_iptun_ingress_redirect")
+int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key tkey = {};
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int key = 0, *ifindex;
+
+ int ret;
+
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_OK;
+
+ ifindex = bpf_map_lookup_elem(&tun_iface, &key);
+ if (!ifindex)
+ return TC_ACT_OK;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n";
+ struct iphdr *iph = data + sizeof(*eth);
+ __be32 daddr = iph->daddr;
+
+ if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+ return TC_ACT_OK;
+
+ if (!is_vip_addr(eth->h_proto, daddr))
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(daddr), *ifindex);
+ } else {
+ return TC_ACT_OK;
+ }
+
+ tkey.tunnel_id = 10000;
+ tkey.tunnel_ttl = 64;
+ tkey.remote_ipv4 = 0x0a020166; /* 10.2.1.102 */
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0);
+ return bpf_redirect(*ifindex, 0);
+}
+
+SEC("l2_to_ip6tun_ingress_redirect")
+int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key tkey = {};
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int key = 0, *ifindex;
+
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_OK;
+
+ ifindex = bpf_map_lookup_elem(&tun_iface, &key);
+ if (!ifindex)
+ return TC_ACT_OK;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n";
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+ return TC_ACT_OK;
+
+ if (!is_vip_addr(eth->h_proto, iph->daddr))
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(iph->daddr),
+ *ifindex);
+ } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+ char fmt6[] = "e/ingress redirect daddr6:%x to ifindex:%d\n";
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+ return TC_ACT_OK;
+
+ if (!is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0]))
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt6, sizeof(fmt6),
+ _htonl(ip6h->daddr.s6_addr32[0]), *ifindex);
+ } else {
+ return TC_ACT_OK;
+ }
+
+ tkey.tunnel_id = 10000;
+ tkey.tunnel_ttl = 64;
+ /* 2401:db02:0:0:0:0:0:66 */
+ tkey.remote_ipv6[0] = _htonl(0x2401db02);
+ tkey.remote_ipv6[1] = 0;
+ tkey.remote_ipv6[2] = 0;
+ tkey.remote_ipv6[3] = _htonl(0x00000066);
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), BPF_F_TUNINFO_IPV6);
+ return bpf_redirect(*ifindex, 0);
+}
+
+SEC("drop_non_tun_vip")
+int _drop_non_tun_vip(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key tkey = {};
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ void *data_end = (void *)(long)skb->data_end;
+
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_OK;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+ return TC_ACT_OK;
+
+ if (is_vip_addr(eth->h_proto, iph->daddr))
+ return TC_ACT_SHOT;
+ } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+ if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+ return TC_ACT_OK;
+
+ if (is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0]))
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c
new file mode 100644
index 000000000000..4013c5337b91
--- /dev/null
+++ b/samples/bpf/tc_l2_redirect_user.c
@@ -0,0 +1,73 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+
+#include "libbpf.h"
+
+static void usage(void)
+{
+ printf("Usage: tc_l2_ipip_redirect [...]\n");
+ printf(" -U <file> Update an already pinned BPF array\n");
+ printf(" -i <ifindex> Interface index\n");
+ printf(" -h Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+ const char *pinned_file = NULL;
+ int ifindex = -1;
+ int array_key = 0;
+ int array_fd = -1;
+ int ret = -1;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "F:U:i:")) != -1) {
+ switch (opt) {
+ /* General args */
+ case 'U':
+ pinned_file = optarg;
+ break;
+ case 'i':
+ ifindex = atoi(optarg);
+ break;
+ default:
+ usage();
+ goto out;
+ }
+ }
+
+ if (ifindex < 0 || !pinned_file) {
+ usage();
+ goto out;
+ }
+
+ array_fd = bpf_obj_get(pinned_file);
+ if (array_fd < 0) {
+ fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
+ pinned_file, strerror(errno), errno);
+ goto out;
+ }
+
+ /* bpf_tunnel_key.remote_ipv4 expects host byte orders */
+ ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0);
+ if (ret) {
+ perror("bpf_update_elem");
+ goto out;
+ }
+
+out:
+ if (array_fd != -1)
+ close(array_fd);
+ return ret;
+}