aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/bonding/bond_main.c55
-rw-r--r--drivers/net/bonding/bond_options.c2
-rw-r--r--drivers/net/dsa/b53/b53_common.c3
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c5
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_filters.c2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_hw.h2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.c8
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c13
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.c10
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.h3
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c22
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c19
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c12
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c12
-rw-r--r--drivers/net/ethernet/broadcom/cnic_defs.h4
-rw-r--r--drivers/net/ethernet/cadence/macb.h1
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c66
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c62
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.h9
-rw-r--r--drivers/net/ethernet/davicom/dm9000.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c56
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c134
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h2
-rw-r--r--drivers/net/ethernet/micrel/ks8851_mll.c53
-rw-r--r--drivers/net/ethernet/mscc/ocelot_board.c8
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.c11
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_if.h1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_rdma.c29
-rw-r--r--drivers/net/phy/broadcom.c4
-rw-r--r--drivers/net/phy/mdio-bcm-iproc.c20
-rw-r--r--drivers/net/wireguard/device.c7
-rw-r--r--drivers/net/wireguard/receive.c7
-rw-r--r--drivers/net/wireguard/send.c16
-rw-r--r--drivers/net/wireguard/socket.c1
-rw-r--r--drivers/nfc/pn544/i2c.c1
-rw-r--r--drivers/nfc/pn544/pn544.c2
-rw-r--r--drivers/s390/net/qeth_core_main.c3
-rw-r--r--drivers/s390/net/qeth_l2_main.c29
-rw-r--r--include/linux/mlx5/mlx5_ifc.h5
-rw-r--r--include/linux/netdevice.h7
-rw-r--r--include/linux/rculist_nulls.h7
-rw-r--r--include/linux/skbuff.h30
-rw-r--r--include/net/flow_dissector.h9
-rw-r--r--include/net/sock.h38
-rw-r--r--include/uapi/linux/bpf.h16
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h12
-rw-r--r--kernel/bpf/btf.c6
-rw-r--r--kernel/bpf/hashtab.c58
-rw-r--r--kernel/bpf/offload.c2
-rw-r--r--net/Kconfig1
-rw-r--r--net/bridge/br_stp.c3
-rw-r--r--net/core/dev.c34
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/rtnetlink.c26
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/ethtool/bitset.c3
-rw-r--r--net/hsr/hsr_framereg.c3
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/ip6_fib.c7
-rw-r--r--net/ipv6/ip6_gre.c8
-rw-r--r--net/ipv6/ip6_tunnel.c13
-rw-r--r--net/ipv6/route.c1
-rw-r--r--net/mptcp/Kconfig1
-rw-r--r--net/mptcp/protocol.c48
-rw-r--r--net/mptcp/protocol.h4
-rw-r--r--net/netfilter/nf_conntrack_core.c192
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c20
-rw-r--r--net/netfilter/nf_flow_table_offload.c6
-rw-r--r--net/netfilter/nft_set_pipapo.c6
-rw-r--r--net/netfilter/xt_hashlimit.c22
-rw-r--r--net/netlabel/netlabel_domainhash.c3
-rw-r--r--net/netlabel/netlabel_unlabeled.c3
-rw-r--r--net/netlink/af_netlink.c5
-rw-r--r--net/openvswitch/datapath.c9
-rw-r--r--net/openvswitch/flow_netlink.c18
-rw-r--r--net/openvswitch/flow_table.c6
-rw-r--r--net/openvswitch/meter.c3
-rw-r--r--net/openvswitch/vport.c3
-rw-r--r--net/rds/rdma.c24
-rw-r--r--net/sched/cls_flower.c1
-rw-r--r--net/sctp/sm_statefuns.c29
-rw-r--r--net/tls/tls_device.c20
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--net/xdp/xsk_queue.h3
-rw-r--r--tools/include/uapi/linux/bpf.h16
-rw-r--r--tools/lib/bpf/libbpf.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c5
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh6
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile38
104 files changed, 1132 insertions, 506 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 48d5ec770b94..d10805e5e623 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3526,6 +3526,47 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res,
}
}
+#ifdef CONFIG_LOCKDEP
+static int bond_get_lowest_level_rcu(struct net_device *dev)
+{
+ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int cur = 0, max = 0;
+
+ now = dev;
+ iter = &dev->adj_list.lower;
+
+ while (1) {
+ next = NULL;
+ while (1) {
+ ldev = netdev_next_lower_dev_rcu(now, &iter);
+ if (!ldev)
+ break;
+
+ next = ldev;
+ niter = &ldev->adj_list.lower;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ if (max <= cur)
+ max = cur;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return max;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
+ }
+
+ return max;
+}
+#endif
+
static void bond_get_stats(struct net_device *bond_dev,
struct rtnl_link_stats64 *stats)
{
@@ -3533,11 +3574,17 @@ static void bond_get_stats(struct net_device *bond_dev,
struct rtnl_link_stats64 temp;
struct list_head *iter;
struct slave *slave;
+ int nest_level = 0;
- spin_lock(&bond->stats_lock);
- memcpy(stats, &bond->bond_stats, sizeof(*stats));
rcu_read_lock();
+#ifdef CONFIG_LOCKDEP
+ nest_level = bond_get_lowest_level_rcu(bond_dev);
+#endif
+
+ spin_lock_nested(&bond->stats_lock, nest_level);
+ memcpy(stats, &bond->bond_stats, sizeof(*stats));
+
bond_for_each_slave_rcu(bond, slave, iter) {
const struct rtnl_link_stats64 *new =
dev_get_stats(slave->dev, &temp);
@@ -3547,10 +3594,10 @@ static void bond_get_stats(struct net_device *bond_dev,
/* save off the slave stats for the next run */
memcpy(&slave->slave_stats, new, sizeof(*new));
}
- rcu_read_unlock();
memcpy(&bond->bond_stats, stats, sizeof(*stats));
spin_unlock(&bond->stats_lock);
+ rcu_read_unlock();
}
static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
@@ -3640,6 +3687,8 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
case BOND_RELEASE_OLD:
case SIOCBONDRELEASE:
res = bond_release(bond_dev, slave_dev);
+ if (!res)
+ netdev_update_lockdep_key(slave_dev);
break;
case BOND_SETHWADDR_OLD:
case SIOCBONDSETHWADDR:
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index ddb3916d3506..215c10923289 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1398,6 +1398,8 @@ static int bond_option_slaves_set(struct bonding *bond,
case '-':
slave_dbg(bond->dev, dev, "Releasing interface\n");
ret = bond_release(bond->dev, dev);
+ if (!ret)
+ netdev_update_lockdep_key(dev);
break;
default:
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 449a22172e07..1a69286daa8d 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1366,6 +1366,9 @@ void b53_vlan_add(struct dsa_switch *ds, int port,
b53_get_vlan_entry(dev, vid, vl);
+ if (vid == 0 && vid == b53_default_pvid(dev))
+ untagged = true;
+
vl->members |= BIT(port);
if (untagged && !dsa_is_cpu_port(ds, port))
vl->untag |= BIT(port);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index a1f99bef4a68..7b55633d2cb9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -722,6 +722,11 @@ static int aq_ethtool_set_priv_flags(struct net_device *ndev, u32 flags)
if (flags & ~AQ_PRIV_FLAGS_MASK)
return -EOPNOTSUPP;
+ if (hweight32((flags | priv_flags) & AQ_HW_LOOPBACK_MASK) > 1) {
+ netdev_info(ndev, "Can't enable more than one loopback simultaneously\n");
+ return -EINVAL;
+ }
+
cfg->priv_flags = flags;
if ((priv_flags ^ flags) & BIT(AQ_HW_LOOPBACK_DMA_NET)) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index 6102251bb909..03ff92bc4a7f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -163,7 +163,7 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic,
}
if ((aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
- (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci),
+ (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci) & VLAN_VID_MASK,
aq_nic->active_vlans))) {
netdev_err(aq_nic->ndev,
"ethtool: unknown vlan-id specified");
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index cc70c606b6ef..251767c31f7e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -337,6 +337,8 @@ struct aq_fw_ops {
void (*enable_ptp)(struct aq_hw_s *self, int enable);
+ void (*adjust_ptp)(struct aq_hw_s *self, uint64_t adj);
+
int (*set_eee_rate)(struct aq_hw_s *self, u32 speed);
int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index c85e3e29012c..e95f6a6bef73 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -533,8 +533,10 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
dx_buff->len,
DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa)))
+ if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) {
+ ret = 0;
goto exit;
+ }
first = dx_buff;
dx_buff->len_pkt = skb->len;
@@ -655,10 +657,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
if (likely(frags)) {
err = self->aq_hw_ops->hw_ring_tx_xmit(self->aq_hw,
ring, frags);
- if (err >= 0) {
- ++ring->stats.tx.packets;
- ring->stats.tx.bytes += skb->len;
- }
} else {
err = NETDEV_TX_BUSY;
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 6b27af0db499..78b6f3248756 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -359,7 +359,8 @@ static int aq_suspend_common(struct device *dev, bool deep)
netif_device_detach(nic->ndev);
netif_tx_stop_all_queues(nic->ndev);
- aq_nic_stop(nic);
+ if (netif_running(nic->ndev))
+ aq_nic_stop(nic);
if (deep) {
aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
@@ -375,7 +376,7 @@ static int atl_resume_common(struct device *dev, bool deep)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct aq_nic_s *nic;
- int ret;
+ int ret = 0;
nic = pci_get_drvdata(pdev);
@@ -390,9 +391,11 @@ static int atl_resume_common(struct device *dev, bool deep)
goto err_exit;
}
- ret = aq_nic_start(nic);
- if (ret)
- goto err_exit;
+ if (netif_running(nic->ndev)) {
+ ret = aq_nic_start(nic);
+ if (ret)
+ goto err_exit;
+ }
netif_device_attach(nic->ndev);
netif_tx_start_all_queues(nic->ndev);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 951d86f8b66e..bae95a618560 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -272,9 +272,12 @@ bool aq_ring_tx_clean(struct aq_ring_s *self)
}
}
- if (unlikely(buff->is_eop))
- dev_kfree_skb_any(buff->skb);
+ if (unlikely(buff->is_eop)) {
+ ++self->stats.rx.packets;
+ self->stats.tx.bytes += buff->skb->len;
+ dev_kfree_skb_any(buff->skb);
+ }
buff->pa = 0U;
buff->eop_index = 0xffffU;
self->sw_head = aq_ring_next_dx(self, self->sw_head);
@@ -351,7 +354,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
err = 0;
goto err_exit;
}
- if (buff->is_error || buff->is_cso_err) {
+ if (buff->is_error ||
+ (buff->is_lro && buff->is_cso_err)) {
buff_ = buff;
do {
next_ = buff_->next,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 991e4d31b094..2c96f20f6289 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -78,7 +78,8 @@ struct __packed aq_ring_buff_s {
u32 is_cleaned:1;
u32 is_error:1;
u32 is_vlan:1;
- u32 rsvd3:4;
+ u32 is_lro:1;
+ u32 rsvd3:3;
u16 eop_index;
u16 rsvd4;
};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index ec041f78d063..d20d91cdece8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -823,6 +823,8 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
}
}
+ buff->is_lro = !!(HW_ATL_B0_RXD_WB_STAT2_RSCCNT &
+ rxd_wb->status);
if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) {
buff->len = rxd_wb->pkt_len %
AQ_CFG_RX_FRAME_MAX;
@@ -835,8 +837,7 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ?
AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len;
- if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT &
- rxd_wb->status) {
+ if (buff->is_lro) {
/* LRO */
buff->next = rxd_wb->next_desc_ptr;
++ring->stats.rx.lro_packets;
@@ -884,13 +885,16 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
{
struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
unsigned int i = 0U;
+ u32 vlan_promisc;
+ u32 l2_promisc;
- hw_atl_rpfl2promiscuous_mode_en_set(self,
- IS_FILTER_ENABLED(IFF_PROMISC));
+ l2_promisc = IS_FILTER_ENABLED(IFF_PROMISC) ||
+ !!(cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET));
+ vlan_promisc = l2_promisc || cfg->is_vlan_force_promisc;
- hw_atl_rpf_vlan_prom_mode_en_set(self,
- IS_FILTER_ENABLED(IFF_PROMISC) ||
- cfg->is_vlan_force_promisc);
+ hw_atl_rpfl2promiscuous_mode_en_set(self, l2_promisc);
+
+ hw_atl_rpf_vlan_prom_mode_en_set(self, vlan_promisc);
hw_atl_rpfl2multicast_flr_en_set(self,
IS_FILTER_ENABLED(IFF_ALLMULTI) &&
@@ -1161,6 +1165,8 @@ static int hw_atl_b0_adj_sys_clock(struct aq_hw_s *self, s64 delta)
{
self->ptp_clk_offset += delta;
+ self->aq_fw_ops->adjust_ptp(self, self->ptp_clk_offset);
+
return 0;
}
@@ -1211,7 +1217,7 @@ static int hw_atl_b0_gpio_pulse(struct aq_hw_s *self, u32 index,
fwreq.ptp_gpio_ctrl.index = index;
fwreq.ptp_gpio_ctrl.period = period;
/* Apply time offset */
- fwreq.ptp_gpio_ctrl.start = start - self->ptp_clk_offset;
+ fwreq.ptp_gpio_ctrl.start = start;
size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_gpio_ctrl);
return self->aq_fw_ops->send_fw_request(self, &fwreq, size);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index f547baa6c954..354705f9bc49 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -22,6 +22,7 @@
#define HW_ATL_MIF_ADDR 0x0208U
#define HW_ATL_MIF_VAL 0x020CU
+#define HW_ATL_MPI_RPC_ADDR 0x0334U
#define HW_ATL_RPC_CONTROL_ADR 0x0338U
#define HW_ATL_RPC_STATE_ADR 0x033CU
@@ -53,15 +54,14 @@ enum mcp_area {
};
static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual);
-
static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
enum hal_atl_utils_fw_state_e state);
-
static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self);
static u32 hw_atl_utils_mpi_get_state(struct aq_hw_s *self);
static u32 hw_atl_utils_mif_cmd_get(struct aq_hw_s *self);
static u32 hw_atl_utils_mif_addr_get(struct aq_hw_s *self);
static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self);
+static u32 aq_fw1x_rpc_get(struct aq_hw_s *self);
int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
{
@@ -476,6 +476,10 @@ static int hw_atl_utils_init_ucp(struct aq_hw_s *self,
self, self->mbox_addr,
self->mbox_addr != 0U,
1000U, 10000U);
+ err = readx_poll_timeout_atomic(aq_fw1x_rpc_get, self,
+ self->rpc_addr,
+ self->rpc_addr != 0U,
+ 1000U, 100000U);
return err;
}
@@ -531,6 +535,12 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
self, fw.val,
sw.tid == fw.tid,
1000U, 100000U);
+ if (err < 0)
+ goto err_exit;
+
+ err = aq_hw_err_from_flags(self);
+ if (err < 0)
+ goto err_exit;
if (fw.len == 0xFFFFU) {
err = hw_atl_utils_fw_rpc_call(self, sw.len);
@@ -1025,6 +1035,11 @@ static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self)
return aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR);
}
+static u32 aq_fw1x_rpc_get(struct aq_hw_s *self)
+{
+ return aq_hw_read_reg(self, HW_ATL_MPI_RPC_ADDR);
+}
+
const struct aq_fw_ops aq_fw_1x_ops = {
.init = hw_atl_utils_mpi_create,
.deinit = hw_atl_fw1x_deinit,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index 97ebf849695f..77a4ed64830f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -30,6 +30,9 @@
#define HW_ATL_FW3X_EXT_CONTROL_ADDR 0x378
#define HW_ATL_FW3X_EXT_STATE_ADDR 0x37c
+#define HW_ATL_FW3X_PTP_ADJ_LSW_ADDR 0x50a0
+#define HW_ATL_FW3X_PTP_ADJ_MSW_ADDR 0x50a4
+
#define HW_ATL_FW2X_CAP_PAUSE BIT(CAPS_HI_PAUSE)
#define HW_ATL_FW2X_CAP_ASYM_PAUSE BIT(CAPS_HI_ASYMMETRIC_PAUSE)
#define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY)
@@ -475,6 +478,14 @@ static void aq_fw3x_enable_ptp(struct aq_hw_s *self, int enable)
aq_hw_write_reg(self, HW_ATL_FW3X_EXT_CONTROL_ADDR, ptp_opts);
}
+static void aq_fw3x_adjust_ptp(struct aq_hw_s *self, uint64_t adj)
+{
+ aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_LSW_ADDR,
+ (adj >> 0) & 0xffffffff);
+ aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_MSW_ADDR,
+ (adj >> 32) & 0xffffffff);
+}
+
static int aq_fw2x_led_control(struct aq_hw_s *self, u32 mode)
{
if (self->fw_ver_actual < HW_ATL_FW_VER_LED)
@@ -633,4 +644,5 @@ const struct aq_fw_ops aq_fw_2x_ops = {
.enable_ptp = aq_fw3x_enable_ptp,
.led_control = aq_fw2x_led_control,
.set_phyloopback = aq_fw2x_set_phyloopback,
+ .adjust_ptp = aq_fw3x_adjust_ptp,
};
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 597e6fd5bfea..fd6e0e48cd51 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11786,6 +11786,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (version_printed++ == 0)
pr_info("%s", version);
+ /* Clear any pending DMA transactions from crash kernel
+ * while loading driver in capture kernel.
+ */
+ if (is_kdump_kernel()) {
+ pci_clear_master(pdev);
+ pcie_flr(pdev);
+ }
+
max_irqs = bnxt_get_max_irq(pdev);
dev = alloc_etherdev_mq(sizeof(*bp), max_irqs);
if (!dev)
@@ -11983,10 +11991,10 @@ static void bnxt_shutdown(struct pci_dev *pdev)
dev_close(dev);
bnxt_ulp_shutdown(bp);
+ bnxt_clear_int_mode(bp);
+ pci_disable_device(pdev);
if (system_state == SYSTEM_POWER_OFF) {
- bnxt_clear_int_mode(bp);
- pci_disable_device(pdev);
pci_wake_from_d3(pdev, bp->wol);
pci_set_power_state(pdev, PCI_D3hot);
}
diff --git a/drivers/net/ethernet/broadcom/cnic_defs.h b/drivers/net/ethernet/broadcom/cnic_defs.h
index b38499774071..99e2c6d4d8c3 100644
--- a/drivers/net/ethernet/broadcom/cnic_defs.h
+++ b/drivers/net/ethernet/broadcom/cnic_defs.h
@@ -543,13 +543,13 @@ struct l4_kwq_update_pg {
#define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2
#endif
#if defined(__BIG_ENDIAN)
- u16 reserverd3;
+ u16 reserved3;
u8 da0;
u8 da1;
#elif defined(__LITTLE_ENDIAN)
u8 da1;
u8 da0;
- u16 reserverd3;
+ u16 reserved3;
#endif
#if defined(__BIG_ENDIAN)
u8 da2;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index dbf7070fcdba..a3f0f27fc79a 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -652,6 +652,7 @@
#define MACB_CAPS_GEM_HAS_PTP 0x00000040
#define MACB_CAPS_BD_RD_PREFETCH 0x00000080
#define MACB_CAPS_NEEDS_RSTONUBR 0x00000100
+#define MACB_CAPS_MACB_IS_EMAC 0x08000000
#define MACB_CAPS_FIFO_MODE 0x10000000
#define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000
#define MACB_CAPS_SG_DISABLED 0x40000000
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 4508f0d150da..2c28da1737fe 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -572,8 +572,21 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
old_ctrl = ctrl = macb_or_gem_readl(bp, NCFGR);
/* Clear all the bits we might set later */
- ctrl &= ~(GEM_BIT(GBE) | MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE) |
- GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL));
+ ctrl &= ~(MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE));
+
+ if (bp->caps & MACB_CAPS_MACB_IS_EMAC) {
+ if (state->interface == PHY_INTERFACE_MODE_RMII)
+ ctrl |= MACB_BIT(RM9200_RMII);
+ } else {
+ ctrl &= ~(GEM_BIT(GBE) | GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL));
+
+ /* We do not support MLO_PAUSE_RX yet */
+ if (state->pause & MLO_PAUSE_TX)
+ ctrl |= MACB_BIT(PAE);
+
+ if (state->interface == PHY_INTERFACE_MODE_SGMII)
+ ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
+ }
if (state->speed == SPEED_1000)
ctrl |= GEM_BIT(GBE);
@@ -583,13 +596,6 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
if (state->duplex)
ctrl |= MACB_BIT(FD);
- /* We do not support MLO_PAUSE_RX yet */
- if (state->pause & MLO_PAUSE_TX)
- ctrl |= MACB_BIT(PAE);
-
- if (state->interface == PHY_INTERFACE_MODE_SGMII)
- ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
-
/* Apply the new configuration, if any */
if (old_ctrl ^ ctrl)
macb_or_gem_writel(bp, NCFGR, ctrl);
@@ -608,9 +614,10 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
unsigned int q;
u32 ctrl;
- for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
- queue_writel(queue, IDR,
- bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP));
+ if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC))
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
+ queue_writel(queue, IDR,
+ bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP));
/* Disable Rx and Tx */
ctrl = macb_readl(bp, NCR) & ~(MACB_BIT(RE) | MACB_BIT(TE));
@@ -627,17 +634,19 @@ static void macb_mac_link_up(struct phylink_config *config, unsigned int mode,
struct macb_queue *queue;
unsigned int q;
- macb_set_tx_clk(bp->tx_clk, bp->speed, ndev);
+ if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) {
+ macb_set_tx_clk(bp->tx_clk, bp->speed, ndev);
- /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down
- * cleared the pipeline and control registers.
- */
- bp->macbgem_ops.mog_init_rings(bp);
- macb_init_buffers(bp);
+ /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down
+ * cleared the pipeline and control registers.
+ */
+ bp->macbgem_ops.mog_init_rings(bp);
+ macb_init_buffers(bp);
- for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
- queue_writel(queue, IER,
- bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP));
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
+ queue_writel(queue, IER,
+ bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP));
+ }
/* Enable Rx and Tx */
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE));
@@ -3790,6 +3799,10 @@ static int at91ether_open(struct net_device *dev)
u32 ctl;
int ret;
+ ret = pm_runtime_get_sync(&lp->pdev->dev);
+ if (ret < 0)
+ return ret;
+
/* Clear internal statistics */
ctl = macb_readl(lp, NCR);
macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT));
@@ -3854,7 +3867,7 @@ static int at91ether_close(struct net_device *dev)
q->rx_buffers, q->rx_buffers_dma);
q->rx_buffers = NULL;
- return 0;
+ return pm_runtime_put(&lp->pdev->dev);
}
/* Transmit packet */
@@ -4037,7 +4050,6 @@ static int at91ether_init(struct platform_device *pdev)
struct net_device *dev = platform_get_drvdata(pdev);
struct macb *bp = netdev_priv(dev);
int err;
- u32 reg;
bp->queues[0].bp = bp;
@@ -4051,11 +4063,7 @@ static int at91ether_init(struct platform_device *pdev)
macb_writel(bp, NCR, 0);
- reg = MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG);
- if (bp->phy_interface == PHY_INTERFACE_MODE_RMII)
- reg |= MACB_BIT(RM9200_RMII);
-
- macb_writel(bp, NCFGR, reg);
+ macb_writel(bp, NCFGR, MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG));
return 0;
}
@@ -4214,7 +4222,7 @@ static const struct macb_config sama5d4_config = {
};
static const struct macb_config emac_config = {
- .caps = MACB_CAPS_NEEDS_RSTONUBR,
+ .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC,
.clk_init = at91ether_clk_init,
.init = at91ether_init,
};
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 17a4110c2e49..8ff28ed04b7f 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -410,10 +410,19 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
lmac = &bgx->lmac[lmacid];
cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
- if (enable)
+ if (enable) {
cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN;
- else
+
+ /* enable TX FIFO Underflow interrupt */
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S,
+ GMI_TXX_INT_UNDFLW);
+ } else {
cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+
+ /* Disable TX FIFO Underflow interrupt */
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C,
+ GMI_TXX_INT_UNDFLW);
+ }
bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
if (bgx->is_rgx)
@@ -1535,6 +1544,48 @@ static int bgx_init_phy(struct bgx *bgx)
return bgx_init_of_phy(bgx);
}
+static irqreturn_t bgx_intr_handler(int irq, void *data)
+{
+ struct bgx *bgx = (struct bgx *)data;
+ u64 status, val;
+ int lmac;
+
+ for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
+ status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT);
+ if (status & GMI_TXX_INT_UNDFLW) {
+ pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n",
+ bgx->bgx_id, lmac);
+ val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG);
+ val &= ~CMR_EN;
+ bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+ val |= CMR_EN;
+ bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+ }
+ /* clear interrupts */
+ bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void bgx_register_intr(struct pci_dev *pdev)
+{
+ struct bgx *bgx = pci_get_drvdata(pdev);
+ int ret;
+
+ ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET,
+ BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES);
+ if (ret < 0) {
+ pci_err(pdev, "Req for #%d msix vectors failed\n",
+ BGX_LMAC_VEC_OFFSET);
+ return;
+ }
+ ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL,
+ bgx, "BGX%d", bgx->bgx_id);
+ if (ret)
+ pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
+}
+
static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int err;
@@ -1550,7 +1601,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, bgx);
- err = pci_enable_device(pdev);
+ err = pcim_enable_device(pdev);
if (err) {
dev_err(dev, "Failed to enable PCI device\n");
pci_set_drvdata(pdev, NULL);
@@ -1604,6 +1655,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
bgx_init_hw(bgx);
+ bgx_register_intr(pdev);
+
/* Enable all LMACs */
for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
err = bgx_lmac_enable(bgx, lmac);
@@ -1620,6 +1673,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err_enable:
bgx_vnic[bgx->bgx_id] = NULL;
+ pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
err_release_regions:
pci_release_regions(pdev);
err_disable_device:
@@ -1637,6 +1691,8 @@ static void bgx_remove(struct pci_dev *pdev)
for (lmac = 0; lmac < bgx->lmac_count; lmac++)
bgx_lmac_disable(bgx, lmac);
+ pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
+
bgx_vnic[bgx->bgx_id] = NULL;
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 25888706bdcd..cdea49392185 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -180,6 +180,15 @@
#define BGX_GMP_GMI_TXX_BURST 0x38228
#define BGX_GMP_GMI_TXX_MIN_PKT 0x38240
#define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300
+#define BGX_GMP_GMI_TXX_INT 0x38500
+#define BGX_GMP_GMI_TXX_INT_W1S 0x38508
+#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510
+#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518
+#define GMI_TXX_INT_PTP_LOST BIT_ULL(4)
+#define GMI_TXX_INT_LATE_COL BIT_ULL(3)
+#define GMI_TXX_INT_XSDEF BIT_ULL(2)
+#define GMI_TXX_INT_XSCOL BIT_ULL(1)
+#define GMI_TXX_INT_UNDFLW BIT_ULL(0)
#define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */
#define BGX_MSIX_VEC_0_29_CTL 0x400008
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 1ea3372775e6..e94ae9b94dbf 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1405,6 +1405,8 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev)
mac_addr = of_get_mac_address(np);
if (!IS_ERR(mac_addr))
ether_addr_copy(pdata->dev_addr, mac_addr);
+ else if (PTR_ERR(mac_addr) == -EPROBE_DEFER)
+ return ERR_CAST(mac_addr);
return pdata;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index b002ab4e5838..77c412a7e7a4 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2936,13 +2936,6 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
else
return -EINVAL;
- /* Tell the OS link is going down, the link will go back up when fw
- * says it is ready asynchronously
- */
- ice_print_link_msg(vsi, false);
- netif_carrier_off(netdev);
- netif_tx_stop_all_queues(netdev);
-
/* Set the FC mode and only restart AN if link is up */
status = ice_set_fc(pi, &aq_failures, link_up);
@@ -3489,21 +3482,13 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
return -EINVAL;
}
- /* hardware only supports an ITR granularity of 2us */
- if (coalesce_usecs % 2 != 0) {
- netdev_info(vsi->netdev, "Invalid value, %s-usecs must be even\n",
- c_type_str);
- return -EINVAL;
- }
-
if (use_adaptive_coalesce) {
rc->itr_setting |= ICE_ITR_DYNAMIC;
} else {
- /* store user facing value how it was set */
+ /* save the user set usecs */
rc->itr_setting = coalesce_usecs;
- /* set to static and convert to value HW understands */
- rc->target_itr =
- ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting));
+ /* device ITR granularity is in 2 usec increments */
+ rc->target_itr = ITR_REG_ALIGN(rc->itr_setting);
}
return 0;
@@ -3597,6 +3582,30 @@ ice_is_coalesce_param_invalid(struct net_device *netdev,
}
/**
+ * ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs
+ * @netdev: netdev used for print
+ * @itr_setting: previous user setting
+ * @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled
+ * @coalesce_usecs: requested value of [tx|rx]-usecs
+ * @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs
+ */
+static void
+ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting,
+ u32 use_adaptive_coalesce, u32 coalesce_usecs,
+ const char *c_type_str)
+{
+ if (use_adaptive_coalesce)
+ return;
+
+ itr_setting = ITR_TO_REG(itr_setting);
+
+ if (itr_setting != coalesce_usecs && (coalesce_usecs % 2))
+ netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n",
+ c_type_str, coalesce_usecs, c_type_str,
+ ITR_REG_ALIGN(coalesce_usecs));
+}
+
+/**
* __ice_set_coalesce - set ITR/INTRL values for the device
* @netdev: pointer to the netdev associated with this query
* @ec: ethtool structure to fill with driver's coalesce settings
@@ -3616,8 +3625,19 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
return -EINVAL;
if (q_num < 0) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[0];
int v_idx;
+ if (q_vector) {
+ ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting,
+ ec->use_adaptive_rx_coalesce,
+ ec->rx_coalesce_usecs, "rx");
+
+ ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting,
+ ec->use_adaptive_tx_coalesce,
+ ec->tx_coalesce_usecs, "tx");
+ }
+
ice_for_each_q_vector(vsi, v_idx) {
/* In some cases if DCB is configured the num_[rx|tx]q
* can be less than vsi->num_q_vectors. This check
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 14a1bf445889..7ee00a128663 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -222,7 +222,7 @@ enum ice_rx_dtype {
#define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */
#define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S)
#define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */
-#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK)
+#define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK)
#define ICE_ITR_ADAPTIVE_MIN_INC 0x0002
#define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 262714d5f54a..75c70d432c72 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1874,6 +1874,48 @@ error_param:
}
/**
+ * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset
+ * @vf: The VF being resseting
+ *
+ * The max poll time is about ~800ms, which is about the maximum time it takes
+ * for a VF to be reset and/or a VF driver to be removed.
+ */
+static void ice_wait_on_vf_reset(struct ice_vf *vf)
+{
+ int i;
+
+ for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) {
+ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+ break;
+ msleep(ICE_MAX_VF_RESET_SLEEP_MS);
+ }
+}
+
+/**
+ * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried
+ * @vf: VF to check if it's ready to be configured/queried
+ *
+ * The purpose of this function is to make sure the VF is not in reset, not
+ * disabled, and initialized so it can be configured and/or queried by a host
+ * administrator.
+ */
+static int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
+{
+ struct ice_pf *pf;
+
+ ice_wait_on_vf_reset(vf);
+
+ if (ice_is_vf_disabled(vf))
+ return -EINVAL;
+
+ pf = vf->pf;
+ if (ice_check_vf_init(pf, vf))
+ return -EBUSY;
+
+ return 0;
+}
+
+/**
* ice_set_vf_spoofchk
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -1890,16 +1932,16 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
enum ice_status status;
struct device *dev;
struct ice_vf *vf;
- int ret = 0;
+ int ret;
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
-
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
vf_vsi = pf->vsi[vf->lan_vsi_idx];
if (!vf_vsi) {
@@ -2696,7 +2738,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
struct ice_vsi *vsi;
struct device *dev;
struct ice_vf *vf;
- int ret = 0;
+ int ret;
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id))
@@ -2714,13 +2756,15 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
vf = &pf->vf[vf_id];
vsi = pf->vsi[vf->lan_vsi_idx];
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
if (le16_to_cpu(vsi->info.pvid) == vlanprio) {
/* duplicate request, so just return success */
dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio);
- return ret;
+ return 0;
}
/* If PVID, then remove all filters on the old VLAN */
@@ -2731,7 +2775,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
if (vlan_id || qos) {
ret = ice_vsi_manage_pvid(vsi, vlanprio, true);
if (ret)
- goto error_set_pvid;
+ return ret;
} else {
ice_vsi_manage_pvid(vsi, 0, false);
vsi->info.pvid = 0;
@@ -2744,7 +2788,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
/* add new VLAN filter for each MAC */
ret = ice_vsi_add_vlan(vsi, vlan_id);
if (ret)
- goto error_set_pvid;
+ return ret;
}
/* The Port VLAN needs to be saved across resets the same as the
@@ -2752,8 +2796,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
*/
vf->port_vlan_id = le16_to_cpu(vsi->info.pvid);
-error_set_pvid:
- return ret;
+ return 0;
}
/**
@@ -3237,23 +3280,6 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
}
/**
- * ice_wait_on_vf_reset
- * @vf: The VF being resseting
- *
- * Poll to make sure a given VF is ready after reset
- */
-static void ice_wait_on_vf_reset(struct ice_vf *vf)
-{
- int i;
-
- for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) {
- if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
- break;
- msleep(20);
- }
-}
-
-/**
* ice_set_vf_mac
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -3265,29 +3291,21 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
- int ret = 0;
+ int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
- vf = &pf->vf[vf_id];
- /* Don't set MAC on disabled VF */
- if (ice_is_vf_disabled(vf))
- return -EINVAL;
-
- /* In case VF is in reset mode, wait until it is completed. Depending
- * on factors like queue disabling routine, this could take ~250ms
- */
- ice_wait_on_vf_reset(vf);
-
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
-
if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) {
netdev_err(netdev, "%pM not a valid unicast address\n", mac);
return -EINVAL;
}
+ vf = &pf->vf[vf_id];
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
+
/* copy MAC into dflt_lan_addr and trigger a VF reset. The reset
* flow will use the updated dflt_lan_addr and add a MAC filter
* using ice_add_mac. Also set pf_set_mac to indicate that the PF has
@@ -3299,7 +3317,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
vf_id, mac);
ice_vc_reset_vf(vf);
- return ret;
+ return 0;
}
/**
@@ -3314,22 +3332,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
+ int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
- /* Don't set Trusted Mode on disabled VF */
- if (ice_is_vf_disabled(vf))
- return -EINVAL;
-
- /* In case VF is in reset mode, wait until it is completed. Depending
- * on factors like queue disabling routine, this could take ~250ms
- */
- ice_wait_on_vf_reset(vf);
-
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
/* Check if already trusted */
if (trusted == vf->trusted)
@@ -3355,13 +3366,15 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
+ int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
switch (link_state) {
case IFLA_VF_LINK_STATE_AUTO:
@@ -3397,14 +3410,15 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id,
struct ice_eth_stats *stats;
struct ice_vsi *vsi;
struct ice_vf *vf;
+ int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
-
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi)
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 4647d636ed36..ac67982751df 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -38,7 +38,8 @@
#define ICE_MAX_POLICY_INTR_PER_VF 33
#define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1)
#define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1)
-#define ICE_MAX_VF_RESET_WAIT 15
+#define ICE_MAX_VF_RESET_TRIES 40
+#define ICE_MAX_VF_RESET_SLEEP_MS 20
#define ice_for_each_vf(pf, i) \
for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 3a975641f902..20b907dc1e29 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -200,7 +200,7 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
netdev_err(priv->netdev, err_str);
if (!reporter)
- return err_ctx->recover(&err_ctx->ctx);
+ return err_ctx->recover(err_ctx->ctx);
return devlink_health_report(reporter, err_str, err_ctx);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 7c8796d9743f..a226277b0980 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -179,6 +179,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
}
}
+static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
+{
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ mlx5_wq_ll_reset(&rq->mpwqe.wq);
+ else
+ mlx5_wq_cyc_reset(&rq->wqe.wq);
+}
+
/* SW parser related functions */
struct mlx5e_swp_spec {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 454d3459bd8b..966983674663 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -712,6 +712,9 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
if (!in)
return -ENOMEM;
+ if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY)
+ mlx5e_rqwq_reset(rq);
+
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
MLX5_SET(modify_rq_in, in, rq_state, curr_state);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5acf60b1bbfe..e49acd0c5da5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -459,12 +459,16 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
static int esw_legacy_enable(struct mlx5_eswitch *esw)
{
- int ret;
+ struct mlx5_vport *vport;
+ int ret, i;
ret = esw_create_legacy_table(esw);
if (ret)
return ret;
+ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+
ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS);
if (ret)
esw_destroy_legacy_table(esw);
@@ -2452,25 +2456,17 @@ out:
int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
{
- int err = 0;
-
if (!esw)
return -EOPNOTSUPP;
if (!ESW_ALLOWED(esw))
return -EPERM;
- mutex_lock(&esw->state_lock);
- if (esw->mode != MLX5_ESWITCH_LEGACY) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ if (esw->mode != MLX5_ESWITCH_LEGACY)
+ return -EOPNOTSUPP;
*setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
-
-out:
- mutex_unlock(&esw->state_lock);
- return err;
+ return 0;
}
int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 979f13bdc203..1a57b2bd74b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1172,7 +1172,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
return -EINVAL;
}
- mlx5_eswitch_disable(esw, true);
+ mlx5_eswitch_disable(esw, false);
mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
if (err) {
@@ -2065,7 +2065,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
{
int err, err1;
- mlx5_eswitch_disable(esw, true);
+ mlx5_eswitch_disable(esw, false);
err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
index c5a446e295aa..4276194b633f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
@@ -35,7 +35,7 @@
static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024,
1 * 1024 * 1024,
64 * 1024,
- 4 * 1024, };
+ 128 };
struct mlx5_esw_chains_priv {
struct rhashtable chains_ht;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index c6c7d1defbd7..aade62a9ee5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -2307,7 +2307,9 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
struct mlx5dr_cmd_vport_cap *vport_cap;
struct mlx5dr_domain *dmn = sb->dmn;
struct mlx5dr_cmd_caps *caps;
+ u8 *bit_mask = sb->bit_mask;
u8 *tag = hw_ste->tag;
+ bool source_gvmi_set;
DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
@@ -2328,7 +2330,8 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
if (!vport_cap)
return -EINVAL;
- if (vport_cap->vport_gvmi)
+ source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi);
+ if (vport_cap->vport_gvmi && source_gvmi_set)
MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi);
misc->source_eswitch_owner_vhca_id = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 3abfc8125926..c2027192e21e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -66,15 +66,20 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *next_ft)
{
struct mlx5dr_table *tbl;
+ u32 flags;
int err;
if (mlx5_dr_is_fw_table(ft->flags))
return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
log_size,
next_ft);
+ flags = ft->flags;
+ /* turn off encap/decap if not supported for sw-str by fw */
+ if (!MLX5_CAP_FLOWTABLE(ns->dev, sw_owner_reformat_supported))
+ flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
- tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain,
- ft->level, ft->flags);
+ tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags);
if (!tbl) {
mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 02f7e4a39578..01f075fac276 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -94,6 +94,13 @@ void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides)
print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false);
}
+void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq)
+{
+ wq->wqe_ctr = 0;
+ wq->cur_sz = 0;
+ mlx5_wq_cyc_update_db_record(wq);
+}
+
int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *qpc, struct mlx5_wq_qp *wq,
struct mlx5_wq_ctrl *wq_ctrl)
@@ -192,6 +199,19 @@ err_db_free:
return err;
}
+static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq)
+{
+ struct mlx5_wqe_srq_next_seg *next_seg;
+ int i;
+
+ for (i = 0; i < wq->fbc.sz_m1; i++) {
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ next_seg->next_wqe_index = cpu_to_be16(i + 1);
+ }
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ wq->tail_next = &next_seg->next_wqe_index;
+}
+
int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_ll *wq,
struct mlx5_wq_ctrl *wq_ctrl)
@@ -199,9 +219,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz);
struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
- struct mlx5_wqe_srq_next_seg *next_seg;
int err;
- int i;
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
@@ -220,13 +238,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
- for (i = 0; i < fbc->sz_m1; i++) {
- next_seg = mlx5_wq_ll_get_wqe(wq, i);
- next_seg->next_wqe_index = cpu_to_be16(i + 1);
- }
- next_seg = mlx5_wq_ll_get_wqe(wq, i);
- wq->tail_next = &next_seg->next_wqe_index;
-
+ mlx5_wq_ll_init_list(wq);
wq_ctrl->mdev = mdev;
return 0;
@@ -237,6 +249,15 @@ err_db_free:
return err;
}
+void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq)
+{
+ wq->head = 0;
+ wq->wqe_ctr = 0;
+ wq->cur_sz = 0;
+ mlx5_wq_ll_init_list(wq);
+ mlx5_wq_ll_update_db_record(wq);
+}
+
void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
{
mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index d9a94bc223c0..4cadc336593f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -80,6 +80,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_cyc *wq,
struct mlx5_wq_ctrl *wq_ctrl);
void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides);
+void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq);
int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *qpc, struct mlx5_wq_qp *wq,
@@ -92,6 +93,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_ll *wq,
struct mlx5_wq_ctrl *wq_ctrl);
+void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq);
void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
index a41a90c589db..1c9e70c8cc30 100644
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ b/drivers/net/ethernet/micrel/ks8851_mll.c
@@ -157,24 +157,6 @@ static int msg_enable;
*/
/**
- * ks_rdreg8 - read 8 bit register from device
- * @ks : The chip information
- * @offset: The register address
- *
- * Read a 8bit register from the chip, returning the result
- */
-static u8 ks_rdreg8(struct ks_net *ks, int offset)
-{
- u16 data;
- u8 shift_bit = offset & 0x03;
- u8 shift_data = (offset & 1) << 3;
- ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit);
- iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
- data = ioread16(ks->hw_addr);
- return (u8)(data >> shift_data);
-}
-
-/**
* ks_rdreg16 - read 16 bit register from device
* @ks : The chip information
* @offset: The register address
@@ -184,28 +166,12 @@ static u8 ks_rdreg8(struct ks_net *ks, int offset)
static u16 ks_rdreg16(struct ks_net *ks, int offset)
{
- ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02));
+ ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02));
iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
return ioread16(ks->hw_addr);
}
/**
- * ks_wrreg8 - write 8bit register value to chip
- * @ks: The chip information
- * @offset: The register address
- * @value: The value to write
- *
- */
-static void ks_wrreg8(struct ks_net *ks, int offset, u8 value)
-{
- u8 shift_bit = (offset & 0x03);
- u16 value_write = (u16)(value << ((offset & 1) << 3));
- ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit);
- iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
- iowrite16(value_write, ks->hw_addr);
-}
-
-/**
* ks_wrreg16 - write 16bit register value to chip
* @ks: The chip information
* @offset: The register address
@@ -215,7 +181,7 @@ static void ks_wrreg8(struct ks_net *ks, int offset, u8 value)
static void ks_wrreg16(struct ks_net *ks, int offset, u16 value)
{
- ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02));
+ ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02));
iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
iowrite16(value, ks->hw_addr);
}
@@ -231,7 +197,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len)
{
len >>= 1;
while (len--)
- *wptr++ = (u16)ioread16(ks->hw_addr);
+ *wptr++ = be16_to_cpu(ioread16(ks->hw_addr));
}
/**
@@ -245,7 +211,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len)
{
len >>= 1;
while (len--)
- iowrite16(*wptr++, ks->hw_addr);
+ iowrite16(cpu_to_be16(*wptr++), ks->hw_addr);
}
static void ks_disable_int(struct ks_net *ks)
@@ -324,8 +290,7 @@ static void ks_read_config(struct ks_net *ks)
u16 reg_data = 0;
/* Regardless of bus width, 8 bit read should always work.*/
- reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF;
- reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8;
+ reg_data = ks_rdreg16(ks, KS_CCR);
/* addr/data bus are multiplexed */
ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED;
@@ -429,7 +394,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len)
/* 1. set sudo DMA mode */
ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI);
- ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff);
+ ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
/* 2. read prepend data */
/**
@@ -446,7 +411,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len)
ks_inblk(ks, buf, ALIGN(len, 4));
/* 4. reset sudo DMA Mode */
- ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr);
+ ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
}
/**
@@ -679,13 +644,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len)
ks->txh.txw[1] = cpu_to_le16(len);
/* 1. set sudo-DMA mode */
- ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff);
+ ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
/* 2. write status/lenth info */
ks_outblk(ks, ks->txh.txw, 4);
/* 3. write pkt data */
ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4));
/* 4. reset sudo-DMA mode */
- ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr);
+ ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
/* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */
ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE);
/* 6. wait until TXQCR_METFE is auto-cleared */
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index b38820849faa..1135a18019c7 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -114,6 +114,14 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
if (err != 4)
break;
+ /* At this point the IFH was read correctly, so it is safe to
+ * presume that there is no error. The err needs to be reset
+ * otherwise a frame could come in CPU queue between the while
+ * condition and the check for error later on. And in that case
+ * the new frame is just removed and not processed.
+ */
+ err = 0;
+
ocelot_parse_ifh(ifh, &info);
ocelot_port = ocelot->ports[info.port];
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 87f82f36812f..46107de5e6c3 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -103,7 +103,7 @@ int ionic_heartbeat_check(struct ionic *ionic)
{
struct ionic_dev *idev = &ionic->idev;
unsigned long hb_time;
- u32 fw_status;
+ u8 fw_status;
u32 hb;
/* wait a little more than one second before testing again */
@@ -111,9 +111,12 @@ int ionic_heartbeat_check(struct ionic *ionic)
if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period)))
return 0;
- /* firmware is useful only if fw_status is non-zero */
- fw_status = ioread32(&idev->dev_info_regs->fw_status);
- if (!fw_status)
+ /* firmware is useful only if the running bit is set and
+ * fw_status != 0xff (bad PCI read)
+ */
+ fw_status = ioread8(&idev->dev_info_regs->fw_status);
+ if (fw_status == 0xff ||
+ !(fw_status & IONIC_FW_STS_F_RUNNING))
return -ENXIO;
/* early FW has no heartbeat, else FW will return non-zero */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index ce07c2931a72..54547d53b0f2 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -2445,6 +2445,7 @@ union ionic_dev_info_regs {
u8 version;
u8 asic_type;
u8 asic_rev;
+#define IONIC_FW_STS_F_RUNNING 0x1
u8 fw_status;
u32 fw_heartbeat;
char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN];
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index e8a1b27db84d..234c6f30effb 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -163,6 +163,8 @@ struct qede_rdma_dev {
struct list_head entry;
struct list_head rdma_event_list;
struct workqueue_struct *rdma_wq;
+ struct kref refcnt;
+ struct completion event_comp;
bool exp_recovery;
};
diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index ffabc2d2f082..2d873ae8a234 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -59,6 +59,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
static int qede_rdma_create_wq(struct qede_dev *edev)
{
INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list);
+ kref_init(&edev->rdma_info.refcnt);
+ init_completion(&edev->rdma_info.event_comp);
+
edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq");
if (!edev->rdma_info.rdma_wq) {
DP_NOTICE(edev, "qedr: Could not create workqueue\n");
@@ -83,8 +86,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev)
}
}
+static void qede_rdma_complete_event(struct kref *ref)
+{
+ struct qede_rdma_dev *rdma_dev =
+ container_of(ref, struct qede_rdma_dev, refcnt);
+
+ /* no more events will be added after this */
+ complete(&rdma_dev->event_comp);
+}
+
static void qede_rdma_destroy_wq(struct qede_dev *edev)
{
+ /* Avoid race with add_event flow, make sure it finishes before
+ * we start accessing the list and cleaning up the work
+ */
+ kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event);
+ wait_for_completion(&edev->rdma_info.event_comp);
+
qede_rdma_cleanup_event(edev);
destroy_workqueue(edev->rdma_info.rdma_wq);
}
@@ -310,15 +328,24 @@ static void qede_rdma_add_event(struct qede_dev *edev,
if (!edev->rdma_info.qedr_dev)
return;
+ /* We don't want the cleanup flow to start while we're allocating and
+ * scheduling the work
+ */
+ if (!kref_get_unless_zero(&edev->rdma_info.refcnt))
+ return; /* already being destroyed */
+
event_node = qede_rdma_get_free_event_node(edev);
if (!event_node)
- return;
+ goto out;
event_node->event = event;
event_node->ptr = edev;
INIT_WORK(&event_node->work, qede_rdma_handle_event);
queue_work(edev->rdma_info.rdma_wq, &event_node->work);
+
+out:
+ kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event);
}
void qede_rdma_dev_event_open(struct qede_dev *edev)
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 7d68b28bb893..a62229a8b1a4 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -410,7 +410,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev)
struct device_node *np = phydev->mdio.dev.of_node;
int ret;
- /* Aneg firsly. */
+ /* Aneg firstly. */
ret = genphy_config_aneg(phydev);
/* Then we can set up the delay. */
@@ -463,7 +463,7 @@ static int bcm54616s_config_aneg(struct phy_device *phydev)
{
int ret;
- /* Aneg firsly. */
+ /* Aneg firstly. */
if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX)
ret = genphy_c37_config_aneg(phydev);
else
diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c
index 7e9975d25066..f1ded03f0229 100644
--- a/drivers/net/phy/mdio-bcm-iproc.c
+++ b/drivers/net/phy/mdio-bcm-iproc.c
@@ -178,6 +178,23 @@ static int iproc_mdio_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_PM_SLEEP
+int iproc_mdio_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct iproc_mdio_priv *priv = platform_get_drvdata(pdev);
+
+ /* restore the mii clock configuration */
+ iproc_mdio_config_clk(priv->base);
+
+ return 0;
+}
+
+static const struct dev_pm_ops iproc_mdio_pm_ops = {
+ .resume = iproc_mdio_resume
+};
+#endif /* CONFIG_PM_SLEEP */
+
static const struct of_device_id iproc_mdio_of_match[] = {
{ .compatible = "brcm,iproc-mdio", },
{ /* sentinel */ },
@@ -188,6 +205,9 @@ static struct platform_driver iproc_mdio_driver = {
.driver = {
.name = "iproc-mdio",
.of_match_table = iproc_mdio_of_match,
+#ifdef CONFIG_PM_SLEEP
+ .pm = &iproc_mdio_pm_ops,
+#endif
},
.probe = iproc_mdio_probe,
.remove = iproc_mdio_remove,
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index 43db442b1373..cdc96968b0f4 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -258,6 +258,8 @@ static void wg_setup(struct net_device *dev)
enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
NETIF_F_SG | NETIF_F_GSO |
NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
+ const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) +
+ max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
dev->netdev_ops = &netdev_ops;
dev->hard_header_len = 0;
@@ -271,9 +273,8 @@ static void wg_setup(struct net_device *dev)
dev->features |= WG_NETDEV_FEATURES;
dev->hw_features |= WG_NETDEV_FEATURES;
dev->hw_enc_features |= WG_NETDEV_FEATURES;
- dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH -
- sizeof(struct udphdr) -
- max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
+ dev->mtu = ETH_DATA_LEN - overhead;
+ dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead;
SET_NETDEV_DEVTYPE(dev, &device_type);
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 9c6bab9c981f..4a153894cee2 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -118,10 +118,13 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
under_load = skb_queue_len(&wg->incoming_handshakes) >=
MAX_QUEUED_INCOMING_HANDSHAKES / 8;
- if (under_load)
+ if (under_load) {
last_under_load = ktime_get_coarse_boottime_ns();
- else if (last_under_load)
+ } else if (last_under_load) {
under_load = !wg_birthdate_has_expired(last_under_load, 1);
+ if (!under_load)
+ last_under_load = 0;
+ }
mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
under_load);
if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index c13260563446..7348c10cbae3 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_peer *peer)
static unsigned int calculate_skb_padding(struct sk_buff *skb)
{
+ unsigned int padded_size, last_unit = skb->len;
+
+ if (unlikely(!PACKET_CB(skb)->mtu))
+ return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit;
+
/* We do this modulo business with the MTU, just in case the networking
* layer gives us a packet that's bigger than the MTU. In that case, we
* wouldn't want the final subtraction to overflow in the case of the
- * padded_size being clamped.
+ * padded_size being clamped. Fortunately, that's very rarely the case,
+ * so we optimize for that not happening.
*/
- unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu;
- unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE);
+ if (unlikely(last_unit > PACKET_CB(skb)->mtu))
+ last_unit %= PACKET_CB(skb)->mtu;
- if (padded_size > PACKET_CB(skb)->mtu)
- padded_size = PACKET_CB(skb)->mtu;
+ padded_size = min(PACKET_CB(skb)->mtu,
+ ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE));
return padded_size - last_unit;
}
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index 262f3b5c819d..b0d6541582d3 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
mutex_unlock(&wg->socket_update_lock);
synchronize_rcu();
- synchronize_net();
sock_free(old4);
sock_free(old6);
}
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index 720c89d6066e..4ac8cb262559 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -225,6 +225,7 @@ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy)
out:
gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity);
+ usleep_range(10000, 15000);
}
static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode)
diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c
index 2b83156efe3f..b788870473e8 100644
--- a/drivers/nfc/pn544/pn544.c
+++ b/drivers/nfc/pn544/pn544.c
@@ -682,7 +682,7 @@ static int pn544_hci_tm_send(struct nfc_hci_dev *hdev, struct sk_buff *skb)
static int pn544_hci_check_presence(struct nfc_hci_dev *hdev,
struct nfc_target *target)
{
- pr_debug("supported protocol %d\b", target->supported_protocols);
+ pr_debug("supported protocol %d\n", target->supported_protocols);
if (target->supported_protocols & (NFC_PROTO_ISO14443_MASK |
NFC_PROTO_ISO14443_B_MASK)) {
return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 9639938581f5..5efcaa43615b 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5344,7 +5344,7 @@ next_packet:
}
use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) ||
- ((skb_len >= card->options.rx_sg_cb) &&
+ (skb_len > card->options.rx_sg_cb &&
!atomic_read(&card->force_alloc_skb) &&
!IS_OSN(card));
@@ -5447,7 +5447,6 @@ static int qeth_extract_skbs(struct qeth_card *card, int budget,
{
int work_done = 0;
- WARN_ON_ONCE(!budget);
*done = false;
while (budget) {
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 692bd2623401..9972d96820f3 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1707,15 +1707,14 @@ int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state)
QETH_CARD_TEXT(card, 2, "vniccsch");
- /* do not change anything if BridgePort is enabled */
- if (qeth_bridgeport_is_in_use(card))
- return -EBUSY;
-
/* check if characteristic and enable/disable are supported */
if (!(card->options.vnicc.sup_chars & vnicc) ||
!(card->options.vnicc.set_char_sup & vnicc))
return -EOPNOTSUPP;
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
/* set enable/disable command and store wanted characteristic */
if (state) {
cmd = IPA_VNICC_ENABLE;
@@ -1761,14 +1760,13 @@ int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state)
QETH_CARD_TEXT(card, 2, "vniccgch");
- /* do not get anything if BridgePort is enabled */
- if (qeth_bridgeport_is_in_use(card))
- return -EBUSY;
-
/* check if characteristic is supported */
if (!(card->options.vnicc.sup_chars & vnicc))
return -EOPNOTSUPP;
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
/* if card is ready, query current VNICC state */
if (qeth_card_hw_is_reachable(card))
rc = qeth_l2_vnicc_query_chars(card);
@@ -1786,15 +1784,14 @@ int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout)
QETH_CARD_TEXT(card, 2, "vniccsto");
- /* do not change anything if BridgePort is enabled */
- if (qeth_bridgeport_is_in_use(card))
- return -EBUSY;
-
/* check if characteristic and set_timeout are supported */
if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) ||
!(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING))
return -EOPNOTSUPP;
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
/* do we need to do anything? */
if (card->options.vnicc.learning_timeout == timeout)
return rc;
@@ -1823,14 +1820,14 @@ int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout)
QETH_CARD_TEXT(card, 2, "vniccgto");
- /* do not get anything if BridgePort is enabled */
- if (qeth_bridgeport_is_in_use(card))
- return -EBUSY;
-
/* check if characteristic and get_timeout are supported */
if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) ||
!(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING))
return -EOPNOTSUPP;
+
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
/* if card is ready, get timeout. Otherwise, just return stored value */
*timeout = card->options.vnicc.learning_timeout;
if (qeth_card_hw_is_reachable(card))
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ff8c9d527bb4..bfdf41537cf1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -688,7 +688,10 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
u8 nic_rx_multi_path_tirs[0x1];
u8 nic_rx_multi_path_tirs_fts[0x1];
u8 allow_sniffer_and_nic_rx_shared_tir[0x1];
- u8 reserved_at_3[0x1d];
+ u8 reserved_at_3[0x4];
+ u8 sw_owner_reformat_supported[0x1];
+ u8 reserved_at_8[0x18];
+
u8 encap_general_header[0x1];
u8 reserved_at_21[0xa];
u8 log_max_packet_reformat_context[0x5];
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9f1f633235f6..6c3f7032e8d9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -72,6 +72,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
#define NET_RX_DROP 1 /* packet dropped */
+#define MAX_NEST_DEV 8
+
/*
* Transmit return codes: transmit return codes originate from three different
* namespaces:
@@ -4389,11 +4391,8 @@ void *netdev_lower_get_next(struct net_device *dev,
ldev; \
ldev = netdev_lower_get_next(dev, &(iter)))
-struct net_device *netdev_all_lower_get_next(struct net_device *dev,
+struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
struct list_head **iter);
-struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
- struct list_head **iter);
-
int netdev_walk_all_lower_dev(struct net_device *dev,
int (*fn)(struct net_device *lower_dev,
void *data),
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index e5b752027a03..9670b54b484a 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -145,6 +145,13 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
}
}
+/* after that hlist_nulls_del will work */
+static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n)
+{
+ n->pprev = &n->next;
+ n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL);
+}
+
/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ca8806b69388..5b50278c4bc8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -611,9 +611,15 @@ typedef unsigned char *sk_buff_data_t;
* @next: Next buffer in list
* @prev: Previous buffer in list
* @tstamp: Time we arrived/left
+ * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point
+ * for retransmit timer
* @rbnode: RB tree node, alternative to next/prev for netem/tcp
+ * @list: queue head
* @sk: Socket we are owned by
+ * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
+ * fragmentation management
* @dev: Device we arrived on/are leaving by
+ * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
* @cb: Control buffer. Free for use by every layer. Put private vars here
* @_skb_refdst: destination entry (with norefcount bit)
* @sp: the security path, used for xfrm
@@ -632,6 +638,9 @@ typedef unsigned char *sk_buff_data_t;
* @pkt_type: Packet class
* @fclone: skbuff clone status
* @ipvs_property: skbuff is owned by ipvs
+ * @inner_protocol_type: whether the inner protocol is
+ * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO
+ * @remcsum_offload: remote checksum offload is enabled
* @offload_fwd_mark: Packet was L2-forwarded in hardware
* @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
* @tc_skip_classify: do not classify packet. set by IFB device
@@ -650,6 +659,8 @@ typedef unsigned char *sk_buff_data_t;
* @tc_index: Traffic control index
* @hash: the packet hash
* @queue_mapping: Queue mapping for multiqueue devices
+ * @head_frag: skb was allocated from page fragments,
+ * not allocated by kmalloc() or vmalloc().
* @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
* @active_extensions: active extensions (skb_ext_id types)
* @ndisc_nodetype: router type (from link layer)
@@ -660,15 +671,28 @@ typedef unsigned char *sk_buff_data_t;
* @wifi_acked_valid: wifi_acked was set
* @wifi_acked: whether frame was acked on wifi or not
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
+ * @encapsulation: indicates the inner headers in the skbuff are valid
+ * @encap_hdr_csum: software checksum is needed
+ * @csum_valid: checksum is already valid
* @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
+ * @csum_complete_sw: checksum was completed by software
+ * @csum_level: indicates the number of consecutive checksums found in
+ * the packet minus one that have been verified as
+ * CHECKSUM_UNNECESSARY (max 3)
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
* @napi_id: id of the NAPI struct this skb came from
+ * @sender_cpu: (aka @napi_id) source CPU in XPS
* @secmark: security marking
* @mark: Generic packet mark
+ * @reserved_tailroom: (aka @mark) number of bytes of free space available
+ * at the tail of an sk_buff
+ * @vlan_present: VLAN tag is present
* @vlan_proto: vlan encapsulation protocol
* @vlan_tci: vlan tag control information
* @inner_protocol: Protocol (encapsulation)
+ * @inner_ipproto: (aka @inner_protocol) stores ipproto when
+ * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO;
* @inner_transport_header: Inner transport layer header (encapsulation)
* @inner_network_header: Network layer header (encapsulation)
* @inner_mac_header: Link layer header (encapsulation)
@@ -750,7 +774,9 @@ struct sk_buff {
#endif
#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset)
+ /* private: */
__u8 __cloned_offset[0];
+ /* public: */
__u8 cloned:1,
nohdr:1,
fclone:2,
@@ -775,7 +801,9 @@ struct sk_buff {
#endif
#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
+ /* private: */
__u8 __pkt_type_offset[0];
+ /* public: */
__u8 pkt_type:3;
__u8 ignore_df:1;
__u8 nf_trace:1;
@@ -798,7 +826,9 @@ struct sk_buff {
#define PKT_VLAN_PRESENT_BIT 0
#endif
#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset)
+ /* private: */
__u8 __pkt_vlan_present_offset[0];
+ /* public: */
__u8 vlan_present:1;
__u8 csum_complete_sw:1;
__u8 csum_level:2;
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index e9391e877f9a..628383915827 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/in6.h>
#include <linux/siphash.h>
+#include <linux/string.h>
#include <uapi/linux/if_ether.h>
struct sk_buff;
@@ -348,4 +349,12 @@ struct bpf_flow_dissector {
void *data_end;
};
+static inline void
+flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
+ struct flow_dissector_key_basic *key_basic)
+{
+ memset(key_control, 0, sizeof(*key_control));
+ memset(key_basic, 0, sizeof(*key_basic));
+}
+
#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 02162b0378f7..328564525526 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -117,19 +117,26 @@ typedef __u64 __bitwise __addrpair;
* struct sock_common - minimal network layer representation of sockets
* @skc_daddr: Foreign IPv4 addr
* @skc_rcv_saddr: Bound local IPv4 addr
+ * @skc_addrpair: 8-byte-aligned __u64 union of @skc_daddr & @skc_rcv_saddr
* @skc_hash: hash value used with various protocol lookup tables
* @skc_u16hashes: two u16 hash values used by UDP lookup tables
* @skc_dport: placeholder for inet_dport/tw_dport
* @skc_num: placeholder for inet_num/tw_num
+ * @skc_portpair: __u32 union of @skc_dport & @skc_num
* @skc_family: network address family
* @skc_state: Connection state
* @skc_reuse: %SO_REUSEADDR setting
* @skc_reuseport: %SO_REUSEPORT setting
+ * @skc_ipv6only: socket is IPV6 only
+ * @skc_net_refcnt: socket is using net ref counting
* @skc_bound_dev_if: bound device index if != 0
* @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
* @skc_prot: protocol handlers inside a network family
* @skc_net: reference to the network namespace of this socket
+ * @skc_v6_daddr: IPV6 destination address
+ * @skc_v6_rcv_saddr: IPV6 source address
+ * @skc_cookie: socket's cookie value
* @skc_node: main hash linkage for various protocol lookup tables
* @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
* @skc_tx_queue_mapping: tx queue number for this connection
@@ -137,7 +144,15 @@ typedef __u64 __bitwise __addrpair;
* @skc_flags: place holder for sk_flags
* %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
+ * @skc_listener: connection request listener socket (aka rsk_listener)
+ * [union with @skc_flags]
+ * @skc_tw_dr: (aka tw_dr) ptr to &struct inet_timewait_death_row
+ * [union with @skc_flags]
* @skc_incoming_cpu: record/match cpu processing incoming packets
+ * @skc_rcv_wnd: (aka rsk_rcv_wnd) TCP receive window size (possibly scaled)
+ * [union with @skc_incoming_cpu]
+ * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number
+ * [union with @skc_incoming_cpu]
* @skc_refcnt: reference count
*
* This is the minimal network layer representation of sockets, the header
@@ -245,6 +260,7 @@ struct bpf_sk_storage;
* @sk_dst_cache: destination cache
* @sk_dst_pending_confirm: need to confirm neighbour
* @sk_policy: flow policy
+ * @sk_rx_skb_cache: cache copy of recently accessed RX skb
* @sk_receive_queue: incoming packets
* @sk_wmem_alloc: transmit queue bytes committed
* @sk_tsq_flags: TCP Small Queues flags
@@ -265,6 +281,8 @@ struct bpf_sk_storage;
* @sk_no_check_rx: allow zero checksum in RX packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
* @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
+ * @sk_route_forced_caps: static, forced route capabilities
+ * (set in tcp_init_sock())
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_gso_max_size: Maximum GSO segment size to build
* @sk_gso_max_segs: Maximum number of GSO segments
@@ -303,6 +321,8 @@ struct bpf_sk_storage;
* @sk_frag: cached page frag
* @sk_peek_off: current peek_offset value
* @sk_send_head: front of stuff to transmit
+ * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head]
+ * @sk_tx_skb_cache: cache copy of recently accessed TX skb
* @sk_security: used by security modules
* @sk_mark: generic packet mark
* @sk_cgrp_data: cgroup data for this cgroup
@@ -313,11 +333,14 @@ struct bpf_sk_storage;
* @sk_write_space: callback to indicate there is bf sending space available
* @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
* @sk_backlog_rcv: callback to process the backlog
+ * @sk_validate_xmit_skb: ptr to an optional validate function
* @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
* @sk_reuseport_cb: reuseport group container
+ * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage
* @sk_rcu: used during RCU grace period
* @sk_clockid: clockid used by time-based scheduling (SO_TXTIME)
* @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
+ * @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
*/
struct sock {
@@ -393,7 +416,9 @@ struct sock {
struct sk_filter __rcu *sk_filter;
union {
struct socket_wq __rcu *sk_wq;
+ /* private: */
struct socket_wq *sk_wq_raw;
+ /* public: */
};
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
@@ -2017,7 +2042,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
* sk_wmem_alloc_get - returns write allocations
* @sk: socket
*
- * Returns sk_wmem_alloc minus initial offset of one
+ * Return: sk_wmem_alloc minus initial offset of one
*/
static inline int sk_wmem_alloc_get(const struct sock *sk)
{
@@ -2028,7 +2053,7 @@ static inline int sk_wmem_alloc_get(const struct sock *sk)
* sk_rmem_alloc_get - returns read allocations
* @sk: socket
*
- * Returns sk_rmem_alloc
+ * Return: sk_rmem_alloc
*/
static inline int sk_rmem_alloc_get(const struct sock *sk)
{
@@ -2039,7 +2064,7 @@ static inline int sk_rmem_alloc_get(const struct sock *sk)
* sk_has_allocations - check if allocations are outstanding
* @sk: socket
*
- * Returns true if socket has write or read allocations
+ * Return: true if socket has write or read allocations
*/
static inline bool sk_has_allocations(const struct sock *sk)
{
@@ -2050,7 +2075,7 @@ static inline bool sk_has_allocations(const struct sock *sk)
* skwq_has_sleeper - check if there are any waiting processes
* @wq: struct socket_wq
*
- * Returns true if socket_wq has waiting processes
+ * Return: true if socket_wq has waiting processes
*
* The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory
* barrier call. They were added due to the race found within the tcp code.
@@ -2238,6 +2263,9 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
* gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
* inside other socket operations and end up recursing into sk_page_frag()
* while it's already in use.
+ *
+ * Return: a per task page_frag if context allows that,
+ * otherwise a per socket one.
*/
static inline struct page_frag *sk_page_frag(struct sock *sk)
{
@@ -2432,6 +2460,7 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
&skb_shinfo(skb)->tskey);
}
+DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
/**
* sk_eat_skb - Release a skb if it is no longer needed
* @sk: socket to eat this skb from
@@ -2440,7 +2469,6 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
* This routine must be called with interrupts disabled or with the socket
* locked so that the sk_buff queue operation is ok.
*/
-DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f1d74a2bd234..22f235260a3a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1045,9 +1045,9 @@ union bpf_attr {
* supports redirection to the egress interface, and accepts no
* flag at all.
*
- * The same effect can be attained with the more generic
- * **bpf_redirect_map**\ (), which requires specific maps to be
- * used but offers better performance.
+ * The same effect can also be attained with the more generic
+ * **bpf_redirect_map**\ (), which uses a BPF map to store the
+ * redirect target instead of providing it directly to the helper.
* Return
* For XDP, the helper returns **XDP_REDIRECT** on success or
* **XDP_ABORTED** on error. For other program types, the values
@@ -1611,13 +1611,11 @@ union bpf_attr {
* the caller. Any higher bits in the *flags* argument must be
* unset.
*
- * When used to redirect packets to net devices, this helper
- * provides a high performance increase over **bpf_redirect**\ ().
- * This is due to various implementation details of the underlying
- * mechanisms, one of which is the fact that **bpf_redirect_map**\
- * () tries to send packet as a "bulk" to the device.
+ * See also bpf_redirect(), which only supports redirecting to an
+ * ifindex, but doesn't require a map to do so.
* Return
- * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ * **XDP_REDIRECT** on success, or the value of the two lower bits
+ * of the **flags* argument on error.
*
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 336014bf8868..b6f0bb1dc799 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -97,6 +97,15 @@ enum ip_conntrack_status {
IPS_UNTRACKED_BIT = 12,
IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
+#ifdef __KERNEL__
+ /* Re-purposed for in-kernel use:
+ * Tags a conntrack entry that clashed with an existing entry
+ * on insert.
+ */
+ IPS_NAT_CLASH_BIT = IPS_UNTRACKED_BIT,
+ IPS_NAT_CLASH = IPS_UNTRACKED,
+#endif
+
/* Conntrack got a helper explicitly attached via CT target. */
IPS_HELPER_BIT = 13,
IPS_HELPER = (1 << IPS_HELPER_BIT),
@@ -110,7 +119,8 @@ enum ip_conntrack_status {
*/
IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
+ IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED |
+ IPS_OFFLOAD),
__IPS_MAX_BIT = 15,
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 805c43b083e9..787140095e58 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4142,9 +4142,9 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
* EFAULT - verifier bug
* 0 - 99% match. The last 1% is validated by the verifier.
*/
-int btf_check_func_type_match(struct bpf_verifier_log *log,
- struct btf *btf1, const struct btf_type *t1,
- struct btf *btf2, const struct btf_type *t2)
+static int btf_check_func_type_match(struct bpf_verifier_log *log,
+ struct btf *btf1, const struct btf_type *t1,
+ struct btf *btf2, const struct btf_type *t2)
{
const struct btf_param *args1, *args2;
const char *fn1, *fn2, *s1, *s2;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 2d182c4ee9d9..a1468e3f5af2 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -56,6 +56,7 @@ struct htab_elem {
union {
struct bpf_htab *htab;
struct pcpu_freelist_node fnode;
+ struct htab_elem *batch_flink;
};
};
};
@@ -126,6 +127,17 @@ free_elems:
bpf_map_area_free(htab->elems);
}
+/* The LRU list has a lock (lru_lock). Each htab bucket has a lock
+ * (bucket_lock). If both locks need to be acquired together, the lock
+ * order is always lru_lock -> bucket_lock and this only happens in
+ * bpf_lru_list.c logic. For example, certain code path of
+ * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
+ * will acquire lru_lock first followed by acquiring bucket_lock.
+ *
+ * In hashtab.c, to avoid deadlock, lock acquisition of
+ * bucket_lock followed by lru_lock is not allowed. In such cases,
+ * bucket_lock needs to be released first before acquiring lru_lock.
+ */
static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
u32 hash)
{
@@ -1256,10 +1268,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
u32 batch, max_count, size, bucket_size;
+ struct htab_elem *node_to_free = NULL;
u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
- unsigned long flags;
+ unsigned long flags = 0;
+ bool locked = false;
struct htab_elem *l;
struct bucket *b;
int ret = 0;
@@ -1319,15 +1333,25 @@ again_nocopy:
dst_val = values;
b = &htab->buckets[batch];
head = &b->head;
- raw_spin_lock_irqsave(&b->lock, flags);
+ /* do not grab the lock unless need it (bucket_cnt > 0). */
+ if (locked)
+ raw_spin_lock_irqsave(&b->lock, flags);
bucket_cnt = 0;
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
bucket_cnt++;
+ if (bucket_cnt && !locked) {
+ locked = true;
+ goto again_nocopy;
+ }
+
if (bucket_cnt > (max_count - total)) {
if (total == 0)
ret = -ENOSPC;
+ /* Note that since bucket_cnt > 0 here, it is implicit
+ * that the locked was grabbed, so release it.
+ */
raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock();
this_cpu_dec(bpf_prog_active);
@@ -1337,6 +1361,9 @@ again_nocopy:
if (bucket_cnt > bucket_size) {
bucket_size = bucket_cnt;
+ /* Note that since bucket_cnt > 0 here, it is implicit
+ * that the locked was grabbed, so release it.
+ */
raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock();
this_cpu_dec(bpf_prog_active);
@@ -1346,6 +1373,10 @@ again_nocopy:
goto alloc;
}
+ /* Next block is only safe to run if you have grabbed the lock */
+ if (!locked)
+ goto next_batch;
+
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
memcpy(dst_key, l->key, key_size);
@@ -1370,16 +1401,33 @@ again_nocopy:
}
if (do_delete) {
hlist_nulls_del_rcu(&l->hash_node);
- if (is_lru_map)
- bpf_lru_push_free(&htab->lru, &l->lru_node);
- else
+
+ /* bpf_lru_push_free() will acquire lru_lock, which
+ * may cause deadlock. See comments in function
+ * prealloc_lru_pop(). Let us do bpf_lru_push_free()
+ * after releasing the bucket lock.
+ */
+ if (is_lru_map) {
+ l->batch_flink = node_to_free;
+ node_to_free = l;
+ } else {
free_htab_elem(htab, l);
+ }
}
dst_key += key_size;
dst_val += value_size;
}
raw_spin_unlock_irqrestore(&b->lock, flags);
+ locked = false;
+
+ while (node_to_free) {
+ l = node_to_free;
+ node_to_free = node_to_free->batch_flink;
+ bpf_lru_push_free(&htab->lru, &l->lru_node);
+ }
+
+next_batch:
/* If we are not copying data, we can go to next bucket and avoid
* unlocking the rcu.
*/
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 2c5dc6541ece..bd09290e3648 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -321,7 +321,7 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
ulen = info->jited_prog_len;
info->jited_prog_len = aux->offload->jited_len;
- if (info->jited_prog_len & ulen) {
+ if (info->jited_prog_len && ulen) {
uinsns = u64_to_user_ptr(info->jited_prog_insns);
ulen = min_t(u32, info->jited_prog_len, ulen);
if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
diff --git a/net/Kconfig b/net/Kconfig
index b0937a700f01..2eeb0e55f7c9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -189,7 +189,6 @@ config BRIDGE_NETFILTER
depends on NETFILTER_ADVANCED
select NETFILTER_FAMILY_BRIDGE
select SKB_EXTENSIONS
- default m
---help---
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 6856a6d9282b..1f14b8455345 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -63,7 +63,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
{
struct net_bridge_port *p;
- list_for_each_entry_rcu(p, &br->port_list, list) {
+ list_for_each_entry_rcu(p, &br->port_list, list,
+ lockdep_is_held(&br->lock)) {
if (p->port_no == port_no)
return p;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index a6316b336128..e10bd680dc03 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -146,7 +146,6 @@
#include "net-sysfs.h"
#define MAX_GRO_SKBS 8
-#define MAX_NEST_DEV 8
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
name_node = netdev_name_node_lookup(net, name);
if (!name_node)
return -ENOENT;
+ /* lookup might have found our primary name or a name belonging
+ * to another device.
+ */
+ if (name_node == dev->name_node || name_node->dev != dev)
+ return -EINVAL;
+
__netdev_name_node_alt_destroy(name_node);
return 0;
@@ -3657,26 +3662,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
- qdisc_run_begin(q)) {
- if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state))) {
- __qdisc_drop(skb, &to_free);
- rc = NET_XMIT_DROP;
- goto end_run;
- }
- qdisc_bstats_cpu_update(q, skb);
-
- rc = NET_XMIT_SUCCESS;
- if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
- __qdisc_run(q);
-
-end_run:
- qdisc_run_end(q);
- } else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
- qdisc_run(q);
- }
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ qdisc_run(q);
if (unlikely(to_free))
kfree_skb_list(to_free);
@@ -7201,8 +7188,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev,
return 0;
}
-static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
@@ -7214,6 +7201,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
return lower->dev;
}
+EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
static u8 __netdev_upper_depth(struct net_device *dev)
{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3e7e15278c46..bd7eba9066f8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh = nlmsg_data(nlh);
frh->family = ops->family;
- frh->table = rule->table;
+ frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, FRA_TABLE, rule->table))
goto nla_put_failure;
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 09c44bf2e1d2..e1152f4ffe33 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3504,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
if (err)
return err;
- alt_ifname = nla_data(attr);
+ alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (!alt_ifname)
+ return -ENOMEM;
+
if (cmd == RTM_NEWLINKPROP) {
- alt_ifname = kstrdup(alt_ifname, GFP_KERNEL);
- if (!alt_ifname)
- return -ENOMEM;
err = netdev_name_node_alt_create(dev, alt_ifname);
- if (err) {
- kfree(alt_ifname);
- return err;
- }
+ if (!err)
+ alt_ifname = NULL;
} else if (cmd == RTM_DELLINKPROP) {
err = netdev_name_node_alt_destroy(dev, alt_ifname);
- if (err)
- return err;
} else {
- WARN_ON(1);
- return 0;
+ WARN_ON_ONCE(1);
+ err = -EINVAL;
}
- *changed = true;
- return 0;
+ kfree(alt_ifname);
+ if (!err)
+ *changed = true;
+ return err;
}
static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 864cb9e9622f..e1101a4f90a6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -467,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -527,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (nc->page.pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -4805,9 +4803,9 @@ static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
typeof(IPPROTO_IP) proto,
unsigned int off)
{
- switch (proto) {
- int err;
+ int err;
+ switch (proto) {
case IPPROTO_TCP:
err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
off + MAX_TCP_HDR_LEN);
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
index fce45dac4205..8977fe1f3946 100644
--- a/net/ethtool/bitset.c
+++ b/net/ethtool/bitset.c
@@ -447,7 +447,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
"mask only allowed in compact bitset");
return -EINVAL;
}
+
no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+ if (no_mask)
+ ethnl_bitmap32_clear(bitmap, 0, nbits, mod);
nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) {
bool old_val, new_val;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 364ea2cc028e..3ba7f61be107 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -155,7 +155,8 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
new_node->seq_out[i] = seq_out;
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ list_for_each_entry_rcu(node, node_db, mac_list,
+ lockdep_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db76b9609299..08a41f1e1cd2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1857,8 +1857,12 @@ int __udp_disconnect(struct sock *sk, int flags)
inet->inet_dport = 0;
sock_rps_reset_rxhash(sk);
sk->sk_bound_dev_if = 0;
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) {
inet_reset_saddr(sk);
+ if (sk->sk_prot->rehash &&
+ (sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+ sk->sk_prot->rehash(sk);
+ }
if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
sk->sk_prot->unhash(sk);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 58fbde244381..72abf892302f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1102,8 +1102,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
found++;
break;
}
- if (rt_can_ecmp)
- fallback_ins = fallback_ins ?: ins;
+ fallback_ins = fallback_ins ?: ins;
goto next_iter;
}
@@ -1146,7 +1145,9 @@ next_iter:
}
if (fallback_ins && !found) {
- /* No ECMP-able route found, replace first non-ECMP one */
+ /* No matching route with same ecmp-able-ness found, replace
+ * first matching route
+ */
ins = fallback_ins;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->fib6_table->tb6_lock));
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 55bfc5149d0c..781ca8c07a0d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -437,8 +437,6 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return -ENOENT;
switch (type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -452,7 +450,10 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
}
return 0;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if (code == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -468,6 +469,7 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
t->parms.name);
}
return 0;
+ }
case ICMPV6_PKT_TOOBIG:
ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
return 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5d65436ad5ad..4703b09808d0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -517,8 +517,6 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
err = 0;
switch (*type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu, teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -531,7 +529,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if ((*code) == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -548,7 +549,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
t->parms.name);
}
break;
- case ICMPV6_PKT_TOOBIG:
+ }
+ case ICMPV6_PKT_TOOBIG: {
+ __u32 mtu;
+
ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
sock_net_uid(net, NULL));
mtu = *info - offset;
@@ -562,6 +566,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
+ }
case NDISC_REDIRECT:
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4fbdc60b4e07..2931224b674e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5198,6 +5198,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
+ cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
nhn++;
}
diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
index 49f6054e7f4e..a9ed3bf1d93f 100644
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@@ -4,6 +4,7 @@ config MPTCP
depends on INET
select SKB_EXTENSIONS
select CRYPTO_LIB_SHA256
+ select CRYPTO
help
Multipath TCP (MPTCP) connections send and receive data over multiple
subflows in order to utilize multiple network paths. Each subflow
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 030dee668e0a..e9aa6807b5be 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -755,60 +755,50 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int ret = -EOPNOTSUPP;
struct socket *ssock;
- struct sock *ssk;
pr_debug("msk=%p", msk);
/* @@ the meaning of setsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
+ * there are multiple subflows is not yet defined. It is up to the
+ * MPTCP-level socket to configure the subflows until the subflow
+ * is in TCP fallback, when TCP socket options are passed through
+ * to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- release_sock(sk);
- return ret;
- }
+ ssock = __mptcp_tcp_fallback(msk);
+ if (ssock)
+ return tcp_setsockopt(ssock->sk, level, optname, optval,
+ optlen);
- ssk = ssock->sk;
- sock_hold(ssk);
release_sock(sk);
- ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
- sock_put(ssk);
-
- return ret;
+ return -EOPNOTSUPP;
}
static int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int ret = -EOPNOTSUPP;
struct socket *ssock;
- struct sock *ssk;
pr_debug("msk=%p", msk);
- /* @@ the meaning of getsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
+ /* @@ the meaning of setsockopt() when the socket is connected and
+ * there are multiple subflows is not yet defined. It is up to the
+ * MPTCP-level socket to configure the subflows until the subflow
+ * is in TCP fallback, when socket options are passed through
+ * to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- release_sock(sk);
- return ret;
- }
+ ssock = __mptcp_tcp_fallback(msk);
+ if (ssock)
+ return tcp_getsockopt(ssock->sk, level, optname, optval,
+ option);
- ssk = ssock->sk;
- sock_hold(ssk);
release_sock(sk);
- ret = tcp_getsockopt(ssk, level, optname, optval, option);
- sock_put(ssk);
-
- return ret;
+ return -EOPNOTSUPP;
}
static int mptcp_get_port(struct sock *sk, unsigned short snum)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 8a99a2930284..9f8663b30456 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -56,8 +56,8 @@
#define MPTCP_DSS_FLAG_MASK (0x1F)
/* MPTCP socket flags */
-#define MPTCP_DATA_READY BIT(0)
-#define MPTCP_SEND_SPACE BIT(1)
+#define MPTCP_DATA_READY 0
+#define MPTCP_SEND_SPACE 1
/* MPTCP connection sock */
struct mptcp_sock {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d1305423640f..1927fc296f95 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -894,32 +894,175 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
}
}
-/* Resolve race on insertion if this protocol allows this. */
+static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
+{
+ struct nf_conn_tstamp *tstamp;
+
+ atomic_inc(&ct->ct_general.use);
+ ct->status |= IPS_CONFIRMED;
+
+ /* set conntrack timestamp, if enabled. */
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp)
+ tstamp->start = ktime_get_real_ns();
+}
+
+static int __nf_ct_resolve_clash(struct sk_buff *skb,
+ struct nf_conntrack_tuple_hash *h)
+{
+ /* This is the conntrack entry already in hashes that won race. */
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *loser_ct;
+
+ loser_ct = nf_ct_get(skb, &ctinfo);
+
+ if (nf_ct_is_dying(ct))
+ return NF_DROP;
+
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ return NF_DROP;
+
+ if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
+ nf_ct_match(ct, loser_ct)) {
+ struct net *net = nf_ct_net(ct);
+
+ nf_ct_acct_merge(ct, ctinfo, loser_ct);
+ nf_ct_add_to_dying_list(loser_ct);
+ nf_conntrack_put(&loser_ct->ct_general);
+ nf_ct_set(skb, ct, ctinfo);
+
+ NF_CT_STAT_INC(net, insert_failed);
+ return NF_ACCEPT;
+ }
+
+ nf_ct_put(ct);
+ return NF_DROP;
+}
+
+/**
+ * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry
+ *
+ * @skb: skb that causes the collision
+ * @repl_idx: hash slot for reply direction
+ *
+ * Called when origin or reply direction had a clash.
+ * The skb can be handled without packet drop provided the reply direction
+ * is unique or there the existing entry has the identical tuple in both
+ * directions.
+ *
+ * Caller must hold conntrack table locks to prevent concurrent updates.
+ *
+ * Returns NF_DROP if the clash could not be handled.
+ */
+static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
+{
+ struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb);
+ const struct nf_conntrack_zone *zone;
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_node *n;
+ struct net *net;
+
+ zone = nf_ct_zone(loser_ct);
+ net = nf_ct_net(loser_ct);
+
+ /* Reply direction must never result in a clash, unless both origin
+ * and reply tuples are identical.
+ */
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) {
+ if (nf_ct_key_equal(h,
+ &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ zone, net))
+ return __nf_ct_resolve_clash(skb, h);
+ }
+
+ /* We want the clashing entry to go away real soon: 1 second timeout. */
+ loser_ct->timeout = nfct_time_stamp + HZ;
+
+ /* IPS_NAT_CLASH removes the entry automatically on the first
+ * reply. Also prevents UDP tracker from moving the entry to
+ * ASSURED state, i.e. the entry can always be evicted under
+ * pressure.
+ */
+ loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH;
+
+ __nf_conntrack_insert_prepare(loser_ct);
+
+ /* fake add for ORIGINAL dir: we want lookups to only find the entry
+ * already in the table. This also hides the clashing entry from
+ * ctnetlink iteration, i.e. conntrack -L won't show them.
+ */
+ hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+
+ hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
+ &nf_conntrack_hash[repl_idx]);
+ return NF_ACCEPT;
+}
+
+/**
+ * nf_ct_resolve_clash - attempt to handle clash without packet drop
+ *
+ * @skb: skb that causes the clash
+ * @h: tuplehash of the clashing entry already in table
+ * @hash_reply: hash slot for reply direction
+ *
+ * A conntrack entry can be inserted to the connection tracking table
+ * if there is no existing entry with an identical tuple.
+ *
+ * If there is one, @skb (and the assocated, unconfirmed conntrack) has
+ * to be dropped. In case @skb is retransmitted, next conntrack lookup
+ * will find the already-existing entry.
+ *
+ * The major problem with such packet drop is the extra delay added by
+ * the packet loss -- it will take some time for a retransmit to occur
+ * (or the sender to time out when waiting for a reply).
+ *
+ * This function attempts to handle the situation without packet drop.
+ *
+ * If @skb has no NAT transformation or if the colliding entries are
+ * exactly the same, only the to-be-confirmed conntrack entry is discarded
+ * and @skb is associated with the conntrack entry already in the table.
+ *
+ * Failing that, the new, unconfirmed conntrack is still added to the table
+ * provided that the collision only occurs in the ORIGINAL direction.
+ * The new entry will be added after the existing one in the hash list,
+ * so packets in the ORIGINAL direction will continue to match the existing
+ * entry. The new entry will also have a fixed timeout so it expires --
+ * due to the collision, it will not see bidirectional traffic.
+ *
+ * Returns NF_DROP if the clash could not be resolved.
+ */
static __cold noinline int
-nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conntrack_tuple_hash *h)
+nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h,
+ u32 reply_hash)
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
const struct nf_conntrack_l4proto *l4proto;
- enum ip_conntrack_info oldinfo;
- struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *loser_ct;
+ struct net *net;
+ int ret;
+
+ loser_ct = nf_ct_get(skb, &ctinfo);
+ net = nf_ct_net(loser_ct);
l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
- if (l4proto->allow_clash &&
- !nf_ct_is_dying(ct) &&
- atomic_inc_not_zero(&ct->ct_general.use)) {
- if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
- nf_ct_match(ct, loser_ct)) {
- nf_ct_acct_merge(ct, ctinfo, loser_ct);
- nf_conntrack_put(&loser_ct->ct_general);
- nf_ct_set(skb, ct, oldinfo);
- return NF_ACCEPT;
- }
- nf_ct_put(ct);
- }
+ if (!l4proto->allow_clash)
+ goto drop;
+
+ ret = __nf_ct_resolve_clash(skb, h);
+ if (ret == NF_ACCEPT)
+ return ret;
+
+ ret = nf_ct_resolve_clash_harder(skb, reply_hash);
+ if (ret == NF_ACCEPT)
+ return ret;
+
+drop:
+ nf_ct_add_to_dying_list(loser_ct);
NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC(net, insert_failed);
return NF_DROP;
}
@@ -932,7 +1075,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
- struct nf_conn_tstamp *tstamp;
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
@@ -989,6 +1131,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (unlikely(nf_ct_is_dying(ct))) {
nf_ct_add_to_dying_list(ct);
+ NF_CT_STAT_INC(net, insert_failed);
goto dying;
}
@@ -1009,13 +1152,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
setting time, otherwise we'd get timer wrap in
weird delay cases. */
ct->timeout += nfct_time_stamp;
- atomic_inc(&ct->ct_general.use);
- ct->status |= IPS_CONFIRMED;
- /* set conntrack timestamp, if enabled. */
- tstamp = nf_conn_tstamp_find(ct);
- if (tstamp)
- tstamp->start = ktime_get_real_ns();
+ __nf_conntrack_insert_prepare(ct);
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
@@ -1035,11 +1173,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
out:
- nf_ct_add_to_dying_list(ct);
- ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
+ ret = nf_ct_resolve_clash(skb, h, reply_hash);
dying:
nf_conntrack_double_unlock(hash, reply_hash);
- NF_CT_STAT_INC(net, insert_failed);
local_bh_enable();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 7365b43f8f98..760ca2422816 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -81,6 +81,18 @@ static bool udp_error(struct sk_buff *skb,
return false;
}
+static void nf_conntrack_udp_refresh_unreplied(struct nf_conn *ct,
+ struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ u32 extra_jiffies)
+{
+ if (unlikely(ctinfo == IP_CT_ESTABLISHED_REPLY &&
+ ct->status & IPS_NAT_CLASH))
+ nf_ct_kill(ct);
+ else
+ nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies);
+}
+
/* Returns verdict for packet, and may modify conntracktype */
int nf_conntrack_udp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -116,8 +128,8 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
- nf_ct_refresh_acct(ct, ctinfo, skb,
- timeouts[UDP_CT_UNREPLIED]);
+ nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+ timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
@@ -198,8 +210,8 @@ int nf_conntrack_udplite_packet(struct nf_conn *ct,
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
- nf_ct_refresh_acct(ct, ctinfo, skb,
- timeouts[UDP_CT_UNREPLIED]);
+ nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+ timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 83e1db37c3b0..06f00cdc3891 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -847,9 +847,6 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
{
int err;
- if (!nf_flowtable_hw_offload(flowtable))
- return 0;
-
if (!dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
@@ -876,6 +873,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct flow_block_offload bo;
int err;
+ if (!nf_flowtable_hw_offload(flowtable))
+ return 0;
+
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index f0cb1e13af50..feac8553f6d9 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -203,7 +203,7 @@
* ::
*
* rule indices in last field: 0 1
- * map to elements: 0x42 0x66
+ * map to elements: 0x66 0x42
*
*
* Matching
@@ -298,7 +298,7 @@
* ::
*
* rule indices in last field: 0 1
- * map to elements: 0x42 0x66
+ * map to elements: 0x66 0x42
*
* the matching element is at 0x42.
*
@@ -503,7 +503,7 @@ static int pipapo_refill(unsigned long *map, int len, int rules,
return -1;
}
- if (unlikely(match_only)) {
+ if (match_only) {
bitmap_clear(map, i, 1);
return i;
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index bccd47cd7190..7a2c4b8408c4 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -36,6 +36,7 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/mutex.h>
#include <linux/kernel.h>
+#include <linux/refcount.h>
#include <uapi/linux/netfilter/xt_hashlimit.h>
#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
@@ -114,7 +115,7 @@ struct dsthash_ent {
struct xt_hashlimit_htable {
struct hlist_node node; /* global list of all htables */
- int use;
+ refcount_t use;
u_int8_t family;
bool rnd_initialized;
@@ -315,7 +316,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
for (i = 0; i < hinfo->cfg.size; i++)
INIT_HLIST_HEAD(&hinfo->hash[i]);
- hinfo->use = 1;
+ refcount_set(&hinfo->use, 1);
hinfo->count = 0;
hinfo->family = family;
hinfo->rnd_initialized = false;
@@ -420,7 +421,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
if (!strcmp(name, hinfo->name) &&
hinfo->family == family) {
- hinfo->use++;
+ refcount_inc(&hinfo->use);
return hinfo;
}
}
@@ -429,12 +430,11 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
static void htable_put(struct xt_hashlimit_htable *hinfo)
{
- mutex_lock(&hashlimit_mutex);
- if (--hinfo->use == 0) {
+ if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) {
hlist_del(&hinfo->node);
+ mutex_unlock(&hashlimit_mutex);
htable_destroy(hinfo);
}
- mutex_unlock(&hashlimit_mutex);
}
/* The algorithm used is the Simple Token Bucket Filter (TBF)
@@ -837,6 +837,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
}
+#define HASHLIMIT_MAX_SIZE 1048576
+
static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
struct xt_hashlimit_htable **hinfo,
struct hashlimit_cfg3 *cfg,
@@ -847,6 +849,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
if (cfg->gc_interval == 0 || cfg->expire == 0)
return -EINVAL;
+ if (cfg->size > HASHLIMIT_MAX_SIZE) {
+ cfg->size = HASHLIMIT_MAX_SIZE;
+ pr_info_ratelimited("size too large, truncated to %u\n", cfg->size);
+ }
+ if (cfg->max > HASHLIMIT_MAX_SIZE) {
+ cfg->max = HASHLIMIT_MAX_SIZE;
+ pr_info_ratelimited("max too large, truncated to %u\n", cfg->max);
+ }
if (par->family == NFPROTO_IPV4) {
if (cfg->srcmask > 32 || cfg->dstmask > 32)
return -EINVAL;
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f5d34da0646e..a1f2320ecc16 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -143,7 +143,8 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain,
if (domain != NULL) {
bkt = netlbl_domhsh_hash(domain);
bkt_list = &netlbl_domhsh_rcu_deref(netlbl_domhsh)->tbl[bkt];
- list_for_each_entry_rcu(iter, bkt_list, list)
+ list_for_each_entry_rcu(iter, bkt_list, list,
+ lockdep_is_held(&netlbl_domhsh_lock))
if (iter->valid &&
netlbl_family_match(iter->family, family) &&
strcmp(iter->domain, domain) == 0)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index d2e4ab8d1cb1..77bb1bb22c3b 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -207,7 +207,8 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
bkt = netlbl_unlhsh_hash(ifindex);
bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt];
- list_for_each_entry_rcu(iter, bkt_list, list)
+ list_for_each_entry_rcu(iter, bkt_list, list,
+ lockdep_is_held(&netlbl_unlhsh_lock))
if (iter->valid && iter->ifindex == ifindex)
return iter;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4e31721e7293..edf3e285e242 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1014,7 +1014,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nlk->netlink_bind && groups) {
int group;
- for (group = 0; group < nlk->ngroups; group++) {
+ /* nl_groups is a u32, so cap the maximum groups we can bind */
+ for (group = 0; group < BITS_PER_TYPE(u32); group++) {
if (!test_bit(group, &groups))
continue;
err = nlk->netlink_bind(net, group + 1);
@@ -1033,7 +1034,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
netlink_insert(sk, nladdr->nl_pid) :
netlink_autobind(sock);
if (err) {
- netlink_undo_bind(nlk->ngroups, groups, sk);
+ netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk);
goto unlock;
}
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 659c2a790fe7..c047afd12116 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -179,7 +179,8 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
struct hlist_head *head;
head = vport_hash_bucket(dp, port_no);
- hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, head, dp_hash_node,
+ lockdep_ovsl_is_held()) {
if (vport->port_no == port_no)
return vport;
}
@@ -2042,7 +2043,8 @@ static unsigned int ovs_get_max_headroom(struct datapath *dp)
int i;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held()) {
dev = vport->dev;
dev_headroom = netdev_get_fwd_headroom(dev);
if (dev_headroom > max_headroom)
@@ -2061,7 +2063,8 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
dp->max_headroom = new_headroom;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held())
netdev_set_rx_headroom(vport->dev, new_headroom);
}
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 7da4230627f5..288122eec7c8 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2708,10 +2708,6 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
switch (key_type) {
- const struct ovs_key_ipv4 *ipv4_key;
- const struct ovs_key_ipv6 *ipv6_key;
- int err;
-
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_CT_MARK:
@@ -2723,7 +2719,9 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
- case OVS_KEY_ATTR_TUNNEL:
+ case OVS_KEY_ATTR_TUNNEL: {
+ int err;
+
if (masked)
return -EINVAL; /* Masked tunnel set not supported. */
@@ -2732,8 +2730,10 @@ static int validate_set(const struct nlattr *a,
if (err)
return err;
break;
+ }
+ case OVS_KEY_ATTR_IPV4: {
+ const struct ovs_key_ipv4 *ipv4_key;
- case OVS_KEY_ATTR_IPV4:
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
@@ -2753,8 +2753,10 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
}
break;
+ }
+ case OVS_KEY_ATTR_IPV6: {
+ const struct ovs_key_ipv6 *ipv6_key;
- case OVS_KEY_ATTR_IPV6:
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
@@ -2781,7 +2783,7 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
-
+ }
case OVS_KEY_ATTR_TCP:
if ((eth_type != htons(ETH_P_IP) &&
eth_type != htons(ETH_P_IPV6)) ||
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5904e93e5765..fd8a01ca7a2d 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -585,7 +585,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
head = find_bucket(ti, hash);
(*n_mask_hit)++;
- hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+ hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
+ lockdep_ovsl_is_held()) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
return flow;
@@ -769,7 +770,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
hash = ufid_hash(ufid);
head = find_bucket(ti, hash);
- hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
+ hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
+ lockdep_ovsl_is_held()) {
if (flow->ufid_table.hash == hash &&
ovs_flow_cmp_ufid(flow, ufid))
return flow;
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 3323b79ff548..5010d1ddd4bd 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -61,7 +61,8 @@ static struct dp_meter *lookup_meter(const struct datapath *dp,
struct hlist_head *head;
head = meter_hash_bucket(dp, meter_id);
- hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
+ hlist_for_each_entry_rcu(meter, head, dp_hash_node,
+ lockdep_ovsl_is_held()) {
if (meter->id == meter_id)
return meter;
}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 5da9392b03d6..47febb4504f0 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -96,7 +96,8 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name)
struct hlist_head *bucket = hash_bucket(net, name);
struct vport *vport;
- hlist_for_each_entry_rcu(vport, bucket, hash_node)
+ hlist_for_each_entry_rcu(vport, bucket, hash_node,
+ lockdep_ovsl_is_held())
if (!strcmp(name, ovs_vport_name(vport)) &&
net_eq(ovs_dp_get_net(vport->dp), net))
return vport;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 3341eee87bf9..585e6b3b69ce 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -162,10 +162,9 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
if (write)
gup_flags |= FOLL_WRITE;
- ret = get_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
+ ret = pin_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
if (ret >= 0 && ret < nr_pages) {
- while (ret--)
- put_page(pages[ret]);
+ unpin_user_pages(pages, ret);
ret = -EFAULT;
}
@@ -300,8 +299,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
* to release anything.
*/
if (!need_odp) {
- for (i = 0 ; i < nents; i++)
- put_page(sg_page(&sg[i]));
+ unpin_user_pages(pages, nr_pages);
kfree(sg);
}
ret = PTR_ERR(trans_private);
@@ -325,7 +323,12 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
if (cookie_ret)
*cookie_ret = cookie;
- if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) {
+ if (args->cookie_addr &&
+ put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) {
+ if (!need_odp) {
+ unpin_user_pages(pages, nr_pages);
+ kfree(sg);
+ }
ret = -EFAULT;
goto out;
}
@@ -496,9 +499,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
* is the case for a RDMA_READ which copies from remote
* to local memory
*/
- if (!ro->op_write)
- set_page_dirty(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, !ro->op_write);
}
}
@@ -515,8 +516,7 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
/* Mark page dirty if it was possibly modified, which
* is the case for a RDMA_READ which copies from remote
* to local memory */
- set_page_dirty(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, true);
kfree(ao->op_notifier);
ao->op_notifier = NULL;
@@ -944,7 +944,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
return ret;
err:
if (page)
- put_page(page);
+ unpin_user_page(page);
rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7e54d2ab5254..d32d4233d337 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -305,6 +305,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_fl_filter *f;
list_for_each_entry_rcu(mask, &head->masks, list) {
+ flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
fl_clear_masked_range(&skb_key, mask);
skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 748e3b19ec1d..6a16af4b1ef6 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -170,6 +170,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk,
return true;
}
+/* Check for format error in an ABORT chunk */
+static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk)
+{
+ struct sctp_errhdr *err;
+
+ sctp_walk_errors(err, chunk->chunk_hdr);
+
+ return (void *)err == (void *)chunk->chunk_end;
+}
+
/**********************************************************
* These are the state functions for handling chunk events.
**********************************************************/
@@ -2255,6 +2265,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2298,6 +2311,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Stop the T2-shutdown timer. */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
@@ -2565,6 +2581,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2582,16 +2601,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort(
/* See if we have an error cause code in the chunk. */
len = ntohs(chunk->chunk_hdr->length);
- if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
- struct sctp_errhdr *err;
-
- sctp_walk_errors(err, chunk->chunk_hdr);
- if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_pdiscard(net, ep, asoc, type, arg,
- commands);
-
+ if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
- }
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
/* ASSOC_FAILED will DELETE_TCB. */
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 1ba5a92832bb..1c5574e2e058 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -593,7 +593,7 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
u32 seq, u64 *p_record_sn)
{
u64 record_sn = context->hint_record_sn;
- struct tls_record_info *info;
+ struct tls_record_info *info, *last;
info = context->retransmit_hint;
if (!info ||
@@ -605,6 +605,24 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
struct tls_record_info, list);
if (!info)
return NULL;
+ /* send the start_marker record if seq number is before the
+ * tls offload start marker sequence number. This record is
+ * required to handle TCP packets which are before TLS offload
+ * started.
+ * And if it's not start marker, look if this seq number
+ * belongs to the list.
+ */
+ if (likely(!tls_record_is_start_marker(info))) {
+ /* we have the first record, get the last record to see
+ * if this seq number belongs to the list.
+ */
+ last = list_last_entry(&context->records_list,
+ struct tls_record_info, list);
+
+ if (!between(seq, tls_record_start_seq(info),
+ last->end_seq))
+ return NULL;
+ }
record_sn = context->unacked_record_sn;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index df600487a68d..356f90e4522b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -217,6 +217,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_flush(struct xdp_sock *xs)
{
xskq_prod_submit(xs->rx);
+ __xskq_cons_release(xs->umem->fq);
sock_def_readable(&xs->sk);
}
@@ -304,6 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ __xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
}
rcu_read_unlock();
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index bec2af11853a..89a01ac4e079 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -271,7 +271,8 @@ static inline void xskq_cons_release(struct xsk_queue *q)
{
/* To improve performance, only update local state here.
* Reflect this to global state when we get new entries
- * from the ring in xskq_cons_get_entries().
+ * from the ring in xskq_cons_get_entries() and whenever
+ * Rx or Tx processing are completed in the NAPI loop.
*/
q->cached_cons++;
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f1d74a2bd234..22f235260a3a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1045,9 +1045,9 @@ union bpf_attr {
* supports redirection to the egress interface, and accepts no
* flag at all.
*
- * The same effect can be attained with the more generic
- * **bpf_redirect_map**\ (), which requires specific maps to be
- * used but offers better performance.
+ * The same effect can also be attained with the more generic
+ * **bpf_redirect_map**\ (), which uses a BPF map to store the
+ * redirect target instead of providing it directly to the helper.
* Return
* For XDP, the helper returns **XDP_REDIRECT** on success or
* **XDP_ABORTED** on error. For other program types, the values
@@ -1611,13 +1611,11 @@ union bpf_attr {
* the caller. Any higher bits in the *flags* argument must be
* unset.
*
- * When used to redirect packets to net devices, this helper
- * provides a high performance increase over **bpf_redirect**\ ().
- * This is due to various implementation details of the underlying
- * mechanisms, one of which is the fact that **bpf_redirect_map**\
- * () tries to send packet as a "bulk" to the device.
+ * See also bpf_redirect(), which only supports redirecting to an
+ * ifindex, but doesn't require a map to do so.
* Return
- * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ * **XDP_REDIRECT** on success, or the value of the two lower bits
+ * of the **flags* argument on error.
*
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 514b1a524abb..7469c7dcc15e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -24,6 +24,7 @@
#include <endian.h>
#include <fcntl.h>
#include <errno.h>
+#include <ctype.h>
#include <asm/unistd.h>
#include <linux/err.h>
#include <linux/kernel.h>
@@ -1283,7 +1284,7 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map)
static char *internal_map_name(struct bpf_object *obj,
enum libbpf_map_type type)
{
- char map_name[BPF_OBJ_NAME_LEN];
+ char map_name[BPF_OBJ_NAME_LEN], *p;
const char *sfx = libbpf_type_to_btf_name[type];
int sfx_len = max((size_t)7, strlen(sfx));
int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
@@ -1292,6 +1293,11 @@ static char *internal_map_name(struct bpf_object *obj,
snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
sfx_len, libbpf_type_to_btf_name[type]);
+ /* sanitise map name to characters allowed by kernel */
+ for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
+ if (!isalnum(*p) && *p != '_' && *p != '.')
+ *p = '_';
+
return strdup(map_name);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 098bcae5f827..0800036ed654 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -506,8 +506,10 @@ static void test_syncookie(int type, sa_family_t family)
.pass_on_failure = 0,
};
- if (type != SOCK_STREAM)
+ if (type != SOCK_STREAM) {
+ test__skip();
return;
+ }
/*
* +1 for TCP-SYN and
@@ -822,8 +824,10 @@ void test_select_reuseport(void)
goto out;
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
+ if (saved_tcp_fo < 0)
+ goto out;
saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
- if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0)
+ if (saved_tcp_syncookie < 0)
goto out;
if (enable_fastopen())
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 07f5b462c2ef..aa43e0bd210c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -3,6 +3,11 @@
#include "test_progs.h"
+#define TCP_REPAIR 19 /* TCP sock is under repair right now */
+
+#define TCP_REPAIR_ON 1
+#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
+
static int connected_socket_v4(void)
{
struct sockaddr_in addr = {
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 6dd403103800..60273f1bc7d9 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -910,6 +910,12 @@ ipv6_rt_replace_mpath()
check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
log_test $? 0 "Multipath with single path via multipath attribute"
+ # multipath with dev-only
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 dev veth1"
+ check_route6 "2001:db8:104::/64 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with dev-only"
+
# route replace fails - invalid nexthop 1
add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
index e6fd7a18c655..0266443601bc 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -63,22 +63,23 @@ test_span_gre_mac()
{
local tundev=$1; shift
local direction=$1; shift
- local prot=$1; shift
local what=$1; shift
- local swp3mac=$(mac_get $swp3)
- local h3mac=$(mac_get $h3)
+ case "$direction" in
+ ingress) local src_mac=$(mac_get $h1); local dst_mac=$(mac_get $h2)
+ ;;
+ egress) local src_mac=$(mac_get $h2); local dst_mac=$(mac_get $h1)
+ ;;
+ esac
RET=0
mirror_install $swp1 $direction $tundev "matchall $tcflags"
- tc filter add dev $h3 ingress pref 77 prot $prot \
- flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
- action pass
+ icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac"
- mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+ mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10
- tc filter del dev $h3 ingress pref 77
+ icmp_capture_uninstall h3-${tundev}
mirror_uninstall $swp1 $direction
log_test "$direction $what: envelope MAC ($tcflags)"
@@ -120,14 +121,14 @@ test_ip6gretap()
test_gretap_mac()
{
- test_span_gre_mac gt4 ingress ip "mirror to gretap"
- test_span_gre_mac gt4 egress ip "mirror to gretap"
+ test_span_gre_mac gt4 ingress "mirror to gretap"
+ test_span_gre_mac gt4 egress "mirror to gretap"
}
test_ip6gretap_mac()
{
- test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap"
- test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap"
+ test_span_gre_mac gt6 ingress "mirror to ip6gretap"
+ test_span_gre_mac gt6 egress "mirror to ip6gretap"
}
test_all()
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index bb10e33690b2..ce6bea9675c0 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -516,9 +516,9 @@ test_tos()
RET=0
tc filter add dev v1 egress pref 77 prot ip \
- flower ip_tos 0x40 action pass
- vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10
- vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0
+ flower ip_tos 0x14 action pass
+ vxlan_ping_test $h1 192.0.2.3 "-Q 0x14" v1 egress 77 10
+ vxlan_ping_test $h1 192.0.2.3 "-Q 0x18" v1 egress 77 0
tc filter del dev v1 egress pref 77 prot ip
log_test "VXLAN: envelope TOS inheritance"
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index f10aa3590adc..28d477683e8a 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -38,19 +38,17 @@ endef
define file_download =
$(DISTFILES_PATH)/$(1):
mkdir -p $(DISTFILES_PATH)
- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
- if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
endef
$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
-$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
@@ -295,21 +293,13 @@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
$(MAKE) -C $(IPERF_PATH)
$(STRIP) -s $@
-$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR)
- flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- touch $@
-
-$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS)
- cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
- $(MAKE) -C $(LIBMNL_PATH)
- sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc
-
$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
+ mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
touch $@
-$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg
$(STRIP) -s $@
$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
@@ -340,17 +330,17 @@ $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
touch $@
-$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
- $(STRIP) -s $(IPROUTE2_PATH)/ip/ip
+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
+ $(STRIP) -s $@
-$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
- $(STRIP) -s $(IPROUTE2_PATH)/misc/ss
+$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
+ $(STRIP) -s $@
$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
mkdir -p $(BUILD_PATH)
@@ -358,8 +348,8 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
touch $@
-$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include
$(MAKE) -C $(IPTABLES_PATH)
$(STRIP) -s $@