aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig1
-rw-r--r--net/bridge/br_device.c6
-rw-r--r--net/bridge/br_stp.c3
-rw-r--r--net/core/dev.c40
-rw-r--r--net/core/devlink.c71
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/netclassid_cgroup.c47
-rw-r--r--net/core/page_pool.c22
-rw-r--r--net/core/rtnetlink.c26
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/dsa/dsa_priv.h2
-rw-r--r--net/dsa/port.c32
-rw-r--r--net/dsa/slave.c8
-rw-r--r--net/dsa/tag_ar9331.c2
-rw-r--r--net/dsa/tag_qca.c2
-rw-r--r--net/ethtool/bitset.c6
-rw-r--r--net/ethtool/bitset.h2
-rw-r--r--net/hsr/hsr_framereg.c3
-rw-r--r--net/ieee802154/nl_policy.c6
-rw-r--r--net/ipv4/cipso_ipv4.c7
-rw-r--r--net/ipv4/gre_demux.c12
-rw-r--r--net/ipv4/icmp.c33
-rw-r--r--net/ipv4/inet_diag.c44
-rw-r--r--net/ipv4/raw_diag.c5
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv4/udp_diag.c5
-rw-r--r--net/ipv6/addrconf.c47
-rw-r--r--net/ipv6/ip6_fib.c7
-rw-r--r--net/ipv6/ip6_gre.c8
-rw-r--r--net/ipv6/ip6_icmp.c34
-rw-r--r--net/ipv6/ip6_tunnel.c81
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/ipv6/route.c1
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/mlme.c14
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mac80211/util.c34
-rw-r--r--net/mptcp/Kconfig1
-rw-r--r--net/mptcp/options.c19
-rw-r--r--net/mptcp/protocol.c56
-rw-r--r--net/mptcp/protocol.h4
-rw-r--r--net/netfilter/ipset/ip_set_core.c34
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h635
-rw-r--r--net/netfilter/nf_conntrack_core.c192
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c20
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_flow_table_offload.c6
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c22
-rw-r--r--net/netfilter/nfnetlink_cthelper.c2
-rw-r--r--net/netfilter/nft_chain_nat.c1
-rw-r--r--net/netfilter/nft_payload.c1
-rw-r--r--net/netfilter/nft_set_pipapo.c12
-rw-r--r--net/netfilter/nft_tunnel.c2
-rw-r--r--net/netfilter/x_tables.c6
-rw-r--r--net/netfilter/xt_hashlimit.c38
-rw-r--r--net/netfilter/xt_recent.c2
-rw-r--r--net/netlabel/netlabel_domainhash.c3
-rw-r--r--net/netlabel/netlabel_unlabeled.c3
-rw-r--r--net/netlink/af_netlink.c7
-rw-r--r--net/netlink/genetlink.c5
-rw-r--r--net/nfc/hci/core.c19
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/openvswitch/datapath.c10
-rw-r--r--net/openvswitch/flow_netlink.c18
-rw-r--r--net/openvswitch/flow_table.c6
-rw-r--r--net/openvswitch/meter.c3
-rw-r--r--net/openvswitch/vport.c3
-rw-r--r--net/rds/rdma.c24
-rw-r--r--net/sched/act_api.c1
-rw-r--r--net/sched/cls_flower.c2
-rw-r--r--net/sched/cls_matchall.c1
-rw-r--r--net/sched/sch_fq.c1
-rw-r--r--net/sched/sch_taprio.c1
-rw-r--r--net/sctp/diag.c8
-rw-r--r--net/sctp/sm_statefuns.c29
-rw-r--r--net/smc/af_smc.c27
-rw-r--r--net/smc/smc_clc.c4
-rw-r--r--net/smc/smc_core.c12
-rw-r--r--net/smc/smc_core.h2
-rw-r--r--net/smc/smc_diag.c5
-rw-r--r--net/smc/smc_ib.c2
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c13
-rw-r--r--net/tipc/netlink.c1
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tls/tls_device.c20
-rw-r--r--net/unix/af_unix.c4
-rw-r--r--net/vmw_vsock/af_vsock.c20
-rw-r--r--net/vmw_vsock/hyperv_transport.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c2
-rw-r--r--net/wireless/ethtool.c8
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/wireless/reg.c2
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--net/xdp/xsk_queue.h3
-rw-r--r--net/xfrm/xfrm_interface.c6
99 files changed, 1381 insertions, 622 deletions
diff --git a/net/Kconfig b/net/Kconfig
index b0937a700f01..2eeb0e55f7c9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -189,7 +189,6 @@ config BRIDGE_NETFILTER
depends on NETFILTER_ADVANCED
select NETFILTER_FAMILY_BRIDGE
select SKB_EXTENSIONS
- default m
---help---
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index dc3d2c1dd9d5..0e3dbc5f3c34 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -34,7 +34,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const struct nf_br_ops *nf_ops;
u8 state = BR_STATE_FORWARDING;
const unsigned char *dest;
- struct ethhdr *eth;
u16 vid = 0;
rcu_read_lock();
@@ -54,15 +53,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
skb_reset_mac_header(skb);
- eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
- (eth->h_proto == htons(ETH_P_ARP) ||
- eth->h_proto == htons(ETH_P_RARP)) &&
+ (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
} else if (IS_ENABLED(CONFIG_IPV6) &&
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 6856a6d9282b..1f14b8455345 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -63,7 +63,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
{
struct net_bridge_port *p;
- list_for_each_entry_rcu(p, &br->port_list, list) {
+ list_for_each_entry_rcu(p, &br->port_list, list,
+ lockdep_is_held(&br->lock)) {
if (p->port_no == port_no)
return p;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index a69e8bd7ed74..c6c985fe7b1b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -146,7 +146,6 @@
#include "net-sysfs.h"
#define MAX_GRO_SKBS 8
-#define MAX_NEST_DEV 8
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
name_node = netdev_name_node_lookup(net, name);
if (!name_node)
return -ENOENT;
+ /* lookup might have found our primary name or a name belonging
+ * to another device.
+ */
+ if (name_node == dev->name_node || name_node->dev != dev)
+ return -EINVAL;
+
__netdev_name_node_alt_destroy(name_node);
return 0;
@@ -3071,6 +3076,8 @@ static u16 skb_tx_hash(const struct net_device *dev,
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
+ if (hash >= qoffset)
+ hash -= qoffset;
while (unlikely(hash >= qcount))
hash -= qcount;
return hash + qoffset;
@@ -3657,26 +3664,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
- qdisc_run_begin(q)) {
- if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state))) {
- __qdisc_drop(skb, &to_free);
- rc = NET_XMIT_DROP;
- goto end_run;
- }
- qdisc_bstats_cpu_update(q, skb);
-
- rc = NET_XMIT_SUCCESS;
- if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
- __qdisc_run(q);
-
-end_run:
- qdisc_run_end(q);
- } else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
- qdisc_run(q);
- }
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ qdisc_run(q);
if (unlikely(to_free))
kfree_skb_list(to_free);
@@ -4527,14 +4516,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
- if (skb_cloned(skb) || skb_is_tc_redirected(skb))
+ if (skb_is_tc_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
* native XDP provides, thus we need to do it here as well.
*/
- if (skb_is_nonlinear(skb) ||
+ if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
int troom = skb->tail + skb->data_len - skb->end;
@@ -7201,8 +7190,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev,
return 0;
}
-static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
@@ -7214,6 +7203,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
return lower->dev;
}
+EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
static u8 __netdev_upper_depth(struct net_device *dev)
{
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 549ee56b7a21..b831c5545d6a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2103,11 +2103,11 @@ err_action_values_put:
static struct devlink_dpipe_table *
devlink_dpipe_table_find(struct list_head *dpipe_tables,
- const char *table_name)
+ const char *table_name, struct devlink *devlink)
{
struct devlink_dpipe_table *table;
-
- list_for_each_entry_rcu(table, dpipe_tables, list) {
+ list_for_each_entry_rcu(table, dpipe_tables, list,
+ lockdep_is_held(&devlink->lock)) {
if (!strcmp(table->name, table_name))
return table;
}
@@ -2226,7 +2226,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -2382,7 +2382,7 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink,
struct devlink_dpipe_table *table;
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -3352,34 +3352,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
struct genl_info *info,
union devlink_param_value *value)
{
+ struct nlattr *param_data;
int len;
- if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
- !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
+ param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
+
+ if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
return -EINVAL;
switch (param->type) {
case DEVLINK_PARAM_TYPE_U8:
- value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u8))
+ return -EINVAL;
+ value->vu8 = nla_get_u8(param_data);
break;
case DEVLINK_PARAM_TYPE_U16:
- value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u16))
+ return -EINVAL;
+ value->vu16 = nla_get_u16(param_data);
break;
case DEVLINK_PARAM_TYPE_U32:
- value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u32))
+ return -EINVAL;
+ value->vu32 = nla_get_u32(param_data);
break;
case DEVLINK_PARAM_TYPE_STRING:
- len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]),
- nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
- if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) ||
+ len = strnlen(nla_data(param_data), nla_len(param_data));
+ if (len == nla_len(param_data) ||
len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
return -EINVAL;
- strcpy(value->vstr,
- nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
+ strcpy(value->vstr, nla_data(param_data));
break;
case DEVLINK_PARAM_TYPE_BOOL:
- value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
- true : false;
+ if (param_data && nla_len(param_data))
+ return -EINVAL;
+ value->vbool = nla_get_flag(param_data);
break;
}
return 0;
@@ -5951,6 +5958,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
@@ -6854,7 +6863,7 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
rcu_read_lock();
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
enabled = false;
if (table)
enabled = table->counters_enabled;
@@ -6878,26 +6887,34 @@ int devlink_dpipe_table_register(struct devlink *devlink,
void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
-
- if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
- return -EEXIST;
+ int err = 0;
if (WARN_ON(!table_ops->size_get))
return -EINVAL;
+ mutex_lock(&devlink->lock);
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
+ devlink)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ if (!table) {
+ err = -ENOMEM;
+ goto unlock;
+ }
table->name = table_name;
table->table_ops = table_ops;
table->priv = priv;
table->counter_control_extern = counter_control_extern;
- mutex_lock(&devlink->lock);
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+unlock:
mutex_unlock(&devlink->lock);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
@@ -6914,7 +6931,7 @@ void devlink_dpipe_table_unregister(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
goto unlock;
list_del_rcu(&table->list);
@@ -7071,7 +7088,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table) {
err = -EINVAL;
goto out;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3e7e15278c46..bd7eba9066f8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh = nlmsg_data(nlh);
frh->family = ops->family;
- frh->table = rule->table;
+ frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, FRA_TABLE, rule->table))
goto nla_put_failure;
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0642f91c4038..b4c87fe31be2 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
kfree(css_cls_state(css));
}
+/*
+ * To avoid freezing of sockets creation for tasks with big number of threads
+ * and opened sockets lets release file_lock every 1000 iterated descriptors.
+ * New sockets will already have been created with new classid.
+ */
+
+struct update_classid_context {
+ u32 classid;
+ unsigned int batch;
+};
+
+#define UPDATE_CLASSID_BATCH 1000
+
static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
int err;
+ struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file, &err);
if (sock) {
spin_lock(&cgroup_sk_update_lock);
- sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,
- (unsigned long)v);
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
spin_unlock(&cgroup_sk_update_lock);
}
+ if (--ctx->batch == 0) {
+ ctx->batch = UPDATE_CLASSID_BATCH;
+ return n + 1;
+ }
return 0;
}
+static void update_classid_task(struct task_struct *p, u32 classid)
+{
+ struct update_classid_context ctx = {
+ .classid = classid,
+ .batch = UPDATE_CLASSID_BATCH
+ };
+ unsigned int fd = 0;
+
+ do {
+ task_lock(p);
+ fd = iterate_fd(p->files, fd, update_classid_sock, &ctx);
+ task_unlock(p);
+ cond_resched();
+ } while (fd);
+}
+
static void cgrp_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
struct task_struct *p;
cgroup_taskset_for_each(p, css, tset) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)css_cls_state(css)->classid);
- task_unlock(p);
+ update_classid_task(p, css_cls_state(css)->classid);
}
}
@@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
css_task_iter_start(css, 0, &it);
while ((p = css_task_iter_next(&it))) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)cs->classid);
- task_unlock(p);
+ update_classid_task(p, cs->classid);
cond_resched();
}
css_task_iter_end(&it);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b7cbe35df37..10d2b255df5e 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -99,8 +99,7 @@ EXPORT_SYMBOL(page_pool_create);
static void __page_pool_return_page(struct page_pool *pool, struct page *page);
noinline
-static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
- bool refill)
+static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
struct page *page;
@@ -141,8 +140,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
page = NULL;
break;
}
- } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL &&
- refill);
+ } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
/* Return last page */
if (likely(pool->alloc.count > 0))
@@ -155,20 +153,16 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
/* fast path */
static struct page *__page_pool_get_cached(struct page_pool *pool)
{
- bool refill = false;
struct page *page;
- /* Test for safe-context, caller should provide this guarantee */
- if (likely(in_serving_softirq())) {
- if (likely(pool->alloc.count)) {
- /* Fast-path */
- page = pool->alloc.cache[--pool->alloc.count];
- return page;
- }
- refill = true;
+ /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
+ if (likely(pool->alloc.count)) {
+ /* Fast-path */
+ page = pool->alloc.cache[--pool->alloc.count];
+ } else {
+ page = page_pool_refill_alloc_cache(pool);
}
- page = page_pool_refill_alloc_cache(pool, refill);
return page;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 09c44bf2e1d2..e1152f4ffe33 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3504,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
if (err)
return err;
- alt_ifname = nla_data(attr);
+ alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (!alt_ifname)
+ return -ENOMEM;
+
if (cmd == RTM_NEWLINKPROP) {
- alt_ifname = kstrdup(alt_ifname, GFP_KERNEL);
- if (!alt_ifname)
- return -ENOMEM;
err = netdev_name_node_alt_create(dev, alt_ifname);
- if (err) {
- kfree(alt_ifname);
- return err;
- }
+ if (!err)
+ alt_ifname = NULL;
} else if (cmd == RTM_DELLINKPROP) {
err = netdev_name_node_alt_destroy(dev, alt_ifname);
- if (err)
- return err;
} else {
- WARN_ON(1);
- return 0;
+ WARN_ON_ONCE(1);
+ err = -EINVAL;
}
- *changed = true;
- return 0;
+ kfree(alt_ifname);
+ if (!err)
+ *changed = true;
+ return err;
}
static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 864cb9e9622f..e1101a4f90a6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -467,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -527,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (nc->page.pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -4805,9 +4803,9 @@ static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
typeof(IPPROTO_IP) proto,
unsigned int off)
{
- switch (proto) {
- int err;
+ int err;
+ switch (proto) {
case IPPROTO_TCP:
err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
off + MAX_TCP_HDR_LEN);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index a7662e7a691d..760e6ea3178a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -117,7 +117,9 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
/* port.c */
int dsa_port_set_state(struct dsa_port *dp, u8 state,
struct switchdev_trans *trans);
+int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 774facb8d547..ed7dabb57985 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -63,7 +63,7 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
-int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
@@ -78,14 +78,31 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
if (!dp->bridge_dev)
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ if (dp->pl)
+ phylink_start(dp->pl);
+
return 0;
}
-void dsa_port_disable(struct dsa_port *dp)
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+{
+ int err;
+
+ rtnl_lock();
+ err = dsa_port_enable_rt(dp, phy);
+ rtnl_unlock();
+
+ return err;
+}
+
+void dsa_port_disable_rt(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
+ if (dp->pl)
+ phylink_stop(dp->pl);
+
if (!dp->bridge_dev)
dsa_port_set_state_now(dp, BR_STATE_DISABLED);
@@ -93,6 +110,13 @@ void dsa_port_disable(struct dsa_port *dp)
ds->ops->port_disable(ds, port);
}
+void dsa_port_disable(struct dsa_port *dp)
+{
+ rtnl_lock();
+ dsa_port_disable_rt(dp);
+ rtnl_unlock();
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -614,10 +638,6 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
goto err_phy_connect;
}
- rtnl_lock();
- phylink_start(dp->pl);
- rtnl_unlock();
-
return 0;
err_phy_connect:
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 088c886e609e..ddc0f9236928 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -88,12 +88,10 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_allmulti;
}
- err = dsa_port_enable(dp, dev->phydev);
+ err = dsa_port_enable_rt(dp, dev->phydev);
if (err)
goto clear_promisc;
- phylink_start(dp->pl);
-
return 0;
clear_promisc:
@@ -114,9 +112,7 @@ static int dsa_slave_close(struct net_device *dev)
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- phylink_stop(dp->pl);
-
- dsa_port_disable(dp);
+ dsa_port_disable_rt(dp);
dev_mc_unsync(master, dev);
dev_uc_unsync(master, dev);
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
index 466ffa92a474..55b00694cdba 100644
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -31,7 +31,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
__le16 *phdr;
u16 hdr;
- if (skb_cow_head(skb, 0) < 0)
+ if (skb_cow_head(skb, AR9331_HDR_LEN) < 0)
return NULL;
phdr = skb_push(skb, AR9331_HDR_LEN);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index c8a128c9e5e0..70db7c909f74 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -33,7 +33,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *phdr, hdr;
- if (skb_cow_head(skb, 0) < 0)
+ if (skb_cow_head(skb, QCA_HDR_LEN) < 0)
return NULL;
skb_push(skb, QCA_HDR_LEN);
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
index fce45dac4205..ef9197541cb3 100644
--- a/net/ethtool/bitset.c
+++ b/net/ethtool/bitset.c
@@ -305,7 +305,8 @@ nla_put_failure:
static const struct nla_policy bitset_policy[ETHTOOL_A_BITSET_MAX + 1] = {
[ETHTOOL_A_BITSET_UNSPEC] = { .type = NLA_REJECT },
[ETHTOOL_A_BITSET_NOMASK] = { .type = NLA_FLAG },
- [ETHTOOL_A_BITSET_SIZE] = { .type = NLA_U32 },
+ [ETHTOOL_A_BITSET_SIZE] = NLA_POLICY_MAX(NLA_U32,
+ ETHNL_MAX_BITSET_SIZE),
[ETHTOOL_A_BITSET_BITS] = { .type = NLA_NESTED },
[ETHTOOL_A_BITSET_VALUE] = { .type = NLA_BINARY },
[ETHTOOL_A_BITSET_MASK] = { .type = NLA_BINARY },
@@ -447,7 +448,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
"mask only allowed in compact bitset");
return -EINVAL;
}
+
no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+ if (no_mask)
+ ethnl_bitmap32_clear(bitmap, 0, nbits, mod);
nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) {
bool old_val, new_val;
diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h
index b8247e34109d..b849f9d19676 100644
--- a/net/ethtool/bitset.h
+++ b/net/ethtool/bitset.h
@@ -3,6 +3,8 @@
#ifndef _NET_ETHTOOL_BITSET_H
#define _NET_ETHTOOL_BITSET_H
+#define ETHNL_MAX_BITSET_SIZE S16_MAX
+
typedef const char (*const ethnl_string_array_t)[ETH_GSTRING_LEN];
int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact);
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 364ea2cc028e..3ba7f61be107 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -155,7 +155,8 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
new_node->seq_out[i] = seq_out;
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ list_for_each_entry_rcu(node, node_db, mac_list,
+ lockdep_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 2c7a38d76a3a..0672b2f01586 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -21,7 +21,13 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
[IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, },
[IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, },
[IEEE802154_ATTR_PAGE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, },
[IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, },
[IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, },
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 376882215919..0bd10a1f477f 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1724,6 +1724,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf;
+ int res;
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
@@ -1735,7 +1736,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
memset(opt, 0, sizeof(struct ip_options));
opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
- if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+ rcu_read_lock();
+ res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL);
+ rcu_read_unlock();
+
+ if (res)
return;
if (gateway)
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 5fd6e8ed02b5..66fdbfe5447c 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -56,7 +56,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
}
EXPORT_SYMBOL_GPL(gre_del_protocol);
-/* Fills in tpi and returns header length to be pulled. */
+/* Fills in tpi and returns header length to be pulled.
+ * Note that caller must use pskb_may_pull() before pulling GRE header.
+ */
int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err, __be16 proto, int nhs)
{
@@ -110,8 +112,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
* - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
*/
if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ u8 _val, *val;
+
+ val = skb_header_pointer(skb, nhs + hdr_len,
+ sizeof(_val), &_val);
+ if (!val)
+ return -EINVAL;
tpi->proto = proto;
- if ((*(u8 *)options & 0xF0) != 0x40)
+ if ((*val & 0xF0) != 0x40)
hdr_len += 4;
}
tpi->hdr_len = hdr_len;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 18068ed42f25..f369e7ce685b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -748,6 +748,39 @@ out:;
}
EXPORT_SYMBOL(__icmp_send);
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ __be32 orig_ip;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmp_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct iphdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct iphdr))))
+ goto out;
+
+ orig_ip = ip_hdr(skb_in)->saddr;
+ ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
+ icmp_send(skb_in, type, code, info);
+ ip_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmp_ndo_send);
+#endif
static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
{
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f11e997e517b..8c8377568a78 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -100,13 +100,9 @@ static size_t inet_sk_attr_size(struct sock *sk,
aux = handler->idiag_get_aux_size(sk, net_admin);
return nla_total_size(sizeof(struct tcp_info))
- + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
- + nla_total_size(1) /* INET_DIAG_TOS */
- + nla_total_size(1) /* INET_DIAG_TCLASS */
- + nla_total_size(4) /* INET_DIAG_MARK */
- + nla_total_size(4) /* INET_DIAG_CLASS_ID */
- + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ + inet_diag_msg_attrs_size()
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+ nla_total_size(TCP_CA_NAME_MAX)
+ nla_total_size(sizeof(struct tcpvegas_info))
@@ -147,6 +143,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
goto errout;
+ if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
+ ext & (1 << (INET_DIAG_TCLASS - 1))) {
+ u32 classid = 0;
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ classid = sock_cgroup_classid(&sk->sk_cgrp_data);
+#endif
+ /* Fallback to socket priority if class id isn't set.
+ * Classful qdiscs use it as direct reference to class.
+ * For cgroup2 classid is always zero.
+ */
+ if (!classid)
+ classid = sk->sk_priority;
+
+ if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
+ goto errout;
+ }
+
r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = sock_i_ino(sk);
@@ -284,24 +298,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
- if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
- ext & (1 << (INET_DIAG_TCLASS - 1))) {
- u32 classid = 0;
-
-#ifdef CONFIG_SOCK_CGROUP_DATA
- classid = sock_cgroup_classid(&sk->sk_cgrp_data);
-#endif
- /* Fallback to socket priority if class id isn't set.
- * Classful qdiscs use it as direct reference to class.
- * For cgroup2 classid is always zero.
- */
- if (!classid)
- classid = sk->sk_priority;
-
- if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
- goto errout;
- }
-
out:
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index e35736b99300..a93e7d1e1251 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -100,8 +100,9 @@ static int raw_diag_dump_one(struct sk_buff *in_skb,
if (IS_ERR(sk))
return PTR_ERR(sk);
- rep = nlmsg_new(sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) + 64,
+ rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
+ inet_diag_msg_attrs_size() +
+ nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
GFP_KERNEL);
if (!rep) {
sock_put(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 316ebdf8151d..6b6b57000dad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6124,7 +6124,11 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
struct request_sock *req;
- tcp_try_undo_loss(sk, false);
+ /* If we are still handling the SYNACK RTO, see if timestamp ECR allows
+ * undo. If peer SACKs triggered fast recovery, we can't undo here.
+ */
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
+ tcp_try_undo_loss(sk, false);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
tcp_sk(sk)->retrans_stamp = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db76b9609299..08a41f1e1cd2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1857,8 +1857,12 @@ int __udp_disconnect(struct sock *sk, int flags)
inet->inet_dport = 0;
sock_rps_reset_rxhash(sk);
sk->sk_bound_dev_if = 0;
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) {
inet_reset_saddr(sk);
+ if (sk->sk_prot->rehash &&
+ (sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+ sk->sk_prot->rehash(sk);
+ }
if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
sk->sk_prot->unhash(sk);
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 910555a4d9fe..dccd2286bc28 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -64,8 +64,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
goto out;
err = -ENOMEM;
- rep = nlmsg_new(sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) + 64,
+ rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
+ inet_diag_msg_attrs_size() +
+ nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
GFP_KERNEL);
if (!rep)
goto out;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cb493e15959c..e6e1290ea06f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1226,11 +1226,13 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
}
static void
-cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
+cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
+ bool del_rt, bool del_peer)
{
struct fib6_info *f6i;
- f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->idev->dev, 0, RTF_DEFAULT, true);
if (f6i) {
if (del_rt)
@@ -1293,7 +1295,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (action != CLEANUP_PREFIX_RT_NOP) {
cleanup_prefix_route(ifp, expires,
- action == CLEANUP_PREFIX_RT_DEL);
+ action == CLEANUP_PREFIX_RT_DEL, false);
}
/* clean up prefsrc entries */
@@ -4586,12 +4588,14 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int modify_prefix_route(struct inet6_ifaddr *ifp,
- unsigned long expires, u32 flags)
+ unsigned long expires, u32 flags,
+ bool modify_peer)
{
struct fib6_info *f6i;
u32 prio;
- f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ f6i = addrconf_get_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->idev->dev, 0, RTF_DEFAULT, true);
if (!f6i)
return -ENOENT;
@@ -4602,7 +4606,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ip6_del_rt(dev_net(ifp->idev->dev), f6i);
/* add new one */
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+ addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
} else {
@@ -4624,6 +4629,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
unsigned long timeout;
bool was_managetempaddr;
bool had_prefixroute;
+ bool new_peer = false;
ASSERT_RTNL();
@@ -4655,6 +4661,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
cfg->preferred_lft = timeout;
}
+ if (cfg->peer_pfx &&
+ memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) {
+ if (!ipv6_addr_any(&ifp->peer_addr))
+ cleanup_prefix_route(ifp, expires, true, true);
+ new_peer = true;
+ }
+
spin_lock_bh(&ifp->lock);
was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR;
had_prefixroute = ifp->flags & IFA_F_PERMANENT &&
@@ -4670,6 +4683,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
ifp->rt_priority = cfg->rt_priority;
+ if (new_peer)
+ ifp->peer_addr = *cfg->peer_pfx;
+
spin_unlock_bh(&ifp->lock);
if (!(ifp->flags&IFA_F_TENTATIVE))
ipv6_ifa_notify(0, ifp);
@@ -4678,7 +4694,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
int rc = -ENOENT;
if (had_prefixroute)
- rc = modify_prefix_route(ifp, expires, flags);
+ rc = modify_prefix_route(ifp, expires, flags, false);
/* prefix route could have been deleted; if so restore it */
if (rc == -ENOENT) {
@@ -4686,6 +4702,15 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
}
+
+ if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr))
+ rc = modify_prefix_route(ifp, expires, flags, true);
+
+ if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) {
+ addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len,
+ ifp->rt_priority, ifp->idev->dev,
+ expires, flags, GFP_KERNEL);
+ }
} else if (had_prefixroute) {
enum cleanup_prefix_rt_t action;
unsigned long rt_expires;
@@ -4696,7 +4721,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
if (action != CLEANUP_PREFIX_RT_NOP) {
cleanup_prefix_route(ifp, rt_expires,
- action == CLEANUP_PREFIX_RT_DEL);
+ action == CLEANUP_PREFIX_RT_DEL, false);
}
}
@@ -5983,9 +6008,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
if (!ipv6_addr_any(&ifp->peer_addr))
- addrconf_prefix_route(&ifp->peer_addr, 128, 0,
- ifp->idev->dev, 0, 0,
- GFP_ATOMIC);
+ addrconf_prefix_route(&ifp->peer_addr, 128,
+ ifp->rt_priority, ifp->idev->dev,
+ 0, 0, GFP_ATOMIC);
break;
case RTM_DELADDR:
if (ifp->idev->cnf.forwarding)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 58fbde244381..72abf892302f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1102,8 +1102,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
found++;
break;
}
- if (rt_can_ecmp)
- fallback_ins = fallback_ins ?: ins;
+ fallback_ins = fallback_ins ?: ins;
goto next_iter;
}
@@ -1146,7 +1145,9 @@ next_iter:
}
if (fallback_ins && !found) {
- /* No ECMP-able route found, replace first non-ECMP one */
+ /* No matching route with same ecmp-able-ness found, replace
+ * first matching route
+ */
ins = fallback_ins;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->fib6_table->tb6_lock));
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 55bfc5149d0c..781ca8c07a0d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -437,8 +437,6 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return -ENOENT;
switch (type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -452,7 +450,10 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
}
return 0;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if (code == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -468,6 +469,7 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
t->parms.name);
}
return 0;
+ }
case ICMPV6_PKT_TOOBIG:
ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
return 0;
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 02045494c24c..e0086758b6ee 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -45,4 +45,38 @@ out:
rcu_read_unlock();
}
EXPORT_SYMBOL(icmpv6_send);
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr orig_ip;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmpv6_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct ipv6hdr))))
+ goto out;
+
+ orig_ip = ipv6_hdr(skb_in)->saddr;
+ ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+ icmpv6_send(skb_in, type, code, info);
+ ipv6_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmpv6_ndo_send);
+#endif
#endif
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b5dd20c4599b..4703b09808d0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -121,6 +121,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @link: ifindex of underlying interface
* @remote: the address of the tunnel exit-point
* @local: the address of the tunnel entry-point
*
@@ -134,37 +135,56 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
static struct ip6_tnl *
-ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
+ip6_tnl_lookup(struct net *net, int link,
+ const struct in6_addr *remote, const struct in6_addr *local)
{
unsigned int hash = HASH(remote, local);
- struct ip6_tnl *t;
+ struct ip6_tnl *t, *cand = NULL;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct in6_addr any;
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else
+ cand = t;
}
memset(&any, 0, sizeof(any));
hash = HASH(&any, local);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_any(&t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_any(&t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
hash = HASH(remote, &any);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(remote, &t->parms.raddr) &&
- ipv6_addr_any(&t->parms.laddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !ipv6_addr_any(&t->parms.laddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
+ if (cand)
+ return cand;
+
t = rcu_dereference(ip6n->collect_md_tun);
if (t && t->dev->flags & IFF_UP)
return t;
@@ -351,7 +371,8 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr)) {
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ p->link == t->parms.link) {
if (create)
return ERR_PTR(-EEXIST);
@@ -485,7 +506,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
processing of the error. */
rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr);
+ t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr);
if (!t)
goto out;
@@ -496,8 +517,6 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
err = 0;
switch (*type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu, teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -510,7 +529,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if ((*code) == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -527,7 +549,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
t->parms.name);
}
break;
- case ICMPV6_PKT_TOOBIG:
+ }
+ case ICMPV6_PKT_TOOBIG: {
+ __u32 mtu;
+
ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
sock_net_uid(net, NULL));
mtu = *info - offset;
@@ -541,6 +566,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
+ }
case NDISC_REDIRECT:
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
@@ -887,7 +913,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
int ret = -1;
rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
+ t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->saddr, &ipv6h->daddr);
if (t) {
u8 tproto = READ_ONCE(t->parms.proto);
@@ -1420,8 +1446,10 @@ tx_err:
static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
+ struct net_device *tdev = NULL;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
+ unsigned int mtu;
int t_hlen;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1457,22 +1485,25 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, NULL, strict);
+ if (rt) {
+ tdev = rt->dst.dev;
+ ip6_rt_put(rt);
+ }
- if (!rt)
- return;
+ if (!tdev && p->link)
+ tdev = __dev_get_by_index(t->net, p->link);
- if (rt->dst.dev) {
- dev->hard_header_len = rt->dst.dev->hard_header_len +
- t_hlen;
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + t_hlen;
+ mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU);
- dev->mtu = rt->dst.dev->mtu - t_hlen;
+ dev->mtu = mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
- ip6_rt_put(rt);
}
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 79fc012dd2ca..debdaeba5d8c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -183,9 +183,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = -EBUSY;
break;
}
- } else if (sk->sk_protocol != IPPROTO_TCP)
+ } else if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_prot != &tcpv6_prot) {
+ retv = -EBUSY;
+ break;
+ }
break;
-
+ } else {
+ break;
+ }
if (sk->sk_state != TCP_ESTABLISHED) {
retv = -ENOTCONN;
break;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4fbdc60b4e07..2931224b674e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5198,6 +5198,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
+ cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
nhn++;
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 000c742d0527..6aee699deb28 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3450,7 +3450,7 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
spin_lock_irqsave(&local->ack_status_lock, spin_flags);
id = idr_alloc(&local->ack_status_frames, ack_skb,
- 1, 0x40, GFP_ATOMIC);
+ 1, 0x2000, GFP_ATOMIC);
spin_unlock_irqrestore(&local->ack_status_lock, spin_flags);
if (id < 0) {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5fa13176036f..88d7a692a965 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
*/
#include <linux/delay.h>
@@ -1311,7 +1311,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
if (!res) {
ch_switch.timestamp = timestamp;
ch_switch.device_timestamp = device_timestamp;
- ch_switch.block_tx = beacon ? csa_ie.mode : 0;
+ ch_switch.block_tx = csa_ie.mode;
ch_switch.chandef = csa_ie.chandef;
ch_switch.count = csa_ie.count;
ch_switch.delay = csa_ie.max_switch_time;
@@ -1404,7 +1404,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
sdata->vif.csa_active = true;
sdata->csa_chandef = csa_ie.chandef;
- sdata->csa_block_tx = ch_switch.block_tx;
+ sdata->csa_block_tx = csa_ie.mode;
ifmgd->csa_ignored_same_chan = false;
if (sdata->csa_block_tx)
@@ -1438,7 +1438,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
* reset when the disconnection worker runs.
*/
sdata->vif.csa_active = true;
- sdata->csa_block_tx = ch_switch.block_tx;
+ sdata->csa_block_tx = csa_ie.mode;
ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
mutex_unlock(&local->chanctx_mtx);
@@ -2959,7 +2959,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
(auth_transaction == 2 &&
ifmgd->auth_data->expected_transaction == 2)) {
if (!ieee80211_mark_sta_auth(sdata, bssid))
- goto out_err;
+ return; /* ignore frame -- wait for timeout */
} else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
auth_transaction == 2) {
sdata_info(sdata, "SAE peer confirmed\n");
@@ -2967,10 +2967,6 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
}
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
- return;
- out_err:
- mutex_unlock(&sdata->local->sta_mtx);
- /* ignore frame -- wait for timeout */
}
#define case_WLAN(type) \
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0e05ff037672..0ba98ad9bc85 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4114,7 +4114,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->sta_mtx);
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry(sta, &local->sta_list, list) {
if (sdata != sta->sdata &&
(!sta->sdata->bss || sta->sdata->bss != sdata->bss))
continue;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4bd1faf4f779..87def9cb91ff 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2442,7 +2442,7 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local,
spin_lock_irqsave(&local->ack_status_lock, flags);
id = idr_alloc(&local->ack_status_frames, ack_skb,
- 1, 0x40, GFP_ATOMIC);
+ 1, 0x2000, GFP_ATOMIC);
spin_unlock_irqrestore(&local->ack_status_lock, flags);
if (id >= 0) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 32a7a53833c0..decd46b38393 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1063,16 +1063,22 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elem_parse_failed = true;
break;
case WLAN_EID_VHT_OPERATION:
- if (elen >= sizeof(struct ieee80211_vht_operation))
+ if (elen >= sizeof(struct ieee80211_vht_operation)) {
elems->vht_operation = (void *)pos;
- else
- elem_parse_failed = true;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed = true;
break;
case WLAN_EID_OPMODE_NOTIF:
- if (elen > 0)
+ if (elen > 0) {
elems->opmode_notif = pos;
- else
- elem_parse_failed = true;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed = true;
break;
case WLAN_EID_MESH_ID:
elems->mesh_id = pos;
@@ -2987,10 +2993,22 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
int cf0, cf1;
int ccfs0, ccfs1, ccfs2;
int ccf0, ccf1;
+ u32 vht_cap;
+ bool support_80_80 = false;
+ bool support_160 = false;
if (!oper || !htop)
return false;
+ vht_cap = hw->wiphy->bands[chandef->chan->band]->vht_cap.cap;
+ support_160 = (vht_cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK |
+ IEEE80211_VHT_CAP_EXT_NSS_BW_MASK));
+ support_80_80 = ((vht_cap &
+ IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) ||
+ (vht_cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
+ vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) ||
+ ((vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) >>
+ IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT > 1));
ccfs0 = oper->center_freq_seg0_idx;
ccfs1 = oper->center_freq_seg1_idx;
ccfs2 = (le16_to_cpu(htop->operation_mode) &
@@ -3018,10 +3036,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
unsigned int diff;
diff = abs(ccf1 - ccf0);
- if (diff == 8) {
+ if ((diff == 8) && support_160) {
new.width = NL80211_CHAN_WIDTH_160;
new.center_freq1 = cf1;
- } else if (diff > 8) {
+ } else if ((diff > 8) && support_80_80) {
new.width = NL80211_CHAN_WIDTH_80P80;
new.center_freq2 = cf1;
}
diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
index 49f6054e7f4e..a9ed3bf1d93f 100644
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@@ -4,6 +4,7 @@ config MPTCP
depends on INET
select SKB_EXTENSIONS
select CRYPTO_LIB_SHA256
+ select CRYPTO
help
Multipath TCP (MPTCP) connections send and receive data over multiple
subflows in order to utilize multiple network paths. Each subflow
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 45acd877bef3..fd2c3150e591 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -334,6 +334,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
struct mptcp_sock *msk;
unsigned int ack_size;
bool ret = false;
+ bool can_ack;
+ u64 ack_seq;
u8 tcp_fin;
if (skb) {
@@ -360,9 +362,22 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
ret = true;
}
+ /* passive sockets msk will set the 'can_ack' after accept(), even
+ * if the first subflow may have the already the remote key handy
+ */
+ can_ack = true;
opts->ext_copy.use_ack = 0;
msk = mptcp_sk(subflow->conn);
- if (!msk || !READ_ONCE(msk->can_ack)) {
+ if (likely(msk && READ_ONCE(msk->can_ack))) {
+ ack_seq = msk->ack_seq;
+ } else if (subflow->can_ack) {
+ mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
+ ack_seq++;
+ } else {
+ can_ack = false;
+ }
+
+ if (unlikely(!can_ack)) {
*size = ALIGN(dss_size, 4);
return ret;
}
@@ -375,7 +390,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
dss_size += ack_size;
- opts->ext_copy.data_ack = msk->ack_seq;
+ opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
opts->ext_copy.use_ack = 1;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 73780b4cb108..3c19a8efdcea 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -543,6 +543,11 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
}
}
+static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
+{
+ return 0;
+}
+
static int __mptcp_init_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -551,6 +556,7 @@ static int __mptcp_init_sock(struct sock *sk)
__set_bit(MPTCP_SEND_SPACE, &msk->flags);
msk->first = NULL;
+ inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
return 0;
}
@@ -643,7 +649,7 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
}
#endif
-struct sock *mptcp_sk_clone_lock(const struct sock *sk)
+static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
{
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
@@ -755,60 +761,50 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int ret = -EOPNOTSUPP;
struct socket *ssock;
- struct sock *ssk;
pr_debug("msk=%p", msk);
/* @@ the meaning of setsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
+ * there are multiple subflows is not yet defined. It is up to the
+ * MPTCP-level socket to configure the subflows until the subflow
+ * is in TCP fallback, when TCP socket options are passed through
+ * to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- release_sock(sk);
- return ret;
- }
+ ssock = __mptcp_tcp_fallback(msk);
+ if (ssock)
+ return tcp_setsockopt(ssock->sk, level, optname, optval,
+ optlen);
- ssk = ssock->sk;
- sock_hold(ssk);
release_sock(sk);
- ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
- sock_put(ssk);
-
- return ret;
+ return -EOPNOTSUPP;
}
static int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int ret = -EOPNOTSUPP;
struct socket *ssock;
- struct sock *ssk;
pr_debug("msk=%p", msk);
- /* @@ the meaning of getsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
+ /* @@ the meaning of setsockopt() when the socket is connected and
+ * there are multiple subflows is not yet defined. It is up to the
+ * MPTCP-level socket to configure the subflows until the subflow
+ * is in TCP fallback, when socket options are passed through
+ * to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- release_sock(sk);
- return ret;
- }
+ ssock = __mptcp_tcp_fallback(msk);
+ if (ssock)
+ return tcp_getsockopt(ssock->sk, level, optname, optval,
+ option);
- ssk = ssock->sk;
- sock_hold(ssk);
release_sock(sk);
- ret = tcp_getsockopt(ssk, level, optname, optval, option);
- sock_put(ssk);
-
- return ret;
+ return -EOPNOTSUPP;
}
static int mptcp_get_port(struct sock *sk, unsigned short snum)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 8a99a2930284..9f8663b30456 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -56,8 +56,8 @@
#define MPTCP_DSS_FLAG_MASK (0x1F)
/* MPTCP socket flags */
-#define MPTCP_DATA_READY BIT(0)
-#define MPTCP_SEND_SPACE BIT(1)
+#define MPTCP_DATA_READY 0
+#define MPTCP_SEND_SPACE 1
/* MPTCP connection sock */
struct mptcp_sock {
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69c107f9ba8d..8dd17589217d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -723,6 +723,20 @@ ip_set_rcu_get(struct net *net, ip_set_id_t index)
return set;
}
+static inline void
+ip_set_lock(struct ip_set *set)
+{
+ if (!set->variant->region_lock)
+ spin_lock_bh(&set->lock);
+}
+
+static inline void
+ip_set_unlock(struct ip_set *set)
+{
+ if (!set->variant->region_lock)
+ spin_unlock_bh(&set->lock);
+}
+
int
ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
@@ -744,9 +758,9 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
if (ret == -EAGAIN) {
/* Type requests element to be completed */
pr_debug("element must be completed, ADD is triggered\n");
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
ret = 1;
} else {
/* --return-nomatch: invert matched element */
@@ -775,9 +789,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
return ret;
}
@@ -797,9 +811,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
return ret;
}
@@ -1264,9 +1278,9 @@ ip_set_flush_set(struct ip_set *set)
{
pr_debug("set: %s\n", set->name);
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
set->variant->flush(set);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
}
static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
@@ -1713,9 +1727,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
retried = true;
} while (ret == -EAGAIN &&
set->variant->resize &&
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 7480ce55b5c8..e52d7b7597a0 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -7,13 +7,21 @@
#include <linux/rcupdate.h>
#include <linux/jhash.h>
#include <linux/types.h>
+#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/ipset/ip_set.h>
-#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
-#define ipset_dereference_protected(p, set) \
- __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock))
-
-#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
+#define __ipset_dereference(p) \
+ rcu_dereference_protected(p, 1)
+#define ipset_dereference_nfnl(p) \
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+#define ipset_dereference_set(p, set) \
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+ lockdep_is_held(&(set)->lock))
+#define ipset_dereference_bh_nfnl(p) \
+ rcu_dereference_bh_check(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* Hashing which uses arrays to resolve clashing. The hash table is resized
* (doubled) when searching becomes too long.
@@ -72,11 +80,35 @@ struct hbucket {
__aligned(__alignof__(u64));
};
+/* Region size for locking == 2^HTABLE_REGION_BITS */
+#define HTABLE_REGION_BITS 10
+#define ahash_numof_locks(htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? 1 \
+ : jhash_size((htable_bits) - HTABLE_REGION_BITS))
+#define ahash_sizeof_regions(htable_bits) \
+ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
+#define ahash_region(n, htable_bits) \
+ ((n) % ahash_numof_locks(htable_bits))
+#define ahash_bucket_start(h, htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? 0 \
+ : (h) * jhash_size(HTABLE_REGION_BITS))
+#define ahash_bucket_end(h, htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \
+ : ((h) + 1) * jhash_size(HTABLE_REGION_BITS))
+
+struct htable_gc {
+ struct delayed_work dwork;
+ struct ip_set *set; /* Set the gc belongs to */
+ u32 region; /* Last gc run position */
+};
+
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
atomic_t ref; /* References for resizing */
- atomic_t uref; /* References for dumping */
+ atomic_t uref; /* References for dumping and gc */
u8 htable_bits; /* size of hash table == 2^htable_bits */
+ u32 maxelem; /* Maxelem per region */
+ struct ip_set_region *hregion; /* Region locks and ext sizes */
struct hbucket __rcu *bucket[0]; /* hashtable buckets */
};
@@ -162,6 +194,10 @@ htable_bits(u32 hashsize)
#define NLEN 0
#endif /* IP_SET_HASH_WITH_NETS */
+#define SET_ELEM_EXPIRED(set, d) \
+ (SET_WITH_TIMEOUT(set) && \
+ ip_set_timeout_expired(ext_timeout(d, set)))
+
#endif /* _IP_SET_HASH_GEN_H */
#ifndef MTYPE
@@ -205,10 +241,12 @@ htable_bits(u32 hashsize)
#undef mtype_test_cidrs
#undef mtype_test
#undef mtype_uref
-#undef mtype_expire
#undef mtype_resize
+#undef mtype_ext_size
+#undef mtype_resize_ad
#undef mtype_head
#undef mtype_list
+#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
#undef mtype_variant
@@ -247,10 +285,12 @@ htable_bits(u32 hashsize)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
#define mtype_test IPSET_TOKEN(MTYPE, _test)
#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
-#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
+#define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size)
+#define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
@@ -275,8 +315,7 @@ htable_bits(u32 hashsize)
/* The generic hash structure */
struct htype {
struct htable __rcu *table; /* the hash table */
- struct timer_list gc; /* garbage collection when timeout enabled */
- struct ip_set *set; /* attached to this ip_set */
+ struct htable_gc gc; /* gc workqueue */
u32 maxelem; /* max elements in the hash */
u32 initval; /* random jhash init value */
#ifdef IP_SET_HASH_WITH_MARKMASK
@@ -288,21 +327,33 @@ struct htype {
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask; /* netmask value for subnets to store */
#endif
+ struct list_head ad; /* Resize add|del backlist */
struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_NETS
struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
#endif
};
+/* ADD|DEL entries saved during resize */
+struct mtype_resize_ad {
+ struct list_head list;
+ enum ipset_adt ad; /* ADD|DEL element */
+ struct mtype_elem d; /* Element value */
+ struct ip_set_ext ext; /* Extensions for ADD */
+ struct ip_set_ext mext; /* Target extensions for ADD */
+ u32 flags; /* Flags for ADD */
+};
+
#ifdef IP_SET_HASH_WITH_NETS
/* Network cidr size book keeping when the hash stores different
* sized networks. cidr == real cidr + 1 to support /0.
*/
static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
+mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
{
int i, j;
+ spin_lock_bh(&set->lock);
/* Add in increasing prefix order, so larger cidr first */
for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
if (j != -1) {
@@ -311,7 +362,7 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
j = i;
} else if (h->nets[i].cidr[n] == cidr) {
h->nets[CIDR_POS(cidr)].nets[n]++;
- return;
+ goto unlock;
}
}
if (j != -1) {
@@ -320,24 +371,29 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
}
h->nets[i].cidr[n] = cidr;
h->nets[CIDR_POS(cidr)].nets[n] = 1;
+unlock:
+ spin_unlock_bh(&set->lock);
}
static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
+mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
{
u8 i, j, net_end = NLEN - 1;
+ spin_lock_bh(&set->lock);
for (i = 0; i < NLEN; i++) {
if (h->nets[i].cidr[n] != cidr)
continue;
h->nets[CIDR_POS(cidr)].nets[n]--;
if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
- return;
+ goto unlock;
for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
h->nets[j].cidr[n] = 0;
- return;
+ goto unlock;
}
+unlock:
+ spin_unlock_bh(&set->lock);
}
#endif
@@ -345,7 +401,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
static size_t
mtype_ahash_memsize(const struct htype *h, const struct htable *t)
{
- return sizeof(*h) + sizeof(*t);
+ return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits);
}
/* Get the ith element from the array block n */
@@ -369,24 +425,29 @@ mtype_flush(struct ip_set *set)
struct htype *h = set->data;
struct htable *t;
struct hbucket *n;
- u32 i;
-
- t = ipset_dereference_protected(h->table, set);
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
- if (!n)
- continue;
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set, n);
- /* FIXME: use slab cache */
- rcu_assign_pointer(hbucket(t, i), NULL);
- kfree_rcu(n, rcu);
+ u32 r, i;
+
+ t = ipset_dereference_nfnl(h->table);
+ for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+ spin_lock_bh(&t->hregion[r].lock);
+ for (i = ahash_bucket_start(r, t->htable_bits);
+ i < ahash_bucket_end(r, t->htable_bits); i++) {
+ n = __ipset_dereference(hbucket(t, i));
+ if (!n)
+ continue;
+ if (set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set, n);
+ /* FIXME: use slab cache */
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
+ }
+ t->hregion[r].ext_size = 0;
+ t->hregion[r].elements = 0;
+ spin_unlock_bh(&t->hregion[r].lock);
}
#ifdef IP_SET_HASH_WITH_NETS
memset(h->nets, 0, sizeof(h->nets));
#endif
- set->elements = 0;
- set->ext_size = 0;
}
/* Destroy the hashtable part of the set */
@@ -397,7 +458,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
+ n = __ipset_dereference(hbucket(t, i));
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -406,6 +467,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
kfree(n);
}
+ ip_set_free(t->hregion);
ip_set_free(t);
}
@@ -414,28 +476,21 @@ static void
mtype_destroy(struct ip_set *set)
{
struct htype *h = set->data;
+ struct list_head *l, *lt;
if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&h->gc);
+ cancel_delayed_work_sync(&h->gc.dwork);
- mtype_ahash_destroy(set,
- __ipset_dereference_protected(h->table, 1), true);
+ mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ list_for_each_safe(l, lt, &h->ad) {
+ list_del(l);
+ kfree(l);
+ }
kfree(h);
set->data = NULL;
}
-static void
-mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
-{
- struct htype *h = set->data;
-
- timer_setup(&h->gc, gc, 0);
- mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
- pr_debug("gc initialized, run in every %u\n",
- IPSET_GC_PERIOD(set->timeout));
-}
-
static bool
mtype_same_set(const struct ip_set *a, const struct ip_set *b)
{
@@ -454,11 +509,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
-/* Delete expired elements from the hashtable */
static void
-mtype_expire(struct ip_set *set, struct htype *h)
+mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
{
- struct htable *t;
struct hbucket *n, *tmp;
struct mtype_elem *data;
u32 i, j, d;
@@ -466,10 +519,12 @@ mtype_expire(struct ip_set *set, struct htype *h)
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
+ u8 htable_bits = t->htable_bits;
- t = ipset_dereference_protected(h->table, set);
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
+ spin_lock_bh(&t->hregion[r].lock);
+ for (i = ahash_bucket_start(r, htable_bits);
+ i < ahash_bucket_end(r, htable_bits); i++) {
+ n = __ipset_dereference(hbucket(t, i));
if (!n)
continue;
for (j = 0, d = 0; j < n->pos; j++) {
@@ -485,58 +540,100 @@ mtype_expire(struct ip_set *set, struct htype *h)
smp_mb__after_atomic();
#ifdef IP_SET_HASH_WITH_NETS
for (k = 0; k < IPSET_NET_COUNT; k++)
- mtype_del_cidr(h,
+ mtype_del_cidr(set, h,
NCIDR_PUT(DCIDR_GET(data->cidr, k)),
k);
#endif
+ t->hregion[r].elements--;
ip_set_ext_destroy(set, data);
- set->elements--;
d++;
}
if (d >= AHASH_INIT_SIZE) {
if (d >= n->size) {
+ t->hregion[r].ext_size -=
+ ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, i), NULL);
kfree_rcu(n, rcu);
continue;
}
tmp = kzalloc(sizeof(*tmp) +
- (n->size - AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
- /* Still try to delete expired elements */
+ /* Still try to delete expired elements. */
continue;
tmp->size = n->size - AHASH_INIT_SIZE;
for (j = 0, d = 0; j < n->pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
- memcpy(tmp->value + d * dsize, data, dsize);
+ memcpy(tmp->value + d * dsize,
+ data, dsize);
set_bit(d, tmp->used);
d++;
}
tmp->pos = d;
- set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
+ t->hregion[r].ext_size -=
+ ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, i), tmp);
kfree_rcu(n, rcu);
}
}
+ spin_unlock_bh(&t->hregion[r].lock);
}
static void
-mtype_gc(struct timer_list *t)
+mtype_gc(struct work_struct *work)
{
- struct htype *h = from_timer(h, t, gc);
- struct ip_set *set = h->set;
+ struct htable_gc *gc;
+ struct ip_set *set;
+ struct htype *h;
+ struct htable *t;
+ u32 r, numof_locks;
+ unsigned int next_run;
+
+ gc = container_of(work, struct htable_gc, dwork.work);
+ set = gc->set;
+ h = set->data;
- pr_debug("called\n");
spin_lock_bh(&set->lock);
- mtype_expire(set, h);
+ t = ipset_dereference_set(h->table, set);
+ atomic_inc(&t->uref);
+ numof_locks = ahash_numof_locks(t->htable_bits);
+ r = gc->region++;
+ if (r >= numof_locks) {
+ r = gc->region = 0;
+ }
+ next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks;
+ if (next_run < HZ/10)
+ next_run = HZ/10;
spin_unlock_bh(&set->lock);
- h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
- add_timer(&h->gc);
+ mtype_gc_do(set, h, t, r);
+
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by expire: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+
+ queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run);
+
+}
+
+static void
+mtype_gc_init(struct htable_gc *gc)
+{
+ INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc);
+ queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags);
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags);
+
/* Resize a hash: create a new hash table with doubling the hashsize
* and inserting the elements to it. Repeat until we succeed or
* fail due to memory pressures.
@@ -547,7 +644,7 @@ mtype_resize(struct ip_set *set, bool retried)
struct htype *h = set->data;
struct htable *t, *orig;
u8 htable_bits;
- size_t extsize, dsize = set->dsize;
+ size_t dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
struct mtype_elem *tmp;
@@ -555,7 +652,9 @@ mtype_resize(struct ip_set *set, bool retried)
struct mtype_elem *data;
struct mtype_elem *d;
struct hbucket *n, *m;
- u32 i, j, key;
+ struct list_head *l, *lt;
+ struct mtype_resize_ad *x;
+ u32 i, j, r, nr, key;
int ret;
#ifdef IP_SET_HASH_WITH_NETS
@@ -563,10 +662,8 @@ mtype_resize(struct ip_set *set, bool retried)
if (!tmp)
return -ENOMEM;
#endif
- rcu_read_lock_bh();
- orig = rcu_dereference_bh_nfnl(h->table);
+ orig = ipset_dereference_bh_nfnl(h->table);
htable_bits = orig->htable_bits;
- rcu_read_unlock_bh();
retry:
ret = 0;
@@ -583,88 +680,124 @@ retry:
ret = -ENOMEM;
goto out;
}
+ t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits));
+ if (!t->hregion) {
+ kfree(t);
+ ret = -ENOMEM;
+ goto out;
+ }
t->htable_bits = htable_bits;
+ t->maxelem = h->maxelem / ahash_numof_locks(htable_bits);
+ for (i = 0; i < ahash_numof_locks(htable_bits); i++)
+ spin_lock_init(&t->hregion[i].lock);
- spin_lock_bh(&set->lock);
- orig = __ipset_dereference_protected(h->table, 1);
- /* There can't be another parallel resizing, but dumping is possible */
+ /* There can't be another parallel resizing,
+ * but dumping, gc, kernel side add/del are possible
+ */
+ orig = ipset_dereference_bh_nfnl(h->table);
atomic_set(&orig->ref, 1);
atomic_inc(&orig->uref);
- extsize = 0;
pr_debug("attempt to resize set %s from %u to %u, t %p\n",
set->name, orig->htable_bits, htable_bits, orig);
- for (i = 0; i < jhash_size(orig->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(orig, i), 1);
- if (!n)
- continue;
- for (j = 0; j < n->pos; j++) {
- if (!test_bit(j, n->used))
+ for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) {
+ /* Expire may replace a hbucket with another one */
+ rcu_read_lock_bh();
+ for (i = ahash_bucket_start(r, orig->htable_bits);
+ i < ahash_bucket_end(r, orig->htable_bits); i++) {
+ n = __ipset_dereference(hbucket(orig, i));
+ if (!n)
continue;
- data = ahash_data(n, j, dsize);
+ for (j = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
+ if (SET_ELEM_EXPIRED(set, data))
+ continue;
#ifdef IP_SET_HASH_WITH_NETS
- /* We have readers running parallel with us,
- * so the live data cannot be modified.
- */
- flags = 0;
- memcpy(tmp, data, dsize);
- data = tmp;
- mtype_data_reset_flags(data, &flags);
+ /* We have readers running parallel with us,
+ * so the live data cannot be modified.
+ */
+ flags = 0;
+ memcpy(tmp, data, dsize);
+ data = tmp;
+ mtype_data_reset_flags(data, &flags);
#endif
- key = HKEY(data, h->initval, htable_bits);
- m = __ipset_dereference_protected(hbucket(t, key), 1);
- if (!m) {
- m = kzalloc(sizeof(*m) +
+ key = HKEY(data, h->initval, htable_bits);
+ m = __ipset_dereference(hbucket(t, key));
+ nr = ahash_region(key, htable_bits);
+ if (!m) {
+ m = kzalloc(sizeof(*m) +
AHASH_INIT_SIZE * dsize,
GFP_ATOMIC);
- if (!m) {
- ret = -ENOMEM;
- goto cleanup;
- }
- m->size = AHASH_INIT_SIZE;
- extsize += ext_size(AHASH_INIT_SIZE, dsize);
- RCU_INIT_POINTER(hbucket(t, key), m);
- } else if (m->pos >= m->size) {
- struct hbucket *ht;
-
- if (m->size >= AHASH_MAX(h)) {
- ret = -EAGAIN;
- } else {
- ht = kzalloc(sizeof(*ht) +
+ if (!m) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ m->size = AHASH_INIT_SIZE;
+ t->hregion[nr].ext_size +=
+ ext_size(AHASH_INIT_SIZE,
+ dsize);
+ RCU_INIT_POINTER(hbucket(t, key), m);
+ } else if (m->pos >= m->size) {
+ struct hbucket *ht;
+
+ if (m->size >= AHASH_MAX(h)) {
+ ret = -EAGAIN;
+ } else {
+ ht = kzalloc(sizeof(*ht) +
(m->size + AHASH_INIT_SIZE)
* dsize,
GFP_ATOMIC);
- if (!ht)
- ret = -ENOMEM;
+ if (!ht)
+ ret = -ENOMEM;
+ }
+ if (ret < 0)
+ goto cleanup;
+ memcpy(ht, m, sizeof(struct hbucket) +
+ m->size * dsize);
+ ht->size = m->size + AHASH_INIT_SIZE;
+ t->hregion[nr].ext_size +=
+ ext_size(AHASH_INIT_SIZE,
+ dsize);
+ kfree(m);
+ m = ht;
+ RCU_INIT_POINTER(hbucket(t, key), ht);
}
- if (ret < 0)
- goto cleanup;
- memcpy(ht, m, sizeof(struct hbucket) +
- m->size * dsize);
- ht->size = m->size + AHASH_INIT_SIZE;
- extsize += ext_size(AHASH_INIT_SIZE, dsize);
- kfree(m);
- m = ht;
- RCU_INIT_POINTER(hbucket(t, key), ht);
- }
- d = ahash_data(m, m->pos, dsize);
- memcpy(d, data, dsize);
- set_bit(m->pos++, m->used);
+ d = ahash_data(m, m->pos, dsize);
+ memcpy(d, data, dsize);
+ set_bit(m->pos++, m->used);
+ t->hregion[nr].elements++;
#ifdef IP_SET_HASH_WITH_NETS
- mtype_data_reset_flags(d, &flags);
+ mtype_data_reset_flags(d, &flags);
#endif
+ }
}
+ rcu_read_unlock_bh();
}
- rcu_assign_pointer(h->table, t);
- set->ext_size = extsize;
- spin_unlock_bh(&set->lock);
+ /* There can't be any other writer. */
+ rcu_assign_pointer(h->table, t);
/* Give time to other readers of the set */
synchronize_rcu();
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- /* If there's nobody else dumping the table, destroy it */
+ /* Add/delete elements processed by the SET target during resize.
+ * Kernel-side add cannot trigger a resize and userspace actions
+ * are serialized by the mutex.
+ */
+ list_for_each_safe(l, lt, &h->ad) {
+ x = list_entry(l, struct mtype_resize_ad, list);
+ if (x->ad == IPSET_ADD) {
+ mtype_add(set, &x->d, &x->ext, &x->mext, x->flags);
+ } else {
+ mtype_del(set, &x->d, NULL, NULL, 0);
+ }
+ list_del(l);
+ kfree(l);
+ }
+ /* If there's nobody else using the table, destroy it */
if (atomic_dec_and_test(&orig->uref)) {
pr_debug("Table destroy by resize %p\n", orig);
mtype_ahash_destroy(set, orig, false);
@@ -677,15 +810,44 @@ out:
return ret;
cleanup:
+ rcu_read_unlock_bh();
atomic_set(&orig->ref, 0);
atomic_dec(&orig->uref);
- spin_unlock_bh(&set->lock);
mtype_ahash_destroy(set, t, false);
if (ret == -EAGAIN)
goto retry;
goto out;
}
+/* Get the current number of elements and ext_size in the set */
+static void
+mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
+{
+ struct htype *h = set->data;
+ const struct htable *t;
+ u32 i, j, r;
+ struct hbucket *n;
+ struct mtype_elem *data;
+
+ t = rcu_dereference_bh(h->table);
+ for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+ for (i = ahash_bucket_start(r, t->htable_bits);
+ i < ahash_bucket_end(r, t->htable_bits); i++) {
+ n = rcu_dereference_bh(hbucket(t, i));
+ if (!n)
+ continue;
+ for (j = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, set->dsize);
+ if (!SET_ELEM_EXPIRED(set, data))
+ (*elements)++;
+ }
+ }
+ *ext_size += t->hregion[r].ext_size;
+ }
+}
+
/* Add an element to a hash and update the internal counters when succeeded,
* otherwise report the proper error code.
*/
@@ -698,32 +860,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n, *old = ERR_PTR(-ENOENT);
- int i, j = -1;
+ int i, j = -1, ret;
bool flag_exist = flags & IPSET_FLAG_EXIST;
bool deleted = false, forceadd = false, reuse = false;
- u32 key, multi = 0;
+ u32 r, key, multi = 0, elements, maxelem;
- if (set->elements >= h->maxelem) {
- if (SET_WITH_TIMEOUT(set))
- /* FIXME: when set is full, we slow down here */
- mtype_expire(set, h);
- if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
+ key = HKEY(value, h->initval, t->htable_bits);
+ r = ahash_region(key, t->htable_bits);
+ atomic_inc(&t->uref);
+ elements = t->hregion[r].elements;
+ maxelem = t->maxelem;
+ if (elements >= maxelem) {
+ u32 e;
+ if (SET_WITH_TIMEOUT(set)) {
+ rcu_read_unlock_bh();
+ mtype_gc_do(set, h, t, r);
+ rcu_read_lock_bh();
+ }
+ maxelem = h->maxelem;
+ elements = 0;
+ for (e = 0; e < ahash_numof_locks(t->htable_bits); e++)
+ elements += t->hregion[e].elements;
+ if (elements >= maxelem && SET_WITH_FORCEADD(set))
forceadd = true;
}
+ rcu_read_unlock_bh();
- t = ipset_dereference_protected(h->table, set);
- key = HKEY(value, h->initval, t->htable_bits);
- n = __ipset_dereference_protected(hbucket(t, key), 1);
+ spin_lock_bh(&t->hregion[r].lock);
+ n = rcu_dereference_bh(hbucket(t, key));
if (!n) {
- if (forceadd || set->elements >= h->maxelem)
+ if (forceadd || elements >= maxelem)
goto set_full;
old = NULL;
n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
GFP_ATOMIC);
- if (!n)
- return -ENOMEM;
+ if (!n) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
n->size = AHASH_INIT_SIZE;
- set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
+ t->hregion[r].ext_size +=
+ ext_size(AHASH_INIT_SIZE, set->dsize);
goto copy_elem;
}
for (i = 0; i < n->pos; i++) {
@@ -737,38 +916,37 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi)) {
- if (flag_exist ||
- (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))) {
+ if (flag_exist || SET_ELEM_EXPIRED(set, data)) {
/* Just the extensions could be overwritten */
j = i;
goto overwrite_extensions;
}
- return -IPSET_ERR_EXIST;
+ ret = -IPSET_ERR_EXIST;
+ goto unlock;
}
/* Reuse first timed out entry */
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)) &&
- j == -1) {
+ if (SET_ELEM_EXPIRED(set, data) && j == -1) {
j = i;
reuse = true;
}
}
if (reuse || forceadd) {
+ if (j == -1)
+ j = 0;
data = ahash_data(n, j, set->dsize);
if (!deleted) {
#ifdef IP_SET_HASH_WITH_NETS
for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_del_cidr(h,
+ mtype_del_cidr(set, h,
NCIDR_PUT(DCIDR_GET(data->cidr, i)),
i);
#endif
ip_set_ext_destroy(set, data);
- set->elements--;
+ t->hregion[r].elements--;
}
goto copy_data;
}
- if (set->elements >= h->maxelem)
+ if (elements >= maxelem)
goto set_full;
/* Create a new slot */
if (n->pos >= n->size) {
@@ -776,28 +954,32 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (n->size >= AHASH_MAX(h)) {
/* Trigger rehashing */
mtype_data_next(&h->next, d);
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto resize;
}
old = n;
n = kzalloc(sizeof(*n) +
(old->size + AHASH_INIT_SIZE) * set->dsize,
GFP_ATOMIC);
- if (!n)
- return -ENOMEM;
+ if (!n) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
memcpy(n, old, sizeof(struct hbucket) +
old->size * set->dsize);
n->size = old->size + AHASH_INIT_SIZE;
- set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
+ t->hregion[r].ext_size +=
+ ext_size(AHASH_INIT_SIZE, set->dsize);
}
copy_elem:
j = n->pos++;
data = ahash_data(n, j, set->dsize);
copy_data:
- set->elements++;
+ t->hregion[r].elements++;
#ifdef IP_SET_HASH_WITH_NETS
for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
+ mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
#endif
memcpy(data, d, sizeof(struct mtype_elem));
overwrite_extensions:
@@ -820,13 +1002,41 @@ overwrite_extensions:
if (old)
kfree_rcu(old, rcu);
}
+ ret = 0;
+resize:
+ spin_unlock_bh(&t->hregion[r].lock);
+ if (atomic_read(&t->ref) && ext->target) {
+ /* Resize is in process and kernel side add, save values */
+ struct mtype_resize_ad *x;
+
+ x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC);
+ if (!x)
+ /* Don't bother */
+ goto out;
+ x->ad = IPSET_ADD;
+ memcpy(&x->d, value, sizeof(struct mtype_elem));
+ memcpy(&x->ext, ext, sizeof(struct ip_set_ext));
+ memcpy(&x->mext, mext, sizeof(struct ip_set_ext));
+ x->flags = flags;
+ spin_lock_bh(&set->lock);
+ list_add_tail(&x->list, &h->ad);
+ spin_unlock_bh(&set->lock);
+ }
+ goto out;
- return 0;
set_full:
if (net_ratelimit())
pr_warn("Set %s is full, maxelem %u reached\n",
- set->name, h->maxelem);
- return -IPSET_ERR_HASH_FULL;
+ set->name, maxelem);
+ ret = -IPSET_ERR_HASH_FULL;
+unlock:
+ spin_unlock_bh(&t->hregion[r].lock);
+out:
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by add: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+ return ret;
}
/* Delete an element from the hash and free up space if possible.
@@ -840,13 +1050,23 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n;
- int i, j, k, ret = -IPSET_ERR_EXIST;
+ struct mtype_resize_ad *x = NULL;
+ int i, j, k, r, ret = -IPSET_ERR_EXIST;
u32 key, multi = 0;
size_t dsize = set->dsize;
- t = ipset_dereference_protected(h->table, set);
+ /* Userspace add and resize is excluded by the mutex.
+ * Kernespace add does not trigger resize.
+ */
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
key = HKEY(value, h->initval, t->htable_bits);
- n = __ipset_dereference_protected(hbucket(t, key), 1);
+ r = ahash_region(key, t->htable_bits);
+ atomic_inc(&t->uref);
+ rcu_read_unlock_bh();
+
+ spin_lock_bh(&t->hregion[r].lock);
+ n = rcu_dereference_bh(hbucket(t, key));
if (!n)
goto out;
for (i = 0, k = 0; i < n->pos; i++) {
@@ -857,8 +1077,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
data = ahash_data(n, i, dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))
+ if (SET_ELEM_EXPIRED(set, data))
goto out;
ret = 0;
@@ -866,20 +1085,33 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
smp_mb__after_atomic();
if (i + 1 == n->pos)
n->pos--;
- set->elements--;
+ t->hregion[r].elements--;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
- mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
- j);
+ mtype_del_cidr(set, h,
+ NCIDR_PUT(DCIDR_GET(d->cidr, j)), j);
#endif
ip_set_ext_destroy(set, data);
+ if (atomic_read(&t->ref) && ext->target) {
+ /* Resize is in process and kernel side del,
+ * save values
+ */
+ x = kzalloc(sizeof(struct mtype_resize_ad),
+ GFP_ATOMIC);
+ if (x) {
+ x->ad = IPSET_DEL;
+ memcpy(&x->d, value,
+ sizeof(struct mtype_elem));
+ x->flags = flags;
+ }
+ }
for (; i < n->pos; i++) {
if (!test_bit(i, n->used))
k++;
}
if (n->pos == 0 && k == 0) {
- set->ext_size -= ext_size(n->size, dsize);
+ t->hregion[r].ext_size -= ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, key), NULL);
kfree_rcu(n, rcu);
} else if (k >= AHASH_INIT_SIZE) {
@@ -898,7 +1130,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
k++;
}
tmp->pos = k;
- set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
+ t->hregion[r].ext_size -=
+ ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, key), tmp);
kfree_rcu(n, rcu);
}
@@ -906,6 +1139,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
out:
+ spin_unlock_bh(&t->hregion[r].lock);
+ if (x) {
+ spin_lock_bh(&set->lock);
+ list_add(&x->list, &h->ad);
+ spin_unlock_bh(&set->lock);
+ }
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by del: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
return ret;
}
@@ -991,6 +1234,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
int i, ret = 0;
u32 key, multi = 0;
+ rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
#ifdef IP_SET_HASH_WITH_NETS
/* If we test an IP address and not a network address,
@@ -1022,6 +1266,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
goto out;
}
out:
+ rcu_read_unlock_bh();
return ret;
}
@@ -1033,23 +1278,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
const struct htable *t;
struct nlattr *nested;
size_t memsize;
+ u32 elements = 0;
+ size_t ext_size = 0;
u8 htable_bits;
- /* If any members have expired, set->elements will be wrong
- * mytype_expire function will update it with the right count.
- * we do not hold set->lock here, so grab it first.
- * set->elements can still be incorrect in the case of a huge set,
- * because elements might time out during the listing.
- */
- if (SET_WITH_TIMEOUT(set)) {
- spin_lock_bh(&set->lock);
- mtype_expire(set, h);
- spin_unlock_bh(&set->lock);
- }
-
rcu_read_lock_bh();
- t = rcu_dereference_bh_nfnl(h->table);
- memsize = mtype_ahash_memsize(h, t) + set->ext_size;
+ t = rcu_dereference_bh(h->table);
+ mtype_ext_size(set, &elements, &ext_size);
+ memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size;
htable_bits = t->htable_bits;
rcu_read_unlock_bh();
@@ -1071,7 +1307,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
#endif
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
- nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
+ nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
@@ -1091,15 +1327,15 @@ mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
if (start) {
rcu_read_lock_bh();
- t = rcu_dereference_bh_nfnl(h->table);
+ t = ipset_dereference_bh_nfnl(h->table);
atomic_inc(&t->uref);
cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
rcu_read_unlock_bh();
} else if (cb->args[IPSET_CB_PRIVATE]) {
t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
- /* Resizing didn't destroy the hash table */
- pr_debug("Table destroy by dump: %p\n", t);
+ pr_debug("Table destroy after resize "
+ " by dump: %p\n", t);
mtype_ahash_destroy(set, t, false);
}
cb->args[IPSET_CB_PRIVATE] = 0;
@@ -1141,8 +1377,7 @@ mtype_list(const struct ip_set *set,
if (!test_bit(i, n->used))
continue;
e = ahash_data(n, i, set->dsize);
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
+ if (SET_ELEM_EXPIRED(set, e))
continue;
pr_debug("list hash %lu hbucket %p i %u, data %p\n",
cb->args[IPSET_CB_ARG0], n, i, e);
@@ -1208,6 +1443,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .region_lock = true,
};
#ifdef IP_SET_EMIT_CREATE
@@ -1226,6 +1462,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
size_t hsize;
struct htype *h;
struct htable *t;
+ u32 i;
pr_debug("Create set %s with family %s\n",
set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
@@ -1294,6 +1531,15 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
kfree(h);
return -ENOMEM;
}
+ t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits));
+ if (!t->hregion) {
+ kfree(t);
+ kfree(h);
+ return -ENOMEM;
+ }
+ h->gc.set = set;
+ for (i = 0; i < ahash_numof_locks(hbits); i++)
+ spin_lock_init(&t->hregion[i].lock);
h->maxelem = maxelem;
#ifdef IP_SET_HASH_WITH_NETMASK
h->netmask = netmask;
@@ -1304,9 +1550,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
get_random_bytes(&h->initval, sizeof(h->initval));
t->htable_bits = hbits;
+ t->maxelem = h->maxelem / ahash_numof_locks(hbits);
RCU_INIT_POINTER(h->table, t);
- h->set = set;
+ INIT_LIST_HEAD(&h->ad);
set->data = h;
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
@@ -1329,12 +1576,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4)
#endif
- IPSET_TOKEN(HTYPE, 4_gc_init)(set,
- IPSET_TOKEN(HTYPE, 4_gc));
+ IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc);
#ifndef IP_SET_PROTO_UNDEF
else
- IPSET_TOKEN(HTYPE, 6_gc_init)(set,
- IPSET_TOKEN(HTYPE, 6_gc));
+ IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc);
#endif
}
pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d1305423640f..1927fc296f95 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -894,32 +894,175 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
}
}
-/* Resolve race on insertion if this protocol allows this. */
+static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
+{
+ struct nf_conn_tstamp *tstamp;
+
+ atomic_inc(&ct->ct_general.use);
+ ct->status |= IPS_CONFIRMED;
+
+ /* set conntrack timestamp, if enabled. */
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp)
+ tstamp->start = ktime_get_real_ns();
+}
+
+static int __nf_ct_resolve_clash(struct sk_buff *skb,
+ struct nf_conntrack_tuple_hash *h)
+{
+ /* This is the conntrack entry already in hashes that won race. */
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *loser_ct;
+
+ loser_ct = nf_ct_get(skb, &ctinfo);
+
+ if (nf_ct_is_dying(ct))
+ return NF_DROP;
+
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ return NF_DROP;
+
+ if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
+ nf_ct_match(ct, loser_ct)) {
+ struct net *net = nf_ct_net(ct);
+
+ nf_ct_acct_merge(ct, ctinfo, loser_ct);
+ nf_ct_add_to_dying_list(loser_ct);
+ nf_conntrack_put(&loser_ct->ct_general);
+ nf_ct_set(skb, ct, ctinfo);
+
+ NF_CT_STAT_INC(net, insert_failed);
+ return NF_ACCEPT;
+ }
+
+ nf_ct_put(ct);
+ return NF_DROP;
+}
+
+/**
+ * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry
+ *
+ * @skb: skb that causes the collision
+ * @repl_idx: hash slot for reply direction
+ *
+ * Called when origin or reply direction had a clash.
+ * The skb can be handled without packet drop provided the reply direction
+ * is unique or there the existing entry has the identical tuple in both
+ * directions.
+ *
+ * Caller must hold conntrack table locks to prevent concurrent updates.
+ *
+ * Returns NF_DROP if the clash could not be handled.
+ */
+static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
+{
+ struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb);
+ const struct nf_conntrack_zone *zone;
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_node *n;
+ struct net *net;
+
+ zone = nf_ct_zone(loser_ct);
+ net = nf_ct_net(loser_ct);
+
+ /* Reply direction must never result in a clash, unless both origin
+ * and reply tuples are identical.
+ */
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) {
+ if (nf_ct_key_equal(h,
+ &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ zone, net))
+ return __nf_ct_resolve_clash(skb, h);
+ }
+
+ /* We want the clashing entry to go away real soon: 1 second timeout. */
+ loser_ct->timeout = nfct_time_stamp + HZ;
+
+ /* IPS_NAT_CLASH removes the entry automatically on the first
+ * reply. Also prevents UDP tracker from moving the entry to
+ * ASSURED state, i.e. the entry can always be evicted under
+ * pressure.
+ */
+ loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH;
+
+ __nf_conntrack_insert_prepare(loser_ct);
+
+ /* fake add for ORIGINAL dir: we want lookups to only find the entry
+ * already in the table. This also hides the clashing entry from
+ * ctnetlink iteration, i.e. conntrack -L won't show them.
+ */
+ hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+
+ hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
+ &nf_conntrack_hash[repl_idx]);
+ return NF_ACCEPT;
+}
+
+/**
+ * nf_ct_resolve_clash - attempt to handle clash without packet drop
+ *
+ * @skb: skb that causes the clash
+ * @h: tuplehash of the clashing entry already in table
+ * @hash_reply: hash slot for reply direction
+ *
+ * A conntrack entry can be inserted to the connection tracking table
+ * if there is no existing entry with an identical tuple.
+ *
+ * If there is one, @skb (and the assocated, unconfirmed conntrack) has
+ * to be dropped. In case @skb is retransmitted, next conntrack lookup
+ * will find the already-existing entry.
+ *
+ * The major problem with such packet drop is the extra delay added by
+ * the packet loss -- it will take some time for a retransmit to occur
+ * (or the sender to time out when waiting for a reply).
+ *
+ * This function attempts to handle the situation without packet drop.
+ *
+ * If @skb has no NAT transformation or if the colliding entries are
+ * exactly the same, only the to-be-confirmed conntrack entry is discarded
+ * and @skb is associated with the conntrack entry already in the table.
+ *
+ * Failing that, the new, unconfirmed conntrack is still added to the table
+ * provided that the collision only occurs in the ORIGINAL direction.
+ * The new entry will be added after the existing one in the hash list,
+ * so packets in the ORIGINAL direction will continue to match the existing
+ * entry. The new entry will also have a fixed timeout so it expires --
+ * due to the collision, it will not see bidirectional traffic.
+ *
+ * Returns NF_DROP if the clash could not be resolved.
+ */
static __cold noinline int
-nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conntrack_tuple_hash *h)
+nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h,
+ u32 reply_hash)
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
const struct nf_conntrack_l4proto *l4proto;
- enum ip_conntrack_info oldinfo;
- struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *loser_ct;
+ struct net *net;
+ int ret;
+
+ loser_ct = nf_ct_get(skb, &ctinfo);
+ net = nf_ct_net(loser_ct);
l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
- if (l4proto->allow_clash &&
- !nf_ct_is_dying(ct) &&
- atomic_inc_not_zero(&ct->ct_general.use)) {
- if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
- nf_ct_match(ct, loser_ct)) {
- nf_ct_acct_merge(ct, ctinfo, loser_ct);
- nf_conntrack_put(&loser_ct->ct_general);
- nf_ct_set(skb, ct, oldinfo);
- return NF_ACCEPT;
- }
- nf_ct_put(ct);
- }
+ if (!l4proto->allow_clash)
+ goto drop;
+
+ ret = __nf_ct_resolve_clash(skb, h);
+ if (ret == NF_ACCEPT)
+ return ret;
+
+ ret = nf_ct_resolve_clash_harder(skb, reply_hash);
+ if (ret == NF_ACCEPT)
+ return ret;
+
+drop:
+ nf_ct_add_to_dying_list(loser_ct);
NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC(net, insert_failed);
return NF_DROP;
}
@@ -932,7 +1075,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
- struct nf_conn_tstamp *tstamp;
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
@@ -989,6 +1131,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (unlikely(nf_ct_is_dying(ct))) {
nf_ct_add_to_dying_list(ct);
+ NF_CT_STAT_INC(net, insert_failed);
goto dying;
}
@@ -1009,13 +1152,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
setting time, otherwise we'd get timer wrap in
weird delay cases. */
ct->timeout += nfct_time_stamp;
- atomic_inc(&ct->ct_general.use);
- ct->status |= IPS_CONFIRMED;
- /* set conntrack timestamp, if enabled. */
- tstamp = nf_conn_tstamp_find(ct);
- if (tstamp)
- tstamp->start = ktime_get_real_ns();
+ __nf_conntrack_insert_prepare(ct);
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
@@ -1035,11 +1173,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
out:
- nf_ct_add_to_dying_list(ct);
- ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
+ ret = nf_ct_resolve_clash(skb, h, reply_hash);
dying:
nf_conntrack_double_unlock(hash, reply_hash);
- NF_CT_STAT_INC(net, insert_failed);
local_bh_enable();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 7365b43f8f98..760ca2422816 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -81,6 +81,18 @@ static bool udp_error(struct sk_buff *skb,
return false;
}
+static void nf_conntrack_udp_refresh_unreplied(struct nf_conn *ct,
+ struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ u32 extra_jiffies)
+{
+ if (unlikely(ctinfo == IP_CT_ESTABLISHED_REPLY &&
+ ct->status & IPS_NAT_CLASH))
+ nf_ct_kill(ct);
+ else
+ nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies);
+}
+
/* Returns verdict for packet, and may modify conntracktype */
int nf_conntrack_udp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -116,8 +128,8 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
- nf_ct_refresh_acct(ct, ctinfo, skb,
- timeouts[UDP_CT_UNREPLIED]);
+ nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+ timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
@@ -198,8 +210,8 @@ int nf_conntrack_udplite_packet(struct nf_conn *ct,
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
- nf_ct_refresh_acct(ct, ctinfo, skb,
- timeouts[UDP_CT_UNREPLIED]);
+ nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+ timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 410809c669e1..4912069627b6 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -411,7 +411,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu + 1;
return per_cpu_ptr(net->ct.stat, cpu);
}
-
+ (*pos)++;
return NULL;
}
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 83e1db37c3b0..06f00cdc3891 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -847,9 +847,6 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
{
int err;
- if (!nf_flowtable_hw_offload(flowtable))
- return 0;
-
if (!dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
@@ -876,6 +873,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct flow_block_offload bo;
int err;
+ if (!nf_flowtable_hw_offload(flowtable))
+ return 0;
+
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b0930d4aba22..b9cbe1e2453e 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -267,7 +267,7 @@ static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu + 1;
return per_cpu_ptr(snet->stats, cpu);
}
-
+ (*pos)++;
return NULL;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d1318bdf49ca..38c680f28f15 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1405,6 +1405,11 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
lockdep_commit_lock_is_held(net));
if (nft_dump_stats(skb, stats))
goto nla_put_failure;
+
+ if ((chain->flags & NFT_CHAIN_HW_OFFLOAD) &&
+ nla_put_be32(skb, NFTA_CHAIN_FLAGS,
+ htonl(NFT_CHAIN_HW_OFFLOAD)))
+ goto nla_put_failure;
}
if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
@@ -6300,8 +6305,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
goto err4;
err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
- if (err < 0)
+ if (err < 0) {
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
goto err4;
+ }
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
@@ -7378,13 +7388,8 @@ static void nf_tables_module_autoload(struct net *net)
list_splice_init(&net->nft.module_list, &module_list);
mutex_unlock(&net->nft.commit_mutex);
list_for_each_entry_safe(req, next, &module_list, list) {
- if (req->done) {
- list_del(&req->list);
- kfree(req);
- } else {
- request_module("%s", req->module);
- req->done = true;
- }
+ request_module("%s", req->module);
+ req->done = true;
}
mutex_lock(&net->nft.commit_mutex);
list_splice(&module_list, &net->nft.module_list);
@@ -8167,6 +8172,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
__nft_release_tables(net);
mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables));
+ WARN_ON_ONCE(!list_empty(&net->nft.module_list));
}
static struct pernet_operations nf_tables_net_ops = {
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index de3a9596b7f1..a5f294aa8e4c 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -742,6 +742,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
[NFCTH_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN-1 },
[NFCTH_QUEUE_NUM] = { .type = NLA_U32, },
+ [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, },
+ [NFCTH_STATUS] = { .type = NLA_U32, },
};
static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = {
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index ff9ac8ae0031..eac4a901233f 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -89,6 +89,7 @@ static const struct nft_chain_type nft_chain_nat_inet = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_INET,
+ .owner = THIS_MODULE,
.hook_mask = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_LOCAL_OUT) |
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 1993af3a2979..a7de3a58f553 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -129,6 +129,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
[NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
[NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 },
[NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 },
};
static int nft_payload_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index f0cb1e13af50..4fc0c924ed5d 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -203,7 +203,7 @@
* ::
*
* rule indices in last field: 0 1
- * map to elements: 0x42 0x66
+ * map to elements: 0x66 0x42
*
*
* Matching
@@ -298,7 +298,7 @@
* ::
*
* rule indices in last field: 0 1
- * map to elements: 0x42 0x66
+ * map to elements: 0x66 0x42
*
* the matching element is at 0x42.
*
@@ -503,7 +503,7 @@ static int pipapo_refill(unsigned long *map, int len, int rules,
return -1;
}
- if (unlikely(match_only)) {
+ if (match_only) {
bitmap_clear(map, i, 1);
return i;
}
@@ -1766,11 +1766,13 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
- const u8 *data = (const u8 *)elem->key.val.data;
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
+ struct nft_pipapo_elem *e = elem->priv;
int rules_f0, first_rule = 0;
- struct nft_pipapo_elem *e;
+ const u8 *data;
+
+ data = (const u8 *)nft_set_ext_key(&e->ext);
e = pipapo_get(net, set, data, 0);
if (IS_ERR(e))
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 4c3f2e24c7cb..764e88682a81 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -339,6 +339,8 @@ static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] =
[NFTA_TUNNEL_KEY_FLAGS] = { .type = NLA_U32, },
[NFTA_TUNNEL_KEY_TOS] = { .type = NLA_U8, },
[NFTA_TUNNEL_KEY_TTL] = { .type = NLA_U8, },
+ [NFTA_TUNNEL_KEY_SPORT] = { .type = NLA_U16, },
+ [NFTA_TUNNEL_KEY_DPORT] = { .type = NLA_U16, },
[NFTA_TUNNEL_KEY_OPTS] = { .type = NLA_NESTED, },
};
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e27c6c5ba9df..cd2b034eef59 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1551,6 +1551,9 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
+ if (ppos != NULL)
+ ++(*ppos);
+
switch (trav->class) {
case MTTG_TRAV_INIT:
trav->class = MTTG_TRAV_NFP_UNSPEC;
@@ -1576,9 +1579,6 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
default:
return NULL;
}
-
- if (ppos != NULL)
- ++*ppos;
return trav;
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index bccd47cd7190..8c835ad63729 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -36,6 +36,7 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/mutex.h>
#include <linux/kernel.h>
+#include <linux/refcount.h>
#include <uapi/linux/netfilter/xt_hashlimit.h>
#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
@@ -114,7 +115,7 @@ struct dsthash_ent {
struct xt_hashlimit_htable {
struct hlist_node node; /* global list of all htables */
- int use;
+ refcount_t use;
u_int8_t family;
bool rnd_initialized;
@@ -315,7 +316,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
for (i = 0; i < hinfo->cfg.size; i++)
INIT_HLIST_HEAD(&hinfo->hash[i]);
- hinfo->use = 1;
+ refcount_set(&hinfo->use, 1);
hinfo->count = 0;
hinfo->family = family;
hinfo->rnd_initialized = false;
@@ -401,15 +402,6 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
remove_proc_entry(hinfo->name, parent);
}
-static void htable_destroy(struct xt_hashlimit_htable *hinfo)
-{
- cancel_delayed_work_sync(&hinfo->gc_work);
- htable_remove_proc_entry(hinfo);
- htable_selective_cleanup(hinfo, true);
- kfree(hinfo->name);
- vfree(hinfo);
-}
-
static struct xt_hashlimit_htable *htable_find_get(struct net *net,
const char *name,
u_int8_t family)
@@ -420,7 +412,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
if (!strcmp(name, hinfo->name) &&
hinfo->family == family) {
- hinfo->use++;
+ refcount_inc(&hinfo->use);
return hinfo;
}
}
@@ -429,12 +421,16 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
static void htable_put(struct xt_hashlimit_htable *hinfo)
{
- mutex_lock(&hashlimit_mutex);
- if (--hinfo->use == 0) {
+ if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) {
hlist_del(&hinfo->node);
- htable_destroy(hinfo);
+ htable_remove_proc_entry(hinfo);
+ mutex_unlock(&hashlimit_mutex);
+
+ cancel_delayed_work_sync(&hinfo->gc_work);
+ htable_selective_cleanup(hinfo, true);
+ kfree(hinfo->name);
+ vfree(hinfo);
}
- mutex_unlock(&hashlimit_mutex);
}
/* The algorithm used is the Simple Token Bucket Filter (TBF)
@@ -837,6 +833,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
}
+#define HASHLIMIT_MAX_SIZE 1048576
+
static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
struct xt_hashlimit_htable **hinfo,
struct hashlimit_cfg3 *cfg,
@@ -847,6 +845,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
if (cfg->gc_interval == 0 || cfg->expire == 0)
return -EINVAL;
+ if (cfg->size > HASHLIMIT_MAX_SIZE) {
+ cfg->size = HASHLIMIT_MAX_SIZE;
+ pr_info_ratelimited("size too large, truncated to %u\n", cfg->size);
+ }
+ if (cfg->max > HASHLIMIT_MAX_SIZE) {
+ cfg->max = HASHLIMIT_MAX_SIZE;
+ pr_info_ratelimited("max too large, truncated to %u\n", cfg->max);
+ }
if (par->family == NFPROTO_IPV4) {
if (cfg->srcmask > 32 || cfg->dstmask > 32)
return -EINVAL;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 0a9708004e20..225a7ab6d79a 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -492,12 +492,12 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
const struct recent_entry *e = v;
const struct list_head *head = e->list.next;
+ (*pos)++;
while (head == &t->iphash[st->bucket]) {
if (++st->bucket >= ip_list_hash_size)
return NULL;
head = t->iphash[st->bucket].next;
}
- (*pos)++;
return list_entry(head, struct recent_entry, list);
}
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f5d34da0646e..a1f2320ecc16 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -143,7 +143,8 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain,
if (domain != NULL) {
bkt = netlbl_domhsh_hash(domain);
bkt_list = &netlbl_domhsh_rcu_deref(netlbl_domhsh)->tbl[bkt];
- list_for_each_entry_rcu(iter, bkt_list, list)
+ list_for_each_entry_rcu(iter, bkt_list, list,
+ lockdep_is_held(&netlbl_domhsh_lock))
if (iter->valid &&
netlbl_family_match(iter->family, family) &&
strcmp(iter->domain, domain) == 0)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index d2e4ab8d1cb1..77bb1bb22c3b 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -207,7 +207,8 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
bkt = netlbl_unlhsh_hash(ifindex);
bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt];
- list_for_each_entry_rcu(iter, bkt_list, list)
+ list_for_each_entry_rcu(iter, bkt_list, list,
+ lockdep_is_held(&netlbl_unlhsh_lock))
if (iter->valid && iter->ifindex == ifindex)
return iter;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4e31721e7293..5313f1cec170 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1014,7 +1014,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nlk->netlink_bind && groups) {
int group;
- for (group = 0; group < nlk->ngroups; group++) {
+ /* nl_groups is a u32, so cap the maximum groups we can bind */
+ for (group = 0; group < BITS_PER_TYPE(u32); group++) {
if (!test_bit(group, &groups))
continue;
err = nlk->netlink_bind(net, group + 1);
@@ -1033,7 +1034,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
netlink_insert(sk, nladdr->nl_pid) :
netlink_autobind(sock);
if (err) {
- netlink_undo_bind(nlk->ngroups, groups, sk);
+ netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk);
goto unlock;
}
}
@@ -2433,7 +2434,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
in_skb->len))
WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
(u8 *)extack->bad_attr -
- in_skb->data));
+ (u8 *)nlh));
} else {
if (extack->cookie_len)
WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 0522b2b1fd95..9f357aa22b94 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -497,8 +497,9 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
family->policy, validate, extack);
- if (err && parallel) {
- kfree(attrbuf);
+ if (err) {
+ if (parallel)
+ kfree(attrbuf);
return ERR_PTR(err);
}
return attrbuf;
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 6f1b096e601c..43811b5219b5 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -181,13 +181,20 @@ exit:
void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
struct sk_buff *skb)
{
- u8 gate = hdev->pipes[pipe].gate;
u8 status = NFC_HCI_ANY_OK;
struct hci_create_pipe_resp *create_info;
struct hci_delete_pipe_noti *delete_info;
struct hci_all_pipe_cleared_noti *cleared_info;
+ u8 gate;
- pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd);
+ pr_debug("from pipe %x cmd %x\n", pipe, cmd);
+
+ if (pipe >= NFC_HCI_MAX_PIPES) {
+ status = NFC_HCI_ANY_E_NOK;
+ goto exit;
+ }
+
+ gate = hdev->pipes[pipe].gate;
switch (cmd) {
case NFC_HCI_ADM_NOTIFY_PIPE_CREATED:
@@ -375,8 +382,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
struct sk_buff *skb)
{
int r = 0;
- u8 gate = hdev->pipes[pipe].gate;
+ u8 gate;
+
+ if (pipe >= NFC_HCI_MAX_PIPES) {
+ pr_err("Discarded event %x to invalid pipe %x\n", event, pipe);
+ goto exit;
+ }
+ gate = hdev->pipes[pipe].gate;
if (gate == NFC_HCI_INVALID_GATE) {
pr_err("Discarded event %x to unopened pipe %x\n", event, pipe);
goto exit;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index eee0dddb7749..e894254c17d4 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -32,6 +32,7 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
.len = NFC_DEVICE_NAME_MAXSIZE },
[NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 },
+ [NFC_ATTR_TARGET_INDEX] = { .type = NLA_U32 },
[NFC_ATTR_COMM_MODE] = { .type = NLA_U8 },
[NFC_ATTR_RF_MODE] = { .type = NLA_U8 },
[NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 },
@@ -43,7 +44,10 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
[NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING,
.len = NFC_FIRMWARE_NAME_MAXSIZE },
+ [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 },
[NFC_ATTR_SE_APDU] = { .type = NLA_BINARY },
+ [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 },
+ [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 },
[NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
};
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 659c2a790fe7..07a7dd185995 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -179,7 +179,8 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
struct hlist_head *head;
head = vport_hash_bucket(dp, port_no);
- hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, head, dp_hash_node,
+ lockdep_ovsl_is_held()) {
if (vport->port_no == port_no)
return vport;
}
@@ -644,6 +645,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
+ [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
};
static const struct genl_ops dp_packet_genl_ops[] = {
@@ -2042,7 +2044,8 @@ static unsigned int ovs_get_max_headroom(struct datapath *dp)
int i;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held()) {
dev = vport->dev;
dev_headroom = netdev_get_fwd_headroom(dev);
if (dev_headroom > max_headroom)
@@ -2061,7 +2064,8 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
dp->max_headroom = new_headroom;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held())
netdev_set_rx_headroom(vport->dev, new_headroom);
}
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 7da4230627f5..288122eec7c8 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2708,10 +2708,6 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
switch (key_type) {
- const struct ovs_key_ipv4 *ipv4_key;
- const struct ovs_key_ipv6 *ipv6_key;
- int err;
-
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_CT_MARK:
@@ -2723,7 +2719,9 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
- case OVS_KEY_ATTR_TUNNEL:
+ case OVS_KEY_ATTR_TUNNEL: {
+ int err;
+
if (masked)
return -EINVAL; /* Masked tunnel set not supported. */
@@ -2732,8 +2730,10 @@ static int validate_set(const struct nlattr *a,
if (err)
return err;
break;
+ }
+ case OVS_KEY_ATTR_IPV4: {
+ const struct ovs_key_ipv4 *ipv4_key;
- case OVS_KEY_ATTR_IPV4:
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
@@ -2753,8 +2753,10 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
}
break;
+ }
+ case OVS_KEY_ATTR_IPV6: {
+ const struct ovs_key_ipv6 *ipv6_key;
- case OVS_KEY_ATTR_IPV6:
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
@@ -2781,7 +2783,7 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
-
+ }
case OVS_KEY_ATTR_TCP:
if ((eth_type != htons(ETH_P_IP) &&
eth_type != htons(ETH_P_IPV6)) ||
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5904e93e5765..fd8a01ca7a2d 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -585,7 +585,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
head = find_bucket(ti, hash);
(*n_mask_hit)++;
- hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+ hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
+ lockdep_ovsl_is_held()) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
return flow;
@@ -769,7 +770,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
hash = ufid_hash(ufid);
head = find_bucket(ti, hash);
- hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
+ hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
+ lockdep_ovsl_is_held()) {
if (flow->ufid_table.hash == hash &&
ovs_flow_cmp_ufid(flow, ufid))
return flow;
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 3323b79ff548..5010d1ddd4bd 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -61,7 +61,8 @@ static struct dp_meter *lookup_meter(const struct datapath *dp,
struct hlist_head *head;
head = meter_hash_bucket(dp, meter_id);
- hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
+ hlist_for_each_entry_rcu(meter, head, dp_hash_node,
+ lockdep_ovsl_is_held()) {
if (meter->id == meter_id)
return meter;
}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 5da9392b03d6..47febb4504f0 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -96,7 +96,8 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name)
struct hlist_head *bucket = hash_bucket(net, name);
struct vport *vport;
- hlist_for_each_entry_rcu(vport, bucket, hash_node)
+ hlist_for_each_entry_rcu(vport, bucket, hash_node,
+ lockdep_ovsl_is_held())
if (!strcmp(name, ovs_vport_name(vport)) &&
net_eq(ovs_dp_get_net(vport->dp), net))
return vport;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 3341eee87bf9..585e6b3b69ce 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -162,10 +162,9 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
if (write)
gup_flags |= FOLL_WRITE;
- ret = get_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
+ ret = pin_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
if (ret >= 0 && ret < nr_pages) {
- while (ret--)
- put_page(pages[ret]);
+ unpin_user_pages(pages, ret);
ret = -EFAULT;
}
@@ -300,8 +299,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
* to release anything.
*/
if (!need_odp) {
- for (i = 0 ; i < nents; i++)
- put_page(sg_page(&sg[i]));
+ unpin_user_pages(pages, nr_pages);
kfree(sg);
}
ret = PTR_ERR(trans_private);
@@ -325,7 +323,12 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
if (cookie_ret)
*cookie_ret = cookie;
- if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) {
+ if (args->cookie_addr &&
+ put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) {
+ if (!need_odp) {
+ unpin_user_pages(pages, nr_pages);
+ kfree(sg);
+ }
ret = -EFAULT;
goto out;
}
@@ -496,9 +499,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
* is the case for a RDMA_READ which copies from remote
* to local memory
*/
- if (!ro->op_write)
- set_page_dirty(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, !ro->op_write);
}
}
@@ -515,8 +516,7 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
/* Mark page dirty if it was possibly modified, which
* is the case for a RDMA_READ which copies from remote
* to local memory */
- set_page_dirty(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, true);
kfree(ao->op_notifier);
ao->op_notifier = NULL;
@@ -944,7 +944,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
return ret;
err:
if (page)
- put_page(page);
+ unpin_user_page(page);
rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 90a31b15585f..8c466a712cda 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -186,6 +186,7 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
+ cookie_len /* TCA_ACT_COOKIE */
+ nla_total_size(0) /* TCA_ACT_STATS nested */
+ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */
/* TCA_STATS_BASIC */
+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
/* TCA_STATS_PKT64 */
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f9c0d1e8d380..d32d4233d337 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -305,6 +305,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_fl_filter *f;
list_for_each_entry_rcu(mask, &head->masks, list) {
+ flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
fl_clear_masked_range(&skb_key, mask);
skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
@@ -691,6 +692,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
.len = 128 / BITS_PER_BYTE },
[TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY,
.len = 128 / BITS_PER_BYTE },
+ [TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
};
static const struct nla_policy
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 039cc86974f4..610a0b728161 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -157,6 +157,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle)
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
[TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
+ [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a5a295477ecc..371ad84def3b 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -744,6 +744,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
+ [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 },
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
};
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 660fc45ee40f..ee717e05372b 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -768,6 +768,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
+ [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
};
static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 8a15146faaeb..1069d7af3672 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -237,15 +237,11 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
addrcnt++;
return nla_total_size(sizeof(struct sctp_info))
- + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
- + nla_total_size(1) /* INET_DIAG_TOS */
- + nla_total_size(1) /* INET_DIAG_TCLASS */
- + nla_total_size(4) /* INET_DIAG_MARK */
- + nla_total_size(4) /* INET_DIAG_CLASS_ID */
+ nla_total_size(addrlen * asoc->peer.transport_count)
+ nla_total_size(addrlen * addrcnt)
- + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ + inet_diag_msg_attrs_size()
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ 64;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 748e3b19ec1d..6a16af4b1ef6 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -170,6 +170,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk,
return true;
}
+/* Check for format error in an ABORT chunk */
+static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk)
+{
+ struct sctp_errhdr *err;
+
+ sctp_walk_errors(err, chunk->chunk_hdr);
+
+ return (void *)err == (void *)chunk->chunk_end;
+}
+
/**********************************************************
* These are the state functions for handling chunk events.
**********************************************************/
@@ -2255,6 +2265,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2298,6 +2311,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Stop the T2-shutdown timer. */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
@@ -2565,6 +2581,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2582,16 +2601,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort(
/* See if we have an error cause code in the chunk. */
len = ntohs(chunk->chunk_hdr->length);
- if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
- struct sctp_errhdr *err;
-
- sctp_walk_errors(err, chunk->chunk_hdr);
- if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_pdiscard(net, ep, asoc, type, arg,
- commands);
-
+ if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
- }
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
/* ASSOC_FAILED will DELETE_TCB. */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index cee5bf4a9bb9..6fd44bdb0fc3 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -470,6 +470,8 @@ static void smc_switch_to_fallback(struct smc_sock *smc)
if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
smc->clcsock->file = smc->sk.sk_socket->file;
smc->clcsock->file->private_data = smc->clcsock;
+ smc->clcsock->wq.fasync_list =
+ smc->sk.sk_socket->wq.fasync_list;
}
}
@@ -510,15 +512,18 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
static int smc_connect_abort(struct smc_sock *smc, int reason_code,
int local_contact)
{
+ bool is_smcd = smc->conn.lgr->is_smcd;
+
if (local_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(smc->conn.lgr);
- if (smc->conn.lgr->is_smcd)
+ smc_lgr_cleanup_early(&smc->conn);
+ else
+ smc_conn_free(&smc->conn);
+ if (is_smcd)
/* there is only one lgr role for SMC-D; use server lock */
mutex_unlock(&smc_server_lgr_pending);
else
mutex_unlock(&smc_client_lgr_pending);
- smc_conn_free(&smc->conn);
smc->connect_nonblock = 0;
return reason_code;
}
@@ -1089,7 +1094,6 @@ static void smc_listen_out_err(struct smc_sock *new_smc)
if (newsmcsk->sk_state == SMC_INIT)
sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
- smc_conn_free(&new_smc->conn);
smc_listen_out(new_smc);
}
@@ -1100,12 +1104,13 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
{
/* RDMA setup failed, switch back to TCP */
if (local_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(new_smc->conn.lgr);
+ smc_lgr_cleanup_early(&new_smc->conn);
+ else
+ smc_conn_free(&new_smc->conn);
if (reason_code < 0) { /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
}
- smc_conn_free(&new_smc->conn);
smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = reason_code;
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
@@ -1168,16 +1173,18 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
new_smc->conn.lgr->vlan_id,
new_smc->conn.lgr->smcd)) {
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(new_smc->conn.lgr);
- smc_conn_free(&new_smc->conn);
+ smc_lgr_cleanup_early(&new_smc->conn);
+ else
+ smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_SMCDNOTALK;
}
/* Create send and receive buffers */
if (smc_buf_create(new_smc, true)) {
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(new_smc->conn.lgr);
- smc_conn_free(&new_smc->conn);
+ smc_lgr_cleanup_early(&new_smc->conn);
+ else
+ smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_MEM;
}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 0879f7bed967..86cccc24e52e 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -372,7 +372,9 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = SMC_CLC_V1;
dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
- memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
+ if (smc->conn.lgr && !smc->conn.lgr->is_smcd)
+ memcpy(dclc.id_for_peer, local_systemid,
+ sizeof(local_systemid));
dclc.peer_diagnosis = htonl(peer_diag_info);
memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2249de5379ee..5b085efa3bce 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -162,6 +162,18 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
conn->lgr = NULL;
}
+void smc_lgr_cleanup_early(struct smc_connection *conn)
+{
+ struct smc_link_group *lgr = conn->lgr;
+
+ if (!lgr)
+ return;
+
+ smc_conn_free(conn);
+ smc_lgr_forget(lgr);
+ smc_lgr_schedule_free_work_fast(lgr);
+}
+
/* Send delete link, either as client to request the initiation
* of the DELETE LINK sequence from server; or as server to
* initiate the delete processing. See smc_llc_rx_delete_link().
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c472e12951d1..234ae25f0025 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -296,6 +296,7 @@ struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
void smc_lgr_forget(struct smc_link_group *lgr);
+void smc_lgr_cleanup_early(struct smc_connection *conn);
void smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
@@ -316,7 +317,6 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
-void smcd_conn_free(struct smc_connection *conn);
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index f38727ecf8b2..e1f64f4ba236 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -39,16 +39,15 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
+ memset(r, 0, sizeof(*r));
r->diag_family = sk->sk_family;
+ sock_diag_save_cookie(sk, r->id.idiag_cookie);
if (!smc->clcsock)
return;
r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
r->id.idiag_dport = smc->clcsock->sk->sk_dport;
r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
- sock_diag_save_cookie(sk, r->id.idiag_cookie);
if (sk->sk_protocol == SMCPROTO_SMC) {
- memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
- memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 548632621f4b..d6ba186f67e2 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -573,6 +573,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
struct smc_ib_device *smcibdev;
smcibdev = ib_get_client_data(ibdev, &smc_ib_client);
+ if (!smcibdev || smcibdev->ibdev != ibdev)
+ return;
ib_set_client_data(ibdev, &smc_ib_client, NULL);
spin_lock(&smc_ib_devices.lock);
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 095be887753e..125297c9aa3e 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -288,8 +288,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct ib_reg_wr *reg_wr;
+ int i, n, dma_nents;
struct ib_mr *ibmr;
- int i, n;
u8 key;
if (nsegs > ia->ri_max_frwr_depth)
@@ -313,15 +313,16 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
break;
}
mr->mr_dir = rpcrdma_data_dir(writing);
+ mr->mr_nents = i;
- mr->mr_nents =
- ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
- if (!mr->mr_nents)
+ dma_nents = ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, mr->mr_nents,
+ mr->mr_dir);
+ if (!dma_nents)
goto out_dmamap_err;
ibmr = mr->frwr.fr_mr;
- n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
- if (unlikely(n != mr->mr_nents))
+ n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
+ if (n != dma_nents)
goto out_mapmr_err;
ibmr->iova &= 0x00000000ffffffff;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 7c35094c20b8..bb9862410e68 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -116,6 +116,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
[TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
[TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
[TIPC_NLA_PROP_WIN] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_MTU] = { .type = NLA_U32 },
[TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 },
[TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 }
};
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 99b28b69fc17..0c88778c88b5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -278,7 +278,7 @@ struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos)
}
#endif
-void tipc_node_free(struct rcu_head *rp)
+static void tipc_node_free(struct rcu_head *rp)
{
struct tipc_node *n = container_of(rp, struct tipc_node, rcu);
@@ -2798,7 +2798,7 @@ static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id)
return 0;
}
-int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
+static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1];
struct net *net = sock_net(skb->sk);
@@ -2875,7 +2875,8 @@ int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
return err;
}
-int __tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info)
+static int __tipc_nl_node_flush_key(struct sk_buff *skb,
+ struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = tipc_net(net);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f9b4fb92c0b1..693e8902161e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2441,6 +2441,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
return -ETIMEDOUT;
if (signal_pending(current))
return sock_intr_errno(*timeo_p);
+ if (sk->sk_state == TIPC_DISCONNECTING)
+ break;
add_wait_queue(sk_sleep(sk), &wait);
done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk),
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 1ba5a92832bb..1c5574e2e058 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -593,7 +593,7 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
u32 seq, u64 *p_record_sn)
{
u64 record_sn = context->hint_record_sn;
- struct tls_record_info *info;
+ struct tls_record_info *info, *last;
info = context->retransmit_hint;
if (!info ||
@@ -605,6 +605,24 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
struct tls_record_info, list);
if (!info)
return NULL;
+ /* send the start_marker record if seq number is before the
+ * tls offload start marker sequence number. This record is
+ * required to handle TCP packets which are before TLS offload
+ * started.
+ * And if it's not start marker, look if this seq number
+ * belongs to the list.
+ */
+ if (likely(!tls_record_is_start_marker(info))) {
+ /* we have the first record, get the last record to see
+ * if this seq number belongs to the list.
+ */
+ last = list_last_entry(&context->records_list,
+ struct tls_record_info, list);
+
+ if (!between(seq, tls_record_start_seq(info),
+ last->end_seq))
+ return NULL;
+ }
record_sn = context->unacked_record_sn;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 62c12cb5763e..68debcb28fa4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -682,6 +682,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
return 0;
}
+#ifdef CONFIG_PROC_FS
static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -692,6 +693,9 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
}
}
+#else
+#define unix_show_fdinfo NULL
+#endif
static const struct proto_ops unix_stream_ops = {
.family = PF_UNIX,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9c5b2a91baad..a5f28708e0e7 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -451,6 +451,12 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
if (vsk->transport == new_transport)
return 0;
+ /* transport->release() must be called with sock lock acquired.
+ * This path can only be taken during vsock_stream_connect(),
+ * where we have already held the sock lock.
+ * In the other cases, this function is called on a new socket
+ * which is not assigned to any transport.
+ */
vsk->transport->release(vsk);
vsock_deassign_transport(vsk);
}
@@ -753,20 +759,18 @@ static void __vsock_release(struct sock *sk, int level)
vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */
- /* The release call is supposed to use lock_sock_nested()
- * rather than lock_sock(), if a sock lock should be acquired.
- */
- if (vsk->transport)
- vsk->transport->release(vsk);
- else if (sk->sk_type == SOCK_STREAM)
- vsock_remove_sock(vsk);
-
/* When "level" is SINGLE_DEPTH_NESTING, use the nested
* version to avoid the warning "possible recursive locking
* detected". When "level" is 0, lock_sock_nested(sk, level)
* is the same as lock_sock(sk).
*/
lock_sock_nested(sk, level);
+
+ if (vsk->transport)
+ vsk->transport->release(vsk);
+ else if (sk->sk_type == SOCK_STREAM)
+ vsock_remove_sock(vsk);
+
sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 3492c021925f..630b851f8150 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -526,12 +526,9 @@ static bool hvs_close_lock_held(struct vsock_sock *vsk)
static void hvs_release(struct vsock_sock *vsk)
{
- struct sock *sk = sk_vsock(vsk);
bool remove_sock;
- lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
remove_sock = hvs_close_lock_held(vsk);
- release_sock(sk);
if (remove_sock)
vsock_remove_sock(vsk);
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index d9f0c9c5425a..f3c4bab2f737 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -829,7 +829,6 @@ void virtio_transport_release(struct vsock_sock *vsk)
struct sock *sk = &vsk->sk;
bool remove_sock = true;
- lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_type == SOCK_STREAM)
remove_sock = virtio_transport_close(vsk);
@@ -837,7 +836,6 @@ void virtio_transport_release(struct vsock_sock *vsk)
list_del(&pkt->list);
virtio_transport_free_pkt(pkt);
}
- release_sock(sk);
if (remove_sock)
vsock_remove_sock(vsk);
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index a9c0f368db5d..24e18405cdb4 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -7,9 +7,13 @@
void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct device *pdev = wiphy_dev(wdev->wiphy);
- strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name,
- sizeof(info->driver));
+ if (pdev->driver)
+ strlcpy(info->driver, pdev->driver->name,
+ sizeof(info->driver));
+ else
+ strlcpy(info->driver, "N/A", sizeof(info->driver));
strlcpy(info->version, init_utsname()->release, sizeof(info->version));
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 123b8d720a59..5b19e9fac4aa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -20,6 +20,7 @@
#include <linux/netlink.h>
#include <linux/nospec.h>
#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
#include <net/net_namespace.h>
#include <net/genetlink.h>
#include <net/cfg80211.h>
@@ -437,6 +438,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
[NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG },
[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
+ [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 },
[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
[NL80211_ATTR_PID] = { .type = NLA_U32 },
@@ -4799,8 +4801,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
err = nl80211_parse_he_obss_pd(
info->attrs[NL80211_ATTR_HE_OBSS_PD],
&params.he_obss_pd);
- if (err)
- return err;
+ goto out;
}
nl80211_calculate_ap_params(&params);
@@ -4822,6 +4823,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
}
wdev_unlock(wdev);
+out:
kfree(params.acl);
return err;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index fff9a74891fc..1a8218f1bbe0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2276,7 +2276,7 @@ static void handle_channel_custom(struct wiphy *wiphy,
break;
}
- if (IS_ERR(reg_rule)) {
+ if (IS_ERR_OR_NULL(reg_rule)) {
pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n",
chan->center_freq);
if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index df600487a68d..356f90e4522b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -217,6 +217,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_flush(struct xdp_sock *xs)
{
xskq_prod_submit(xs->rx);
+ __xskq_cons_release(xs->umem->fq);
sock_def_readable(&xs->sk);
}
@@ -304,6 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ __xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
}
rcu_read_unlock();
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index bec2af11853a..89a01ac4e079 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -271,7 +271,8 @@ static inline void xskq_cons_release(struct xsk_queue *q)
{
/* To improve performance, only update local state here.
* Reflect this to global state when we get new entries
- * from the ring in xskq_cons_get_entries().
+ * from the ring in xskq_cons_get_entries() and whenever
+ * Rx or Tx processing are completed in the NAPI loop.
*/
q->cached_cons++;
}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index dc651a628dcf..3361e3ac5714 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -300,10 +300,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
} else {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
}
dst_release(dst);