aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig1
-rw-r--r--net/atm/mpoa_proc.c17
-rw-r--r--net/atm/proc.c8
-rw-r--r--net/bpfilter/Makefile2
-rw-r--r--net/bridge/br_device.c6
-rw-r--r--net/bridge/br_stp.c3
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/ceph/ceph_common.c41
-rw-r--r--net/ceph/ceph_fs.c104
-rw-r--r--net/ceph/osd_client.c18
-rw-r--r--net/core/bpf_sk_storage.c5
-rw-r--r--net/core/dev.c40
-rw-r--r--net/core/devlink.c27
-rw-r--r--net/core/drop_monitor.c4
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/page_pool.c22
-rw-r--r--net/core/pktgen.c44
-rw-r--r--net/core/rtnetlink.c26
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/core/sock_map.c28
-rw-r--r--net/dsa/tag_ar9331.c2
-rw-r--r--net/dsa/tag_qca.c2
-rw-r--r--net/ethtool/bitset.c3
-rw-r--r--net/hsr/hsr_framereg.c3
-rw-r--r--net/hsr/hsr_slave.c2
-rw-r--r--net/ipv4/cipso_ipv4.c7
-rw-r--r--net/ipv4/icmp.c33
-rw-r--r--net/ipv4/ipconfig.c10
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c16
-rw-r--r--net/ipv4/route.c24
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/addrconf.c3
-rw-r--r--net/ipv6/ip6_fib.c7
-rw-r--r--net/ipv6/ip6_gre.c8
-rw-r--r--net/ipv6/ip6_icmp.c34
-rw-r--r--net/ipv6/ip6_tunnel.c81
-rw-r--r--net/ipv6/route.c1
-rw-r--r--net/l2tp/l2tp_core.c7
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/mlme.c14
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mac80211/util.c34
-rw-r--r--net/mptcp/Kconfig1
-rw-r--r--net/mptcp/protocol.c154
-rw-r--r--net/mptcp/protocol.h4
-rw-r--r--net/netfilter/nf_conntrack_core.c192
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c20
-rw-r--r--net/netfilter/nf_flow_table_offload.c6
-rw-r--r--net/netfilter/nft_set_pipapo.c6
-rw-r--r--net/netfilter/xt_hashlimit.c22
-rw-r--r--net/netfilter/xt_recent.c17
-rw-r--r--net/netlabel/netlabel_domainhash.c3
-rw-r--r--net/netlabel/netlabel_unlabeled.c3
-rw-r--r--net/netlink/af_netlink.c5
-rw-r--r--net/netlink/genetlink.c5
-rw-r--r--net/openvswitch/datapath.c9
-rw-r--r--net/openvswitch/flow_netlink.c18
-rw-r--r--net/openvswitch/flow_table.c6
-rw-r--r--net/openvswitch/meter.c3
-rw-r--r--net/openvswitch/vport.c3
-rw-r--r--net/rds/rdma.c24
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h11
-rw-r--r--net/rxrpc/call_object.c26
-rw-r--r--net/rxrpc/conn_client.c3
-rw-r--r--net/rxrpc/conn_event.c30
-rw-r--r--net/rxrpc/conn_object.c3
-rw-r--r--net/rxrpc/input.c6
-rw-r--r--net/rxrpc/local_object.c23
-rw-r--r--net/rxrpc/output.c27
-rw-r--r--net/rxrpc/peer_event.c42
-rw-r--r--net/sched/cls_flower.c2
-rw-r--r--net/sched/cls_matchall.c1
-rw-r--r--net/sched/cls_tcindex.c43
-rw-r--r--net/sched/sch_fq_pie.c2
-rw-r--r--net/sched/sch_taprio.c92
-rw-r--r--net/sctp/sm_statefuns.c29
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc_clc.c4
-rw-r--r--net/smc/smc_diag.c5
-rw-r--r--net/sunrpc/addr.c2
-rw-r--r--net/sunrpc/auth.c49
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c1
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c29
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c14
-rw-r--r--net/sunrpc/cache.c93
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/sched.c4
-rw-r--r--net/sunrpc/stats.c21
-rw-r--r--net/sunrpc/xdr.c2
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c117
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c20
-rw-r--r--net/sunrpc/xprtrdma/transport.c17
-rw-r--r--net/sunrpc/xprtrdma/verbs.c213
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h14
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tls/tls_device.c20
-rw-r--r--net/unix/af_unix.c11
-rw-r--r--net/wireless/ethtool.c8
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/wireless/reg.c2
-rw-r--r--net/xdp/xdp_umem.c4
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--net/xdp/xsk_queue.h3
-rw-r--r--net/xfrm/xfrm_interface.c6
-rw-r--r--net/xfrm/xfrm_policy.c2
111 files changed, 1230 insertions, 1015 deletions
diff --git a/net/Kconfig b/net/Kconfig
index b0937a700f01..2eeb0e55f7c9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -189,7 +189,6 @@ config BRIDGE_NETFILTER
depends on NETFILTER_ADVANCED
select NETFILTER_FAMILY_BRIDGE
select SKB_EXTENSIONS
- default m
---help---
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 46d6cd9a36ae..829db9eba0cb 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -53,15 +53,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff,
static int parse_qos(const char *buff);
-/*
- * Define allowed FILE OPERATIONS
- */
-static const struct file_operations mpc_file_operations = {
- .open = proc_mpc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = proc_mpc_write,
- .release = seq_release,
+static const struct proc_ops mpc_proc_ops = {
+ .proc_open = proc_mpc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = proc_mpc_write,
+ .proc_release = seq_release,
};
/*
@@ -290,7 +287,7 @@ int mpc_proc_init(void)
{
struct proc_dir_entry *p;
- p = proc_create(STAT_FILE_NAME, 0, atm_proc_root, &mpc_file_operations);
+ p = proc_create(STAT_FILE_NAME, 0, atm_proc_root, &mpc_proc_ops);
if (!p) {
pr_err("Unable to initialize /proc/atm/%s\n", STAT_FILE_NAME);
return -ENOMEM;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index c31896707313..4369ffa3302a 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -36,9 +36,9 @@
static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
size_t count, loff_t *pos);
-static const struct file_operations proc_atm_dev_ops = {
- .read = proc_dev_atm_read,
- .llseek = noop_llseek,
+static const struct proc_ops atm_dev_proc_ops = {
+ .proc_read = proc_dev_atm_read,
+ .proc_lseek = noop_llseek,
};
static void add_stats(struct seq_file *seq, const char *aal,
@@ -359,7 +359,7 @@ int atm_proc_dev_register(struct atm_dev *dev)
goto err_out;
dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root,
- &proc_atm_dev_ops, dev);
+ &atm_dev_proc_ops, dev);
if (!dev->proc_entry)
goto err_free_name;
return 0;
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index aa945ab5b655..36580301da70 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -3,7 +3,7 @@
# Makefile for the Linux BPFILTER layer.
#
-hostprogs-y := bpfilter_umh
+hostprogs := bpfilter_umh
bpfilter_umh-objs := main.o
KBUILD_HOSTCFLAGS += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
HOSTCC := $(CC)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index dc3d2c1dd9d5..0e3dbc5f3c34 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -34,7 +34,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const struct nf_br_ops *nf_ops;
u8 state = BR_STATE_FORWARDING;
const unsigned char *dest;
- struct ethhdr *eth;
u16 vid = 0;
rcu_read_lock();
@@ -54,15 +53,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
skb_reset_mac_header(skb);
- eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
- (eth->h_proto == htons(ETH_P_ARP) ||
- eth->h_proto == htons(ETH_P_RARP)) &&
+ (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
} else if (IS_ENABLED(CONFIG_IPV6) &&
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 6856a6d9282b..1f14b8455345 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -63,7 +63,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
{
struct net_bridge_port *p;
- list_for_each_entry_rcu(p, &br->port_list, list) {
+ list_for_each_entry_rcu(p, &br->port_list, list,
+ lockdep_is_held(&br->lock)) {
if (p->port_no == port_no)
return p;
}
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 59d0ba2072de..ce09bb4fb249 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -13,5 +13,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
auth.o auth_none.o \
crypto.o armor.o \
auth_x.o \
- ceph_fs.o ceph_strings.o ceph_hash.o \
+ ceph_strings.o ceph_hash.o \
pagevec.o snapshot.o string_table.o
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a9d6c97b5b0d..a0e97f6c1072 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -269,7 +269,7 @@ enum {
Opt_abort_on_full,
};
-static const struct fs_parameter_spec ceph_param_specs[] = {
+static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full),
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages),
@@ -283,18 +283,13 @@ static const struct fs_parameter_spec ceph_param_specs[] = {
fsparam_u32 ("osd_request_timeout", Opt_osd_request_timeout),
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
__fsparam (fs_param_is_s32, "osdtimeout", Opt_osdtimeout,
- fs_param_deprecated),
+ fs_param_deprecated, NULL),
fsparam_string ("secret", Opt_secret),
fsparam_flag_no ("share", Opt_share),
fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay),
{}
};
-static const struct fs_parameter_description ceph_parameters = {
- .name = "libceph",
- .specs = ceph_param_specs,
-};
-
struct ceph_options *ceph_alloc_options(void)
{
struct ceph_options *opt;
@@ -337,7 +332,7 @@ EXPORT_SYMBOL(ceph_destroy_options);
/* get secret from key store */
static int get_secret(struct ceph_crypto_key *dst, const char *name,
- struct fs_context *fc)
+ struct p_log *log)
{
struct key *ukey;
int key_err;
@@ -351,19 +346,19 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name,
key_err = PTR_ERR(ukey);
switch (key_err) {
case -ENOKEY:
- errorf(fc, "libceph: Failed due to key not found: %s",
+ error_plog(log, "Failed due to key not found: %s",
name);
break;
case -EKEYEXPIRED:
- errorf(fc, "libceph: Failed due to expired key: %s",
+ error_plog(log, "Failed due to expired key: %s",
name);
break;
case -EKEYREVOKED:
- errorf(fc, "libceph: Failed due to revoked key: %s",
+ error_plog(log, "Failed due to revoked key: %s",
name);
break;
default:
- errorf(fc, "libceph: Failed due to key error %d: %s",
+ error_plog(log, "Failed due to key error %d: %s",
key_err, name);
}
err = -EPERM;
@@ -383,15 +378,16 @@ out:
}
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
- struct fs_context *fc)
+ struct fc_log *l)
{
+ struct p_log log = {.prefix = "libceph", .log = l};
int ret;
/* ip1[:port1][,ip2[:port2]...] */
ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON,
&opt->num_mon);
if (ret) {
- errorf(fc, "libceph: Failed to parse monitor IPs: %d", ret);
+ error_plog(&log, "Failed to parse monitor IPs: %d", ret);
return ret;
}
@@ -400,12 +396,13 @@ int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
EXPORT_SYMBOL(ceph_parse_mon_ips);
int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
- struct fs_context *fc)
+ struct fc_log *l)
{
struct fs_parse_result result;
int token, err;
+ struct p_log log = {.prefix = "libceph", .log = l};
- token = fs_parse(fc, &ceph_parameters, param, &result);
+ token = __fs_parse(&log, ceph_parameters, param, &result);
dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
if (token < 0)
return token;
@@ -417,7 +414,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
&opt->my_addr,
1, NULL);
if (err) {
- errorf(fc, "libceph: Failed to parse ip: %d", err);
+ error_plog(&log, "Failed to parse ip: %d", err);
return err;
}
opt->flags |= CEPH_OPT_MYIP;
@@ -426,7 +423,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_fsid:
err = parse_fsid(param->string, &opt->fsid);
if (err) {
- errorf(fc, "libceph: Failed to parse fsid: %d", err);
+ error_plog(&log, "Failed to parse fsid: %d", err);
return err;
}
opt->flags |= CEPH_OPT_FSID;
@@ -445,7 +442,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
return -ENOMEM;
err = ceph_crypto_key_unarmor(opt->key, param->string);
if (err) {
- errorf(fc, "libceph: Failed to parse secret: %d", err);
+ error_plog(&log, "Failed to parse secret: %d", err);
return err;
}
break;
@@ -456,10 +453,10 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
if (!opt->key)
return -ENOMEM;
- return get_secret(opt->key, param->string, fc);
+ return get_secret(opt->key, param->string, &log);
case Opt_osdtimeout:
- warnf(fc, "libceph: Ignoring osdtimeout");
+ warn_plog(&log, "Ignoring osdtimeout");
break;
case Opt_osdkeepalivetimeout:
/* 0 isn't well defined right now, reject it */
@@ -530,7 +527,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
return 0;
out_of_range:
- return invalf(fc, "libceph: %s out of range", param->key);
+ return inval_plog(&log, "%s out of range", param->key);
}
EXPORT_SYMBOL(ceph_parse_param);
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
deleted file mode 100644
index 756a2dc10d27..000000000000
--- a/net/ceph/ceph_fs.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Some non-inline ceph helpers
- */
-#include <linux/module.h>
-#include <linux/ceph/types.h>
-
-/*
- * return true if @layout appears to be valid
- */
-int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
-{
- __u32 su = layout->stripe_unit;
- __u32 sc = layout->stripe_count;
- __u32 os = layout->object_size;
-
- /* stripe unit, object size must be non-zero, 64k increment */
- if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
- return 0;
- if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
- return 0;
- /* object size must be a multiple of stripe unit */
- if (os < su || os % su)
- return 0;
- /* stripe count must be non-zero */
- if (!sc)
- return 0;
- return 1;
-}
-
-void ceph_file_layout_from_legacy(struct ceph_file_layout *fl,
- struct ceph_file_layout_legacy *legacy)
-{
- fl->stripe_unit = le32_to_cpu(legacy->fl_stripe_unit);
- fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count);
- fl->object_size = le32_to_cpu(legacy->fl_object_size);
- fl->pool_id = le32_to_cpu(legacy->fl_pg_pool);
- if (fl->pool_id == 0 && fl->stripe_unit == 0 &&
- fl->stripe_count == 0 && fl->object_size == 0)
- fl->pool_id = -1;
-}
-EXPORT_SYMBOL(ceph_file_layout_from_legacy);
-
-void ceph_file_layout_to_legacy(struct ceph_file_layout *fl,
- struct ceph_file_layout_legacy *legacy)
-{
- legacy->fl_stripe_unit = cpu_to_le32(fl->stripe_unit);
- legacy->fl_stripe_count = cpu_to_le32(fl->stripe_count);
- legacy->fl_object_size = cpu_to_le32(fl->object_size);
- if (fl->pool_id >= 0)
- legacy->fl_pg_pool = cpu_to_le32(fl->pool_id);
- else
- legacy->fl_pg_pool = 0;
-}
-EXPORT_SYMBOL(ceph_file_layout_to_legacy);
-
-int ceph_flags_to_mode(int flags)
-{
- int mode;
-
-#ifdef O_DIRECTORY /* fixme */
- if ((flags & O_DIRECTORY) == O_DIRECTORY)
- return CEPH_FILE_MODE_PIN;
-#endif
-
- switch (flags & O_ACCMODE) {
- case O_WRONLY:
- mode = CEPH_FILE_MODE_WR;
- break;
- case O_RDONLY:
- mode = CEPH_FILE_MODE_RD;
- break;
- case O_RDWR:
- case O_ACCMODE: /* this is what the VFS does */
- mode = CEPH_FILE_MODE_RDWR;
- break;
- }
-#ifdef O_LAZY
- if (flags & O_LAZY)
- mode |= CEPH_FILE_MODE_LAZY;
-#endif
-
- return mode;
-}
-EXPORT_SYMBOL(ceph_flags_to_mode);
-
-int ceph_caps_for_mode(int mode)
-{
- int caps = CEPH_CAP_PIN;
-
- if (mode & CEPH_FILE_MODE_RD)
- caps |= CEPH_CAP_FILE_SHARED |
- CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
- if (mode & CEPH_FILE_MODE_WR)
- caps |= CEPH_CAP_FILE_EXCL |
- CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
- CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
- CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
- if (mode & CEPH_FILE_MODE_LAZY)
- caps |= CEPH_CAP_FILE_LAZYIO;
-
- return caps;
-}
-EXPORT_SYMBOL(ceph_caps_for_mode);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ba45b074a362..b68b376d8c2f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -402,7 +402,7 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
case CEPH_OSD_OP_LIST_WATCHERS:
ceph_osd_data_release(&op->list_watchers.response_data);
break;
- case CEPH_OSD_OP_COPY_FROM:
+ case CEPH_OSD_OP_COPY_FROM2:
ceph_osd_data_release(&op->copy_from.osd_data);
break;
default:
@@ -697,7 +697,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
case CEPH_OSD_OP_SETXATTR:
case CEPH_OSD_OP_CMPXATTR:
case CEPH_OSD_OP_NOTIFY_ACK:
- case CEPH_OSD_OP_COPY_FROM:
+ case CEPH_OSD_OP_COPY_FROM2:
*num_request_data_items += 1;
break;
@@ -1029,7 +1029,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
case CEPH_OSD_OP_CREATE:
case CEPH_OSD_OP_DELETE:
break;
- case CEPH_OSD_OP_COPY_FROM:
+ case CEPH_OSD_OP_COPY_FROM2:
dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid);
dst->copy_from.src_version =
cpu_to_le64(src->copy_from.src_version);
@@ -1966,7 +1966,7 @@ static void setup_request_data(struct ceph_osd_request *req)
ceph_osdc_msg_data_add(request_msg,
&op->notify_ack.request_data);
break;
- case CEPH_OSD_OP_COPY_FROM:
+ case CEPH_OSD_OP_COPY_FROM2:
ceph_osdc_msg_data_add(request_msg,
&op->copy_from.osd_data);
break;
@@ -5315,6 +5315,7 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
struct ceph_object_locator *src_oloc,
u32 src_fadvise_flags,
u32 dst_fadvise_flags,
+ u32 truncate_seq, u64 truncate_size,
u8 copy_from_flags)
{
struct ceph_osd_req_op *op;
@@ -5325,7 +5326,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
if (IS_ERR(pages))
return PTR_ERR(pages);
- op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags);
+ op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2,
+ dst_fadvise_flags);
op->copy_from.snapid = src_snapid;
op->copy_from.src_version = src_version;
op->copy_from.flags = copy_from_flags;
@@ -5335,6 +5337,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
end = p + PAGE_SIZE;
ceph_encode_string(&p, end, src_oid->name, src_oid->name_len);
encode_oloc(&p, end, src_oloc);
+ ceph_encode_32(&p, truncate_seq);
+ ceph_encode_64(&p, truncate_size);
op->indata_len = PAGE_SIZE - (end - p);
ceph_osd_data_pages_init(&op->copy_from.osd_data, pages,
@@ -5350,6 +5354,7 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
struct ceph_object_id *dst_oid,
struct ceph_object_locator *dst_oloc,
u32 dst_fadvise_flags,
+ u32 truncate_seq, u64 truncate_size,
u8 copy_from_flags)
{
struct ceph_osd_request *req;
@@ -5366,7 +5371,8 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
src_oloc, src_fadvise_flags,
- dst_fadvise_flags, copy_from_flags);
+ dst_fadvise_flags, truncate_seq,
+ truncate_size, copy_from_flags);
if (ret)
goto out;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 458be6b3eda9..3ab23f698221 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -643,9 +643,10 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
+ nbuckets = roundup_pow_of_two(num_possible_cpus());
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
- smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus())));
- nbuckets = 1U << smap->bucket_log;
+ nbuckets = max_t(u32, 2, nbuckets);
+ smap->bucket_log = ilog2(nbuckets);
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
ret = bpf_map_charge_init(&smap->map.memory, cost);
diff --git a/net/core/dev.c b/net/core/dev.c
index 17529d49faec..e10bd680dc03 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -146,7 +146,6 @@
#include "net-sysfs.h"
#define MAX_GRO_SKBS 8
-#define MAX_NEST_DEV 8
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
name_node = netdev_name_node_lookup(net, name);
if (!name_node)
return -ENOENT;
+ /* lookup might have found our primary name or a name belonging
+ * to another device.
+ */
+ if (name_node == dev->name_node || name_node->dev != dev)
+ return -EINVAL;
+
__netdev_name_node_alt_destroy(name_node);
return 0;
@@ -3657,26 +3662,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
- qdisc_run_begin(q)) {
- if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state))) {
- __qdisc_drop(skb, &to_free);
- rc = NET_XMIT_DROP;
- goto end_run;
- }
- qdisc_bstats_cpu_update(q, skb);
-
- rc = NET_XMIT_SUCCESS;
- if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
- __qdisc_run(q);
-
-end_run:
- qdisc_run_end(q);
- } else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
- qdisc_run(q);
- }
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ qdisc_run(q);
if (unlikely(to_free))
kfree_skb_list(to_free);
@@ -4527,14 +4514,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
- if (skb_cloned(skb) || skb_is_tc_redirected(skb))
+ if (skb_is_tc_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
* native XDP provides, thus we need to do it here as well.
*/
- if (skb_is_nonlinear(skb) ||
+ if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
int troom = skb->tail + skb->data_len - skb->end;
@@ -5792,7 +5779,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (&ptype->list == head)
goto normal;
- if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) {
+ if (PTR_ERR(pp) == -EINPROGRESS) {
ret = GRO_CONSUMED;
goto ok;
}
@@ -7201,8 +7188,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev,
return 0;
}
-static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
@@ -7214,6 +7201,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
return lower->dev;
}
+EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
static u8 __netdev_upper_depth(struct net_device *dev)
{
diff --git a/net/core/devlink.c b/net/core/devlink.c
index ca1df0ec3c97..8d0b558be942 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3986,6 +3986,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto out_unlock;
}
+ /* return 0 if there is no further data to read */
+ if (start_offset >= region->size) {
+ err = 0;
+ goto out_unlock;
+ }
+
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
DEVLINK_CMD_REGION_READ);
@@ -6872,26 +6878,33 @@ int devlink_dpipe_table_register(struct devlink *devlink,
void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
-
- if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
- return -EEXIST;
+ int err = 0;
if (WARN_ON(!table_ops->size_get))
return -EINVAL;
+ mutex_lock(&devlink->lock);
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ if (!table) {
+ err = -ENOMEM;
+ goto unlock;
+ }
table->name = table_name;
table->table_ops = table_ops;
table->priv = priv;
table->counter_control_extern = counter_control_extern;
- mutex_lock(&devlink->lock);
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+unlock:
mutex_unlock(&devlink->lock);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ea46fc6aa883..31700e0c3928 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -1000,8 +1000,10 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
{
int cpu;
- if (!monitor_hw)
+ if (!monitor_hw) {
NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
+ return;
+ }
monitor_hw = false;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3e7e15278c46..bd7eba9066f8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh = nlmsg_data(nlh);
frh->family = ops->family;
- frh->table = rule->table;
+ frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, FRA_TABLE, rule->table))
goto nla_put_failure;
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
diff --git a/net/core/filter.c b/net/core/filter.c
index 792e3744b915..c180871e606d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1573,7 +1573,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
return -EPERM;
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
- if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
+ if (PTR_ERR(prog) == -EINVAL)
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
if (IS_ERR(prog))
return PTR_ERR(prog);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b7cbe35df37..10d2b255df5e 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -99,8 +99,7 @@ EXPORT_SYMBOL(page_pool_create);
static void __page_pool_return_page(struct page_pool *pool, struct page *page);
noinline
-static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
- bool refill)
+static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
struct page *page;
@@ -141,8 +140,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
page = NULL;
break;
}
- } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL &&
- refill);
+ } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
/* Return last page */
if (likely(pool->alloc.count > 0))
@@ -155,20 +153,16 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
/* fast path */
static struct page *__page_pool_get_cached(struct page_pool *pool)
{
- bool refill = false;
struct page *page;
- /* Test for safe-context, caller should provide this guarantee */
- if (likely(in_serving_softirq())) {
- if (likely(pool->alloc.count)) {
- /* Fast-path */
- page = pool->alloc.cache[--pool->alloc.count];
- return page;
- }
- refill = true;
+ /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
+ if (likely(pool->alloc.count)) {
+ /* Fast-path */
+ page = pool->alloc.cache[--pool->alloc.count];
+ } else {
+ page = page_pool_refill_alloc_cache(pool);
}
- page = page_pool_refill_alloc_cache(pool, refill);
return page;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 294bfcf0ce0e..acc849df60b5 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -535,12 +535,12 @@ static int pgctrl_open(struct inode *inode, struct file *file)
return single_open(file, pgctrl_show, PDE_DATA(inode));
}
-static const struct file_operations pktgen_fops = {
- .open = pgctrl_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = pgctrl_write,
- .release = single_release,
+static const struct proc_ops pktgen_proc_ops = {
+ .proc_open = pgctrl_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = pgctrl_write,
+ .proc_release = single_release,
};
static int pktgen_if_show(struct seq_file *seq, void *v)
@@ -1707,12 +1707,12 @@ static int pktgen_if_open(struct inode *inode, struct file *file)
return single_open(file, pktgen_if_show, PDE_DATA(inode));
}
-static const struct file_operations pktgen_if_fops = {
- .open = pktgen_if_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = pktgen_if_write,
- .release = single_release,
+static const struct proc_ops pktgen_if_proc_ops = {
+ .proc_open = pktgen_if_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = pktgen_if_write,
+ .proc_release = single_release,
};
static int pktgen_thread_show(struct seq_file *seq, void *v)
@@ -1844,12 +1844,12 @@ static int pktgen_thread_open(struct inode *inode, struct file *file)
return single_open(file, pktgen_thread_show, PDE_DATA(inode));
}
-static const struct file_operations pktgen_thread_fops = {
- .open = pktgen_thread_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = pktgen_thread_write,
- .release = single_release,
+static const struct proc_ops pktgen_thread_proc_ops = {
+ .proc_open = pktgen_thread_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = pktgen_thread_write,
+ .proc_release = single_release,
};
/* Think find or remove for NN */
@@ -1926,7 +1926,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
pkt_dev->entry = proc_create_data(dev->name, 0600,
pn->proc_dir,
- &pktgen_if_fops,
+ &pktgen_if_proc_ops,
pkt_dev);
if (!pkt_dev->entry)
pr_err("can't move proc entry for '%s'\n",
@@ -3638,7 +3638,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->clone_skb = pg_clone_skb_d;
pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir,
- &pktgen_if_fops, pkt_dev);
+ &pktgen_if_proc_ops, pkt_dev);
if (!pkt_dev->entry) {
pr_err("cannot create %s/%s procfs entry\n",
PG_PROC_DIR, ifname);
@@ -3708,7 +3708,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
t->tsk = p;
pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir,
- &pktgen_thread_fops, t);
+ &pktgen_thread_proc_ops, t);
if (!pe) {
pr_err("cannot create %s/%s procfs entry\n",
PG_PROC_DIR, t->tsk->comm);
@@ -3793,7 +3793,7 @@ static int __net_init pg_net_init(struct net *net)
pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR);
return -ENODEV;
}
- pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops);
+ pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_proc_ops);
if (pe == NULL) {
pr_err("cannot create %s procfs entry\n", PGCTRL);
ret = -EINVAL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 09c44bf2e1d2..e1152f4ffe33 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3504,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
if (err)
return err;
- alt_ifname = nla_data(attr);
+ alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (!alt_ifname)
+ return -ENOMEM;
+
if (cmd == RTM_NEWLINKPROP) {
- alt_ifname = kstrdup(alt_ifname, GFP_KERNEL);
- if (!alt_ifname)
- return -ENOMEM;
err = netdev_name_node_alt_create(dev, alt_ifname);
- if (err) {
- kfree(alt_ifname);
- return err;
- }
+ if (!err)
+ alt_ifname = NULL;
} else if (cmd == RTM_DELLINKPROP) {
err = netdev_name_node_alt_destroy(dev, alt_ifname);
- if (err)
- return err;
} else {
- WARN_ON(1);
- return 0;
+ WARN_ON_ONCE(1);
+ err = -EINVAL;
}
- *changed = true;
- return 0;
+ kfree(alt_ifname);
+ if (!err)
+ *changed = true;
+ return err;
}
static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 864cb9e9622f..e1101a4f90a6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -467,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -527,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (nc->page.pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -4805,9 +4803,9 @@ static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
typeof(IPPROTO_IP) proto,
unsigned int off)
{
- switch (proto) {
- int err;
+ int err;
+ switch (proto) {
case IPPROTO_TCP:
err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
off + MAX_TCP_HDR_LEN);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8998e356f423..085cef5857bb 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -234,7 +234,6 @@ static void sock_map_free(struct bpf_map *map)
int i;
synchronize_rcu();
- rcu_read_lock();
raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) {
struct sock **psk = &stab->sks[i];
@@ -243,13 +242,15 @@ static void sock_map_free(struct bpf_map *map)
sk = xchg(psk, NULL);
if (sk) {
lock_sock(sk);
+ rcu_read_lock();
sock_map_unref(sk, psk);
+ rcu_read_unlock();
release_sock(sk);
}
}
raw_spin_unlock_bh(&stab->lock);
- rcu_read_unlock();
+ /* wait for psock readers accessing its map link */
synchronize_rcu();
bpf_map_area_free(stab->sks);
@@ -416,14 +417,16 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
ret = -EINVAL;
goto out;
}
- if (!sock_map_sk_is_suitable(sk) ||
- sk->sk_state != TCP_ESTABLISHED) {
+ if (!sock_map_sk_is_suitable(sk)) {
ret = -EOPNOTSUPP;
goto out;
}
sock_map_sk_acquire(sk);
- ret = sock_map_update_common(map, idx, sk, flags);
+ if (sk->sk_state != TCP_ESTABLISHED)
+ ret = -EOPNOTSUPP;
+ else
+ ret = sock_map_update_common(map, idx, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
@@ -739,14 +742,16 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key,
ret = -EINVAL;
goto out;
}
- if (!sock_map_sk_is_suitable(sk) ||
- sk->sk_state != TCP_ESTABLISHED) {
+ if (!sock_map_sk_is_suitable(sk)) {
ret = -EOPNOTSUPP;
goto out;
}
sock_map_sk_acquire(sk);
- ret = sock_hash_update_common(map, key, sk, flags);
+ if (sk->sk_state != TCP_ESTABLISHED)
+ ret = -EOPNOTSUPP;
+ else
+ ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
@@ -859,19 +864,22 @@ static void sock_hash_free(struct bpf_map *map)
int i;
synchronize_rcu();
- rcu_read_lock();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node);
lock_sock(elem->sk);
+ rcu_read_lock();
sock_map_unref(elem->sk, elem);
+ rcu_read_unlock();
release_sock(elem->sk);
}
raw_spin_unlock_bh(&bucket->lock);
}
- rcu_read_unlock();
+
+ /* wait for psock readers accessing its map link */
+ synchronize_rcu();
bpf_map_area_free(htab->buckets);
kfree(htab);
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
index 466ffa92a474..55b00694cdba 100644
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -31,7 +31,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
__le16 *phdr;
u16 hdr;
- if (skb_cow_head(skb, 0) < 0)
+ if (skb_cow_head(skb, AR9331_HDR_LEN) < 0)
return NULL;
phdr = skb_push(skb, AR9331_HDR_LEN);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index c8a128c9e5e0..70db7c909f74 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -33,7 +33,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *phdr, hdr;
- if (skb_cow_head(skb, 0) < 0)
+ if (skb_cow_head(skb, QCA_HDR_LEN) < 0)
return NULL;
skb_push(skb, QCA_HDR_LEN);
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
index fce45dac4205..8977fe1f3946 100644
--- a/net/ethtool/bitset.c
+++ b/net/ethtool/bitset.c
@@ -447,7 +447,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
"mask only allowed in compact bitset");
return -EINVAL;
}
+
no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+ if (no_mask)
+ ethnl_bitmap32_clear(bitmap, 0, nbits, mod);
nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) {
bool old_val, new_val;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 364ea2cc028e..3ba7f61be107 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -155,7 +155,8 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
new_node->seq_out[i] = seq_out;
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ list_for_each_entry_rcu(node, node_db, mac_list,
+ lockdep_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index ee561297d8a7..fbfd0db182b7 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -27,6 +27,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
rcu_read_lock(); /* hsr->node_db, hsr->ports */
port = hsr_port_get_rcu(skb->dev);
+ if (!port)
+ goto finish_pass;
if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) {
/* Directly kill frames sent by ourselves */
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 376882215919..0bd10a1f477f 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1724,6 +1724,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf;
+ int res;
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
@@ -1735,7 +1736,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
memset(opt, 0, sizeof(struct ip_options));
opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
- if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+ rcu_read_lock();
+ res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL);
+ rcu_read_unlock();
+
+ if (res)
return;
if (gateway)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 18068ed42f25..f369e7ce685b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -748,6 +748,39 @@ out:;
}
EXPORT_SYMBOL(__icmp_send);
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ __be32 orig_ip;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmp_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct iphdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct iphdr))))
+ goto out;
+
+ orig_ip = ip_hdr(skb_in)->saddr;
+ ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
+ icmp_send(skb_in, type, code, info);
+ ip_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmp_ndo_send);
+#endif
static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
{
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f35308ff84c3..4438f6b12335 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1334,7 +1334,7 @@ static int __init ipconfig_proc_net_init(void)
/* Create a new file under /proc/net/ipconfig */
static int ipconfig_proc_net_create(const char *name,
- const struct file_operations *fops)
+ const struct proc_ops *proc_ops)
{
char *pname;
struct proc_dir_entry *p;
@@ -1346,7 +1346,7 @@ static int ipconfig_proc_net_create(const char *name,
if (!pname)
return -ENOMEM;
- p = proc_create(pname, 0444, init_net.proc_net, fops);
+ p = proc_create(pname, 0444, init_net.proc_net, proc_ops);
kfree(pname);
if (!p)
return -ENOMEM;
@@ -1355,7 +1355,7 @@ static int ipconfig_proc_net_create(const char *name,
}
/* Write NTP server IP addresses to /proc/net/ipconfig/ntp_servers */
-static int ntp_servers_seq_show(struct seq_file *seq, void *v)
+static int ntp_servers_show(struct seq_file *seq, void *v)
{
int i;
@@ -1365,7 +1365,7 @@ static int ntp_servers_seq_show(struct seq_file *seq, void *v)
}
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(ntp_servers_seq);
+DEFINE_PROC_SHOW_ATTRIBUTE(ntp_servers);
#endif /* CONFIG_PROC_FS */
/*
@@ -1456,7 +1456,7 @@ static int __init ip_auto_config(void)
proc_create_single("pnp", 0444, init_net.proc_net, pnp_seq_show);
if (ipconfig_proc_net_init() == 0)
- ipconfig_proc_net_create("ntp_servers", &ntp_servers_seq_fops);
+ ipconfig_proc_net_create("ntp_servers", &ntp_servers_proc_ops);
#endif /* CONFIG_PROC_FS */
if (!ic_enable)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 6bdb1ab8af61..f8755a4ae9d4 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -58,7 +58,7 @@ struct clusterip_config {
};
#ifdef CONFIG_PROC_FS
-static const struct file_operations clusterip_proc_fops;
+static const struct proc_ops clusterip_proc_ops;
#endif
struct clusterip_net {
@@ -280,7 +280,7 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
mutex_lock(&cn->mutex);
c->pde = proc_create_data(buffer, 0600,
cn->procdir,
- &clusterip_proc_fops, c);
+ &clusterip_proc_ops, c);
mutex_unlock(&cn->mutex);
if (!c->pde) {
err = -ENOMEM;
@@ -804,12 +804,12 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
return size;
}
-static const struct file_operations clusterip_proc_fops = {
- .open = clusterip_proc_open,
- .read = seq_read,
- .write = clusterip_proc_write,
- .llseek = seq_lseek,
- .release = clusterip_proc_release,
+static const struct proc_ops clusterip_proc_ops = {
+ .proc_open = clusterip_proc_open,
+ .proc_read = seq_read,
+ .proc_write = clusterip_proc_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = clusterip_proc_release,
};
#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d5c57b3f77d5..ebe7060d0fc9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -237,11 +237,11 @@ static int rt_cache_seq_open(struct inode *inode, struct file *file)
return seq_open(file, &rt_cache_seq_ops);
}
-static const struct file_operations rt_cache_seq_fops = {
- .open = rt_cache_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
+static const struct proc_ops rt_cache_proc_ops = {
+ .proc_open = rt_cache_seq_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
};
@@ -328,11 +328,11 @@ static int rt_cpu_seq_open(struct inode *inode, struct file *file)
return seq_open(file, &rt_cpu_seq_ops);
}
-static const struct file_operations rt_cpu_seq_fops = {
- .open = rt_cpu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
+static const struct proc_ops rt_cpu_proc_ops = {
+ .proc_open = rt_cpu_seq_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
};
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -366,12 +366,12 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
struct proc_dir_entry *pde;
pde = proc_create("rt_cache", 0444, net->proc_net,
- &rt_cache_seq_fops);
+ &rt_cache_proc_ops);
if (!pde)
goto err1;
pde = proc_create("rt_cache", 0444,
- net->proc_net_stat, &rt_cpu_seq_fops);
+ net->proc_net_stat, &rt_cpu_proc_ops);
if (!pde)
goto err2;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 316ebdf8151d..6b6b57000dad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6124,7 +6124,11 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
struct request_sock *req;
- tcp_try_undo_loss(sk, false);
+ /* If we are still handling the SYNACK RTO, see if timestamp ECR allows
+ * undo. If peer SACKs triggered fast recovery, we can't undo here.
+ */
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
+ tcp_try_undo_loss(sk, false);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
tcp_sk(sk)->retrans_stamp = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db76b9609299..08a41f1e1cd2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1857,8 +1857,12 @@ int __udp_disconnect(struct sock *sk, int flags)
inet->inet_dport = 0;
sock_rps_reset_rxhash(sk);
sk->sk_bound_dev_if = 0;
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) {
inet_reset_saddr(sk);
+ if (sk->sk_prot->rehash &&
+ (sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+ sk->sk_prot->rehash(sk);
+ }
if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
sk->sk_prot->unhash(sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 39d861d00377..cb493e15959c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5718,6 +5718,9 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
struct nlattr *tb[IFLA_INET6_MAX + 1];
int err;
+ if (!idev)
+ return -EAFNOSUPPORT;
+
if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
BUG();
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 58fbde244381..72abf892302f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1102,8 +1102,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
found++;
break;
}
- if (rt_can_ecmp)
- fallback_ins = fallback_ins ?: ins;
+ fallback_ins = fallback_ins ?: ins;
goto next_iter;
}
@@ -1146,7 +1145,9 @@ next_iter:
}
if (fallback_ins && !found) {
- /* No ECMP-able route found, replace first non-ECMP one */
+ /* No matching route with same ecmp-able-ness found, replace
+ * first matching route
+ */
ins = fallback_ins;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->fib6_table->tb6_lock));
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 55bfc5149d0c..781ca8c07a0d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -437,8 +437,6 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return -ENOENT;
switch (type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -452,7 +450,10 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
}
return 0;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if (code == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -468,6 +469,7 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
t->parms.name);
}
return 0;
+ }
case ICMPV6_PKT_TOOBIG:
ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
return 0;
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 02045494c24c..e0086758b6ee 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -45,4 +45,38 @@ out:
rcu_read_unlock();
}
EXPORT_SYMBOL(icmpv6_send);
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr orig_ip;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmpv6_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct ipv6hdr))))
+ goto out;
+
+ orig_ip = ipv6_hdr(skb_in)->saddr;
+ ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+ icmpv6_send(skb_in, type, code, info);
+ ipv6_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmpv6_ndo_send);
+#endif
#endif
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b5dd20c4599b..4703b09808d0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -121,6 +121,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @link: ifindex of underlying interface
* @remote: the address of the tunnel exit-point
* @local: the address of the tunnel entry-point
*
@@ -134,37 +135,56 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
static struct ip6_tnl *
-ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
+ip6_tnl_lookup(struct net *net, int link,
+ const struct in6_addr *remote, const struct in6_addr *local)
{
unsigned int hash = HASH(remote, local);
- struct ip6_tnl *t;
+ struct ip6_tnl *t, *cand = NULL;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct in6_addr any;
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else
+ cand = t;
}
memset(&any, 0, sizeof(any));
hash = HASH(&any, local);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_any(&t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_any(&t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
hash = HASH(remote, &any);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(remote, &t->parms.raddr) &&
- ipv6_addr_any(&t->parms.laddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !ipv6_addr_any(&t->parms.laddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
+ if (cand)
+ return cand;
+
t = rcu_dereference(ip6n->collect_md_tun);
if (t && t->dev->flags & IFF_UP)
return t;
@@ -351,7 +371,8 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr)) {
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ p->link == t->parms.link) {
if (create)
return ERR_PTR(-EEXIST);
@@ -485,7 +506,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
processing of the error. */
rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr);
+ t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr);
if (!t)
goto out;
@@ -496,8 +517,6 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
err = 0;
switch (*type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu, teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -510,7 +529,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if ((*code) == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -527,7 +549,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
t->parms.name);
}
break;
- case ICMPV6_PKT_TOOBIG:
+ }
+ case ICMPV6_PKT_TOOBIG: {
+ __u32 mtu;
+
ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
sock_net_uid(net, NULL));
mtu = *info - offset;
@@ -541,6 +566,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
+ }
case NDISC_REDIRECT:
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
@@ -887,7 +913,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
int ret = -1;
rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
+ t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->saddr, &ipv6h->daddr);
if (t) {
u8 tproto = READ_ONCE(t->parms.proto);
@@ -1420,8 +1446,10 @@ tx_err:
static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
+ struct net_device *tdev = NULL;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
+ unsigned int mtu;
int t_hlen;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1457,22 +1485,25 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, NULL, strict);
+ if (rt) {
+ tdev = rt->dst.dev;
+ ip6_rt_put(rt);
+ }
- if (!rt)
- return;
+ if (!tdev && p->link)
+ tdev = __dev_get_by_index(t->net, p->link);
- if (rt->dst.dev) {
- dev->hard_header_len = rt->dst.dev->hard_header_len +
- t_hlen;
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + t_hlen;
+ mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU);
- dev->mtu = rt->dst.dev->mtu - t_hlen;
+ dev->mtu = mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
- ip6_rt_put(rt);
}
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4fbdc60b4e07..2931224b674e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5198,6 +5198,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
+ cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
nhn++;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index c99223cb3338..fcb53ed1c4fb 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -320,8 +320,13 @@ int l2tp_session_register(struct l2tp_session *session,
spin_lock_bh(&pn->l2tp_session_hlist_lock);
+ /* IP encap expects session IDs to be globally unique, while
+ * UDP encap doesn't.
+ */
hlist_for_each_entry(session_walk, g_head, global_hlist)
- if (session_walk->session_id == session->session_id) {
+ if (session_walk->session_id == session->session_id &&
+ (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP ||
+ tunnel->encap == L2TP_ENCAPTYPE_IP)) {
err = -EEXIST;
goto err_tlock_pnlock;
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 000c742d0527..6aee699deb28 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3450,7 +3450,7 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
spin_lock_irqsave(&local->ack_status_lock, spin_flags);
id = idr_alloc(&local->ack_status_frames, ack_skb,
- 1, 0x40, GFP_ATOMIC);
+ 1, 0x2000, GFP_ATOMIC);
spin_unlock_irqrestore(&local->ack_status_lock, spin_flags);
if (id < 0) {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5fa13176036f..88d7a692a965 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
*/
#include <linux/delay.h>
@@ -1311,7 +1311,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
if (!res) {
ch_switch.timestamp = timestamp;
ch_switch.device_timestamp = device_timestamp;
- ch_switch.block_tx = beacon ? csa_ie.mode : 0;
+ ch_switch.block_tx = csa_ie.mode;
ch_switch.chandef = csa_ie.chandef;
ch_switch.count = csa_ie.count;
ch_switch.delay = csa_ie.max_switch_time;
@@ -1404,7 +1404,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
sdata->vif.csa_active = true;
sdata->csa_chandef = csa_ie.chandef;
- sdata->csa_block_tx = ch_switch.block_tx;
+ sdata->csa_block_tx = csa_ie.mode;
ifmgd->csa_ignored_same_chan = false;
if (sdata->csa_block_tx)
@@ -1438,7 +1438,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
* reset when the disconnection worker runs.
*/
sdata->vif.csa_active = true;
- sdata->csa_block_tx = ch_switch.block_tx;
+ sdata->csa_block_tx = csa_ie.mode;
ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
mutex_unlock(&local->chanctx_mtx);
@@ -2959,7 +2959,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
(auth_transaction == 2 &&
ifmgd->auth_data->expected_transaction == 2)) {
if (!ieee80211_mark_sta_auth(sdata, bssid))
- goto out_err;
+ return; /* ignore frame -- wait for timeout */
} else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
auth_transaction == 2) {
sdata_info(sdata, "SAE peer confirmed\n");
@@ -2967,10 +2967,6 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
}
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
- return;
- out_err:
- mutex_unlock(&sdata->local->sta_mtx);
- /* ignore frame -- wait for timeout */
}
#define case_WLAN(type) \
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0e05ff037672..0ba98ad9bc85 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4114,7 +4114,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->sta_mtx);
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry(sta, &local->sta_list, list) {
if (sdata != sta->sdata &&
(!sta->sdata->bss || sta->sdata->bss != sdata->bss))
continue;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4bd1faf4f779..87def9cb91ff 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2442,7 +2442,7 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local,
spin_lock_irqsave(&local->ack_status_lock, flags);
id = idr_alloc(&local->ack_status_frames, ack_skb,
- 1, 0x40, GFP_ATOMIC);
+ 1, 0x2000, GFP_ATOMIC);
spin_unlock_irqrestore(&local->ack_status_lock, flags);
if (id >= 0) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 32a7a53833c0..decd46b38393 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1063,16 +1063,22 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elem_parse_failed = true;
break;
case WLAN_EID_VHT_OPERATION:
- if (elen >= sizeof(struct ieee80211_vht_operation))
+ if (elen >= sizeof(struct ieee80211_vht_operation)) {
elems->vht_operation = (void *)pos;
- else
- elem_parse_failed = true;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed = true;
break;
case WLAN_EID_OPMODE_NOTIF:
- if (elen > 0)
+ if (elen > 0) {
elems->opmode_notif = pos;
- else
- elem_parse_failed = true;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed = true;
break;
case WLAN_EID_MESH_ID:
elems->mesh_id = pos;
@@ -2987,10 +2993,22 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
int cf0, cf1;
int ccfs0, ccfs1, ccfs2;
int ccf0, ccf1;
+ u32 vht_cap;
+ bool support_80_80 = false;
+ bool support_160 = false;
if (!oper || !htop)
return false;
+ vht_cap = hw->wiphy->bands[chandef->chan->band]->vht_cap.cap;
+ support_160 = (vht_cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK |
+ IEEE80211_VHT_CAP_EXT_NSS_BW_MASK));
+ support_80_80 = ((vht_cap &
+ IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) ||
+ (vht_cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
+ vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) ||
+ ((vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) >>
+ IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT > 1));
ccfs0 = oper->center_freq_seg0_idx;
ccfs1 = oper->center_freq_seg1_idx;
ccfs2 = (le16_to_cpu(htop->operation_mode) &
@@ -3018,10 +3036,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
unsigned int diff;
diff = abs(ccf1 - ccf0);
- if (diff == 8) {
+ if ((diff == 8) && support_160) {
new.width = NL80211_CHAN_WIDTH_160;
new.center_freq1 = cf1;
- } else if (diff > 8) {
+ } else if ((diff > 8) && support_80_80) {
new.width = NL80211_CHAN_WIDTH_80P80;
new.center_freq2 = cf1;
}
diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
index 49f6054e7f4e..a9ed3bf1d93f 100644
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@@ -4,6 +4,7 @@ config MPTCP
depends on INET
select SKB_EXTENSIONS
select CRYPTO_LIB_SHA256
+ select CRYPTO
help
Multipath TCP (MPTCP) connections send and receive data over multiple
subflows in order to utilize multiple network paths. Each subflow
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3bccee455688..e9aa6807b5be 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -24,57 +24,12 @@
#define MPTCP_SAME_STATE TCP_MAX_STATES
-static void __mptcp_close(struct sock *sk, long timeout);
-
-static const struct proto_ops *tcp_proto_ops(struct sock *sk)
-{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (sk->sk_family == AF_INET6)
- return &inet6_stream_ops;
+struct mptcp6_sock {
+ struct mptcp_sock msk;
+ struct ipv6_pinfo np;
+};
#endif
- return &inet_stream_ops;
-}
-
-/* MP_CAPABLE handshake failed, convert msk to plain tcp, replacing
- * socket->sk and stream ops and destroying msk
- * return the msk socket, as we can't access msk anymore after this function
- * completes
- * Called with msk lock held, releases such lock before returning
- */
-static struct socket *__mptcp_fallback_to_tcp(struct mptcp_sock *msk,
- struct sock *ssk)
-{
- struct mptcp_subflow_context *subflow;
- struct socket *sock;
- struct sock *sk;
-
- sk = (struct sock *)msk;
- sock = sk->sk_socket;
- subflow = mptcp_subflow_ctx(ssk);
-
- /* detach the msk socket */
- list_del_init(&subflow->node);
- sock_orphan(sk);
- sock->sk = NULL;
-
- /* socket is now TCP */
- lock_sock(ssk);
- sock_graft(ssk, sock);
- if (subflow->conn) {
- /* We can't release the ULP data on a live socket,
- * restore the tcp callback
- */
- mptcp_subflow_tcp_fallback(ssk, subflow);
- sock_put(subflow->conn);
- subflow->conn = NULL;
- }
- release_sock(ssk);
- sock->ops = tcp_proto_ops(ssk);
-
- /* destroy the left-over msk sock */
- __mptcp_close(sk, 0);
- return sock;
-}
/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
* completed yet or has failed, return the subflow socket.
@@ -93,10 +48,6 @@ static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
return msk->first && !sk_is_mptcp(msk->first);
}
-/* if the mp_capable handshake has failed, it fallbacks msk to plain TCP,
- * releases the socket lock and returns a reference to the now TCP socket.
- * Otherwise returns NULL
- */
static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
sock_owned_by_me((const struct sock *)msk);
@@ -105,15 +56,11 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
return NULL;
if (msk->subflow) {
- /* the first subflow is an active connection, discart the
- * paired socket
- */
- msk->subflow->sk = NULL;
- sock_release(msk->subflow);
- msk->subflow = NULL;
+ release_sock((struct sock *)msk);
+ return msk->subflow;
}
- return __mptcp_fallback_to_tcp(msk, msk->first);
+ return NULL;
}
static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
@@ -640,12 +587,14 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how)
}
/* Called with msk lock held, releases such lock before returning */
-static void __mptcp_close(struct sock *sk, long timeout)
+static void mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
LIST_HEAD(conn_list);
+ lock_sock(sk);
+
mptcp_token_destroy(msk->token);
inet_sk_state_store(sk, TCP_CLOSE);
@@ -662,12 +611,6 @@ static void __mptcp_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
-static void mptcp_close(struct sock *sk, long timeout)
-{
- lock_sock(sk);
- __mptcp_close(sk, timeout);
-}
-
static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -691,6 +634,30 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
inet_sk(msk)->inet_rcv_saddr = inet_sk(ssk)->inet_rcv_saddr;
}
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
+{
+ unsigned int offset = sizeof(struct mptcp6_sock) - sizeof(struct ipv6_pinfo);
+
+ return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+}
+#endif
+
+static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
+{
+ struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+
+ if (!nsk)
+ return NULL;
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (nsk->sk_family == AF_INET6)
+ inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
+#endif
+
+ return nsk;
+}
+
static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
bool kern)
{
@@ -721,7 +688,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
lock_sock(sk);
local_bh_disable();
- new_mptcp_sock = sk_clone_lock(sk, GFP_ATOMIC);
+ new_mptcp_sock = mptcp_sk_clone_lock(sk);
if (!new_mptcp_sock) {
*err = -ENOBUFS;
local_bh_enable();
@@ -788,60 +755,50 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int ret = -EOPNOTSUPP;
struct socket *ssock;
- struct sock *ssk;
pr_debug("msk=%p", msk);
/* @@ the meaning of setsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
+ * there are multiple subflows is not yet defined. It is up to the
+ * MPTCP-level socket to configure the subflows until the subflow
+ * is in TCP fallback, when TCP socket options are passed through
+ * to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- release_sock(sk);
- return ret;
- }
+ ssock = __mptcp_tcp_fallback(msk);
+ if (ssock)
+ return tcp_setsockopt(ssock->sk, level, optname, optval,
+ optlen);
- ssk = ssock->sk;
- sock_hold(ssk);
release_sock(sk);
- ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
- sock_put(ssk);
-
- return ret;
+ return -EOPNOTSUPP;
}
static int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int ret = -EOPNOTSUPP;
struct socket *ssock;
- struct sock *ssk;
pr_debug("msk=%p", msk);
- /* @@ the meaning of getsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
+ /* @@ the meaning of setsockopt() when the socket is connected and
+ * there are multiple subflows is not yet defined. It is up to the
+ * MPTCP-level socket to configure the subflows until the subflow
+ * is in TCP fallback, when socket options are passed through
+ * to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- release_sock(sk);
- return ret;
- }
+ ssock = __mptcp_tcp_fallback(msk);
+ if (ssock)
+ return tcp_getsockopt(ssock->sk, level, optname, optval,
+ option);
- ssk = ssock->sk;
- sock_hold(ssk);
release_sock(sk);
- ret = tcp_getsockopt(ssk, level, optname, optval, option);
- sock_put(ssk);
-
- return ret;
+ return -EOPNOTSUPP;
}
static int mptcp_get_port(struct sock *sk, unsigned short snum)
@@ -1270,8 +1227,7 @@ int mptcp_proto_v6_init(void)
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.destroy = mptcp_v6_destroy;
- mptcp_v6_prot.obj_size = sizeof(struct mptcp_sock) +
- sizeof(struct ipv6_pinfo);
+ mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
err = proto_register(&mptcp_v6_prot, 1);
if (err)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 8a99a2930284..9f8663b30456 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -56,8 +56,8 @@
#define MPTCP_DSS_FLAG_MASK (0x1F)
/* MPTCP socket flags */
-#define MPTCP_DATA_READY BIT(0)
-#define MPTCP_SEND_SPACE BIT(1)
+#define MPTCP_DATA_READY 0
+#define MPTCP_SEND_SPACE 1
/* MPTCP connection sock */
struct mptcp_sock {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d1305423640f..1927fc296f95 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -894,32 +894,175 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
}
}
-/* Resolve race on insertion if this protocol allows this. */
+static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
+{
+ struct nf_conn_tstamp *tstamp;
+
+ atomic_inc(&ct->ct_general.use);
+ ct->status |= IPS_CONFIRMED;
+
+ /* set conntrack timestamp, if enabled. */
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp)
+ tstamp->start = ktime_get_real_ns();
+}
+
+static int __nf_ct_resolve_clash(struct sk_buff *skb,
+ struct nf_conntrack_tuple_hash *h)
+{
+ /* This is the conntrack entry already in hashes that won race. */
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *loser_ct;
+
+ loser_ct = nf_ct_get(skb, &ctinfo);
+
+ if (nf_ct_is_dying(ct))
+ return NF_DROP;
+
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ return NF_DROP;
+
+ if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
+ nf_ct_match(ct, loser_ct)) {
+ struct net *net = nf_ct_net(ct);
+
+ nf_ct_acct_merge(ct, ctinfo, loser_ct);
+ nf_ct_add_to_dying_list(loser_ct);
+ nf_conntrack_put(&loser_ct->ct_general);
+ nf_ct_set(skb, ct, ctinfo);
+
+ NF_CT_STAT_INC(net, insert_failed);
+ return NF_ACCEPT;
+ }
+
+ nf_ct_put(ct);
+ return NF_DROP;
+}
+
+/**
+ * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry
+ *
+ * @skb: skb that causes the collision
+ * @repl_idx: hash slot for reply direction
+ *
+ * Called when origin or reply direction had a clash.
+ * The skb can be handled without packet drop provided the reply direction
+ * is unique or there the existing entry has the identical tuple in both
+ * directions.
+ *
+ * Caller must hold conntrack table locks to prevent concurrent updates.
+ *
+ * Returns NF_DROP if the clash could not be handled.
+ */
+static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
+{
+ struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb);
+ const struct nf_conntrack_zone *zone;
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_node *n;
+ struct net *net;
+
+ zone = nf_ct_zone(loser_ct);
+ net = nf_ct_net(loser_ct);
+
+ /* Reply direction must never result in a clash, unless both origin
+ * and reply tuples are identical.
+ */
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) {
+ if (nf_ct_key_equal(h,
+ &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ zone, net))
+ return __nf_ct_resolve_clash(skb, h);
+ }
+
+ /* We want the clashing entry to go away real soon: 1 second timeout. */
+ loser_ct->timeout = nfct_time_stamp + HZ;
+
+ /* IPS_NAT_CLASH removes the entry automatically on the first
+ * reply. Also prevents UDP tracker from moving the entry to
+ * ASSURED state, i.e. the entry can always be evicted under
+ * pressure.
+ */
+ loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH;
+
+ __nf_conntrack_insert_prepare(loser_ct);
+
+ /* fake add for ORIGINAL dir: we want lookups to only find the entry
+ * already in the table. This also hides the clashing entry from
+ * ctnetlink iteration, i.e. conntrack -L won't show them.
+ */
+ hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+
+ hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
+ &nf_conntrack_hash[repl_idx]);
+ return NF_ACCEPT;
+}
+
+/**
+ * nf_ct_resolve_clash - attempt to handle clash without packet drop
+ *
+ * @skb: skb that causes the clash
+ * @h: tuplehash of the clashing entry already in table
+ * @hash_reply: hash slot for reply direction
+ *
+ * A conntrack entry can be inserted to the connection tracking table
+ * if there is no existing entry with an identical tuple.
+ *
+ * If there is one, @skb (and the assocated, unconfirmed conntrack) has
+ * to be dropped. In case @skb is retransmitted, next conntrack lookup
+ * will find the already-existing entry.
+ *
+ * The major problem with such packet drop is the extra delay added by
+ * the packet loss -- it will take some time for a retransmit to occur
+ * (or the sender to time out when waiting for a reply).
+ *
+ * This function attempts to handle the situation without packet drop.
+ *
+ * If @skb has no NAT transformation or if the colliding entries are
+ * exactly the same, only the to-be-confirmed conntrack entry is discarded
+ * and @skb is associated with the conntrack entry already in the table.
+ *
+ * Failing that, the new, unconfirmed conntrack is still added to the table
+ * provided that the collision only occurs in the ORIGINAL direction.
+ * The new entry will be added after the existing one in the hash list,
+ * so packets in the ORIGINAL direction will continue to match the existing
+ * entry. The new entry will also have a fixed timeout so it expires --
+ * due to the collision, it will not see bidirectional traffic.
+ *
+ * Returns NF_DROP if the clash could not be resolved.
+ */
static __cold noinline int
-nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conntrack_tuple_hash *h)
+nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h,
+ u32 reply_hash)
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
const struct nf_conntrack_l4proto *l4proto;
- enum ip_conntrack_info oldinfo;
- struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *loser_ct;
+ struct net *net;
+ int ret;
+
+ loser_ct = nf_ct_get(skb, &ctinfo);
+ net = nf_ct_net(loser_ct);
l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
- if (l4proto->allow_clash &&
- !nf_ct_is_dying(ct) &&
- atomic_inc_not_zero(&ct->ct_general.use)) {
- if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
- nf_ct_match(ct, loser_ct)) {
- nf_ct_acct_merge(ct, ctinfo, loser_ct);
- nf_conntrack_put(&loser_ct->ct_general);
- nf_ct_set(skb, ct, oldinfo);
- return NF_ACCEPT;
- }
- nf_ct_put(ct);
- }
+ if (!l4proto->allow_clash)
+ goto drop;
+
+ ret = __nf_ct_resolve_clash(skb, h);
+ if (ret == NF_ACCEPT)
+ return ret;
+
+ ret = nf_ct_resolve_clash_harder(skb, reply_hash);
+ if (ret == NF_ACCEPT)
+ return ret;
+
+drop:
+ nf_ct_add_to_dying_list(loser_ct);
NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC(net, insert_failed);
return NF_DROP;
}
@@ -932,7 +1075,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
- struct nf_conn_tstamp *tstamp;
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
@@ -989,6 +1131,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (unlikely(nf_ct_is_dying(ct))) {
nf_ct_add_to_dying_list(ct);
+ NF_CT_STAT_INC(net, insert_failed);
goto dying;
}
@@ -1009,13 +1152,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
setting time, otherwise we'd get timer wrap in
weird delay cases. */
ct->timeout += nfct_time_stamp;
- atomic_inc(&ct->ct_general.use);
- ct->status |= IPS_CONFIRMED;
- /* set conntrack timestamp, if enabled. */
- tstamp = nf_conn_tstamp_find(ct);
- if (tstamp)
- tstamp->start = ktime_get_real_ns();
+ __nf_conntrack_insert_prepare(ct);
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
@@ -1035,11 +1173,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
out:
- nf_ct_add_to_dying_list(ct);
- ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
+ ret = nf_ct_resolve_clash(skb, h, reply_hash);
dying:
nf_conntrack_double_unlock(hash, reply_hash);
- NF_CT_STAT_INC(net, insert_failed);
local_bh_enable();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 7365b43f8f98..760ca2422816 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -81,6 +81,18 @@ static bool udp_error(struct sk_buff *skb,
return false;
}
+static void nf_conntrack_udp_refresh_unreplied(struct nf_conn *ct,
+ struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ u32 extra_jiffies)
+{
+ if (unlikely(ctinfo == IP_CT_ESTABLISHED_REPLY &&
+ ct->status & IPS_NAT_CLASH))
+ nf_ct_kill(ct);
+ else
+ nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies);
+}
+
/* Returns verdict for packet, and may modify conntracktype */
int nf_conntrack_udp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -116,8 +128,8 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
- nf_ct_refresh_acct(ct, ctinfo, skb,
- timeouts[UDP_CT_UNREPLIED]);
+ nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+ timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
@@ -198,8 +210,8 @@ int nf_conntrack_udplite_packet(struct nf_conn *ct,
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
- nf_ct_refresh_acct(ct, ctinfo, skb,
- timeouts[UDP_CT_UNREPLIED]);
+ nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+ timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 83e1db37c3b0..06f00cdc3891 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -847,9 +847,6 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
{
int err;
- if (!nf_flowtable_hw_offload(flowtable))
- return 0;
-
if (!dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
@@ -876,6 +873,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct flow_block_offload bo;
int err;
+ if (!nf_flowtable_hw_offload(flowtable))
+ return 0;
+
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index f0cb1e13af50..feac8553f6d9 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -203,7 +203,7 @@
* ::
*
* rule indices in last field: 0 1
- * map to elements: 0x42 0x66
+ * map to elements: 0x66 0x42
*
*
* Matching
@@ -298,7 +298,7 @@
* ::
*
* rule indices in last field: 0 1
- * map to elements: 0x42 0x66
+ * map to elements: 0x66 0x42
*
* the matching element is at 0x42.
*
@@ -503,7 +503,7 @@ static int pipapo_refill(unsigned long *map, int len, int rules,
return -1;
}
- if (unlikely(match_only)) {
+ if (match_only) {
bitmap_clear(map, i, 1);
return i;
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index bccd47cd7190..7a2c4b8408c4 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -36,6 +36,7 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/mutex.h>
#include <linux/kernel.h>
+#include <linux/refcount.h>
#include <uapi/linux/netfilter/xt_hashlimit.h>
#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
@@ -114,7 +115,7 @@ struct dsthash_ent {
struct xt_hashlimit_htable {
struct hlist_node node; /* global list of all htables */
- int use;
+ refcount_t use;
u_int8_t family;
bool rnd_initialized;
@@ -315,7 +316,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
for (i = 0; i < hinfo->cfg.size; i++)
INIT_HLIST_HEAD(&hinfo->hash[i]);
- hinfo->use = 1;
+ refcount_set(&hinfo->use, 1);
hinfo->count = 0;
hinfo->family = family;
hinfo->rnd_initialized = false;
@@ -420,7 +421,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
if (!strcmp(name, hinfo->name) &&
hinfo->family == family) {
- hinfo->use++;
+ refcount_inc(&hinfo->use);
return hinfo;
}
}
@@ -429,12 +430,11 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
static void htable_put(struct xt_hashlimit_htable *hinfo)
{
- mutex_lock(&hashlimit_mutex);
- if (--hinfo->use == 0) {
+ if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) {
hlist_del(&hinfo->node);
+ mutex_unlock(&hashlimit_mutex);
htable_destroy(hinfo);
}
- mutex_unlock(&hashlimit_mutex);
}
/* The algorithm used is the Simple Token Bucket Filter (TBF)
@@ -837,6 +837,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
}
+#define HASHLIMIT_MAX_SIZE 1048576
+
static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
struct xt_hashlimit_htable **hinfo,
struct hashlimit_cfg3 *cfg,
@@ -847,6 +849,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
if (cfg->gc_interval == 0 || cfg->expire == 0)
return -EINVAL;
+ if (cfg->size > HASHLIMIT_MAX_SIZE) {
+ cfg->size = HASHLIMIT_MAX_SIZE;
+ pr_info_ratelimited("size too large, truncated to %u\n", cfg->size);
+ }
+ if (cfg->max > HASHLIMIT_MAX_SIZE) {
+ cfg->max = HASHLIMIT_MAX_SIZE;
+ pr_info_ratelimited("max too large, truncated to %u\n", cfg->max);
+ }
if (par->family == NFPROTO_IPV4) {
if (cfg->srcmask > 32 || cfg->dstmask > 32)
return -EINVAL;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 781e0b482189..0a9708004e20 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -103,7 +103,7 @@ static DEFINE_SPINLOCK(recent_lock);
static DEFINE_MUTEX(recent_mutex);
#ifdef CONFIG_PROC_FS
-static const struct file_operations recent_mt_fops;
+static const struct proc_ops recent_mt_proc_ops;
#endif
static u_int32_t hash_rnd __read_mostly;
@@ -405,7 +405,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
goto out;
}
pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent,
- &recent_mt_fops, t);
+ &recent_mt_proc_ops, t);
if (pde == NULL) {
recent_table_free(t);
ret = -ENOMEM;
@@ -616,13 +616,12 @@ recent_mt_proc_write(struct file *file, const char __user *input,
return size + 1;
}
-static const struct file_operations recent_mt_fops = {
- .open = recent_seq_open,
- .read = seq_read,
- .write = recent_mt_proc_write,
- .release = seq_release_private,
- .owner = THIS_MODULE,
- .llseek = seq_lseek,
+static const struct proc_ops recent_mt_proc_ops = {
+ .proc_open = recent_seq_open,
+ .proc_read = seq_read,
+ .proc_write = recent_mt_proc_write,
+ .proc_release = seq_release_private,
+ .proc_lseek = seq_lseek,
};
static int __net_init recent_proc_net_init(struct net *net)
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f5d34da0646e..a1f2320ecc16 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -143,7 +143,8 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain,
if (domain != NULL) {
bkt = netlbl_domhsh_hash(domain);
bkt_list = &netlbl_domhsh_rcu_deref(netlbl_domhsh)->tbl[bkt];
- list_for_each_entry_rcu(iter, bkt_list, list)
+ list_for_each_entry_rcu(iter, bkt_list, list,
+ lockdep_is_held(&netlbl_domhsh_lock))
if (iter->valid &&
netlbl_family_match(iter->family, family) &&
strcmp(iter->domain, domain) == 0)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index d2e4ab8d1cb1..77bb1bb22c3b 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -207,7 +207,8 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
bkt = netlbl_unlhsh_hash(ifindex);
bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt];
- list_for_each_entry_rcu(iter, bkt_list, list)
+ list_for_each_entry_rcu(iter, bkt_list, list,
+ lockdep_is_held(&netlbl_unlhsh_lock))
if (iter->valid && iter->ifindex == ifindex)
return iter;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4e31721e7293..edf3e285e242 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1014,7 +1014,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nlk->netlink_bind && groups) {
int group;
- for (group = 0; group < nlk->ngroups; group++) {
+ /* nl_groups is a u32, so cap the maximum groups we can bind */
+ for (group = 0; group < BITS_PER_TYPE(u32); group++) {
if (!test_bit(group, &groups))
continue;
err = nlk->netlink_bind(net, group + 1);
@@ -1033,7 +1034,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
netlink_insert(sk, nladdr->nl_pid) :
netlink_autobind(sock);
if (err) {
- netlink_undo_bind(nlk->ngroups, groups, sk);
+ netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk);
goto unlock;
}
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 0522b2b1fd95..9f357aa22b94 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -497,8 +497,9 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
family->policy, validate, extack);
- if (err && parallel) {
- kfree(attrbuf);
+ if (err) {
+ if (parallel)
+ kfree(attrbuf);
return ERR_PTR(err);
}
return attrbuf;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 659c2a790fe7..c047afd12116 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -179,7 +179,8 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
struct hlist_head *head;
head = vport_hash_bucket(dp, port_no);
- hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, head, dp_hash_node,
+ lockdep_ovsl_is_held()) {
if (vport->port_no == port_no)
return vport;
}
@@ -2042,7 +2043,8 @@ static unsigned int ovs_get_max_headroom(struct datapath *dp)
int i;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held()) {
dev = vport->dev;
dev_headroom = netdev_get_fwd_headroom(dev);
if (dev_headroom > max_headroom)
@@ -2061,7 +2063,8 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
dp->max_headroom = new_headroom;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held())
netdev_set_rx_headroom(vport->dev, new_headroom);
}
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 7da4230627f5..288122eec7c8 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2708,10 +2708,6 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
switch (key_type) {
- const struct ovs_key_ipv4 *ipv4_key;
- const struct ovs_key_ipv6 *ipv6_key;
- int err;
-
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_CT_MARK:
@@ -2723,7 +2719,9 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
- case OVS_KEY_ATTR_TUNNEL:
+ case OVS_KEY_ATTR_TUNNEL: {
+ int err;
+
if (masked)
return -EINVAL; /* Masked tunnel set not supported. */
@@ -2732,8 +2730,10 @@ static int validate_set(const struct nlattr *a,
if (err)
return err;
break;
+ }
+ case OVS_KEY_ATTR_IPV4: {
+ const struct ovs_key_ipv4 *ipv4_key;
- case OVS_KEY_ATTR_IPV4:
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
@@ -2753,8 +2753,10 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
}
break;
+ }
+ case OVS_KEY_ATTR_IPV6: {
+ const struct ovs_key_ipv6 *ipv6_key;
- case OVS_KEY_ATTR_IPV6:
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
@@ -2781,7 +2783,7 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
-
+ }
case OVS_KEY_ATTR_TCP:
if ((eth_type != htons(ETH_P_IP) &&
eth_type != htons(ETH_P_IPV6)) ||
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5904e93e5765..fd8a01ca7a2d 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -585,7 +585,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
head = find_bucket(ti, hash);
(*n_mask_hit)++;
- hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+ hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
+ lockdep_ovsl_is_held()) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
return flow;
@@ -769,7 +770,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
hash = ufid_hash(ufid);
head = find_bucket(ti, hash);
- hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
+ hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
+ lockdep_ovsl_is_held()) {
if (flow->ufid_table.hash == hash &&
ovs_flow_cmp_ufid(flow, ufid))
return flow;
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 3323b79ff548..5010d1ddd4bd 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -61,7 +61,8 @@ static struct dp_meter *lookup_meter(const struct datapath *dp,
struct hlist_head *head;
head = meter_hash_bucket(dp, meter_id);
- hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
+ hlist_for_each_entry_rcu(meter, head, dp_hash_node,
+ lockdep_ovsl_is_held()) {
if (meter->id == meter_id)
return meter;
}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 5da9392b03d6..47febb4504f0 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -96,7 +96,8 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name)
struct hlist_head *bucket = hash_bucket(net, name);
struct vport *vport;
- hlist_for_each_entry_rcu(vport, bucket, hash_node)
+ hlist_for_each_entry_rcu(vport, bucket, hash_node,
+ lockdep_ovsl_is_held())
if (!strcmp(name, ovs_vport_name(vport)) &&
net_eq(ovs_dp_get_net(vport->dp), net))
return vport;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 3341eee87bf9..585e6b3b69ce 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -162,10 +162,9 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
if (write)
gup_flags |= FOLL_WRITE;
- ret = get_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
+ ret = pin_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
if (ret >= 0 && ret < nr_pages) {
- while (ret--)
- put_page(pages[ret]);
+ unpin_user_pages(pages, ret);
ret = -EFAULT;
}
@@ -300,8 +299,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
* to release anything.
*/
if (!need_odp) {
- for (i = 0 ; i < nents; i++)
- put_page(sg_page(&sg[i]));
+ unpin_user_pages(pages, nr_pages);
kfree(sg);
}
ret = PTR_ERR(trans_private);
@@ -325,7 +323,12 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
if (cookie_ret)
*cookie_ret = cookie;
- if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) {
+ if (args->cookie_addr &&
+ put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) {
+ if (!need_odp) {
+ unpin_user_pages(pages, nr_pages);
+ kfree(sg);
+ }
ret = -EFAULT;
goto out;
}
@@ -496,9 +499,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
* is the case for a RDMA_READ which copies from remote
* to local memory
*/
- if (!ro->op_write)
- set_page_dirty(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, !ro->op_write);
}
}
@@ -515,8 +516,7 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
/* Mark page dirty if it was possibly modified, which
* is the case for a RDMA_READ which copies from remote
* to local memory */
- set_page_dirty(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, true);
kfree(ao->op_notifier);
ao->op_notifier = NULL;
@@ -944,7 +944,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
return ret;
err:
if (page)
- put_page(page);
+ unpin_user_page(page);
rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 9d3c4d2d893a..fe42f986cd94 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -194,6 +194,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
service_in_use:
write_unlock(&local->services_lock);
rxrpc_unuse_local(local);
+ rxrpc_put_local(local);
ret = -EADDRINUSE;
error_unlock:
release_sock(&rx->sk);
@@ -899,6 +900,7 @@ static int rxrpc_release_sock(struct sock *sk)
rxrpc_purge_queue(&sk->sk_receive_queue);
rxrpc_unuse_local(rx->local);
+ rxrpc_put_local(rx->local);
rx->local = NULL;
key_put(rx->key);
rx->key = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5e99df80e80a..7d730c438404 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -490,6 +490,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */
RXRPC_CALL_IS_INTR, /* The call is interruptible */
+ RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */
};
/*
@@ -1021,6 +1022,16 @@ void rxrpc_unuse_local(struct rxrpc_local *);
void rxrpc_queue_local(struct rxrpc_local *);
void rxrpc_destroy_all_locals(struct rxrpc_net *);
+static inline bool __rxrpc_unuse_local(struct rxrpc_local *local)
+{
+ return atomic_dec_return(&local->active_users) == 0;
+}
+
+static inline bool __rxrpc_use_local(struct rxrpc_local *local)
+{
+ return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0;
+}
+
/*
* misc.c
*/
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index a31c18c09894..c9f34b0a11df 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -493,7 +493,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- if (conn)
+ if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
rxrpc_disconnect_call(call);
if (call->security)
call->security->free_call_crypto(call);
@@ -562,13 +562,14 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
}
/*
- * Final call destruction under RCU.
+ * Final call destruction - but must be done in process context.
*/
-static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
+static void rxrpc_destroy_call(struct work_struct *work)
{
- struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+ struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor);
struct rxrpc_net *rxnet = call->rxnet;
+ rxrpc_put_connection(call->conn);
rxrpc_put_peer(call->peer);
kfree(call->rxtx_buffer);
kfree(call->rxtx_annotations);
@@ -578,6 +579,22 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
}
/*
+ * Final call destruction under RCU.
+ */
+static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
+{
+ struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+
+ if (in_softirq()) {
+ INIT_WORK(&call->processor, rxrpc_destroy_call);
+ if (!rxrpc_queue_work(&call->processor))
+ BUG();
+ } else {
+ rxrpc_destroy_call(&call->processor);
+ }
+}
+
+/*
* clean up a call
*/
void rxrpc_cleanup_call(struct rxrpc_call *call)
@@ -590,7 +607,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
- ASSERTCMP(call->conn, ==, NULL);
rxrpc_cleanup_ring(call);
rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 376370cd9285..ea7d4c21f889 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -785,6 +785,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
u32 cid;
spin_lock(&conn->channel_lock);
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
cid = call->cid;
if (cid) {
@@ -792,7 +793,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
chan = &conn->channels[channel];
}
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
- call->conn = NULL;
/* Calls that have never actually been assigned a channel can simply be
* discarded. If the conn didn't get used either, it will follow
@@ -908,7 +908,6 @@ out:
spin_unlock(&rxnet->client_conn_cache_lock);
out_2:
spin_unlock(&conn->channel_lock);
- rxrpc_put_connection(conn);
_leave("");
return;
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 808a4723f868..06fcff2ebbba 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -438,16 +438,12 @@ again:
/*
* connection-level event processor
*/
-void rxrpc_process_connection(struct work_struct *work)
+static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
{
- struct rxrpc_connection *conn =
- container_of(work, struct rxrpc_connection, processor);
struct sk_buff *skb;
u32 abort_code = RX_PROTOCOL_ERROR;
int ret;
- rxrpc_see_connection(conn);
-
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
@@ -475,18 +471,32 @@ void rxrpc_process_connection(struct work_struct *work)
}
}
-out:
- rxrpc_put_connection(conn);
- _leave("");
return;
requeue_and_leave:
skb_queue_head(&conn->rx_queue, skb);
- goto out;
+ return;
protocol_error:
if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
goto requeue_and_leave;
rxrpc_free_skb(skb, rxrpc_skb_freed);
- goto out;
+ return;
+}
+
+void rxrpc_process_connection(struct work_struct *work)
+{
+ struct rxrpc_connection *conn =
+ container_of(work, struct rxrpc_connection, processor);
+
+ rxrpc_see_connection(conn);
+
+ if (__rxrpc_use_local(conn->params.local)) {
+ rxrpc_do_process_connection(conn);
+ rxrpc_unuse_local(conn->params.local);
+ }
+
+ rxrpc_put_connection(conn);
+ _leave("");
+ return;
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 38d718e90dc6..19e141eeed17 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -223,9 +223,8 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
__rxrpc_disconnect_call(conn, call);
spin_unlock(&conn->channel_lock);
- call->conn = NULL;
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
conn->idle_timestamp = jiffies;
- rxrpc_put_connection(conn);
}
/*
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 96d54e5bf7bc..ef10fbf71b15 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -599,10 +599,8 @@ ack:
false, true,
rxrpc_propose_ack_input_data);
- if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) {
- trace_rxrpc_notify_socket(call->debug_id, serial);
- rxrpc_notify_socket(call);
- }
+ trace_rxrpc_notify_socket(call->debug_id, serial);
+ rxrpc_notify_socket(call);
unlock:
spin_unlock(&call->input_lock);
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 36587260cabd..a6c1349e965d 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -364,11 +364,14 @@ void rxrpc_queue_local(struct rxrpc_local *local)
void rxrpc_put_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id;
int n;
if (local) {
+ debug_id = local->debug_id;
+
n = atomic_dec_return(&local->usage);
- trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here);
+ trace_rxrpc_local(debug_id, rxrpc_local_put, n, here);
if (n == 0)
call_rcu(&local->rcu, rxrpc_local_rcu);
@@ -380,14 +383,11 @@ void rxrpc_put_local(struct rxrpc_local *local)
*/
struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
{
- unsigned int au;
-
local = rxrpc_get_local_maybe(local);
if (!local)
return NULL;
- au = atomic_fetch_add_unless(&local->active_users, 1, 0);
- if (au == 0) {
+ if (!__rxrpc_use_local(local)) {
rxrpc_put_local(local);
return NULL;
}
@@ -401,14 +401,11 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
*/
void rxrpc_unuse_local(struct rxrpc_local *local)
{
- unsigned int au;
-
if (local) {
- au = atomic_dec_return(&local->active_users);
- if (au == 0)
+ if (__rxrpc_unuse_local(local)) {
+ rxrpc_get_local(local);
rxrpc_queue_local(local);
- else
- rxrpc_put_local(local);
+ }
}
}
@@ -465,7 +462,7 @@ static void rxrpc_local_processor(struct work_struct *work)
do {
again = false;
- if (atomic_read(&local->active_users) == 0) {
+ if (!__rxrpc_use_local(local)) {
rxrpc_local_destroyer(local);
break;
}
@@ -479,6 +476,8 @@ static void rxrpc_local_processor(struct work_struct *work)
rxrpc_process_local_events(local);
again = true;
}
+
+ __rxrpc_unuse_local(local);
} while (again);
rxrpc_put_local(local);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 935bb60fff56..bad3d2420344 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -129,7 +129,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
rxrpc_serial_t *_serial)
{
- struct rxrpc_connection *conn = NULL;
+ struct rxrpc_connection *conn;
struct rxrpc_ack_buffer *pkt;
struct msghdr msg;
struct kvec iov[2];
@@ -139,18 +139,14 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
int ret;
u8 reason;
- spin_lock_bh(&call->lock);
- if (call->conn)
- conn = rxrpc_get_connection_maybe(call->conn);
- spin_unlock_bh(&call->lock);
- if (!conn)
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return -ECONNRESET;
pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
- if (!pkt) {
- rxrpc_put_connection(conn);
+ if (!pkt)
return -ENOMEM;
- }
+
+ conn = call->conn;
msg.msg_name = &call->peer->srx.transport;
msg.msg_namelen = call->peer->srx.transport_len;
@@ -244,7 +240,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
}
out:
- rxrpc_put_connection(conn);
kfree(pkt);
return ret;
}
@@ -254,7 +249,7 @@ out:
*/
int rxrpc_send_abort_packet(struct rxrpc_call *call)
{
- struct rxrpc_connection *conn = NULL;
+ struct rxrpc_connection *conn;
struct rxrpc_abort_buffer pkt;
struct msghdr msg;
struct kvec iov[1];
@@ -271,13 +266,11 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
test_bit(RXRPC_CALL_TX_LAST, &call->flags))
return 0;
- spin_lock_bh(&call->lock);
- if (call->conn)
- conn = rxrpc_get_connection_maybe(call->conn);
- spin_unlock_bh(&call->lock);
- if (!conn)
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return -ECONNRESET;
+ conn = call->conn;
+
msg.msg_name = &call->peer->srx.transport;
msg.msg_namelen = call->peer->srx.transport_len;
msg.msg_control = NULL;
@@ -312,8 +305,6 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
rxrpc_tx_point_call_abort);
rxrpc_tx_backoff(call, ret);
-
- rxrpc_put_connection(conn);
return ret;
}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 48f67a9b1037..923b263c401b 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -364,27 +364,31 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
if (!rxrpc_get_peer_maybe(peer))
continue;
- spin_unlock_bh(&rxnet->peer_hash_lock);
-
- keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
- slot = keepalive_at - base;
- _debug("%02x peer %u t=%d {%pISp}",
- cursor, peer->debug_id, slot, &peer->srx.transport);
+ if (__rxrpc_use_local(peer->local)) {
+ spin_unlock_bh(&rxnet->peer_hash_lock);
+
+ keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
+ slot = keepalive_at - base;
+ _debug("%02x peer %u t=%d {%pISp}",
+ cursor, peer->debug_id, slot, &peer->srx.transport);
+
+ if (keepalive_at <= base ||
+ keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
+ rxrpc_send_keepalive(peer);
+ slot = RXRPC_KEEPALIVE_TIME;
+ }
- if (keepalive_at <= base ||
- keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
- rxrpc_send_keepalive(peer);
- slot = RXRPC_KEEPALIVE_TIME;
+ /* A transmission to this peer occurred since last we
+ * examined it so put it into the appropriate future
+ * bucket.
+ */
+ slot += cursor;
+ slot &= mask;
+ spin_lock_bh(&rxnet->peer_hash_lock);
+ list_add_tail(&peer->keepalive_link,
+ &rxnet->peer_keepalive[slot & mask]);
+ rxrpc_unuse_local(peer->local);
}
-
- /* A transmission to this peer occurred since last we examined
- * it so put it into the appropriate future bucket.
- */
- slot += cursor;
- slot &= mask;
- spin_lock_bh(&rxnet->peer_hash_lock);
- list_add_tail(&peer->keepalive_link,
- &rxnet->peer_keepalive[slot & mask]);
rxrpc_put_peer_locked(peer);
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f9c0d1e8d380..d32d4233d337 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -305,6 +305,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_fl_filter *f;
list_for_each_entry_rcu(mask, &head->masks, list) {
+ flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
fl_clear_masked_range(&skb_key, mask);
skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
@@ -691,6 +692,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
.len = 128 / BITS_PER_BYTE },
[TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY,
.len = 128 / BITS_PER_BYTE },
+ [TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
};
static const struct nla_policy
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 039cc86974f4..610a0b728161 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -157,6 +157,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle)
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
[TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
+ [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 3d4a1280352f..09b7dc5fe7e0 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -333,12 +333,31 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
cp->fall_through = p->fall_through;
cp->tp = tp;
+ if (tb[TCA_TCINDEX_HASH])
+ cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+
+ if (tb[TCA_TCINDEX_MASK])
+ cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+
+ if (tb[TCA_TCINDEX_SHIFT])
+ cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+
+ if (!cp->hash) {
+ /* Hash not specified, use perfect hash if the upper limit
+ * of the hashing index is below the threshold.
+ */
+ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+ cp->hash = (cp->mask >> cp->shift) + 1;
+ else
+ cp->hash = DEFAULT_HASH_SIZE;
+ }
+
if (p->perfect) {
int i;
if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
- for (i = 0; i < cp->hash; i++)
+ for (i = 0; i < min(cp->hash, p->hash); i++)
cp->perfect[i].res = p->perfect[i].res;
balloc = 1;
}
@@ -346,19 +365,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcindex_filter_result_init(&new_filter_result, net);
if (err < 0)
- goto errout1;
+ goto errout_alloc;
if (old_r)
cr = r->res;
- if (tb[TCA_TCINDEX_HASH])
- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-
- if (tb[TCA_TCINDEX_MASK])
- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-
- if (tb[TCA_TCINDEX_SHIFT])
- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-
err = -EBUSY;
/* Hash already allocated, make sure that we still meet the
@@ -376,16 +386,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (tb[TCA_TCINDEX_FALL_THROUGH])
cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
- if (!cp->hash) {
- /* Hash not specified, use perfect hash if the upper limit
- * of the hashing index is below the threshold.
- */
- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
- cp->hash = (cp->mask >> cp->shift) + 1;
- else
- cp->hash = DEFAULT_HASH_SIZE;
- }
-
if (!cp->perfect && !cp->h)
cp->alloc_hash = cp->hash;
@@ -484,7 +484,6 @@ errout_alloc:
tcindex_free_perfect_hash(cp);
else if (balloc == 2)
kfree(cp->h);
-errout1:
tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index bbd0dea6b6b9..214657eb3dfd 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -349,9 +349,9 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_pie_qdisc_dequeue(sch);
- kfree_skb(skb);
len_dropped += qdisc_pkt_len(skb);
num_dropped += 1;
+ rtnl_kfree_skbs(skb, skb);
}
qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c609373c8661..660fc45ee40f 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(taprio_list_lock);
#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
+#define TAPRIO_FLAGS_INVALID U32_MAX
struct sched_entry {
struct list_head list;
@@ -766,6 +767,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
};
static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
@@ -1367,6 +1369,33 @@ static int taprio_mqprio_cmp(const struct net_device *dev,
return 0;
}
+/* The semantics of the 'flags' argument in relation to 'change()'
+ * requests, are interpreted following two rules (which are applied in
+ * this order): (1) an omitted 'flags' argument is interpreted as
+ * zero; (2) the 'flags' of a "running" taprio instance cannot be
+ * changed.
+ */
+static int taprio_new_flags(const struct nlattr *attr, u32 old,
+ struct netlink_ext_ack *extack)
+{
+ u32 new = 0;
+
+ if (attr)
+ new = nla_get_u32(attr);
+
+ if (old != TAPRIO_FLAGS_INVALID && old != new) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!taprio_flags_valid(new)) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
+ return -EINVAL;
+ }
+
+ return new;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1375,7 +1404,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
- u32 taprio_flags = 0;
unsigned long flags;
ktime_t start;
int i, err;
@@ -1388,21 +1416,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
- if (tb[TCA_TAPRIO_ATTR_FLAGS]) {
- taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]);
-
- if (q->flags != 0 && q->flags != taprio_flags) {
- NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
- return -EOPNOTSUPP;
- } else if (!taprio_flags_valid(taprio_flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
- return -EINVAL;
- }
+ err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
+ q->flags, extack);
+ if (err < 0)
+ return err;
- q->flags = taprio_flags;
- }
+ q->flags = err;
- err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags);
+ err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
return err;
@@ -1444,7 +1465,20 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
taprio_set_picos_per_byte(dev, q);
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+ if (mqprio) {
+ netdev_set_num_tc(dev, mqprio->num_tc);
+ for (i = 0; i < mqprio->num_tc; i++)
+ netdev_set_tc_queue(dev, i,
+ mqprio->count[i],
+ mqprio->offset[i]);
+
+ /* Always use supplied priority mappings */
+ for (i = 0; i <= TC_BITMASK; i++)
+ netdev_set_prio_tc_map(dev, i,
+ mqprio->prio_tc_map[i]);
+ }
+
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags))
err = taprio_enable_offload(dev, mqprio, q, new_admin, extack);
else
err = taprio_disable_offload(dev, q, extack);
@@ -1464,27 +1498,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
}
- if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
- !FULL_OFFLOAD_IS_ENABLED(taprio_flags) &&
+ if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
+ !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
!hrtimer_active(&q->advance_timer)) {
hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
}
- if (mqprio) {
- netdev_set_num_tc(dev, mqprio->num_tc);
- for (i = 0; i < mqprio->num_tc; i++)
- netdev_set_tc_queue(dev, i,
- mqprio->count[i],
- mqprio->offset[i]);
-
- /* Always use supplied priority mappings */
- for (i = 0; i <= TC_BITMASK; i++)
- netdev_set_prio_tc_map(dev, i,
- mqprio->prio_tc_map[i]);
- }
-
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) {
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
q->dequeue = taprio_dequeue_offload;
q->peek = taprio_peek_offload;
} else {
@@ -1501,9 +1522,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}
- if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) {
- setup_txtime(q, new_admin, start);
+ setup_txtime(q, new_admin, start);
+ if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
if (!oper) {
rcu_assign_pointer(q->oper_sched, new_admin);
err = 0;
@@ -1528,7 +1549,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
spin_unlock_irqrestore(&q->current_entry_lock, flags);
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags))
taprio_offload_config_changed(q);
}
@@ -1567,7 +1588,7 @@ static void taprio_destroy(struct Qdisc *sch)
}
q->qdiscs = NULL;
- netdev_set_num_tc(dev, 0);
+ netdev_reset_tc(dev);
if (q->oper_sched)
call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
@@ -1597,6 +1618,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
* and get the valid one on taprio_change().
*/
q->clockid = -1;
+ q->flags = TAPRIO_FLAGS_INVALID;
spin_lock(&taprio_list_lock);
list_add(&q->taprio_list, &taprio_list);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 748e3b19ec1d..6a16af4b1ef6 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -170,6 +170,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk,
return true;
}
+/* Check for format error in an ABORT chunk */
+static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk)
+{
+ struct sctp_errhdr *err;
+
+ sctp_walk_errors(err, chunk->chunk_hdr);
+
+ return (void *)err == (void *)chunk->chunk_end;
+}
+
/**********************************************************
* These are the state functions for handling chunk events.
**********************************************************/
@@ -2255,6 +2265,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2298,6 +2311,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Stop the T2-shutdown timer. */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
@@ -2565,6 +2581,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2582,16 +2601,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort(
/* See if we have an error cause code in the chunk. */
len = ntohs(chunk->chunk_hdr->length);
- if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
- struct sctp_errhdr *err;
-
- sctp_walk_errors(err, chunk->chunk_hdr);
- if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_pdiscard(net, ep, asoc, type, arg,
- commands);
-
+ if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
- }
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
/* ASSOC_FAILED will DELETE_TCB. */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index cee5bf4a9bb9..90988a511cd5 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -470,6 +470,8 @@ static void smc_switch_to_fallback(struct smc_sock *smc)
if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
smc->clcsock->file = smc->sk.sk_socket->file;
smc->clcsock->file->private_data = smc->clcsock;
+ smc->clcsock->wq.fasync_list =
+ smc->sk.sk_socket->wq.fasync_list;
}
}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 0879f7bed967..86cccc24e52e 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -372,7 +372,9 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = SMC_CLC_V1;
dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
- memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
+ if (smc->conn.lgr && !smc->conn.lgr->is_smcd)
+ memcpy(dclc.id_for_peer, local_systemid,
+ sizeof(local_systemid));
dclc.peer_diagnosis = htonl(peer_diag_info);
memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index f38727ecf8b2..e1f64f4ba236 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -39,16 +39,15 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
+ memset(r, 0, sizeof(*r));
r->diag_family = sk->sk_family;
+ sock_diag_save_cookie(sk, r->id.idiag_cookie);
if (!smc->clcsock)
return;
r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
r->id.idiag_dport = smc->clcsock->sk->sk_dport;
r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
- sock_diag_save_cookie(sk, r->id.idiag_cookie);
if (sk->sk_protocol == SMCPROTO_SMC) {
- memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
- memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index d024af4be85e..8b4d72b1a066 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -175,7 +175,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
return 0;
len = (buf + buflen) - delim - 1;
- p = kstrndup(delim + 1, len, GFP_KERNEL);
+ p = kmemdup_nul(delim + 1, len, GFP_KERNEL);
if (p) {
u32 scope_id = 0;
struct net_device *dev;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index cdb05b48de44..5748ad0ba1bd 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -221,55 +221,6 @@ rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info)
}
EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
-/**
- * rpcauth_list_flavors - discover registered flavors and pseudoflavors
- * @array: array to fill in
- * @size: size of "array"
- *
- * Returns the number of array items filled in, or a negative errno.
- *
- * The returned array is not sorted by any policy. Callers should not
- * rely on the order of the items in the returned array.
- */
-int
-rpcauth_list_flavors(rpc_authflavor_t *array, int size)
-{
- const struct rpc_authops *ops;
- rpc_authflavor_t flavor, pseudos[4];
- int i, len, result = 0;
-
- rcu_read_lock();
- for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) {
- ops = rcu_dereference(auth_flavors[flavor]);
- if (result >= size) {
- result = -ENOMEM;
- break;
- }
-
- if (ops == NULL)
- continue;
- if (ops->list_pseudoflavors == NULL) {
- array[result++] = ops->au_flavor;
- continue;
- }
- len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos));
- if (len < 0) {
- result = len;
- break;
- }
- for (i = 0; i < len; i++) {
- if (result >= size) {
- result = -ENOMEM;
- break;
- }
- array[result++] = pseudos[i];
- }
- }
- rcu_read_unlock();
- return result;
-}
-EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
-
struct rpc_auth *
rpcauth_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index d75fddca44c9..24ca861815b1 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -2118,7 +2118,6 @@ static const struct rpc_authops authgss_ops = {
.hash_cred = gss_hash_cred,
.lookup_cred = gss_lookup_cred,
.crcreate = gss_create_cred,
- .list_pseudoflavors = gss_mech_list_pseudoflavors,
.info2flavor = gss_mech_info2flavor,
.flavor2info = gss_mech_flavor2info,
};
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index d3685d4ed9e0..db550bfc2642 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -220,35 +220,6 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
}
/**
- * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
- * @array_ptr: array to fill in
- * @size: size of "array"
- *
- * Returns the number of array items filled in, or a negative errno.
- *
- * The returned array is not sorted by any policy. Callers should not
- * rely on the order of the items in the returned array.
- */
-int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
-{
- struct gss_api_mech *pos = NULL;
- int j, i = 0;
-
- rcu_read_lock();
- list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
- for (j = 0; j < pos->gm_pf_num; j++) {
- if (i >= size) {
- spin_unlock(&registered_mechs_lock);
- return -ENOMEM;
- }
- array_ptr[i++] = pos->gm_pfs[j].pseudoflavor;
- }
- }
- rcu_read_unlock();
- return i;
-}
-
-/**
* gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor
* @gm: GSS mechanism handle
* @qop: GSS quality-of-protection value
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 311181720d79..65b67b257302 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1248,6 +1248,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
dprintk("RPC: No creds found!\n");
goto out;
} else {
+ struct timespec64 boot;
/* steal creds */
rsci.cred = ud->creds;
@@ -1268,6 +1269,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
&expiry, GFP_KERNEL);
if (status)
goto out;
+
+ getboottime64(&boot);
+ expiry -= boot.tv_sec;
}
rsci.h.expiry_time = expiry;
@@ -1428,10 +1432,10 @@ static ssize_t read_gssp(struct file *file, char __user *buf,
return len;
}
-static const struct file_operations use_gss_proxy_ops = {
- .open = nonseekable_open,
- .write = write_gssp,
- .read = read_gssp,
+static const struct proc_ops use_gss_proxy_proc_ops = {
+ .proc_open = nonseekable_open,
+ .proc_write = write_gssp,
+ .proc_read = read_gssp,
};
static int create_use_gss_proxy_proc_entry(struct net *net)
@@ -1442,7 +1446,7 @@ static int create_use_gss_proxy_proc_entry(struct net *net)
sn->use_gss_proxy = -1;
*p = proc_create_data("use-gss-proxy", S_IFREG | 0600,
sn->proc_net_rpc,
- &use_gss_proxy_ops, net);
+ &use_gss_proxy_proc_ops, net);
if (!*p)
return -ENOMEM;
init_gssp_clnt(sn);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index d996bf872a7c..bd843a81afa0 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -77,6 +77,22 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
return NULL;
}
+static void sunrpc_begin_cache_remove_entry(struct cache_head *ch,
+ struct cache_detail *cd)
+{
+ /* Must be called under cd->hash_lock */
+ hlist_del_init_rcu(&ch->cache_list);
+ set_bit(CACHE_CLEANED, &ch->flags);
+ cd->entries --;
+}
+
+static void sunrpc_end_cache_remove_entry(struct cache_head *ch,
+ struct cache_detail *cd)
+{
+ cache_fresh_unlocked(ch, cd);
+ cache_put(ch, cd);
+}
+
static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
struct cache_head *key,
int hash)
@@ -100,8 +116,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
hlist_for_each_entry_rcu(tmp, head, cache_list) {
if (detail->match(tmp, key)) {
if (cache_is_expired(detail, tmp)) {
- hlist_del_init_rcu(&tmp->cache_list);
- detail->entries --;
+ sunrpc_begin_cache_remove_entry(tmp, detail);
freeme = tmp;
break;
}
@@ -117,10 +132,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
cache_get(new);
spin_unlock(&detail->hash_lock);
- if (freeme) {
- cache_fresh_unlocked(freeme, detail);
- cache_put(freeme, detail);
- }
+ if (freeme)
+ sunrpc_end_cache_remove_entry(freeme, detail);
return new;
}
@@ -454,8 +467,7 @@ static int cache_clean(void)
if (!cache_is_expired(current_detail, ch))
continue;
- hlist_del_init_rcu(&ch->cache_list);
- current_detail->entries--;
+ sunrpc_begin_cache_remove_entry(ch, current_detail);
rv = 1;
break;
}
@@ -465,11 +477,8 @@ static int cache_clean(void)
if (!ch)
current_index ++;
spin_unlock(&cache_list_lock);
- if (ch) {
- set_bit(CACHE_CLEANED, &ch->flags);
- cache_fresh_unlocked(ch, d);
- cache_put(ch, d);
- }
+ if (ch)
+ sunrpc_end_cache_remove_entry(ch, d);
} else
spin_unlock(&cache_list_lock);
@@ -525,13 +534,9 @@ void cache_purge(struct cache_detail *detail)
for (i = 0; i < detail->hash_size; i++) {
head = &detail->hash_table[i];
hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
- hlist_del_init_rcu(&ch->cache_list);
- detail->entries--;
-
- set_bit(CACHE_CLEANED, &ch->flags);
+ sunrpc_begin_cache_remove_entry(ch, detail);
spin_unlock(&detail->hash_lock);
- cache_fresh_unlocked(ch, detail);
- cache_put(ch, detail);
+ sunrpc_end_cache_remove_entry(ch, detail);
spin_lock(&detail->hash_lock);
}
}
@@ -1571,15 +1576,14 @@ static int cache_release_procfs(struct inode *inode, struct file *filp)
return cache_release(inode, filp, cd);
}
-static const struct file_operations cache_file_operations_procfs = {
- .owner = THIS_MODULE,
- .llseek = no_llseek,
- .read = cache_read_procfs,
- .write = cache_write_procfs,
- .poll = cache_poll_procfs,
- .unlocked_ioctl = cache_ioctl_procfs, /* for FIONREAD */
- .open = cache_open_procfs,
- .release = cache_release_procfs,
+static const struct proc_ops cache_channel_proc_ops = {
+ .proc_lseek = no_llseek,
+ .proc_read = cache_read_procfs,
+ .proc_write = cache_write_procfs,
+ .proc_poll = cache_poll_procfs,
+ .proc_ioctl = cache_ioctl_procfs, /* for FIONREAD */
+ .proc_open = cache_open_procfs,
+ .proc_release = cache_release_procfs,
};
static int content_open_procfs(struct inode *inode, struct file *filp)
@@ -1596,11 +1600,11 @@ static int content_release_procfs(struct inode *inode, struct file *filp)
return content_release(inode, filp, cd);
}
-static const struct file_operations content_file_operations_procfs = {
- .open = content_open_procfs,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = content_release_procfs,
+static const struct proc_ops content_proc_ops = {
+ .proc_open = content_open_procfs,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = content_release_procfs,
};
static int open_flush_procfs(struct inode *inode, struct file *filp)
@@ -1634,12 +1638,12 @@ static ssize_t write_flush_procfs(struct file *filp,
return write_flush(filp, buf, count, ppos, cd);
}
-static const struct file_operations cache_flush_operations_procfs = {
- .open = open_flush_procfs,
- .read = read_flush_procfs,
- .write = write_flush_procfs,
- .release = release_flush_procfs,
- .llseek = no_llseek,
+static const struct proc_ops cache_flush_proc_ops = {
+ .proc_open = open_flush_procfs,
+ .proc_read = read_flush_procfs,
+ .proc_write = write_flush_procfs,
+ .proc_release = release_flush_procfs,
+ .proc_lseek = no_llseek,
};
static void remove_cache_proc_entries(struct cache_detail *cd)
@@ -1662,19 +1666,19 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
goto out_nomem;
p = proc_create_data("flush", S_IFREG | 0600,
- cd->procfs, &cache_flush_operations_procfs, cd);
+ cd->procfs, &cache_flush_proc_ops, cd);
if (p == NULL)
goto out_nomem;
if (cd->cache_request || cd->cache_parse) {
p = proc_create_data("channel", S_IFREG | 0600, cd->procfs,
- &cache_file_operations_procfs, cd);
+ &cache_channel_proc_ops, cd);
if (p == NULL)
goto out_nomem;
}
if (cd->cache_show) {
p = proc_create_data("content", S_IFREG | 0400, cd->procfs,
- &content_file_operations_procfs, cd);
+ &content_proc_ops, cd);
if (p == NULL)
goto out_nomem;
}
@@ -1886,10 +1890,9 @@ void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
{
spin_lock(&cd->hash_lock);
if (!hlist_unhashed(&h->cache_list)){
- hlist_del_init_rcu(&h->cache_list);
- cd->entries--;
+ sunrpc_begin_cache_remove_entry(h, cd);
spin_unlock(&cd->hash_lock);
- cache_put(h, cd);
+ sunrpc_end_cache_remove_entry(h, cd);
} else
spin_unlock(&cd->hash_lock);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a3379765605d..7324b21f923e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2130,6 +2130,7 @@ call_connect_status(struct rpc_task *task)
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EPIPE:
+ case -EPROTO:
xprt_conditional_disconnect(task->tk_rqstp->rq_xprt,
task->tk_rqstp->rq_connect_cookie);
if (RPC_IS_SOFTCONN(task))
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 9c79548c6847..55e900255b0c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -846,6 +846,8 @@ void rpc_signal_task(struct rpc_task *task)
if (!RPC_IS_ACTIVATED(task))
return;
+
+ trace_rpc_task_signalled(task, task->tk_action);
set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
smp_mb__after_atomic();
queue = READ_ONCE(task->tk_waitqueue);
@@ -949,7 +951,7 @@ static void __rpc_execute(struct rpc_task *task)
* clean up after sleeping on some queue, we don't
* break the loop here, but go around once more.
*/
- dprintk("RPC: %5u got signal\n", task->tk_pid);
+ trace_rpc_task_signalled(task, task->tk_action);
set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
task->tk_rpc_status = -ERESTARTSYS;
rpc_exit(task, -ERESTARTSYS);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 7c74197c2ecf..c964b48eaaba 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -69,12 +69,11 @@ static int rpc_proc_open(struct inode *inode, struct file *file)
return single_open(file, rpc_proc_show, PDE_DATA(inode));
}
-static const struct file_operations rpc_proc_fops = {
- .owner = THIS_MODULE,
- .open = rpc_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+static const struct proc_ops rpc_proc_ops = {
+ .proc_open = rpc_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
};
/*
@@ -281,19 +280,19 @@ EXPORT_SYMBOL_GPL(rpc_clnt_show_stats);
*/
static inline struct proc_dir_entry *
do_register(struct net *net, const char *name, void *data,
- const struct file_operations *fops)
+ const struct proc_ops *proc_ops)
{
struct sunrpc_net *sn;
dprintk("RPC: registering /proc/net/rpc/%s\n", name);
sn = net_generic(net, sunrpc_net_id);
- return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
+ return proc_create_data(name, 0, sn->proc_net_rpc, proc_ops, data);
}
struct proc_dir_entry *
rpc_proc_register(struct net *net, struct rpc_stat *statp)
{
- return do_register(net, statp->program->name, statp, &rpc_proc_fops);
+ return do_register(net, statp->program->name, statp, &rpc_proc_ops);
}
EXPORT_SYMBOL_GPL(rpc_proc_register);
@@ -308,9 +307,9 @@ rpc_proc_unregister(struct net *net, const char *name)
EXPORT_SYMBOL_GPL(rpc_proc_unregister);
struct proc_dir_entry *
-svc_proc_register(struct net *net, struct svc_stat *statp, const struct file_operations *fops)
+svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops)
{
- return do_register(net, statp->program->pg_name, statp, fops);
+ return do_register(net, statp->program->pg_name, statp, proc_ops);
}
EXPORT_SYMBOL_GPL(svc_proc_register);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f3104be8ff5d..e5497dc2475b 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1079,7 +1079,7 @@ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
}
EXPORT_SYMBOL_GPL(xdr_enter_page);
-static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0};
+static const struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0};
void
xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 9d02eae353c6..1a0ae0c61353 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -194,6 +194,10 @@ create_req:
req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
if (!req)
return NULL;
+ if (rpcrdma_req_setup(r_xprt, req)) {
+ rpcrdma_req_destroy(req);
+ return NULL;
+ }
xprt->bc_alloc_count++;
rqst = &req->rl_slot;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 523722be6a16..125297c9aa3e 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -51,28 +51,6 @@
#endif
/**
- * frwr_is_supported - Check if device supports FRWR
- * @device: interface adapter to check
- *
- * Returns true if device supports FRWR, otherwise false
- */
-bool frwr_is_supported(struct ib_device *device)
-{
- struct ib_device_attr *attrs = &device->attrs;
-
- if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
- goto out_not_supported;
- if (attrs->max_fast_reg_page_list_len == 0)
- goto out_not_supported;
- return true;
-
-out_not_supported:
- pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",
- device->name);
- return false;
-}
-
-/**
* frwr_release_mr - Destroy one MR
* @mr: MR allocated by frwr_init_mr
*
@@ -170,26 +148,48 @@ out_list_err:
}
/**
- * frwr_open - Prepare an endpoint for use with FRWR
- * @ia: interface adapter this endpoint will use
- * @ep: endpoint to prepare
+ * frwr_query_device - Prepare a transport for use with FRWR
+ * @r_xprt: controlling transport instance
+ * @device: RDMA device to query
*
* On success, sets:
- * ep->rep_attr.cap.max_send_wr
- * ep->rep_attr.cap.max_recv_wr
+ * ep->rep_attr
* ep->rep_max_requests
- * ia->ri_max_segs
+ * ia->ri_max_rdma_segs
*
* And these FRWR-related fields:
* ia->ri_max_frwr_depth
* ia->ri_mrtype
*
- * On failure, a negative errno is returned.
+ * Return values:
+ * On success, returns zero.
+ * %-EINVAL - the device does not support FRWR memory registration
+ * %-ENOMEM - the device is not sufficiently capable for NFS/RDMA
*/
-int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
+int frwr_query_device(struct rpcrdma_xprt *r_xprt,
+ const struct ib_device *device)
{
- struct ib_device_attr *attrs = &ia->ri_id->device->attrs;
+ const struct ib_device_attr *attrs = &device->attrs;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
int max_qp_wr, depth, delta;
+ unsigned int max_sge;
+
+ if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
+ attrs->max_fast_reg_page_list_len == 0) {
+ pr_err("rpcrdma: 'frwr' mode is not supported by device %s\n",
+ device->name);
+ return -EINVAL;
+ }
+
+ max_sge = min_t(unsigned int, attrs->max_send_sge,
+ RPCRDMA_MAX_SEND_SGES);
+ if (max_sge < RPCRDMA_MIN_SEND_SGES) {
+ pr_err("rpcrdma: HCA provides only %u send SGEs\n", max_sge);
+ return -ENOMEM;
+ }
+ ep->rep_attr.cap.max_send_sge = max_sge;
+ ep->rep_attr.cap.max_recv_sge = 1;
ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
@@ -199,14 +199,12 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
* capability, but perform optimally when the MRs are not larger
* than a page.
*/
- if (attrs->max_sge_rd > 1)
+ if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS)
ia->ri_max_frwr_depth = attrs->max_sge_rd;
else
ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len;
if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS)
ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS;
- dprintk("RPC: %s: max FR page list depth = %u\n",
- __func__, ia->ri_max_frwr_depth);
/* Add room for frwr register and invalidate WRs.
* 1. FRWR reg WR for head
@@ -230,7 +228,7 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
} while (delta > 0);
}
- max_qp_wr = ia->ri_id->device->attrs.max_qp_wr;
+ max_qp_wr = attrs->max_qp_wr;
max_qp_wr -= RPCRDMA_BACKWARD_WRS;
max_qp_wr -= 1;
if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
@@ -241,7 +239,7 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
ep->rep_max_requests = max_qp_wr / depth;
if (!ep->rep_max_requests)
- return -EINVAL;
+ return -ENOMEM;
ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
}
ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
@@ -250,30 +248,22 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
- ia->ri_max_segs =
+ ia->ri_max_rdma_segs =
DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth);
/* Reply chunks require segments for head and tail buffers */
- ia->ri_max_segs += 2;
- if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
- ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
- return 0;
-}
-
-/**
- * frwr_maxpages - Compute size of largest payload
- * @r_xprt: transport
- *
- * Returns maximum size of an RPC message, in pages.
- *
- * FRWR mode conveys a list of pages per chunk segment. The
- * maximum length of that list is the FRWR page list depth.
- */
-size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
-{
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ ia->ri_max_rdma_segs += 2;
+ if (ia->ri_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS)
+ ia->ri_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS;
+
+ /* Ensure the underlying device is capable of conveying the
+ * largest r/wsize NFS will ask for. This guarantees that
+ * failing over from one RDMA device to another will not
+ * break NFS I/O.
+ */
+ if ((ia->ri_max_rdma_segs * ia->ri_max_frwr_depth) < RPCRDMA_MAX_SEGS)
+ return -ENOMEM;
- return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
+ return 0;
}
/**
@@ -298,8 +288,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct ib_reg_wr *reg_wr;
+ int i, n, dma_nents;
struct ib_mr *ibmr;
- int i, n;
u8 key;
if (nsegs > ia->ri_max_frwr_depth)
@@ -323,15 +313,16 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
break;
}
mr->mr_dir = rpcrdma_data_dir(writing);
+ mr->mr_nents = i;
- mr->mr_nents =
- ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
- if (!mr->mr_nents)
+ dma_nents = ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, mr->mr_nents,
+ mr->mr_dir);
+ if (!dma_nents)
goto out_dmamap_err;
ibmr = mr->frwr.fr_mr;
- n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
- if (unlikely(n != mr->mr_nents))
+ n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
+ if (n != dma_nents)
goto out_mapmr_err;
ibmr->iova &= 0x00000000ffffffff;
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index aec3beb93b25..28020ec104d4 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -111,7 +111,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
*/
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
{
- unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs;
+ unsigned int maxsegs = r_xprt->rx_ia.ri_max_rdma_segs;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
ep->rep_max_inline_send =
@@ -145,7 +145,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
remaining -= min_t(unsigned int,
PAGE_SIZE - offset, remaining);
offset = 0;
- if (++count > r_xprt->rx_ia.ri_max_send_sges)
+ if (++count > r_xprt->rx_ep.rep_attr.cap.max_send_sge)
return false;
}
}
@@ -580,22 +580,19 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
/* Prepare an SGE for the RPC-over-RDMA transport header.
*/
-static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
+static void rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 len)
{
struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
- if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
- return false;
sge->addr = rdmab_addr(rb);
sge->length = len;
sge->lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
DMA_TO_DEVICE);
- return true;
}
/* The head iovec is straightforward, as it is usually already
@@ -836,10 +833,9 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
req->rl_wr.num_sge = 0;
req->rl_wr.opcode = IB_WR_SEND;
- ret = -EIO;
- if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
- goto out_unmap;
+ rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen);
+ ret = -EIO;
switch (rtype) {
case rpcrdma_noch_pullup:
if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
@@ -909,7 +905,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
goto out_err;
*p++ = rqst->rq_xid;
*p++ = rpcrdma_version;
- *p++ = cpu_to_be32(r_xprt->rx_buf.rb_max_requests);
+ *p++ = r_xprt->rx_buf.rb_max_requests;
/* When the ULP employs a GSS flavor that guarantees integrity
* or privacy, direct data placement of individual data items
@@ -1480,8 +1476,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
if (credits == 0)
credits = 1; /* don't deadlock */
- else if (credits > buf->rb_max_requests)
- credits = buf->rb_max_requests;
+ else if (credits > r_xprt->rx_ep.rep_max_requests)
+ credits = r_xprt->rx_ep.rep_max_requests;
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
rpcrdma_post_recvs(r_xprt, false);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 7395eb2cfdeb..3cfeba68ee9a 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -316,7 +316,8 @@ xprt_setup_rdma(struct xprt_create *args)
if (args->addrlen > sizeof(xprt->addr))
return ERR_PTR(-EBADF);
- xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
+ xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0,
+ xprt_rdma_slot_table_entries);
if (!xprt)
return ERR_PTR(-ENOMEM);
@@ -358,19 +359,13 @@ xprt_setup_rdma(struct xprt_create *args)
if (rc)
goto out3;
- INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
- xprt_rdma_connect_worker);
-
- xprt->max_payload = frwr_maxpages(new_xprt);
- if (xprt->max_payload == 0)
- goto out4;
- xprt->max_payload <<= PAGE_SHIFT;
- dprintk("RPC: %s: transport data payload maximum: %zu bytes\n",
- __func__, xprt->max_payload);
-
if (!try_module_get(THIS_MODULE))
goto out4;
+ INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
+ xprt_rdma_connect_worker);
+ xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
+
dprintk("RPC: %s: %s:%s\n", __func__,
xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PORT]);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index fda3889993cb..353f61ac8d51 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -74,9 +74,13 @@
/*
* internal functions
*/
+static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt);
+static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_sendctx *sc);
+static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt);
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep);
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
@@ -174,7 +178,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
return;
out_flushed:
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_rep_destroy(rep);
}
static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
@@ -366,18 +370,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
goto out_err;
}
- switch (xprt_rdma_memreg_strategy) {
- case RPCRDMA_FRWR:
- if (frwr_is_supported(ia->ri_id->device))
- break;
- /*FALLTHROUGH*/
- default:
- pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
- ia->ri_id->device->name, xprt_rdma_memreg_strategy);
- rc = -EINVAL;
- goto out_err;
- }
-
return 0;
out_err:
@@ -391,6 +383,8 @@ out_err:
*
* Divest transport H/W resources associated with this adapter,
* but allow it to be restored later.
+ *
+ * Caller must hold the transport send lock.
*/
void
rpcrdma_ia_remove(struct rpcrdma_ia *ia)
@@ -398,8 +392,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
rx_ia);
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct rpcrdma_req *req;
/* This is similar to rpcrdma_ep_destroy, but:
* - Don't cancel the connect worker.
@@ -422,12 +414,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
* mappings and MRs are gone.
*/
rpcrdma_reps_unmap(r_xprt);
- list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
- rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf);
- rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
- rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
- }
+ rpcrdma_reqs_reset(r_xprt);
rpcrdma_mrs_destroy(r_xprt);
+ rpcrdma_sendctxs_destroy(r_xprt);
ib_dealloc_pd(ia->ri_pd);
ia->ri_pd = NULL;
@@ -470,30 +459,20 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
struct ib_cq *sendcq, *recvcq;
- unsigned int max_sge;
int rc;
- ep->rep_max_requests = xprt_rdma_slot_table_entries;
+ ep->rep_max_requests = r_xprt->rx_xprt.max_reqs;
ep->rep_inline_send = xprt_rdma_max_inline_write;
ep->rep_inline_recv = xprt_rdma_max_inline_read;
- max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge,
- RPCRDMA_MAX_SEND_SGES);
- if (max_sge < RPCRDMA_MIN_SEND_SGES) {
- pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
- return -ENOMEM;
- }
- ia->ri_max_send_sges = max_sge;
-
- rc = frwr_open(ia, ep);
+ rc = frwr_query_device(r_xprt, ia->ri_id->device);
if (rc)
return rc;
+ r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->rep_max_requests);
ep->rep_attr.event_handler = rpcrdma_qp_event_handler;
ep->rep_attr.qp_context = ep;
ep->rep_attr.srq = NULL;
- ep->rep_attr.cap.max_send_sge = max_sge;
- ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0;
ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
ep->rep_attr.qp_type = IB_QPT_RC;
@@ -716,6 +695,10 @@ retry:
rpcrdma_reset_cwnd(r_xprt);
rpcrdma_post_recvs(r_xprt, true);
+ rc = rpcrdma_sendctxs_create(r_xprt);
+ if (rc)
+ goto out;
+
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
if (rc)
goto out;
@@ -730,6 +713,11 @@ retry:
goto out;
}
+ rc = rpcrdma_reqs_setup(r_xprt);
+ if (rc) {
+ rpcrdma_ep_disconnect(ep, ia);
+ goto out;
+ }
rpcrdma_mrs_create(r_xprt);
out:
@@ -768,6 +756,7 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
rpcrdma_xprt_drain(r_xprt);
rpcrdma_reqs_reset(r_xprt);
rpcrdma_mrs_destroy(r_xprt);
+ rpcrdma_sendctxs_destroy(r_xprt);
}
/* Fixed-size circular FIFO queue. This implementation is wait-free and
@@ -787,20 +776,24 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
* queue activity, and rpcrdma_xprt_drain has flushed all remaining
* Send requests.
*/
-static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
+static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt)
{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
unsigned long i;
+ if (!buf->rb_sc_ctxs)
+ return;
for (i = 0; i <= buf->rb_sc_last; i++)
kfree(buf->rb_sc_ctxs[i]);
kfree(buf->rb_sc_ctxs);
+ buf->rb_sc_ctxs = NULL;
}
-static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia)
+static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep)
{
struct rpcrdma_sendctx *sc;
- sc = kzalloc(struct_size(sc, sc_sges, ia->ri_max_send_sges),
+ sc = kzalloc(struct_size(sc, sc_sges, ep->rep_attr.cap.max_send_sge),
GFP_KERNEL);
if (!sc)
return NULL;
@@ -820,21 +813,22 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
* the ->send_request call to fail temporarily before too many
* Sends are posted.
*/
- i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS;
- dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i);
+ i = r_xprt->rx_ep.rep_max_requests + RPCRDMA_MAX_BC_REQUESTS;
buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
if (!buf->rb_sc_ctxs)
return -ENOMEM;
buf->rb_sc_last = i - 1;
for (i = 0; i <= buf->rb_sc_last; i++) {
- sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
+ sc = rpcrdma_sendctx_create(&r_xprt->rx_ep);
if (!sc)
return -ENOMEM;
buf->rb_sc_ctxs[i] = sc;
}
+ buf->rb_sc_head = 0;
+ buf->rb_sc_tail = 0;
return 0;
}
@@ -933,7 +927,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
unsigned int count;
- for (count = 0; count < ia->ri_max_segs; count++) {
+ for (count = 0; count < ia->ri_max_rdma_segs; count++) {
struct rpcrdma_mr *mr;
int rc;
@@ -1005,32 +999,19 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
gfp_t flags)
{
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
- struct rpcrdma_regbuf *rb;
struct rpcrdma_req *req;
- size_t maxhdrsize;
req = kzalloc(sizeof(*req), flags);
if (req == NULL)
goto out1;
- /* Compute maximum header buffer size in bytes */
- maxhdrsize = rpcrdma_fixed_maxsz + 3 +
- r_xprt->rx_ia.ri_max_segs * rpcrdma_readchunk_maxsz;
- maxhdrsize *= sizeof(__be32);
- rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize),
- DMA_TO_DEVICE, flags);
- if (!rb)
- goto out2;
- req->rl_rdmabuf = rb;
- xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb));
-
req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags);
if (!req->rl_sendbuf)
- goto out3;
+ goto out2;
req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags);
if (!req->rl_recvbuf)
- goto out4;
+ goto out3;
INIT_LIST_HEAD(&req->rl_free_mrs);
INIT_LIST_HEAD(&req->rl_registered);
@@ -1039,10 +1020,8 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
spin_unlock(&buffer->rb_lock);
return req;
-out4:
- kfree(req->rl_sendbuf);
out3:
- kfree(req->rl_rdmabuf);
+ kfree(req->rl_sendbuf);
out2:
kfree(req);
out1:
@@ -1050,27 +1029,90 @@ out1:
}
/**
- * rpcrdma_reqs_reset - Reset all reqs owned by a transport
+ * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object
* @r_xprt: controlling transport instance
+ * @req: rpcrdma_req object to set up
*
- * ASSUMPTION: the rb_allreqs list is stable for the duration,
+ * Returns zero on success, and a negative errno on failure.
+ */
+int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ struct rpcrdma_regbuf *rb;
+ size_t maxhdrsize;
+
+ /* Compute maximum header buffer size in bytes */
+ maxhdrsize = rpcrdma_fixed_maxsz + 3 +
+ r_xprt->rx_ia.ri_max_rdma_segs * rpcrdma_readchunk_maxsz;
+ maxhdrsize *= sizeof(__be32);
+ rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize),
+ DMA_TO_DEVICE, GFP_KERNEL);
+ if (!rb)
+ goto out;
+
+ if (!__rpcrdma_regbuf_dma_map(r_xprt, rb))
+ goto out_free;
+
+ req->rl_rdmabuf = rb;
+ xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb));
+ return 0;
+
+out_free:
+ rpcrdma_regbuf_free(rb);
+out:
+ return -ENOMEM;
+}
+
+/* ASSUMPTION: the rb_allreqs list is stable for the duration,
* and thus can be walked without holding rb_lock. Eg. the
* caller is holding the transport send lock to exclude
* device removal or disconnection.
*/
-static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
+static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_req *req;
+ int rc;
list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
- /* Credits are valid only for one connection */
- req->rl_slot.rq_cong = 0;
+ rc = rpcrdma_req_setup(r_xprt, req);
+ if (rc)
+ return rc;
}
+ return 0;
}
-static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
- bool temp)
+static void rpcrdma_req_reset(struct rpcrdma_req *req)
+{
+ /* Credits are valid for only one connection */
+ req->rl_slot.rq_cong = 0;
+
+ rpcrdma_regbuf_free(req->rl_rdmabuf);
+ req->rl_rdmabuf = NULL;
+
+ rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
+ rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
+}
+
+/* ASSUMPTION: the rb_allreqs list is stable for the duration,
+ * and thus can be walked without holding rb_lock. Eg. the
+ * caller is holding the transport send lock to exclude
+ * device removal or disconnection.
+ */
+static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_req *req;
+
+ list_for_each_entry(req, &buf->rb_allreqs, rl_all)
+ rpcrdma_req_reset(req);
+}
+
+/* No locking needed here. This function is called only by the
+ * Receive completion handler.
+ */
+static noinline
+struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
+ bool temp)
{
struct rpcrdma_rep *rep;
@@ -1083,6 +1125,9 @@ static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
if (!rep->rr_rdmabuf)
goto out_free;
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
+ goto out_free_regbuf;
+
xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
rdmab_length(rep->rr_rdmabuf));
rep->rr_cqe.done = rpcrdma_wc_receive;
@@ -1095,12 +1140,17 @@ static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps);
return rep;
+out_free_regbuf:
+ rpcrdma_regbuf_free(rep->rr_rdmabuf);
out_free:
kfree(rep);
out:
return NULL;
}
+/* No locking needed here. This function is invoked only by the
+ * Receive completion handler, or during transport shutdown.
+ */
static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
{
list_del(&rep->rr_all);
@@ -1130,8 +1180,10 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
- list_for_each_entry(rep, &buf->rb_all_reps, rr_all)
+ list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
+ rep->rr_temp = true;
+ }
}
static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
@@ -1153,7 +1205,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
int i, rc;
- buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
buf->rb_bc_srv_max_requests = 0;
spin_lock_init(&buf->rb_lock);
INIT_LIST_HEAD(&buf->rb_mrs);
@@ -1165,7 +1216,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
INIT_LIST_HEAD(&buf->rb_all_reps);
rc = -ENOMEM;
- for (i = 0; i < buf->rb_max_requests; i++) {
+ for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) {
struct rpcrdma_req *req;
req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2,
@@ -1177,10 +1228,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
init_llist_head(&buf->rb_free_reps);
- rc = rpcrdma_sendctxs_create(r_xprt);
- if (rc)
- goto out;
-
return 0;
out:
rpcrdma_buffer_destroy(buf);
@@ -1256,7 +1303,6 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt)
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
- rpcrdma_sendctxs_destroy(buf);
rpcrdma_reps_destroy(buf);
while (!list_empty(&buf->rb_send_bufs)) {
@@ -1497,7 +1543,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
- struct ib_recv_wr *i, *wr, *bad_wr;
+ struct ib_recv_wr *wr, *bad_wr;
struct rpcrdma_rep *rep;
int needed, count, rc;
@@ -1524,23 +1570,15 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
if (!rep)
break;
+ trace_xprtrdma_post_recv(rep);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;
--needed;
+ ++count;
}
if (!wr)
goto out;
- for (i = wr; i; i = i->next) {
- rep = container_of(i, struct rpcrdma_rep, rr_recv_wr);
-
- if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
- goto release_wrs;
-
- trace_xprtrdma_post_recv(rep);
- ++count;
- }
-
rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
out:
@@ -1557,11 +1595,4 @@ out:
}
ep->rep_receive_count += count;
return;
-
-release_wrs:
- for (i = wr; i;) {
- rep = container_of(i, struct rpcrdma_rep, rr_recv_wr);
- i = i->next;
- rpcrdma_recv_buffer_put(rep);
- }
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index d796d68609ed..37d5080c250b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -71,9 +71,8 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
int ri_async_rc;
- unsigned int ri_max_segs;
+ unsigned int ri_max_rdma_segs;
unsigned int ri_max_frwr_depth;
- unsigned int ri_max_send_sges;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
@@ -99,7 +98,7 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
- unsigned int rep_max_requests; /* set by /proc */
+ unsigned int rep_max_requests; /* depends on device */
unsigned int rep_inline_send; /* negotiated */
unsigned int rep_inline_recv; /* negotiated */
int rep_receive_count;
@@ -373,7 +372,7 @@ struct rpcrdma_buffer {
struct llist_head rb_free_reps;
- u32 rb_max_requests;
+ __be32 rb_max_requests;
u32 rb_credits; /* most recent credit grant */
u32 rb_bc_srv_max_requests;
@@ -479,6 +478,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
*/
struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
gfp_t flags);
+int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
void rpcrdma_req_destroy(struct rpcrdma_req *req);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
@@ -535,12 +535,11 @@ rpcrdma_data_dir(bool writing)
/* Memory registration calls xprtrdma/frwr_ops.c
*/
-bool frwr_is_supported(struct ib_device *device);
void frwr_reset(struct rpcrdma_req *req);
-int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
+int frwr_query_device(struct rpcrdma_xprt *r_xprt,
+ const struct ib_device *device);
int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
void frwr_release_mr(struct rpcrdma_mr *mr);
-size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, __be32 xid,
@@ -583,7 +582,6 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
/* RPC/RDMA module init - xprtrdma/transport.c
*/
-extern unsigned int xprt_rdma_slot_table_entries;
extern unsigned int xprt_rdma_max_inline_read;
extern unsigned int xprt_rdma_max_inline_write;
void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 99b28b69fc17..0c88778c88b5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -278,7 +278,7 @@ struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos)
}
#endif
-void tipc_node_free(struct rcu_head *rp)
+static void tipc_node_free(struct rcu_head *rp)
{
struct tipc_node *n = container_of(rp, struct tipc_node, rcu);
@@ -2798,7 +2798,7 @@ static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id)
return 0;
}
-int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
+static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1];
struct net *net = sock_net(skb->sk);
@@ -2875,7 +2875,8 @@ int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
return err;
}
-int __tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info)
+static int __tipc_nl_node_flush_key(struct sk_buff *skb,
+ struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = tipc_net(net);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f9b4fb92c0b1..693e8902161e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2441,6 +2441,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
return -ETIMEDOUT;
if (signal_pending(current))
return sock_intr_errno(*timeo_p);
+ if (sk->sk_state == TIPC_DISCONNECTING)
+ break;
add_wait_queue(sk_sleep(sk), &wait);
done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk),
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 1ba5a92832bb..1c5574e2e058 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -593,7 +593,7 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
u32 seq, u64 *p_record_sn)
{
u64 record_sn = context->hint_record_sn;
- struct tls_record_info *info;
+ struct tls_record_info *info, *last;
info = context->retransmit_hint;
if (!info ||
@@ -605,6 +605,24 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
struct tls_record_info, list);
if (!info)
return NULL;
+ /* send the start_marker record if seq number is before the
+ * tls offload start marker sequence number. This record is
+ * required to handle TCP packets which are before TLS offload
+ * started.
+ * And if it's not start marker, look if this seq number
+ * belongs to the list.
+ */
+ if (likely(!tls_record_is_start_marker(info))) {
+ /* we have the first record, get the last record to see
+ * if this seq number belongs to the list.
+ */
+ last = list_last_entry(&context->records_list,
+ struct tls_record_info, list);
+
+ if (!between(seq, tls_record_start_seq(info),
+ last->end_seq))
+ return NULL;
+ }
record_sn = context->unacked_record_sn;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 321af97c7bbe..62c12cb5763e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -189,11 +189,17 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk)
return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
}
-static inline int unix_recvq_full(struct sock const *sk)
+static inline int unix_recvq_full(const struct sock *sk)
{
return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
}
+static inline int unix_recvq_full_lockless(const struct sock *sk)
+{
+ return skb_queue_len_lockless(&sk->sk_receive_queue) >
+ READ_ONCE(sk->sk_max_ack_backlog);
+}
+
struct sock *unix_peer_get(struct sock *s)
{
struct sock *peer;
@@ -1758,7 +1764,8 @@ restart_locked:
* - unix_peer(sk) == sk by time of get but disconnected before lock
*/
if (other != sk &&
- unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ unlikely(unix_peer(other) != sk &&
+ unix_recvq_full_lockless(other))) {
if (timeo) {
timeo = unix_wait_for_peer(other, timeo);
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index a9c0f368db5d..24e18405cdb4 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -7,9 +7,13 @@
void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct device *pdev = wiphy_dev(wdev->wiphy);
- strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name,
- sizeof(info->driver));
+ if (pdev->driver)
+ strlcpy(info->driver, pdev->driver->name,
+ sizeof(info->driver));
+ else
+ strlcpy(info->driver, "N/A", sizeof(info->driver));
strlcpy(info->version, init_utsname()->release, sizeof(info->version));
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 123b8d720a59..5b19e9fac4aa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -20,6 +20,7 @@
#include <linux/netlink.h>
#include <linux/nospec.h>
#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
#include <net/net_namespace.h>
#include <net/genetlink.h>
#include <net/cfg80211.h>
@@ -437,6 +438,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
[NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG },
[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
+ [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 },
[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
[NL80211_ATTR_PID] = { .type = NLA_U32 },
@@ -4799,8 +4801,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
err = nl80211_parse_he_obss_pd(
info->attrs[NL80211_ATTR_HE_OBSS_PD],
&params.he_obss_pd);
- if (err)
- return err;
+ goto out;
}
nl80211_calculate_ap_params(&params);
@@ -4822,6 +4823,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
}
wdev_unlock(wdev);
+out:
kfree(params.acl);
return err;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index fff9a74891fc..1a8218f1bbe0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2276,7 +2276,7 @@ static void handle_channel_custom(struct wiphy *wiphy,
break;
}
- if (IS_ERR(reg_rule)) {
+ if (IS_ERR_OR_NULL(reg_rule)) {
pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n",
chan->center_freq);
if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index f93e917e0929..fa7bb5e060d0 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -212,7 +212,7 @@ static int xdp_umem_map_pages(struct xdp_umem *umem)
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
- put_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
+ unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
kfree(umem->pgs);
umem->pgs = NULL;
@@ -291,7 +291,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem)
return -ENOMEM;
down_read(&current->mm->mmap_sem);
- npgs = get_user_pages(umem->address, umem->npgs,
+ npgs = pin_user_pages(umem->address, umem->npgs,
gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
up_read(&current->mm->mmap_sem);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index df600487a68d..356f90e4522b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -217,6 +217,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_flush(struct xdp_sock *xs)
{
xskq_prod_submit(xs->rx);
+ __xskq_cons_release(xs->umem->fq);
sock_def_readable(&xs->sk);
}
@@ -304,6 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ __xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
}
rcu_read_unlock();
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index bec2af11853a..89a01ac4e079 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -271,7 +271,8 @@ static inline void xskq_cons_release(struct xsk_queue *q)
{
/* To improve performance, only update local state here.
* Reflect this to global state when we get new entries
- * from the ring in xskq_cons_get_entries().
+ * from the ring in xskq_cons_get_entries() and whenever
+ * Rx or Tx processing are completed in the NAPI loop.
*/
q->cached_cons++;
}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index dc651a628dcf..3361e3ac5714 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -300,10 +300,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
} else {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
}
dst_release(dst);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 297d1eb79e5c..dbda08ec566e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3189,7 +3189,7 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
flags | XFRM_LOOKUP_QUEUE |
XFRM_LOOKUP_KEEP_DST_REF);
- if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
+ if (PTR_ERR(dst) == -EREMOTE)
return make_blackhole(net, dst_orig->ops->family, dst_orig);
if (IS_ERR(dst))