aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2020-05-21 10:58:15 -0400
committerJ. Bruce Fields <bfields@redhat.com>2020-05-21 10:58:15 -0400
commit6670ee2ef219ac9e1c836a277dda0c949ad8b1ff (patch)
tree0eee3d97941caab0bc85f93a7653b0cfd4abf51b /net
parentsunrpc: add missing newline when printing parameter 'pool_mode' by sysfs (diff)
parentNFSD: Fix improperly-formatted Doxygen comments (diff)
downloadlinux-dev-6670ee2ef219ac9e1c836a277dda0c949ad8b1ff.tar.xz
linux-dev-6670ee2ef219ac9e1c836a277dda0c949ad8b1ff.zip
Merge branch 'nfsd-5.8' of git://linux-nfs.org/~cel/cel-2.6 into for-5.8-incoming
Highlights of this series: * Remove serialization of sending RPC/RDMA Replies * Convert the TCP socket send path to use xdr_buf::bvecs (pre-requisite for RPC-on-TLS) * Fix svcrdma backchannel sendto return code * Convert a number of dprintk call sites to use tracepoints * Fix the "suggest braces around empty body in an 'else' statement" warning
Diffstat (limited to 'net')
-rw-r--r--net/atm/common.c22
-rw-r--r--net/atm/lec.c6
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/network-coding.c9
-rw-r--r--net/batman-adv/sysfs.c3
-rw-r--r--net/bridge/br_netlink.c1
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/devlink.c12
-rw-r--r--net/core/drop_monitor.c11
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/sock.c1
-rw-r--r--net/dsa/dsa2.c2
-rw-r--r--net/dsa/master.c3
-rw-r--r--net/dsa/slave.c8
-rw-r--r--net/hsr/hsr_slave.c2
-rw-r--r--net/ipv4/cipso_ipv4.c6
-rw-r--r--net/ipv4/ipmr.c6
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c27
-rw-r--r--net/ipv4/tcp_bpf.c10
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv6/calipso.c3
-rw-r--r--net/ipv6/route.c31
-rw-r--r--net/ipv6/seg6.c10
-rw-r--r--net/mptcp/options.c95
-rw-r--r--net/mptcp/protocol.c19
-rw-r--r--net/mptcp/protocol.h43
-rw-r--r--net/mptcp/subflow.c96
-rw-r--r--net/netfilter/nf_conntrack_core.c17
-rw-r--r--net/netfilter/nf_flow_table_core.c8
-rw-r--r--net/netfilter/nf_flow_table_offload.c10
-rw-r--r--net/netfilter/nf_nat_proto.c4
-rw-r--r--net/netfilter/nfnetlink_osf.c12
-rw-r--r--net/netfilter/nft_set_rbtree.c11
-rw-r--r--net/netlabel/netlabel_kapi.c6
-rw-r--r--net/sched/cls_api.c22
-rw-r--r--net/sched/sch_choke.c3
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_sfq.c9
-rw-r--r--net/sched/sch_skbprio.c3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c8
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c44
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c3
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c10
-rw-r--r--net/sunrpc/clnt.c9
-rw-r--r--net/sunrpc/svc.c19
-rw-r--r--net/sunrpc/svc_xprt.c53
-rw-r--r--net/sunrpc/svcsock.c407
-rw-r--r--net/sunrpc/xdr.c41
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c121
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c21
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c92
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c10
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c55
-rw-r--r--net/sunrpc/xprtsock.c12
-rw-r--r--net/tipc/socket.c42
-rw-r--r--net/tipc/subscr.h10
-rw-r--r--net/tipc/topsrv.c18
-rw-r--r--net/tls/tls_sw.c7
-rw-r--r--net/vmw_vsock/virtio_transport_common.c4
-rw-r--r--net/x25/x25_subr.c6
64 files changed, 886 insertions, 679 deletions
diff --git a/net/atm/common.c b/net/atm/common.c
index 0ce530af534d..8575f5d52087 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -177,18 +177,18 @@ static void vcc_destroy_socket(struct sock *sk)
set_bit(ATM_VF_CLOSE, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
- if (vcc->dev) {
- if (vcc->dev->ops->close)
- vcc->dev->ops->close(vcc);
- if (vcc->push)
- vcc->push(vcc, NULL); /* atmarpd has no push */
- module_put(vcc->owner);
-
- while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- atm_return(vcc, skb->truesize);
- kfree_skb(skb);
- }
+ if (vcc->dev && vcc->dev->ops->close)
+ vcc->dev->ops->close(vcc);
+ if (vcc->push)
+ vcc->push(vcc, NULL); /* atmarpd has no push */
+ module_put(vcc->owner);
+
+ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ atm_return(vcc, skb->truesize);
+ kfree_skb(skb);
+ }
+ if (vcc->dev && vcc->dev->ops->owner) {
module_put(vcc->dev->ops->owner);
atm_dev_put(vcc->dev);
}
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 25fa3a7b72bd..ca37f5a71f5e 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1264,6 +1264,12 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry)
entry->vcc = NULL;
}
if (entry->recv_vcc) {
+ struct atm_vcc *vcc = entry->recv_vcc;
+ struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
+
+ kfree(vpriv);
+ vcc->user_back = NULL;
+
entry->recv_vcc->push = entry->old_recv_push;
vcc_release_async(entry->recv_vcc, -EPIPE);
entry->recv_vcc = NULL;
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 969466218999..80b87b1f4e3a 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -893,7 +893,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
orig_node = batadv_v_ogm_orig_get(bat_priv, ogm_packet->orig);
if (!orig_node)
- return;
+ goto out;
neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming,
ethhdr->h_source);
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 8f0717c3f7b5..b0469d15da0e 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1009,15 +1009,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
*/
static u8 batadv_nc_random_weight_tq(u8 tq)
{
- u8 rand_val, rand_tq;
-
- get_random_bytes(&rand_val, sizeof(rand_val));
-
/* randomize the estimated packet loss (max TQ - estimated TQ) */
- rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq);
-
- /* normalize the randomized packet loss */
- rand_tq /= BATADV_TQ_MAX_VALUE;
+ u8 rand_tq = prandom_u32_max(BATADV_TQ_MAX_VALUE + 1 - tq);
/* convert to (randomized) estimated tq again */
return BATADV_TQ_MAX_VALUE - rand_tq;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index c45962d8527b..0f962dcd239e 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1150,7 +1150,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
ret = batadv_parse_throughput(net_dev, buff, "throughput_override",
&tp_override);
if (!ret)
- return count;
+ goto out;
old_tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
if (old_tp_override == tp_override)
@@ -1190,6 +1190,7 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj,
tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
+ batadv_hardif_put(hard_iface);
return sprintf(buff, "%u.%u MBit\n", tp_override / 10,
tp_override % 10);
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 43dab4066f91..a0f5dbee8f9c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -612,6 +612,7 @@ int br_process_vlan_info(struct net_bridge *br,
v - 1, rtm_cmd);
v_change_start = 0;
}
+ cond_resched();
}
/* v_change_start is set only if the last/whole range changed */
if (v_change_start)
diff --git a/net/core/dev.c b/net/core/dev.c
index 522288177bbd..6d327b7aa813 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8907,11 +8907,13 @@ static void netdev_sync_lower_features(struct net_device *upper,
netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
&feature, lower->name);
lower->wanted_features &= ~feature;
- netdev_update_features(lower);
+ __netdev_update_features(lower);
if (unlikely(lower->features & feature))
netdev_WARN(upper, "failed to disable %pNF on %s!\n",
&feature, lower->name);
+ else
+ netdev_features_change(lower);
}
}
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 80f97722f31f..899edcee7dab 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4283,6 +4283,11 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
dump = false;
+
+ if (start_offset == end_offset) {
+ err = 0;
+ goto nla_put_failure;
+ }
}
err = devlink_nl_region_read_snapshot_fill(skb, devlink,
@@ -5363,6 +5368,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
{
enum devlink_health_reporter_state prev_health_state;
struct devlink *devlink = reporter->devlink;
+ unsigned long recover_ts_threshold;
/* write a log message of the current error */
WARN_ON(!msg);
@@ -5373,10 +5379,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
/* abort if the previous error wasn't recovered */
+ recover_ts_threshold = reporter->last_recovery_ts +
+ msecs_to_jiffies(reporter->graceful_period);
if (reporter->auto_recover &&
(prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
- jiffies - reporter->last_recovery_ts <
- msecs_to_jiffies(reporter->graceful_period))) {
+ (reporter->last_recovery_ts && reporter->recovery_count &&
+ time_is_after_jiffies(recover_ts_threshold)))) {
trace_devlink_health_recover_aborted(devlink,
reporter->ops->name,
reporter->health_state,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 8e33cec9fc4e..2ee7bc4c9e03 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -213,6 +213,7 @@ static void sched_send_work(struct timer_list *t)
static void trace_drop_common(struct sk_buff *skb, void *location)
{
struct net_dm_alert_msg *msg;
+ struct net_dm_drop_point *point;
struct nlmsghdr *nlh;
struct nlattr *nla;
int i;
@@ -231,11 +232,13 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
nlh = (struct nlmsghdr *)dskb->data;
nla = genlmsg_data(nlmsg_data(nlh));
msg = nla_data(nla);
+ point = msg->points;
for (i = 0; i < msg->entries; i++) {
- if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
- msg->points[i].count++;
+ if (!memcmp(&location, &point->pc, sizeof(void *))) {
+ point->count++;
goto out;
}
+ point++;
}
if (msg->entries == dm_hit_limit)
goto out;
@@ -244,8 +247,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
*/
__nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
- memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
- msg->points[msg->entries].count = 1;
+ memcpy(point->pc, &location, sizeof(void *));
+ point->count = 1;
msg->entries++;
if (!timer_pending(&data->send_timer)) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 7d6ceaa54d21..5cc9276f1023 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2590,8 +2590,8 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
}
pop = 0;
} else if (pop >= sge->length - a) {
- sge->length = a;
pop -= (sge->length - a);
+ sge->length = a;
}
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 39d37d0ef575..116139233d57 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1956,6 +1956,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
}
+ if (protocol)
+ neigh->protocol = protocol;
+
if (ndm->ndm_flags & NTF_EXT_LEARNED)
flags |= NEIGH_UPDATE_F_EXT_LEARNED;
@@ -1969,9 +1972,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
NETLINK_CB(skb).portid, extack);
- if (protocol)
- neigh->protocol = protocol;
-
neigh_release(neigh);
out:
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 8881dd943dd0..9bd4cab7d510 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -236,6 +236,8 @@ static void net_prio_attach(struct cgroup_taskset *tset)
struct task_struct *p;
struct cgroup_subsys_state *css;
+ cgroup_sk_alloc_disable();
+
cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->id;
diff --git a/net/core/sock.c b/net/core/sock.c
index 90509c37d291..b714162213ae 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2364,7 +2364,6 @@ static void sk_leave_memory_pressure(struct sock *sk)
}
}
-/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 9a271a58a41d..d90665b465b8 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -459,7 +459,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
list_for_each_entry(dp, &dst->ports, list) {
err = dsa_port_setup(dp);
if (err)
- goto teardown;
+ continue;
}
return 0;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index b5c535af63a3..a621367c6e8c 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -289,7 +289,8 @@ static void dsa_master_ndo_teardown(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
- dev->netdev_ops = cpu_dp->orig_ndo_ops;
+ if (cpu_dp->orig_ndo_ops)
+ dev->netdev_ops = cpu_dp->orig_ndo_ops;
cpu_dp->orig_ndo_ops = NULL;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d1068803cd11..62f4ee3da172 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -856,20 +856,18 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev,
struct dsa_port *to_dp;
int err;
- act = &cls->rule->action.entries[0];
-
if (!ds->ops->port_mirror_add)
return -EOPNOTSUPP;
- if (!act->dev)
- return -EINVAL;
-
if (!flow_action_basic_hw_stats_check(&cls->rule->action,
cls->common.extack))
return -EOPNOTSUPP;
act = &cls->rule->action.entries[0];
+ if (!act->dev)
+ return -EINVAL;
+
if (!dsa_slave_dev_check(act->dev))
return -EOPNOTSUPP;
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index f4b9f7a3ce51..25b6ffba26cd 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -18,7 +18,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
struct hsr_port *port;
- u16 protocol;
+ __be16 protocol;
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: skb invalid", __func__);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 0bd10a1f477f..a23094b050f8 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1258,7 +1258,8 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
return ret_val;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
return 0;
@@ -1439,7 +1440,8 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
return ret_val;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
return 0;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9cf83cc85e4a..5c218db2dede 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -109,8 +109,10 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
-#define ipmr_for_each_table(mrt, net) \
- list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+#define ipmr_for_each_table(mrt, net) \
+ list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \
+ lockdep_rtnl_is_held() || \
+ list_empty(&net->ipv4.mr_tables))
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 788c69d9bfe0..fa829f31a3f5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -915,7 +915,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
/* Check for load limit; set rate_last to the latest sent
* redirect.
*/
- if (peer->rate_tokens == 0 ||
+ if (peer->n_redirects == 0 ||
time_after(jiffies,
(peer->rate_last +
(ip_rt_redirect_load << peer->n_redirects)))) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6d87de434377..dd401757eea1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -476,9 +476,17 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
int target, struct sock *sk)
{
- return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) ||
- (sk->sk_prot->stream_memory_read ?
- sk->sk_prot->stream_memory_read(sk) : false);
+ int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
+
+ if (avail > 0) {
+ if (avail >= target)
+ return true;
+ if (tcp_rmem_pressure(sk))
+ return true;
+ }
+ if (sk->sk_prot->stream_memory_read)
+ return sk->sk_prot->stream_memory_read(sk);
+ return false;
}
/*
@@ -1756,10 +1764,11 @@ static int tcp_zerocopy_receive(struct sock *sk,
down_read(&current->mm->mmap_sem);
- ret = -EINVAL;
vma = find_vma(current->mm, address);
- if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops)
- goto out;
+ if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) {
+ up_read(&current->mm->mmap_sem);
+ return -EINVAL;
+ }
zc->length = min_t(unsigned long, zc->length, vma->vm_end - address);
tp = tcp_sk(sk);
@@ -2154,13 +2163,15 @@ skip_copy:
tp->urg_data = 0;
tcp_fast_path_check(sk);
}
- if (used + offset < skb->len)
- continue;
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, &tss);
cmsg_flags |= 2;
}
+
+ if (used + offset < skb->len)
+ continue;
+
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
if (!(flags & MSG_PEEK))
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 5a05327f97c1..629aaa9a1eb9 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -125,7 +125,6 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
if (!ret) {
msg->sg.start = i;
- msg->sg.size -= apply_bytes;
sk_psock_queue_msg(psock, tmp);
sk_psock_data_ready(sk, psock);
} else {
@@ -262,14 +261,17 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct sk_psock *psock;
int copied, ret;
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
psock = sk_psock_get(sk);
if (unlikely(!psock))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
- if (unlikely(flags & MSG_ERRQUEUE))
- return inet_recv_error(sk, msg, len, addr_len);
if (!skb_queue_empty(&sk->sk_receive_queue) &&
- sk_psock_queue_empty(psock))
+ sk_psock_queue_empty(psock)) {
+ sk_psock_put(sk, psock);
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ }
lock_sock(sk);
msg_bytes_ready:
copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf4ced9273e8..29c6fc8c7716 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3926,10 +3926,6 @@ void tcp_parse_options(const struct net *net,
*/
break;
#endif
- case TCPOPT_MPTCP:
- mptcp_parse_option(skb, ptr, opsize, opt_rx);
- break;
-
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
@@ -4761,7 +4757,8 @@ void tcp_data_ready(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
int avail = tp->rcv_nxt - tp->copied_seq;
- if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
+ if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
+ !sock_flag(sk, SOCK_DONE))
return;
sk->sk_data_ready(sk);
@@ -5990,9 +5987,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
- if (sk_is_mptcp(sk))
- mptcp_rcv_synsent(sk);
-
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 221c81f85cbf..8d3f66c310db 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1047,7 +1047,8 @@ static int calipso_opt_getattr(const unsigned char *calipso,
goto getattr_return;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
secattr->type = NETLBL_NLTYPE_CALIPSO;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 310cbddaa533..ff847a324220 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1385,9 +1385,18 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
}
ip6_rt_copy_init(pcpu_rt, res);
pcpu_rt->rt6i_flags |= RTF_PCPU;
+
+ if (f6i->nh)
+ pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
+
return pcpu_rt;
}
+static bool rt6_is_valid(const struct rt6_info *rt6)
+{
+ return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
+}
+
/* It should be called with rcu_read_lock() acquired */
static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
{
@@ -1395,6 +1404,19 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
+ if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
+ struct rt6_info *prev, **p;
+
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
+ prev = xchg(p, NULL);
+ if (prev) {
+ dst_dev_put(&prev->dst);
+ dst_release(&prev->dst);
+ }
+
+ pcpu_rt = NULL;
+ }
+
return pcpu_rt;
}
@@ -2593,6 +2615,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt = container_of(dst, struct rt6_info, dst);
+ if (rt->sernum)
+ return rt6_is_valid(rt) ? dst : NULL;
+
rcu_read_lock();
/* All IPV6 dsts are created with ->obsolete set to the value
@@ -2697,8 +2722,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
const struct in6_addr *daddr, *saddr;
struct rt6_info *rt6 = (struct rt6_info *)dst;
- if (dst_metric_locked(dst, RTAX_MTU))
- return;
+ /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
+ * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
+ * [see also comment in rt6_mtu_change_route()]
+ */
if (iph) {
daddr = &iph->daddr;
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 4c7e0a27fa9c..37b434293bda 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -27,8 +27,9 @@
bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
{
- int trailing;
unsigned int tlv_offset;
+ int max_last_entry;
+ int trailing;
if (srh->type != IPV6_SRCRT_TYPE_4)
return false;
@@ -36,7 +37,12 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
if (((srh->hdrlen + 1) << 3) != len)
return false;
- if (srh->segments_left > srh->first_segment)
+ max_last_entry = (srh->hdrlen / 2) - 1;
+
+ if (srh->first_segment > max_last_entry)
+ return false;
+
+ if (srh->segments_left > srh->first_segment + 1)
return false;
tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 4a7c467b99db..45497af23906 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -16,10 +16,10 @@ static bool mptcp_cap_flag_sha256(u8 flags)
return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
}
-void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
- int opsize, struct tcp_options_received *opt_rx)
+static void mptcp_parse_option(const struct sk_buff *skb,
+ const unsigned char *ptr, int opsize,
+ struct mptcp_options_received *mp_opt)
{
- struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
u8 subtype = *ptr >> 4;
int expected_opsize;
u8 version;
@@ -283,12 +283,20 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
}
void mptcp_get_options(const struct sk_buff *skb,
- struct tcp_options_received *opt_rx)
+ struct mptcp_options_received *mp_opt)
{
- const unsigned char *ptr;
const struct tcphdr *th = tcp_hdr(skb);
- int length = (th->doff * 4) - sizeof(struct tcphdr);
+ const unsigned char *ptr;
+ int length;
+
+ /* initialize option status */
+ mp_opt->mp_capable = 0;
+ mp_opt->mp_join = 0;
+ mp_opt->add_addr = 0;
+ mp_opt->rm_addr = 0;
+ mp_opt->dss = 0;
+ length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
while (length > 0) {
@@ -308,7 +316,7 @@ void mptcp_get_options(const struct sk_buff *skb,
if (opsize > length)
return; /* don't parse partial options */
if (opcode == TCPOPT_MPTCP)
- mptcp_parse_option(skb, ptr, opsize, opt_rx);
+ mptcp_parse_option(skb, ptr, opsize, mp_opt);
ptr += opsize - 2;
length -= opsize;
}
@@ -344,28 +352,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
return false;
}
-void mptcp_rcv_synsent(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
- subflow->mp_capable = 1;
- subflow->can_ack = 1;
- subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
- pr_debug("subflow=%p, remote_key=%llu", subflow,
- subflow->remote_key);
- } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) {
- subflow->mp_join = 1;
- subflow->thmac = tp->rx_opt.mptcp.thmac;
- subflow->remote_nonce = tp->rx_opt.mptcp.nonce;
- pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
- subflow->thmac, subflow->remote_nonce);
- } else if (subflow->request_mptcp) {
- tcp_sk(sk)->is_mptcp = 0;
- }
-}
-
/* MP_JOIN client subflow must wait for 4th ack before sending any data:
* TCP can't schedule delack timer before the subflow is fully established.
* MPTCP uses the delack timer to do 3rd ack retransmissions
@@ -709,7 +695,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
return subflow->mp_capable;
- if (mp_opt->use_ack) {
+ if (mp_opt->dss && mp_opt->use_ack) {
/* subflows are fully established as soon as we get any
* additional ack.
*/
@@ -717,8 +703,6 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
goto fully_established;
}
- WARN_ON_ONCE(subflow->can_ack);
-
/* If the first established packet does not contain MP_CAPABLE + data
* then fallback to TCP
*/
@@ -728,6 +712,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
return false;
}
+ if (unlikely(!READ_ONCE(msk->pm.server_side)))
+ pr_warn_once("bogus mpc option on established client sk");
subflow->fully_established = 1;
subflow->remote_key = mp_opt->sndr_key;
subflow->can_ack = 1;
@@ -819,41 +805,41 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- struct mptcp_options_received *mp_opt;
+ struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;
- mp_opt = &opt_rx->mptcp;
- if (!check_fully_established(msk, sk, subflow, skb, mp_opt))
+ mptcp_get_options(skb, &mp_opt);
+ if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
- if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) {
+ if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
struct mptcp_addr_info addr;
- addr.port = htons(mp_opt->port);
- addr.id = mp_opt->addr_id;
- if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
+ addr.port = htons(mp_opt.port);
+ addr.id = mp_opt.addr_id;
+ if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
addr.family = AF_INET;
- addr.addr = mp_opt->addr;
+ addr.addr = mp_opt.addr;
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) {
+ else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
addr.family = AF_INET6;
- addr.addr6 = mp_opt->addr6;
+ addr.addr6 = mp_opt.addr6;
}
#endif
- if (!mp_opt->echo)
+ if (!mp_opt.echo)
mptcp_pm_add_addr_received(msk, &addr);
- mp_opt->add_addr = 0;
+ mp_opt.add_addr = 0;
}
- if (!mp_opt->dss)
+ if (!mp_opt.dss)
return;
/* we can't wait for recvmsg() to update the ack_seq, otherwise
* monodirectional flows will stuck
*/
- if (mp_opt->use_ack)
- update_una(msk, mp_opt);
+ if (mp_opt.use_ack)
+ update_una(msk, &mp_opt);
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext)
@@ -861,8 +847,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
memset(mpext, 0, sizeof(*mpext));
- if (mp_opt->use_map) {
- if (mp_opt->mpc_map) {
+ if (mp_opt.use_map) {
+ if (mp_opt.mpc_map) {
/* this is an MP_CAPABLE carrying MPTCP data
* we know this map the first chunk of data
*/
@@ -872,13 +858,14 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
mpext->subflow_seq = 1;
mpext->dsn64 = 1;
mpext->mpc_map = 1;
+ mpext->data_fin = 0;
} else {
- mpext->data_seq = mp_opt->data_seq;
- mpext->subflow_seq = mp_opt->subflow_seq;
- mpext->dsn64 = mp_opt->dsn64;
- mpext->data_fin = mp_opt->data_fin;
+ mpext->data_seq = mp_opt.data_seq;
+ mpext->subflow_seq = mp_opt.subflow_seq;
+ mpext->dsn64 = mp_opt.dsn64;
+ mpext->data_fin = mp_opt.data_fin;
}
- mpext->data_len = mp_opt->data_len;
+ mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
}
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b22a63ba2348..32ea8d35489a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1316,11 +1316,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
static int mptcp_disconnect(struct sock *sk, int flags)
{
- lock_sock(sk);
- __mptcp_clear_xmit(sk);
- release_sock(sk);
- mptcp_cancel_work(sk);
- return tcp_disconnect(sk, flags);
+ /* Should never be called.
+ * inet_stream_connect() calls ->disconnect, but that
+ * refers to the subflow socket, not the mptcp one.
+ */
+ WARN_ON_ONCE(1);
+ return 0;
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1333,7 +1334,7 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
#endif
struct sock *mptcp_sk_clone(const struct sock *sk,
- const struct tcp_options_received *opt_rx,
+ const struct mptcp_options_received *mp_opt,
struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
@@ -1372,9 +1373,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq);
- if (opt_rx->mptcp.mp_capable) {
+ if (mp_opt->mp_capable) {
msk->can_ack = true;
- msk->remote_key = opt_rx->mptcp.sndr_key;
+ msk->remote_key = mp_opt->sndr_key;
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++;
msk->ack_seq = ack_seq;
@@ -1628,6 +1629,8 @@ bool mptcp_finish_join(struct sock *sk)
ret = mptcp_pm_allow_new_subflow(msk);
if (ret) {
+ subflow->map_seq = msk->ack_seq;
+
/* active connections are already on conn_list */
spin_lock_bh(&msk->join_list_lock);
if (!WARN_ON_ONCE(!list_empty(&subflow->node)))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a2b3048037d0..e4ca6320ce76 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -91,6 +91,45 @@
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
+struct mptcp_options_received {
+ u64 sndr_key;
+ u64 rcvr_key;
+ u64 data_ack;
+ u64 data_seq;
+ u32 subflow_seq;
+ u16 data_len;
+ u16 mp_capable : 1,
+ mp_join : 1,
+ dss : 1,
+ add_addr : 1,
+ rm_addr : 1,
+ family : 4,
+ echo : 1,
+ backup : 1;
+ u32 token;
+ u32 nonce;
+ u64 thmac;
+ u8 hmac[20];
+ u8 join_id;
+ u8 use_map:1,
+ dsn64:1,
+ data_fin:1,
+ use_ack:1,
+ ack64:1,
+ mpc_map:1,
+ __unused:2;
+ u8 addr_id;
+ u8 rm_id;
+ union {
+ struct in_addr addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ struct in6_addr addr6;
+#endif
+ };
+ u64 ahmac;
+ u16 port;
+};
+
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
{
return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
@@ -331,10 +370,10 @@ int mptcp_proto_v6_init(void);
#endif
struct sock *mptcp_sk_clone(const struct sock *sk,
- const struct tcp_options_received *opt_rx,
+ const struct mptcp_options_received *mp_opt,
struct request_sock *req);
void mptcp_get_options(const struct sk_buff *skb,
- struct tcp_options_received *opt_rx);
+ struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index fabd06f2ff45..4931a29a6f08 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -124,12 +124,11 @@ static void subflow_init_req(struct request_sock *req,
{
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
- struct tcp_options_received rx_opt;
+ struct mptcp_options_received mp_opt;
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
- memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp));
- mptcp_get_options(skb, &rx_opt);
+ mptcp_get_options(skb, &mp_opt);
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
@@ -142,16 +141,16 @@ static void subflow_init_req(struct request_sock *req,
return;
#endif
- if (rx_opt.mptcp.mp_capable) {
+ if (mp_opt.mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
- if (rx_opt.mptcp.mp_join)
+ if (mp_opt.mp_join)
return;
- } else if (rx_opt.mptcp.mp_join) {
+ } else if (mp_opt.mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
}
- if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
+ if (mp_opt.mp_capable && listener->request_mptcp) {
int err;
err = mptcp_token_new_request(req);
@@ -159,13 +158,13 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->mp_capable = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
- } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) {
+ } else if (mp_opt.mp_join && listener->request_mptcp) {
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
subflow_req->mp_join = 1;
- subflow_req->backup = rx_opt.mptcp.backup;
- subflow_req->remote_id = rx_opt.mptcp.join_id;
- subflow_req->token = rx_opt.mptcp.token;
- subflow_req->remote_nonce = rx_opt.mptcp.nonce;
+ subflow_req->backup = mp_opt.backup;
+ subflow_req->remote_id = mp_opt.join_id;
+ subflow_req->token = mp_opt.token;
+ subflow_req->remote_nonce = mp_opt.nonce;
pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
subflow_req->remote_nonce);
if (!subflow_token_join_request(req, skb)) {
@@ -221,23 +220,47 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
+ struct tcp_sock *tp = tcp_sk(sk);
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
- if (inet_sk_state_load(parent) != TCP_ESTABLISHED) {
+ if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
inet_sk_state_store(parent, TCP_ESTABLISHED);
parent->sk_state_change(parent);
}
- if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp)
+ /* be sure no special action on any packet other than syn-ack */
+ if (subflow->conn_finished)
+ return;
+
+ subflow->conn_finished = 1;
+
+ mptcp_get_options(skb, &mp_opt);
+ if (subflow->request_mptcp && mp_opt.mp_capable) {
+ subflow->mp_capable = 1;
+ subflow->can_ack = 1;
+ subflow->remote_key = mp_opt.sndr_key;
+ pr_debug("subflow=%p, remote_key=%llu", subflow,
+ subflow->remote_key);
+ } else if (subflow->request_join && mp_opt.mp_join) {
+ subflow->mp_join = 1;
+ subflow->thmac = mp_opt.thmac;
+ subflow->remote_nonce = mp_opt.nonce;
+ pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
+ subflow->thmac, subflow->remote_nonce);
+ } else if (subflow->request_mptcp) {
+ tp->is_mptcp = 0;
+ }
+
+ if (!tp->is_mptcp)
return;
if (subflow->mp_capable) {
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
subflow->remote_key);
mptcp_finish_connect(sk);
- subflow->conn_finished = 1;
if (skb) {
pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
@@ -264,7 +287,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (!mptcp_finish_join(sk))
goto do_reset;
- subflow->conn_finished = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
} else {
do_reset:
@@ -322,7 +344,7 @@ drop:
/* validate hmac received in third ACK */
static bool subflow_hmac_valid(const struct request_sock *req,
- const struct tcp_options_received *rx_opt)
+ const struct mptcp_options_received *mp_opt)
{
const struct mptcp_subflow_request_sock *subflow_req;
u8 hmac[MPTCPOPT_HMAC_LEN];
@@ -339,7 +361,7 @@ static bool subflow_hmac_valid(const struct request_sock *req,
subflow_req->local_nonce, hmac);
ret = true;
- if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac)))
+ if (crypto_memneq(hmac, mp_opt->hmac, sizeof(hmac)))
ret = false;
sock_put((struct sock *)msk);
@@ -395,7 +417,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
{
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
struct mptcp_subflow_request_sock *subflow_req;
- struct tcp_options_received opt_rx;
+ struct mptcp_options_received mp_opt;
bool fallback_is_fatal = false;
struct sock *new_msk = NULL;
bool fallback = false;
@@ -403,7 +425,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
- opt_rx.mptcp.mp_capable = 0;
+ /* we need later a valid 'mp_capable' value even when options are not
+ * parsed
+ */
+ mp_opt.mp_capable = 0;
if (tcp_rsk(req)->is_mptcp == 0)
goto create_child;
@@ -418,22 +443,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
goto create_msk;
}
- mptcp_get_options(skb, &opt_rx);
- if (!opt_rx.mptcp.mp_capable) {
+ mptcp_get_options(skb, &mp_opt);
+ if (!mp_opt.mp_capable) {
fallback = true;
goto create_child;
}
create_msk:
- new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req);
+ new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
fallback_is_fatal = true;
- opt_rx.mptcp.mp_join = 0;
- mptcp_get_options(skb, &opt_rx);
- if (!opt_rx.mptcp.mp_join ||
- !subflow_hmac_valid(req, &opt_rx)) {
+ mptcp_get_options(skb, &mp_opt);
+ if (!mp_opt.mp_join ||
+ !subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
return NULL;
}
@@ -473,9 +497,9 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
- ctx->remote_key = opt_rx.mptcp.sndr_key;
- ctx->fully_established = opt_rx.mptcp.mp_capable;
- ctx->can_ack = opt_rx.mptcp.mp_capable;
+ ctx->remote_key = mp_opt.sndr_key;
+ ctx->fully_established = mp_opt.mp_capable;
+ ctx->can_ack = mp_opt.mp_capable;
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -499,7 +523,7 @@ out:
/* check for expected invariant - should never trigger, just help
* catching eariler subtle bugs
*/
- WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp &&
+ WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
(!mptcp_subflow_ctx(child) ||
!mptcp_subflow_ctx(child)->conn));
return child;
@@ -988,6 +1012,16 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
if (err)
return err;
+ /* the newly created socket really belongs to the owning MPTCP master
+ * socket, even if for additional subflows the allocation is performed
+ * by a kernel workqueue. Adjust inode references, so that the
+ * procfs/diag interaces really show this one belonging to the correct
+ * user.
+ */
+ SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
+ SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
+ SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
+
subflow = mptcp_subflow_ctx(sf->sk);
pr_debug("subflow=%p", subflow);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c4582eb71766..1d57b95d3481 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1519,9 +1519,9 @@ __nf_conntrack_alloc(struct net *net,
ct->status = 0;
ct->timeout = 0;
write_pnet(&ct->ct_net, net);
- memset(&ct->__nfct_init_offset[0], 0,
+ memset(&ct->__nfct_init_offset, 0,
offsetof(struct nf_conn, proto) -
- offsetof(struct nf_conn, __nfct_init_offset[0]));
+ offsetof(struct nf_conn, __nfct_init_offset));
nf_ct_zone_add(ct, zone);
@@ -2139,8 +2139,19 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
nf_conntrack_lock(lockp);
if (*bucket < nf_conntrack_htable_size) {
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
continue;
+ /* All nf_conn objects are added to hash table twice, one
+ * for original direction tuple, once for the reply tuple.
+ *
+ * Exception: In the IPS_NAT_CLASH case, only the reply
+ * tuple is added (the original tuple already existed for
+ * a different object).
+ *
+ * We only need to call the iterator once for each
+ * conntrack, so we just use the 'reply' direction
+ * tuple while iterating.
+ */
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
goto found;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 4344e572b7f9..42da6e337276 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -284,7 +284,7 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
if (nf_flow_has_expired(flow))
flow_offload_fixup_ct(flow->ct);
- else if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
+ else
flow_offload_fixup_ct_timeout(flow->ct);
flow_offload_free(flow);
@@ -361,8 +361,10 @@ static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
{
struct nf_flowtable *flow_table = data;
- if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) ||
- test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+ if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct))
+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+
+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) {
if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
nf_flow_offload_del(flow_table, flow);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index e3b099c14eff..2276a73ccba2 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -817,6 +817,7 @@ static void flow_offload_work_handler(struct work_struct *work)
WARN_ON_ONCE(1);
}
+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
kfree(offload);
}
@@ -831,9 +832,14 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
{
struct flow_offload_work *offload;
+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
+ return NULL;
+
offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
- if (!offload)
+ if (!offload) {
+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
return NULL;
+ }
offload->cmd = cmd;
offload->flow = flow;
@@ -1056,7 +1062,7 @@ static struct flow_indr_block_entry block_ing_entry = {
int nf_flow_table_offload_init(void)
{
nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload",
- WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+ WQ_UNBOUND, 0);
if (!nf_flow_offload_wq)
return -ENOMEM;
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 3d816a1e5442..59151dc07fdc 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -68,15 +68,13 @@ static bool udp_manip_pkt(struct sk_buff *skb,
enum nf_nat_manip_type maniptype)
{
struct udphdr *hdr;
- bool do_csum;
if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
- do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
+ __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check);
- __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
return true;
}
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 9f5dea0064ea..916a3c7f9eaf 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -165,12 +165,12 @@ static bool nf_osf_match_one(const struct sk_buff *skb,
static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
const struct sk_buff *skb,
const struct iphdr *ip,
- unsigned char *opts)
+ unsigned char *opts,
+ struct tcphdr *_tcph)
{
const struct tcphdr *tcp;
- struct tcphdr _tcph;
- tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+ tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), _tcph);
if (!tcp)
return NULL;
@@ -205,10 +205,11 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
int fmatch = FMATCH_WRONG;
struct nf_osf_hdr_ctx ctx;
const struct tcphdr *tcp;
+ struct tcphdr _tcph;
memset(&ctx, 0, sizeof(ctx));
- tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+ tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph);
if (!tcp)
return false;
@@ -265,10 +266,11 @@ bool nf_osf_find(const struct sk_buff *skb,
const struct nf_osf_finger *kf;
struct nf_osf_hdr_ctx ctx;
const struct tcphdr *tcp;
+ struct tcphdr _tcph;
memset(&ctx, 0, sizeof(ctx));
- tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+ tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph);
if (!tcp)
return false;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 3ffef454d469..62f416bc0579 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -79,6 +79,10 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
parent = rcu_dereference_raw(parent->rb_left);
continue;
}
+
+ if (nft_set_elem_expired(&rbe->ext))
+ return false;
+
if (nft_rbtree_interval_end(rbe)) {
if (nft_set_is_anonymous(set))
return false;
@@ -94,6 +98,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_set_elem_expired(&interval->ext) &&
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
@@ -154,6 +159,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
continue;
}
+ if (nft_set_elem_expired(&rbe->ext))
+ return false;
+
if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
(flags & NFT_SET_ELEM_INTERVAL_END)) {
@@ -170,6 +178,7 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_set_elem_expired(&interval->ext) &&
((!nft_rbtree_interval_end(interval) &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) ||
(nft_rbtree_interval_end(interval) &&
@@ -418,6 +427,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
+ if (nft_set_elem_expired(&rbe->ext))
+ goto cont;
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 409a3ae47ce2..5e1239cef000 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -734,6 +734,12 @@ int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap,
if ((off & (BITS_PER_LONG - 1)) != 0)
return -EINVAL;
+ /* a null catmap is equivalent to an empty one */
+ if (!catmap) {
+ *offset = (u32)-1;
+ return 0;
+ }
+
if (off < catmap->startbit) {
off = catmap->startbit;
*offset = off;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 55bd1429678f..0a7ecc292bd3 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2070,6 +2070,7 @@ replay:
err = PTR_ERR(block);
goto errout;
}
+ block->classid = parent;
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
@@ -2612,12 +2613,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
parent = tcm->tcm_parent;
- if (!parent) {
+ if (!parent)
q = dev->qdisc;
- parent = q->handle;
- } else {
+ else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
- }
if (!q)
goto out;
cops = q->ops->cl_ops;
@@ -2633,6 +2632,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
block = cops->tcf_block(q, cl, NULL);
if (!block)
goto out;
+ parent = block->classid;
if (tcf_block_shared(block))
q = NULL;
}
@@ -3523,6 +3523,16 @@ static void tcf_sample_get_group(struct flow_action_entry *entry,
#endif
}
+static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
+{
+ if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
+ return FLOW_ACTION_HW_STATS_DONT_CARE;
+ else if (!hw_stats)
+ return FLOW_ACTION_HW_STATS_DISABLED;
+
+ return hw_stats;
+}
+
int tc_setup_flow_action(struct flow_action *flow_action,
const struct tcf_exts *exts)
{
@@ -3546,7 +3556,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
if (err)
goto err_out_locked;
- entry->hw_stats = act->hw_stats;
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
if (is_tcf_gact_ok(act)) {
entry->id = FLOW_ACTION_ACCEPT;
@@ -3614,7 +3624,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->mangle.mask = tcf_pedit_mask(act, k);
entry->mangle.val = tcf_pedit_val(act, k);
entry->mangle.offset = tcf_pedit_offset(act, k);
- entry->hw_stats = act->hw_stats;
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
entry = &flow_action->entries[++j];
}
} else if (is_tcf_csum(act)) {
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index a36974e9c601..1bcf8fbfd40e 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -323,7 +323,8 @@ static void choke_reset(struct Qdisc *sch)
sch->q.qlen = 0;
sch->qstats.backlog = 0;
- memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ if (q->tab)
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
red_restart(&q->vars);
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 968519ff36e9..436160be9c18 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -416,7 +416,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
- q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+ q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c787d4d46017..5a6def5e4e6d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -637,6 +637,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
if (ctl->divisor &&
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
+
+ /* slot->allot is a short, make sure quantum is not too big. */
+ if (ctl->quantum) {
+ unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+
+ if (scaled <= 0 || scaled > SHRT_MAX)
+ return -EINVAL;
+ }
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog))
return -EINVAL;
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 0fb10abf7579..7a5e4c454715 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -169,6 +169,9 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
{
struct tc_skbprio_qopt *ctl = nla_data(opt);
+ if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
+ return -EINVAL;
+
sch->limit = ctl->limit;
return 0;
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 25fbd8d9de74..ac5cac0dd24b 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -2032,7 +2032,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct kvec *head = rqstp->rq_rcv_buf.head;
struct rpc_auth *auth = cred->cr_auth;
- unsigned int savedlen = rcv_buf->len;
u32 offset, opaque_len, maj_stat;
__be32 *p;
@@ -2043,9 +2042,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
offset = (u8 *)(p) - (u8 *)head->iov_base;
if (offset + opaque_len > rcv_buf->len)
goto unwrap_failed;
- rcv_buf->len = offset + opaque_len;
- maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
+ maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset,
+ offset + opaque_len, rcv_buf);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
@@ -2059,10 +2058,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
*/
xdr_init_decode(xdr, rcv_buf, p, rqstp);
- auth->au_rslack = auth->au_verfsize + 2 +
- XDR_QUADLEN(savedlen - rcv_buf->len);
- auth->au_ralign = auth->au_verfsize + 2 +
- XDR_QUADLEN(savedlen - rcv_buf->len);
+ auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack;
+ auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align;
+
return 0;
unwrap_failed:
trace_rpcgss_unwrap_failed(task);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 6f2d30d7b766..e7180da1fc6a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -851,8 +851,8 @@ out_err:
}
u32
-gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
- u32 *headskip, u32 *tailskip)
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *headskip, u32 *tailskip)
{
struct xdr_buf subbuf;
u32 ret = 0;
@@ -881,7 +881,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
/* create a segment skipping the header and leaving out the checksum */
xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
- (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
+ (len - offset - GSS_KRB5_TOK_HDR_LEN -
kctx->gk5e->cksumlength));
nblocks = (subbuf.len + blocksize - 1) / blocksize;
@@ -926,7 +926,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
goto out_err;
/* Get the packet's hmac value */
- ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
+ ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength,
pkt_hmac, kctx->gk5e->cksumlength);
if (ret)
goto out_err;
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 6c1920eed771..cf0fd170ac18 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -261,7 +261,9 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
}
static u32
-gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align)
{
int signalg;
int sealalg;
@@ -279,12 +281,13 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
u32 conflen = kctx->gk5e->conflen;
int crypt_offset;
u8 *cksumkey;
+ unsigned int saved_len = buf->len;
dprintk("RPC: gss_unwrap_kerberos\n");
ptr = (u8 *)buf->head[0].iov_base + offset;
if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
- buf->len - offset))
+ len - offset))
return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
@@ -324,6 +327,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
(!kctx->initiate && direction != 0))
return GSS_S_BAD_SIG;
+ buf->len = len;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
struct crypto_sync_skcipher *cipher;
int err;
@@ -376,11 +380,15 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
memmove(orig_start, data_start, data_len);
buf->head[0].iov_len -= (data_start - orig_start);
- buf->len -= (data_start - orig_start);
+ buf->len = len - (data_start - orig_start);
if (gss_krb5_remove_padding(buf, blocksize))
return GSS_S_DEFECTIVE_TOKEN;
+ /* slack must include room for krb5 padding */
+ *slack = XDR_QUADLEN(saved_len - buf->len);
+ /* The GSS blob always precedes the RPC message payload */
+ *align = *slack;
return GSS_S_COMPLETE;
}
@@ -486,7 +494,9 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
}
static u32
-gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align)
{
time64_t now;
u8 *ptr;
@@ -532,7 +542,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
if (rrc != 0)
rotate_left(offset + 16, buf, rrc);
- err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
+ err = (*kctx->gk5e->decrypt_v2)(kctx, offset, len, buf,
&headskip, &tailskip);
if (err)
return GSS_S_FAILURE;
@@ -542,7 +552,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
* it against the original
*/
err = read_bytes_from_xdr_buf(buf,
- buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
+ len - GSS_KRB5_TOK_HDR_LEN - tailskip,
decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
if (err) {
dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
@@ -568,18 +578,19 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
* Note that buf->head[0].iov_len may indicate the available
* head buffer space rather than that actually occupied.
*/
- movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
+ movelen = min_t(unsigned int, buf->head[0].iov_len, len);
movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
- if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
- buf->head[0].iov_len)
- return GSS_S_FAILURE;
+ BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+ buf->head[0].iov_len);
memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
- buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+ buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip;
/* Trim off the trailing "extra count" and checksum blob */
- buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
+ xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+ *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip);
+ *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
return GSS_S_COMPLETE;
}
@@ -603,7 +614,8 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
}
u32
-gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos(struct gss_ctx *gctx, int offset,
+ int len, struct xdr_buf *buf)
{
struct krb5_ctx *kctx = gctx->internal_ctx_id;
@@ -613,9 +625,11 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
case ENCTYPE_DES_CBC_RAW:
case ENCTYPE_DES3_CBC_RAW:
case ENCTYPE_ARCFOUR_HMAC:
- return gss_unwrap_kerberos_v1(kctx, offset, buf);
+ return gss_unwrap_kerberos_v1(kctx, offset, len, buf,
+ &gctx->slack, &gctx->align);
case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_unwrap_kerberos_v2(kctx, offset, buf);
+ return gss_unwrap_kerberos_v2(kctx, offset, len, buf,
+ &gctx->slack, &gctx->align);
}
}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index db550bfc2642..69316ab1b9fa 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -411,10 +411,11 @@ gss_wrap(struct gss_ctx *ctx_id,
u32
gss_unwrap(struct gss_ctx *ctx_id,
int offset,
+ int len,
struct xdr_buf *buf)
{
return ctx_id->mech_type->gm_ops
- ->gss_unwrap(ctx_id, offset, buf);
+ ->gss_unwrap(ctx_id, offset, len, buf);
}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 54ae5be62f6a..50d93c49ef1a 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -906,7 +906,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
if (svc_getnl(&buf->head[0]) != seq)
goto out;
/* trim off the mic and padding at the end before returning */
- buf->len -= 4 + round_up_to_quad(mic.len);
+ xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
stat = 0;
out:
kfree(mic.data);
@@ -934,7 +934,7 @@ static int
unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
{
u32 priv_len, maj_stat;
- int pad, saved_len, remaining_len, offset;
+ int pad, remaining_len, offset;
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
@@ -954,12 +954,8 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
buf->len -= pad;
fix_priv_head(buf, pad);
- /* Maybe it would be better to give gss_unwrap a length parameter: */
- saved_len = buf->len;
- buf->len = priv_len;
- maj_stat = gss_unwrap(ctx, 0, buf);
+ maj_stat = gss_unwrap(ctx, 0, priv_len, buf);
pad = priv_len - buf->len;
- buf->len = saved_len;
buf->len -= pad;
/* The upper layers assume the buffer is aligned on 4-byte boundaries.
* In the krb5p case, at least, the data ends up offset, so we need to
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8350d3a2e9a7..61b21dafd7c0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -889,7 +889,9 @@ static void rpc_free_client_work(struct work_struct *work)
* here.
*/
rpc_clnt_debugfs_unregister(clnt);
+ rpc_free_clid(clnt);
rpc_clnt_remove_pipedir(clnt);
+ xprt_put(rcu_dereference_raw(clnt->cl_xprt));
kfree(clnt);
rpciod_down();
@@ -907,10 +909,8 @@ rpc_free_client(struct rpc_clnt *clnt)
rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
clnt->cl_metrics = NULL;
- xprt_put(rcu_dereference_raw(clnt->cl_xprt));
xprt_iter_destroy(&clnt->cl_xpi);
put_cred(clnt->cl_cred);
- rpc_free_clid(clnt);
INIT_WORK(&clnt->cl_work, rpc_free_client_work);
schedule_work(&clnt->cl_work);
@@ -2433,6 +2433,11 @@ rpc_check_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
+ if (RPC_SIGNALLED(task)) {
+ rpc_call_rpcerror(task, -ERESTARTSYS);
+ return;
+ }
+
if (xprt_adjust_timeout(task->tk_rqstp) == 0)
return;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e534f8f082e7..c211b607239e 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -991,6 +991,7 @@ static int __svc_register(struct net *net, const char *progname,
#endif
}
+ trace_svc_register(progname, version, protocol, port, family, error);
return error;
}
@@ -1000,11 +1001,6 @@ int svc_rpcbind_set_version(struct net *net,
unsigned short proto,
unsigned short port)
{
- dprintk("svc: svc_register(%sv%d, %s, %u, %u)\n",
- progp->pg_name, version,
- proto == IPPROTO_UDP? "udp" : "tcp",
- port, family);
-
return __svc_register(net, progp->pg_name, progp->pg_prog,
version, family, proto, port);
@@ -1024,11 +1020,8 @@ int svc_generic_rpcbind_set(struct net *net,
return 0;
if (vers->vs_hidden) {
- dprintk("svc: svc_register(%sv%d, %s, %u, %u)"
- " (but not telling portmap)\n",
- progp->pg_name, version,
- proto == IPPROTO_UDP? "udp" : "tcp",
- port, family);
+ trace_svc_noregister(progp->pg_name, version, proto,
+ port, family, 0);
return 0;
}
@@ -1106,8 +1099,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
if (error == -EPROTONOSUPPORT)
error = rpcb_register(net, program, version, 0, 0);
- dprintk("svc: %s(%sv%u), error %d\n",
- __func__, progname, version, error);
+ trace_svc_unregister(progname, version, error);
}
/*
@@ -1132,9 +1124,6 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
continue;
if (progp->pg_vers[i]->vs_hidden)
continue;
-
- dprintk("svc: attempting to unregister %sv%u\n",
- progp->pg_name, i);
__svc_unregister(net, progp->pg_prog, i, progp->pg_name);
}
}
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2284ff038dad..c1ff8cdb5b2b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -153,6 +153,7 @@ static void svc_xprt_free(struct kref *kref)
xprt_put(xprt->xpt_bc_xprt);
if (xprt->xpt_bc_xps)
xprt_switch_put(xprt->xpt_bc_xps);
+ trace_svc_xprt_free(xprt);
xprt->xpt_ops->xpo_free(xprt);
module_put(owner);
}
@@ -206,6 +207,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
.sin6_port = htons(port),
};
#endif
+ struct svc_xprt *xprt;
struct sockaddr *sap;
size_t len;
@@ -224,7 +226,11 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return ERR_PTR(-EAFNOSUPPORT);
}
- return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
+ xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
+ if (IS_ERR(xprt))
+ trace_svc_xprt_create_err(serv->sv_program->pg_name,
+ xcl->xcl_name, sap, xprt);
+ return xprt;
}
/*
@@ -304,15 +310,11 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
{
int err;
- dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
if (err == -EPROTONOSUPPORT) {
request_module("svc%s", xprt_name);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
}
- if (err < 0)
- dprintk("svc: transport %s not found, err %d\n",
- xprt_name, -err);
return err;
}
EXPORT_SYMBOL_GPL(svc_create_xprt);
@@ -780,7 +782,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
int len = 0;
if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
- dprintk("svc_recv: found XPT_CLOSE\n");
if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags))
xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
svc_delete_xprt(xprt);
@@ -799,6 +800,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
if (newxpt) {
newxpt->xpt_cred = get_cred(xprt->xpt_cred);
svc_add_new_temp_xprt(serv, newxpt);
+ trace_svc_xprt_accept(newxpt, serv->sv_name);
} else
module_put(xprt->xpt_class->xcl_owner);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
@@ -835,14 +837,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
struct svc_serv *serv = rqstp->rq_server;
int len, err;
- dprintk("svc: server %p waiting for data (to = %ld)\n",
- rqstp, timeout);
-
- if (rqstp->rq_xprt)
- printk(KERN_ERR
- "svc_recv: service %p, transport not NULL!\n",
- rqstp);
-
err = svc_alloc_arg(rqstp);
if (err)
goto out;
@@ -890,7 +884,6 @@ EXPORT_SYMBOL_GPL(svc_recv);
void svc_drop(struct svc_rqst *rqstp)
{
trace_svc_drop(rqstp);
- dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
svc_xprt_release(rqstp);
}
EXPORT_SYMBOL_GPL(svc_drop);
@@ -914,16 +907,10 @@ int svc_send(struct svc_rqst *rqstp)
xb->page_len +
xb->tail[0].iov_len;
trace_svc_sendto(xb);
-
- /* Grab mutex to serialize outgoing data. */
- mutex_lock(&xprt->xpt_mutex);
trace_svc_stats_latency(rqstp);
- if (test_bit(XPT_DEAD, &xprt->xpt_flags)
- || test_bit(XPT_CLOSE, &xprt->xpt_flags))
- len = -ENOTCONN;
- else
- len = xprt->xpt_ops->xpo_sendto(rqstp);
- mutex_unlock(&xprt->xpt_mutex);
+
+ len = xprt->xpt_ops->xpo_sendto(rqstp);
+
trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);
@@ -1031,11 +1018,10 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
struct svc_serv *serv = xprt->xpt_server;
struct svc_deferred_req *dr;
- /* Only do this once */
if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
- BUG();
+ return;
- dprintk("svc: svc_delete_xprt(%p)\n", xprt);
+ trace_svc_xprt_detach(xprt);
xprt->xpt_ops->xpo_detach(xprt);
if (xprt->xpt_bc_xprt)
xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt);
@@ -1056,6 +1042,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
void svc_close_xprt(struct svc_xprt *xprt)
{
+ trace_svc_xprt_close(xprt);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
/* someone else will have to effect the close */
@@ -1158,16 +1145,15 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
set_bit(XPT_DEFERRED, &xprt->xpt_flags);
if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
spin_unlock(&xprt->xpt_lock);
- dprintk("revisit canceled\n");
+ trace_svc_defer_drop(dr);
svc_xprt_put(xprt);
- trace_svc_drop_deferred(dr);
kfree(dr);
return;
}
- dprintk("revisit queued\n");
dr->xprt = NULL;
list_add(&dr->handle.recent, &xprt->xpt_deferred);
spin_unlock(&xprt->xpt_lock);
+ trace_svc_defer_queue(dr);
svc_xprt_enqueue(xprt);
svc_xprt_put(xprt);
}
@@ -1213,22 +1199,24 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
dr->argslen << 2);
}
+ trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
- trace_svc_defer(rqstp);
return &dr->handle;
}
/*
* recv data from a deferred request into an active one
*/
-static int svc_deferred_recv(struct svc_rqst *rqstp)
+static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
{
struct svc_deferred_req *dr = rqstp->rq_deferred;
+ trace_svc_defer_recv(dr);
+
/* setup iov_base past transport header */
rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
/* The iov_len does not include the transport header bytes */
@@ -1259,7 +1247,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
struct svc_deferred_req,
handle.recent);
list_del_init(&dr->handle.recent);
- trace_svc_revisit_deferred(dr);
} else
clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
spin_unlock(&xprt->xpt_lock);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 023514e392b3..97d2b6f8c791 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -45,7 +45,6 @@
#include <net/tcp_states.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
-#include <trace/events/skb.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/clnt.h>
@@ -55,6 +54,8 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xprt.h>
+#include <trace/events/sunrpc.h>
+
#include "socklib.h"
#include "sunrpc.h"
@@ -108,31 +109,35 @@ static void svc_reclassify_socket(struct socket *sock)
}
#endif
-/*
- * Release an skbuff after use
+/**
+ * svc_tcp_release_rqst - Release transport-related resources
+ * @rqstp: request structure with resources to be released
+ *
*/
-static void svc_release_skb(struct svc_rqst *rqstp)
+static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
{
struct sk_buff *skb = rqstp->rq_xprt_ctxt;
if (skb) {
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
- rqstp->rq_xprt_ctxt = NULL;
- dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
+ rqstp->rq_xprt_ctxt = NULL;
skb_free_datagram_locked(svsk->sk_sk, skb);
}
}
-static void svc_release_udp_skb(struct svc_rqst *rqstp)
+/**
+ * svc_udp_release_rqst - Release transport-related resources
+ * @rqstp: request structure with resources to be released
+ *
+ */
+static void svc_udp_release_rqst(struct svc_rqst *rqstp)
{
struct sk_buff *skb = rqstp->rq_xprt_ctxt;
if (skb) {
rqstp->rq_xprt_ctxt = NULL;
-
- dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
consume_skb(skb);
}
}
@@ -218,34 +223,68 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
return len;
}
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
+{
+ struct bvec_iter bi = {
+ .bi_size = size,
+ };
+ struct bio_vec bv;
+
+ bvec_iter_advance(bvec, &bi, seek & PAGE_MASK);
+ for_each_bvec(bv, bvec, bi, bi)
+ flush_dcache_page(bv.bv_page);
+}
+#else
+static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size,
+ size_t seek)
+{
+}
+#endif
+
/*
- * Generic recvfrom routine.
+ * Read from @rqstp's transport socket. The incoming message fills whole
+ * pages in @rqstp's rq_pages array until the last page of the message
+ * has been received into a partial page.
*/
-static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
- unsigned int nr, size_t buflen, unsigned int base)
+static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
+ size_t seek)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
+ struct bio_vec *bvec = rqstp->rq_bvec;
struct msghdr msg = { NULL };
+ unsigned int i;
ssize_t len;
+ size_t t;
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
- if (base != 0) {
- iov_iter_advance(&msg.msg_iter, base);
- buflen -= base;
+
+ for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
+ bvec[i].bv_page = rqstp->rq_pages[i];
+ bvec[i].bv_len = PAGE_SIZE;
+ bvec[i].bv_offset = 0;
+ }
+ rqstp->rq_respages = &rqstp->rq_pages[i];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+ iov_iter_bvec(&msg.msg_iter, READ, bvec, i, buflen);
+ if (seek) {
+ iov_iter_advance(&msg.msg_iter, seek);
+ buflen -= seek;
}
len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
+ if (len > 0)
+ svc_flush_bvec(bvec, len, seek);
+
/* If we read a full record, then assume there may be more
* data to read (stream based sockets only!)
*/
if (len == buflen)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n",
- svsk, iov[0].iov_base, iov[0].iov_len, len);
return len;
}
@@ -282,13 +321,10 @@ static void svc_data_ready(struct sock *sk)
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
if (svsk) {
- dprintk("svc: socket %p(inet %p), busy=%d\n",
- svsk, sk,
- test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
-
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
+ trace_svcsock_data_ready(&svsk->sk_xprt, 0);
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -302,11 +338,9 @@ static void svc_write_space(struct sock *sk)
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
if (svsk) {
- dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
- svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
-
/* Refer to svc_setup_socket() for details. */
rmb();
+ trace_svcsock_write_space(&svsk->sk_xprt, 0);
svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -391,8 +425,15 @@ static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
return 0;
}
-/*
- * Receive a datagram from a UDP socket.
+/**
+ * svc_udp_recvfrom - Receive a datagram from a UDP socket.
+ * @rqstp: request structure into which to receive an RPC Call
+ *
+ * Called in a loop when XPT_DATA has been set.
+ *
+ * Returns:
+ * On success, the number of bytes in a received RPC Call, or
+ * %0 if a complete RPC Call message was not ready to return
*/
static int svc_udp_recvfrom(struct svc_rqst *rqstp)
{
@@ -426,20 +467,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- skb = NULL;
err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
0, 0, MSG_PEEK | MSG_DONTWAIT);
- if (err >= 0)
- skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
-
- if (skb == NULL) {
- if (err != -EAGAIN) {
- /* possibly an icmp error */
- dprintk("svc: recvfrom returned error %d\n", -err);
- set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- }
- return 0;
- }
+ if (err < 0)
+ goto out_recv_err;
+ skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
+ if (!skb)
+ goto out_recv_err;
+
len = svc_addr_len(svc_addr(rqstp));
rqstp->rq_addrlen = len;
if (skb->tstamp == 0) {
@@ -450,26 +485,21 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
sock_write_timestamp(svsk->sk_sk, skb->tstamp);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
- len = skb->len;
+ len = skb->len;
rqstp->rq_arg.len = len;
+ trace_svcsock_udp_recv(&svsk->sk_xprt, len);
rqstp->rq_prot = IPPROTO_UDP;
- if (!svc_udp_get_dest_address(rqstp, cmh)) {
- net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
- cmh->cmsg_level, cmh->cmsg_type);
- goto out_free;
- }
+ if (!svc_udp_get_dest_address(rqstp, cmh))
+ goto out_cmsg_err;
rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp));
if (skb_is_nonlinear(skb)) {
/* we have to copy */
local_bh_disable();
- if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
- local_bh_enable();
- /* checksum error */
- goto out_free;
- }
+ if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb))
+ goto out_bh_enable;
local_bh_enable();
consume_skb(skb);
} else {
@@ -497,6 +527,20 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
serv->sv_stats->netudpcnt++;
return len;
+
+out_recv_err:
+ if (err != -EAGAIN) {
+ /* possibly an icmp error */
+ set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+ }
+ trace_svcsock_udp_recv_err(&svsk->sk_xprt, err);
+ return 0;
+out_cmsg_err:
+ net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
+ cmh->cmsg_level, cmh->cmsg_type);
+ goto out_free;
+out_bh_enable:
+ local_bh_enable();
out_free:
kfree_skb(skb);
return 0;
@@ -506,6 +550,9 @@ out_free:
* svc_udp_sendto - Send out a reply on a UDP socket
* @rqstp: completed svc_rqst
*
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_udp_sendto(struct svc_rqst *rqstp)
@@ -527,10 +574,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
- svc_release_udp_skb(rqstp);
+ svc_udp_release_rqst(rqstp);
svc_set_cmsg_data(rqstp, cmh);
+ mutex_lock(&xprt->xpt_mutex);
+
+ if (svc_xprt_is_dead(xprt))
+ goto out_notconn;
+
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
if (err == -ECONNREFUSED) {
@@ -538,9 +590,16 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
}
+ trace_svcsock_udp_send(xprt, err);
+
+ mutex_unlock(&xprt->xpt_mutex);
if (err < 0)
return err;
return sent;
+
+out_notconn:
+ mutex_unlock(&xprt->xpt_mutex);
+ return -ENOTCONN;
}
static int svc_udp_has_wspace(struct svc_xprt *xprt)
@@ -584,7 +643,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
.xpo_read_payload = svc_sock_read_payload,
- .xpo_release_rqst = svc_release_udp_skb,
+ .xpo_release_rqst = svc_udp_release_rqst,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_udp_has_wspace,
@@ -603,7 +662,7 @@ static struct svc_xprt_class svc_udp_class = {
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
{
- int err, level, optname, one = 1;
+ int level, optname, one = 1;
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
&svsk->sk_xprt, serv);
@@ -634,9 +693,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
default:
BUG();
}
- err = kernel_setsockopt(svsk->sk_sock, level, optname,
- (char *)&one, sizeof(one));
- dprintk("svc: kernel_setsockopt returned %d\n", err);
+ kernel_setsockopt(svsk->sk_sock, level, optname, (char *)&one,
+ sizeof(one));
}
/*
@@ -647,9 +705,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- dprintk("svc: socket %p TCP (listen) state change %d\n",
- sk, sk->sk_state);
-
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
@@ -670,8 +725,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
if (svsk) {
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
- } else
- printk("svc: socket %p: no user data\n", sk);
+ }
}
}
@@ -682,15 +736,11 @@ static void svc_tcp_state_change(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
- sk, sk->sk_state, sk->sk_user_data);
-
- if (!svsk)
- printk("svc: socket %p: no user data\n", sk);
- else {
+ if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_ostate(sk);
+ trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock);
if (sk->sk_state != TCP_ESTABLISHED) {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
@@ -711,9 +761,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
struct socket *newsock;
struct svc_sock *newsvsk;
int err, slen;
- RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
- dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
if (!sock)
return NULL;
@@ -726,30 +774,18 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
else if (err != -EAGAIN)
net_warn_ratelimited("%s: accept failed (err %d)!\n",
serv->sv_name, -err);
+ trace_svcsock_accept_err(xprt, serv->sv_name, err);
return NULL;
}
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_getpeername(newsock, sin);
if (err < 0) {
- net_warn_ratelimited("%s: peername failed (err %d)!\n",
- serv->sv_name, -err);
+ trace_svcsock_getpeername_err(xprt, serv->sv_name, err);
goto failed; /* aborted connection or whatever */
}
slen = err;
- /* Ideally, we would want to reject connections from unauthorized
- * hosts here, but when we get encryption, the IP of the host won't
- * tell us anything. For now just warn about unpriv connections.
- */
- if (!svc_port_is_privileged(sin)) {
- dprintk("%s: connect from unprivileged port: %s\n",
- serv->sv_name,
- __svc_print_addr(sin, buf, sizeof(buf)));
- }
- dprintk("%s: connect from %s\n", serv->sv_name,
- __svc_print_addr(sin, buf, sizeof(buf)));
-
/* Reset the inherited callbacks before calling svc_setup_socket */
newsock->sk->sk_state_change = svsk->sk_ostate;
newsock->sk->sk_data_ready = svsk->sk_odata;
@@ -767,10 +803,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
err = kernel_getsockname(newsock, sin);
slen = err;
- if (unlikely(err < 0)) {
- dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
+ if (unlikely(err < 0))
slen = offsetof(struct sockaddr, sa_data);
- }
svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
if (sock_is_loopback(newsock->sk))
@@ -787,13 +821,14 @@ failed:
return NULL;
}
-static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
+static size_t svc_tcp_restore_pages(struct svc_sock *svsk,
+ struct svc_rqst *rqstp)
{
- unsigned int i, len, npages;
+ size_t len = svsk->sk_datalen;
+ unsigned int i, npages;
- if (svsk->sk_datalen == 0)
+ if (!len)
return 0;
- len = svsk->sk_datalen;
npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
if (rqstp->rq_pages[i] != NULL)
@@ -842,47 +877,45 @@ out:
}
/*
- * Receive fragment record header.
- * If we haven't gotten the record length yet, get the next four bytes.
+ * Receive fragment record header into sk_marker.
*/
-static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
+static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
+ struct svc_rqst *rqstp)
{
- struct svc_serv *serv = svsk->sk_xprt.xpt_server;
- unsigned int want;
- int len;
+ ssize_t want, len;
+ /* If we haven't gotten the record length yet,
+ * get the next four bytes.
+ */
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
+ struct msghdr msg = { NULL };
struct kvec iov;
want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
- iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
+ iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
iov.iov_len = want;
- len = svc_recvfrom(rqstp, &iov, 1, want, 0);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, want);
+ len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
if (len < 0)
- goto error;
+ return len;
svsk->sk_tcplen += len;
-
if (len < want) {
- dprintk("svc: short recvfrom while reading record "
- "length (%d of %d)\n", len, want);
- return -EAGAIN;
+ /* call again to read the remaining bytes */
+ goto err_short;
}
-
- dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
+ trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker);
if (svc_sock_reclen(svsk) + svsk->sk_datalen >
- serv->sv_max_mesg) {
- net_notice_ratelimited("RPC: fragment too large: %d\n",
- svc_sock_reclen(svsk));
- goto err_delete;
- }
+ svsk->sk_xprt.xpt_server->sv_max_mesg)
+ goto err_too_large;
}
-
return svc_sock_reclen(svsk);
-error:
- dprintk("RPC: TCP recv_record got %d\n", len);
- return len;
-err_delete:
+
+err_too_large:
+ net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
+ __func__, svsk->sk_xprt.xpt_server->sv_name,
+ svc_sock_reclen(svsk));
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+err_short:
return -EAGAIN;
}
@@ -931,87 +964,58 @@ unlock_eagain:
return -EAGAIN;
}
-static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
-{
- int i = 0;
- int t = 0;
-
- while (t < len) {
- vec[i].iov_base = page_address(pages[i]);
- vec[i].iov_len = PAGE_SIZE;
- i++;
- t += PAGE_SIZE;
- }
- return i;
-}
-
static void svc_tcp_fragment_received(struct svc_sock *svsk)
{
/* If we have more data, signal svc_xprt_enqueue() to try again */
- dprintk("svc: TCP %s record (%d bytes)\n",
- svc_sock_final_rec(svsk) ? "final" : "nonfinal",
- svc_sock_reclen(svsk));
svsk->sk_tcplen = 0;
- svsk->sk_reclen = 0;
+ svsk->sk_marker = xdr_zero;
}
-/*
- * Receive data from a TCP socket.
+/**
+ * svc_tcp_recvfrom - Receive data from a TCP socket
+ * @rqstp: request structure into which to receive an RPC Call
+ *
+ * Called in a loop when XPT_DATA has been set.
+ *
+ * Read the 4-byte stream record marker, then use the record length
+ * in that marker to set up exactly the resources needed to receive
+ * the next RPC message into @rqstp.
+ *
+ * Returns:
+ * On success, the number of bytes in a received RPC Call, or
+ * %0 if a complete RPC Call message was not ready to return
+ *
+ * The zero return case handles partial receives and callback Replies.
+ * The state of a partial receive is preserved in the svc_sock for
+ * the next call to svc_tcp_recvfrom.
*/
static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
- int len;
- struct kvec *vec;
- unsigned int want, base;
+ size_t want, base;
+ ssize_t len;
__be32 *p;
__be32 calldir;
- int pnum;
-
- dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
- svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
- test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
- test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
- len = svc_tcp_recv_record(svsk, rqstp);
+ clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+ len = svc_tcp_read_marker(svsk, rqstp);
if (len < 0)
goto error;
base = svc_tcp_restore_pages(svsk, rqstp);
- want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
-
- vec = rqstp->rq_vec;
-
- pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want);
-
- rqstp->rq_respages = &rqstp->rq_pages[pnum];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
-
- /* Now receive data */
- len = svc_recvfrom(rqstp, vec, pnum, base + want, base);
+ want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
+ len = svc_tcp_read_msg(rqstp, base + want, base);
if (len >= 0) {
+ trace_svcsock_tcp_recv(&svsk->sk_xprt, len);
svsk->sk_tcplen += len;
svsk->sk_datalen += len;
}
- if (len != want || !svc_sock_final_rec(svsk)) {
- svc_tcp_save_pages(svsk, rqstp);
- if (len < 0 && len != -EAGAIN)
- goto err_delete;
- if (len == want)
- svc_tcp_fragment_received(svsk);
- else
- dprintk("svc: incomplete TCP record (%d of %d)\n",
- (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)),
- svc_sock_reclen(svsk));
- goto err_noclose;
- }
-
- if (svsk->sk_datalen < 8) {
- svsk->sk_datalen = 0;
- goto err_delete; /* client is nuts. */
- }
+ if (len != want || !svc_sock_final_rec(svsk))
+ goto err_incomplete;
+ if (svsk->sk_datalen < 8)
+ goto err_nuts;
rqstp->rq_arg.len = svsk->sk_datalen;
rqstp->rq_arg.page_base = 0;
@@ -1046,14 +1050,26 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
return rqstp->rq_arg.len;
+err_incomplete:
+ svc_tcp_save_pages(svsk, rqstp);
+ if (len < 0 && len != -EAGAIN)
+ goto err_delete;
+ if (len == want)
+ svc_tcp_fragment_received(svsk);
+ else
+ trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
+ svc_sock_reclen(svsk),
+ svsk->sk_tcplen - sizeof(rpc_fraghdr));
+ goto err_noclose;
error:
if (len != -EAGAIN)
goto err_delete;
- dprintk("RPC: TCP recvfrom got EAGAIN\n");
+ trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0);
return 0;
+err_nuts:
+ svsk->sk_datalen = 0;
err_delete:
- printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
- svsk->sk_xprt.xpt_server->sv_name, -len);
+ trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len);
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
err_noclose:
return 0; /* record not complete */
@@ -1063,6 +1079,9 @@ err_noclose:
* svc_tcp_sendto - Send out a reply on a TCP socket
* @rqstp: completed svc_rqst
*
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_tcp_sendto(struct svc_rqst *rqstp)
@@ -1078,14 +1097,22 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
- svc_release_skb(rqstp);
+ svc_tcp_release_rqst(rqstp);
+ mutex_lock(&xprt->xpt_mutex);
+ if (svc_xprt_is_dead(xprt))
+ goto out_notconn;
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr);
+ trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
+ mutex_unlock(&xprt->xpt_mutex);
return sent;
+out_notconn:
+ mutex_unlock(&xprt->xpt_mutex);
+ return -ENOTCONN;
out_close:
pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
xprt->xpt_server->sv_name,
@@ -1093,6 +1120,7 @@ out_close:
(err < 0) ? err : sent, xdr->len);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
+ mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
@@ -1109,7 +1137,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
.xpo_read_payload = svc_sock_read_payload,
- .xpo_release_rqst = svc_release_skb,
+ .xpo_release_rqst = svc_tcp_release_rqst,
.xpo_detach = svc_tcp_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_tcp_has_wspace,
@@ -1147,18 +1175,16 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) {
- dprintk("setting up TCP socket for listening\n");
strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
} else {
- dprintk("setting up TCP socket for reading\n");
sk->sk_state_change = svc_tcp_state_change;
sk->sk_data_ready = svc_data_ready;
sk->sk_write_space = svc_write_space;
- svsk->sk_reclen = 0;
+ svsk->sk_marker = xdr_zero;
svsk->sk_tcplen = 0;
svsk->sk_datalen = 0;
memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
@@ -1203,7 +1229,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
int err = 0;
- dprintk("svc: svc_setup_socket %p\n", sock);
svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
if (!svsk)
return ERR_PTR(-ENOMEM);
@@ -1240,12 +1265,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
- dprintk("svc: svc_setup_socket created %p (inet %p), "
- "listen %d close %d\n",
- svsk, svsk->sk_sk,
- test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags),
- test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
-
+ trace_svcsock_new_socket(sock);
return svsk;
}
@@ -1338,11 +1358,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
int newlen;
int family;
int val;
- RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-
- dprintk("svc: svc_create_socket(%s, %d, %s)\n",
- serv->sv_program->pg_name, protocol,
- __svc_print_addr(sin, buf, sizeof(buf)));
if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only UDP and TCP "
@@ -1402,7 +1417,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
return (struct svc_xprt *)svsk;
bummer:
- dprintk("svc: svc_create_socket error = %d\n", -error);
sock_release(sock);
return ERR_PTR(error);
}
@@ -1416,8 +1430,6 @@ static void svc_sock_detach(struct svc_xprt *xprt)
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct sock *sk = svsk->sk_sk;
- dprintk("svc: svc_sock_detach(%p)\n", svsk);
-
/* put back the old socket callbacks */
lock_sock(sk);
sk->sk_state_change = svsk->sk_ostate;
@@ -1434,8 +1446,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk);
-
svc_sock_detach(xprt);
if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
@@ -1450,7 +1460,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- dprintk("svc: svc_sock_free(%p)\n", svsk);
if (svsk->sk_sock->file)
sockfd_put(svsk->sk_sock);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 15b58c5144f9..6f7d82fb1eb0 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1150,6 +1150,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
}
EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
+/**
+ * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
+ * @buf: buf to be trimmed
+ * @len: number of bytes to reduce "buf" by
+ *
+ * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
+ * that it's possible that we'll trim less than that amount if the xdr_buf is
+ * too small, or if (for instance) it's all in the head and the parser has
+ * already read too far into it.
+ */
+void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
+{
+ size_t cur;
+ unsigned int trim = len;
+
+ if (buf->tail[0].iov_len) {
+ cur = min_t(size_t, buf->tail[0].iov_len, trim);
+ buf->tail[0].iov_len -= cur;
+ trim -= cur;
+ if (!trim)
+ goto fix_len;
+ }
+
+ if (buf->page_len) {
+ cur = min_t(unsigned int, buf->page_len, trim);
+ buf->page_len -= cur;
+ trim -= cur;
+ if (!trim)
+ goto fix_len;
+ }
+
+ if (buf->head[0].iov_len) {
+ cur = min_t(size_t, buf->head[0].iov_len, trim);
+ buf->head[0].iov_len -= cur;
+ trim -= cur;
+ }
+fix_len:
+ buf->len -= (len - trim);
+}
+EXPORT_SYMBOL_GPL(xdr_buf_trim);
+
static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
{
unsigned int this_len;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index af7eb8d202ae..1ee73f7cf931 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -10,59 +10,34 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-
-#undef SVCRDMA_BACKCHANNEL_DEBUG
-
/**
- * svc_rdma_handle_bc_reply - Process incoming backchannel reply
- * @xprt: controlling backchannel transport
- * @rdma_resp: pointer to incoming transport header
- * @rcvbuf: XDR buffer into which to decode the reply
+ * svc_rdma_handle_bc_reply - Process incoming backchannel Reply
+ * @rqstp: resources for handling the Reply
+ * @rctxt: Received message
*
- * Returns:
- * %0 if @rcvbuf is filled in, xprt_complete_rqst called,
- * %-EAGAIN if server should call ->recvfrom again.
*/
-int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
- struct xdr_buf *rcvbuf)
+void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *rctxt)
{
+ struct svc_xprt *sxprt = rqstp->rq_xprt;
+ struct rpc_xprt *xprt = sxprt->xpt_bc_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct xdr_buf *rcvbuf = &rqstp->rq_arg;
struct kvec *dst, *src = &rcvbuf->head[0];
+ __be32 *rdma_resp = rctxt->rc_recv_buf;
struct rpc_rqst *req;
u32 credits;
- size_t len;
- __be32 xid;
- __be32 *p;
- int ret;
-
- p = (__be32 *)src->iov_base;
- len = src->iov_len;
- xid = *rdma_resp;
-
-#ifdef SVCRDMA_BACKCHANNEL_DEBUG
- pr_info("%s: xid=%08x, length=%zu\n",
- __func__, be32_to_cpu(xid), len);
- pr_info("%s: RPC/RDMA: %*ph\n",
- __func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp);
- pr_info("%s: RPC: %*ph\n",
- __func__, (int)len, p);
-#endif
-
- ret = -EAGAIN;
- if (src->iov_len < 24)
- goto out_shortreply;
spin_lock(&xprt->queue_lock);
- req = xprt_lookup_rqst(xprt, xid);
+ req = xprt_lookup_rqst(xprt, *rdma_resp);
if (!req)
- goto out_notfound;
+ goto out_unlock;
dst = &req->rq_private_buf.head[0];
memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
- if (dst->iov_len < len)
+ if (dst->iov_len < src->iov_len)
goto out_unlock;
- memcpy(dst->iov_base, p, len);
+ memcpy(dst->iov_base, src->iov_base, src->iov_len);
xprt_pin_rqst(req);
spin_unlock(&xprt->queue_lock);
@@ -71,31 +46,17 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
credits = r_xprt->rx_buf.rb_bc_max_requests;
-
spin_lock(&xprt->transport_lock);
xprt->cwnd = credits << RPC_CWNDSHIFT;
spin_unlock(&xprt->transport_lock);
spin_lock(&xprt->queue_lock);
- ret = 0;
xprt_complete_rqst(req->rq_task, rcvbuf->len);
xprt_unpin_rqst(req);
rcvbuf->len = 0;
out_unlock:
spin_unlock(&xprt->queue_lock);
-out:
- return ret;
-
-out_shortreply:
- dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
- xprt, src->iov_len);
- goto out;
-
-out_notfound:
- dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
- xprt, be32_to_cpu(xid));
- goto out_unlock;
}
/* Send a backwards direction RPC call.
@@ -192,10 +153,6 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
*p++ = xdr_zero;
*p = xdr_zero;
-#ifdef SVCRDMA_BACKCHANNEL_DEBUG
- pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
-#endif
-
rqst->rq_xtime = ktime_get();
rc = svc_rdma_bc_sendto(rdma, rqst, ctxt);
if (rc)
@@ -206,45 +163,36 @@ put_ctxt:
svc_rdma_send_ctxt_put(rdma, ctxt);
drop_connection:
- dprintk("svcrdma: failed to send bc call\n");
return -ENOTCONN;
}
-/* Send an RPC call on the passive end of a transport
- * connection.
+/**
+ * xprt_rdma_bc_send_request - Send a reverse-direction Call
+ * @rqst: rpc_rqst containing Call message to be sent
+ *
+ * Return values:
+ * %0 if the message was sent successfully
+ * %ENOTCONN if the message was not sent
*/
-static int
-xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
+static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
{
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
- struct svcxprt_rdma *rdma;
+ struct svcxprt_rdma *rdma =
+ container_of(sxprt, struct svcxprt_rdma, sc_xprt);
int ret;
- dprintk("svcrdma: sending bc call with xid: %08x\n",
- be32_to_cpu(rqst->rq_xid));
+ if (test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ return -ENOTCONN;
- mutex_lock(&sxprt->xpt_mutex);
-
- ret = -ENOTCONN;
- rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
- if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
- ret = rpcrdma_bc_send_request(rdma, rqst);
- if (ret == -ENOTCONN)
- svc_close_xprt(sxprt);
- }
-
- mutex_unlock(&sxprt->xpt_mutex);
-
- if (ret < 0)
- return ret;
- return 0;
+ ret = rpcrdma_bc_send_request(rdma, rqst);
+ if (ret == -ENOTCONN)
+ svc_close_xprt(sxprt);
+ return ret;
}
static void
xprt_rdma_bc_close(struct rpc_xprt *xprt)
{
- dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
-
xprt_disconnect_done(xprt);
xprt->cwnd = RPC_CWNDSHIFT;
}
@@ -252,8 +200,6 @@ xprt_rdma_bc_close(struct rpc_xprt *xprt)
static void
xprt_rdma_bc_put(struct rpc_xprt *xprt)
{
- dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
-
xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
}
@@ -288,19 +234,14 @@ xprt_setup_rdma_bc(struct xprt_create *args)
struct rpc_xprt *xprt;
struct rpcrdma_xprt *new_xprt;
- if (args->addrlen > sizeof(xprt->addr)) {
- dprintk("RPC: %s: address too large\n", __func__);
+ if (args->addrlen > sizeof(xprt->addr))
return ERR_PTR(-EBADF);
- }
xprt = xprt_alloc(args->net, sizeof(*new_xprt),
RPCRDMA_MAX_BC_REQUESTS,
RPCRDMA_MAX_BC_REQUESTS);
- if (!xprt) {
- dprintk("RPC: %s: couldn't allocate rpc_xprt\n",
- __func__);
+ if (!xprt)
return ERR_PTR(-ENOMEM);
- }
xprt->timeout = &xprt_rdma_bc_timeout;
xprt_set_bound(xprt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index efa5fcb5793f..e426fedb9524 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -665,23 +665,23 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
return hdr_len;
out_short:
- trace_svcrdma_decode_short(rq_arg->len);
+ trace_svcrdma_decode_short_err(rq_arg->len);
return -EINVAL;
out_version:
- trace_svcrdma_decode_badvers(rdma_argp);
+ trace_svcrdma_decode_badvers_err(rdma_argp);
return -EPROTONOSUPPORT;
out_drop:
- trace_svcrdma_decode_drop(rdma_argp);
+ trace_svcrdma_decode_drop_err(rdma_argp);
return 0;
out_proc:
- trace_svcrdma_decode_badproc(rdma_argp);
+ trace_svcrdma_decode_badproc_err(rdma_argp);
return -EINVAL;
out_inval:
- trace_svcrdma_decode_parse(rdma_argp);
+ trace_svcrdma_decode_parse_err(rdma_argp);
return -EINVAL;
}
@@ -878,12 +878,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto out_drop;
rqstp->rq_xprt_hlen = ret;
- if (svc_rdma_is_backchannel_reply(xprt, p)) {
- ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
- &rqstp->rq_arg);
- svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
- return ret;
- }
+ if (svc_rdma_is_backchannel_reply(xprt, p))
+ goto out_backchannel;
+
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
p += rpcrdma_fixed_maxsz;
@@ -913,6 +910,8 @@ out_postfail:
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
+out_backchannel:
+ svc_rdma_handle_bc_reply(rqstp, ctxt);
out_drop:
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 23c2d3ce0dc9..5eb35309ecef 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -9,13 +9,10 @@
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
-#include <linux/sunrpc/debug.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-
static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc);
static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
@@ -39,7 +36,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
struct svc_rdma_rw_ctxt {
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
- int rw_nents;
+ unsigned int rw_nents;
struct sg_table rw_sg_table;
struct scatterlist rw_first_sgl[];
};
@@ -67,19 +64,22 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
- goto out;
+ goto out_noctx;
INIT_LIST_HEAD(&ctxt->rw_list);
}
ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
ctxt->rw_sg_table.sgl,
- SG_CHUNK_SIZE)) {
- kfree(ctxt);
- ctxt = NULL;
- }
-out:
+ SG_CHUNK_SIZE))
+ goto out_free;
return ctxt;
+
+out_free:
+ kfree(ctxt);
+out_noctx:
+ trace_svcrdma_no_rwctx_err(rdma, sges);
+ return NULL;
}
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
@@ -107,6 +107,34 @@ void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
}
}
+/**
+ * svc_rdma_rw_ctx_init - Prepare a R/W context for I/O
+ * @rdma: controlling transport instance
+ * @ctxt: R/W context to prepare
+ * @offset: RDMA offset
+ * @handle: RDMA tag/handle
+ * @direction: I/O direction
+ *
+ * Returns on success, the number of WQEs that will be needed
+ * on the workqueue, or a negative errno.
+ */
+static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_rw_ctxt *ctxt,
+ u64 offset, u32 handle,
+ enum dma_data_direction direction)
+{
+ int ret;
+
+ ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num,
+ ctxt->rw_sg_table.sgl, ctxt->rw_nents,
+ 0, offset, handle, direction);
+ if (unlikely(ret < 0)) {
+ svc_rdma_put_rw_ctxt(rdma, ctxt);
+ trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret);
+ }
+ return ret;
+}
+
/* A chunk context tracks all I/O for moving one Read or Write
* chunk. This is a a set of rdma_rw's that handle data movement
* for all segments of one chunk.
@@ -428,15 +456,13 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
ctxt = svc_rdma_get_rw_ctxt(rdma,
(write_len >> PAGE_SHIFT) + 2);
if (!ctxt)
- goto out_noctx;
+ return -ENOMEM;
constructor(info, write_len, ctxt);
- ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp,
- rdma->sc_port_num, ctxt->rw_sg_table.sgl,
- ctxt->rw_nents, 0, seg_offset,
- seg_handle, DMA_TO_DEVICE);
+ ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle,
+ DMA_TO_DEVICE);
if (ret < 0)
- goto out_initerr;
+ return -EIO;
trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset);
@@ -455,18 +481,9 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
return 0;
out_overflow:
- dprintk("svcrdma: inadequate space in Write chunk (%u)\n",
- info->wi_nsegs);
+ trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no,
+ info->wi_nsegs);
return -E2BIG;
-
-out_noctx:
- dprintk("svcrdma: no R/W ctxs available\n");
- return -ENOMEM;
-
-out_initerr:
- svc_rdma_put_rw_ctxt(rdma, ctxt);
- trace_svcrdma_dma_map_rwctx(rdma, ret);
- return -EIO;
}
/* Send one of an xdr_buf's kvecs by itself. To send a Reply
@@ -616,7 +633,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
if (!ctxt)
- goto out_noctx;
+ return -ENOMEM;
ctxt->rw_nents = sge_no;
sg = ctxt->rw_sg_table.sgl;
@@ -646,29 +663,18 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
goto out_overrun;
}
- ret = rdma_rw_ctx_init(&ctxt->rw_ctx, cc->cc_rdma->sc_qp,
- cc->cc_rdma->sc_port_num,
- ctxt->rw_sg_table.sgl, ctxt->rw_nents,
- 0, offset, rkey, DMA_FROM_DEVICE);
+ ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, offset, rkey,
+ DMA_FROM_DEVICE);
if (ret < 0)
- goto out_initerr;
+ return -EIO;
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
return 0;
-out_noctx:
- dprintk("svcrdma: no R/W ctxs available\n");
- return -ENOMEM;
-
out_overrun:
- dprintk("svcrdma: request overruns rq_pages\n");
+ trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno);
return -EINVAL;
-
-out_initerr:
- trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret);
- svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt);
- return -EIO;
}
/* Walk the segments in the Read chunk starting at @p and construct
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b6c8643867f2..38e7c3c8c4a9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -868,12 +868,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
__be32 *p;
int ret;
- /* Create the RDMA response header. xprt->xpt_mutex,
- * acquired in svc_send(), serializes RPC replies. The
- * code path below that inserts the credit grant value
- * into each transport header runs only inside this
- * critical section.
- */
+ ret = -ENOTCONN;
+ if (svc_xprt_is_dead(xprt))
+ goto err0;
+
ret = -ENOMEM;
sctxt = svc_rdma_send_ctxt_get(rdma);
if (!sctxt)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ea54785db4f8..d38be57b00ed 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -211,7 +211,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
newxprt->sc_ord = param->initiator_depth;
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
- svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+ newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa);
+ memcpy(&newxprt->sc_xprt.xpt_remote, sa,
+ newxprt->sc_xprt.xpt_remotelen);
+ snprintf(newxprt->sc_xprt.xpt_remotebuf,
+ sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa);
+
/* The remote port is arbitrary and not under the control of the
* client ULP. Set it to a fixed value so that the DRC continues
* to be effective after a reconnect.
@@ -309,11 +314,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct svcxprt_rdma *cma_xprt;
int ret;
- dprintk("svcrdma: Creating RDMA listener\n");
- if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
- dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
+ if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT);
- }
cma_xprt = svc_rdma_create_xprt(serv, net);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
@@ -324,7 +326,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
- dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
goto err0;
}
@@ -333,23 +334,17 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
*/
#if IS_ENABLED(CONFIG_IPV6)
ret = rdma_set_afonly(listen_id, 1);
- if (ret) {
- dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret);
+ if (ret)
goto err1;
- }
#endif
ret = rdma_bind_addr(listen_id, sa);
- if (ret) {
- dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
+ if (ret)
goto err1;
- }
cma_xprt->sc_cm_id = listen_id;
ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
- if (ret) {
- dprintk("svcrdma: rdma_listen failed = %d\n", ret);
+ if (ret)
goto err1;
- }
/*
* We need to use the address from the cm_id in case the
@@ -405,9 +400,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!newxprt)
return NULL;
- dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
- newxprt, newxprt->sc_cm_id);
-
dev = newxprt->sc_cm_id->device;
newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
@@ -443,21 +435,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
- dprintk("svcrdma: error creating PD for connect request\n");
+ trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd));
goto errout;
}
newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
IB_POLL_WORKQUEUE);
- if (IS_ERR(newxprt->sc_sq_cq)) {
- dprintk("svcrdma: error creating SQ CQ for connect request\n");
+ if (IS_ERR(newxprt->sc_sq_cq))
goto errout;
- }
newxprt->sc_rq_cq =
ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
- if (IS_ERR(newxprt->sc_rq_cq)) {
- dprintk("svcrdma: error creating RQ CQ for connect request\n");
+ if (IS_ERR(newxprt->sc_rq_cq))
goto errout;
- }
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.event_handler = qp_event_handler;
@@ -481,7 +469,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
- dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
+ trace_svcrdma_qp_err(newxprt, ret);
goto errout;
}
newxprt->sc_qp = newxprt->sc_cm_id->qp;
@@ -489,8 +477,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
newxprt->sc_snd_w_inv = false;
if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
- !rdma_ib_or_roce(dev, newxprt->sc_port_num))
+ !rdma_ib_or_roce(dev, newxprt->sc_port_num)) {
+ trace_svcrdma_fabric_err(newxprt, -EINVAL);
goto errout;
+ }
if (!svc_rdma_post_recvs(newxprt))
goto errout;
@@ -512,15 +502,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
dev->attrs.max_qp_init_rd_atom);
if (!conn_param.initiator_depth) {
- dprintk("svcrdma: invalid ORD setting\n");
ret = -EINVAL;
+ trace_svcrdma_initdepth_err(newxprt, ret);
goto errout;
}
conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
- if (ret)
+ if (ret) {
+ trace_svcrdma_accept_err(newxprt, ret);
goto errout;
+ }
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
dprintk("svcrdma: new connection %p accepted:\n", newxprt);
@@ -535,12 +527,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" ord : %d\n", conn_param.initiator_depth);
#endif
- trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
return &newxprt->sc_xprt;
errout:
- dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
- trace_svcrdma_xprt_fail(&newxprt->sc_xprt);
/* Take a reference in case the DTO handler runs */
svc_xprt_get(&newxprt->sc_xprt);
if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
@@ -578,8 +567,6 @@ static void __svc_rdma_free(struct work_struct *work)
container_of(work, struct svcxprt_rdma, sc_work);
struct svc_xprt *xprt = &rdma->sc_xprt;
- trace_svcrdma_xprt_free(xprt);
-
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 845d0be805ec..839c49330785 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2548,8 +2548,16 @@ static int bc_sendto(struct rpc_rqst *req)
return sent;
}
-/*
- * The send routine. Borrows from svc_send
+/**
+ * bc_send_request - Send a backchannel Call on a TCP socket
+ * @req: rpc_rqst containing Call message to be sent
+ *
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
+ * Return values:
+ * %0 if the message was sent successfully
+ * %ENOTCONN if the message was not sent
*/
static int bc_send_request(struct rpc_rqst *req)
{
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 87466607097f..e370ad0edd76 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1739,22 +1739,21 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
return 0;
}
-static void tipc_sk_send_ack(struct tipc_sock *tsk)
+static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk)
{
struct sock *sk = &tsk->sk;
- struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
struct tipc_msg *msg;
u32 peer_port = tsk_peer_port(tsk);
u32 dnode = tsk_peer_node(tsk);
if (!tipc_sk_connected(sk))
- return;
+ return NULL;
skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
dnode, tsk_own_node(tsk), peer_port,
tsk->portid, TIPC_OK);
if (!skb)
- return;
+ return NULL;
msg = buf_msg(skb);
msg_set_conn_ack(msg, tsk->rcv_unacked);
tsk->rcv_unacked = 0;
@@ -1764,7 +1763,19 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk)
tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
msg_set_adv_win(msg, tsk->rcv_win);
}
- tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
+ return skb;
+}
+
+static void tipc_sk_send_ack(struct tipc_sock *tsk)
+{
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (!skb)
+ return;
+
+ tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk),
+ msg_link_selector(buf_msg(skb)));
}
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
@@ -1938,7 +1949,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
bool peek = flags & MSG_PEEK;
int offset, required, copy, copied = 0;
int hlen, dlen, err, rc;
- bool ack = false;
long timeout;
/* Catch invalid receive attempts */
@@ -1983,7 +1993,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Copy data if msg ok, otherwise return error/partial data */
if (likely(!err)) {
- ack = msg_ack_required(hdr);
offset = skb_cb->bytes_read;
copy = min_t(int, dlen - offset, buflen - copied);
rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
@@ -2011,7 +2020,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Send connection flow control advertisement when applicable */
tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
- if (ack || tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
+ if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
tipc_sk_send_ack(tsk);
/* Exit if all requested data or FIN/error received */
@@ -2105,9 +2114,11 @@ static void tipc_sk_proto_rcv(struct sock *sk,
* tipc_sk_filter_connect - check incoming message for a connection-based socket
* @tsk: TIPC socket
* @skb: pointer to message buffer.
+ * @xmitq: for Nagle ACK if any
* Returns true if message should be added to receive queue, false otherwise
*/
-static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
+static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
@@ -2171,8 +2182,17 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
if (!skb_queue_empty(&sk->sk_write_queue))
tipc_sk_push_backlog(tsk);
/* Accept only connection-based messages sent by peer */
- if (likely(con_msg && !err && pport == oport && pnode == onode))
+ if (likely(con_msg && !err && pport == oport &&
+ pnode == onode)) {
+ if (msg_ack_required(hdr)) {
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (skb)
+ __skb_queue_tail(xmitq, skb);
+ }
return true;
+ }
if (!tsk_peer_msg(tsk, hdr))
return false;
if (!err)
@@ -2267,7 +2287,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
while ((skb = __skb_dequeue(&inputq))) {
hdr = buf_msg(skb);
limit = rcvbuf_limit(sk, skb);
- if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
+ if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) ||
(!sk_conn && msg_connected(hdr)) ||
(!grp && msg_in_group(hdr)))
err = TIPC_ERR_NO_PORT;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index aa015c233898..6ebbec1bedd1 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -96,6 +96,16 @@ void tipc_sub_get(struct tipc_subscription *subscription);
(swap_ ? swab32(val__) : val__); \
})
+/* tipc_sub_write - write val_ to field_ of struct sub_ in user endian format
+ */
+#define tipc_sub_write(sub_, field_, val_) \
+ ({ \
+ struct tipc_subscr *sub__ = sub_; \
+ u32 val__ = val_; \
+ int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \
+ (sub__)->field_ = swap_ ? swab32(val__) : val__; \
+ })
+
/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
*/
#define tipc_evt_write(evt_, field_, val_) \
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 3a12fc18239b..446af7bbd13e 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -237,8 +237,8 @@ static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
tipc_sub_unsubscribe(sub);
atomic_dec(&tn->subscription_count);
- } else if (s) {
- break;
+ if (s)
+ break;
}
}
spin_unlock_bh(&con->sub_lock);
@@ -362,9 +362,10 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
{
struct tipc_net *tn = tipc_net(srv->net);
struct tipc_subscription *sub;
+ u32 s_filter = tipc_sub_read(s, filter);
- if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
- s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL);
+ if (s_filter & TIPC_SUB_CANCEL) {
+ tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL);
tipc_conn_delete_sub(con, s);
return 0;
}
@@ -400,12 +401,15 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
return -EWOULDBLOCK;
if (ret == sizeof(s)) {
read_lock_bh(&sk->sk_callback_lock);
- ret = tipc_conn_rcv_sub(srv, con, &s);
+ /* RACE: the connection can be closed in the meantime */
+ if (likely(connected(con)))
+ ret = tipc_conn_rcv_sub(srv, con, &s);
read_unlock_bh(&sk->sk_callback_lock);
+ if (!ret)
+ return 0;
}
- if (ret < 0)
- tipc_conn_close(con);
+ tipc_conn_close(con);
return ret;
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index c98e602a1a2d..e23f94a5549b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -800,6 +800,8 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
}
+ if (psock)
+ sk_psock_put(sk, psock);
return err;
}
more_data:
@@ -2081,8 +2083,9 @@ static void tls_data_ready(struct sock *sk)
strp_data_ready(&ctx->strp);
psock = sk_psock_get(sk);
- if (psock && !list_empty(&psock->ingress_msg)) {
- ctx->saved_data_ready(sk);
+ if (psock) {
+ if (!list_empty(&psock->ingress_msg))
+ ctx->saved_data_ready(sk);
sk_psock_put(sk, psock);
}
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 709038a4783e..69efc891885f 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -157,7 +157,11 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
{
+ if (pkt->tap_delivered)
+ return;
+
vsock_deliver_tap(virtio_transport_build_skb, pkt);
+ pkt->tap_delivered = true;
}
EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 8aa415a38814..0285aaa1e93c 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -357,6 +357,12 @@ void x25_disconnect(struct sock *sk, int reason, unsigned char cause,
sk->sk_state_change(sk);
sock_set_flag(sk, SOCK_DEAD);
}
+ if (x25->neighbour) {
+ read_lock_bh(&x25_list_lock);
+ x25_neigh_put(x25->neighbour);
+ x25->neighbour = NULL;
+ read_unlock_bh(&x25_list_lock);
+ }
}
/*