aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_forward.c7
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_multicast.c48
-rw-r--r--net/bridge/br_private.h5
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/devlink.c4
-rw-r--r--net/core/drop_monitor.c3
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/dsa/dsa.c6
-rw-r--r--net/dsa/dsa2.c9
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/devinet.c12
-rw-r--r--net/ipv4/ipip.c137
-rw-r--r--net/ipv4/tunnel4.c72
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/ip6mr.c13
-rw-r--r--net/ipv6/sit.c93
-rw-r--r--net/mpls/af_mpls.c6
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c24
-rw-r--r--net/rxrpc/ar-internal.h156
-rw-r--r--net/rxrpc/call_accept.c38
-rw-r--r--net/rxrpc/call_event.c14
-rw-r--r--net/rxrpc/call_object.c273
-rw-r--r--net/rxrpc/conn_client.c282
-rw-r--r--net/rxrpc/conn_event.c44
-rw-r--r--net/rxrpc/conn_object.c636
-rw-r--r--net/rxrpc/conn_service.c230
-rw-r--r--net/rxrpc/input.c71
-rw-r--r--net/rxrpc/insecure.c7
-rw-r--r--net/rxrpc/local_object.c19
-rw-r--r--net/rxrpc/peer_object.c2
-rw-r--r--net/rxrpc/proc.c23
-rw-r--r--net/rxrpc/rxkad.c191
-rw-r--r--net/rxrpc/utils.c37
-rw-r--r--net/sched/sch_hfsc.c10
-rw-r--r--net/sctp/associola.c1
-rw-r--r--net/sctp/chunk.c25
-rw-r--r--net/sctp/endpointola.c1
-rw-r--r--net/sctp/output.c20
-rw-r--r--net/sctp/outqueue.c99
-rw-r--r--net/sctp/sm_make_chunk.c27
-rw-r--r--net/sctp/socket.c240
43 files changed, 1740 insertions, 1163 deletions
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6c196037d818..d610644368b9 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -199,7 +199,6 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
bool unicast)
{
u8 igmp_type = br_multicast_igmp_type(skb);
- __be16 proto = skb->protocol;
struct net_bridge_port *prev;
struct net_bridge_port *p;
@@ -221,7 +220,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
if (IS_ERR(prev))
goto out;
if (prev == p)
- br_multicast_count(p->br, p, proto, igmp_type,
+ br_multicast_count(p->br, p, skb, igmp_type,
BR_MCAST_DIR_TX);
}
@@ -266,8 +265,6 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
- __be16 proto = skb->protocol;
-
struct hlist_node *rp;
rp = rcu_dereference(hlist_first_rcu(&br->router_list));
@@ -286,7 +283,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
if (IS_ERR(prev))
goto out;
if (prev == port)
- br_multicast_count(port->br, port, proto, igmp_type,
+ br_multicast_count(port->br, port, skb, igmp_type,
BR_MCAST_DIR_TX);
if ((unsigned long)lport >= (unsigned long)port)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 786602bc0567..a7817e6f306f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -61,7 +61,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
if (!skb)
return NET_RX_DROP;
/* update the multicast stats if the packet is IGMP/MLD */
- br_multicast_count(br, NULL, skb->protocol, br_multicast_igmp_type(skb),
+ br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index e405eef0ae2e..a5423a1eec05 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -843,14 +843,14 @@ static void __br_multicast_send_query(struct net_bridge *br,
if (port) {
skb->dev = port->dev;
- br_multicast_count(br, port, skb->protocol, igmp_type,
+ br_multicast_count(br, port, skb, igmp_type,
BR_MCAST_DIR_TX);
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
dev_net(port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
} else {
br_multicast_select_own_querier(br, ip, skb);
- br_multicast_count(br, port, skb->protocol, igmp_type,
+ br_multicast_count(br, port, skb, igmp_type,
BR_MCAST_DIR_RX);
netif_rx(skb);
}
@@ -1676,7 +1676,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
- br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
@@ -1725,7 +1725,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
- br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
@@ -2251,13 +2251,16 @@ unlock:
EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
- __be16 proto, u8 type, u8 dir)
+ const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats *pstats = this_cpu_ptr(stats);
+ __be16 proto = skb->protocol;
+ unsigned int t_len;
u64_stats_update_begin(&pstats->syncp);
switch (proto) {
case htons(ETH_P_IP):
+ t_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
switch (type) {
case IGMP_HOST_MEMBERSHIP_REPORT:
pstats->mstats.igmp_v1reports[dir]++;
@@ -2269,7 +2272,21 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
pstats->mstats.igmp_v3reports[dir]++;
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
- pstats->mstats.igmp_queries[dir]++;
+ if (t_len != sizeof(struct igmphdr)) {
+ pstats->mstats.igmp_v3queries[dir]++;
+ } else {
+ unsigned int offset = skb_transport_offset(skb);
+ struct igmphdr *ih, _ihdr;
+
+ ih = skb_header_pointer(skb, offset,
+ sizeof(_ihdr), &_ihdr);
+ if (!ih)
+ break;
+ if (!ih->code)
+ pstats->mstats.igmp_v1queries[dir]++;
+ else
+ pstats->mstats.igmp_v2queries[dir]++;
+ }
break;
case IGMP_HOST_LEAVE_MESSAGE:
pstats->mstats.igmp_leaves[dir]++;
@@ -2278,6 +2295,9 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
+ t_len = ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr);
+ t_len -= skb_network_header_len(skb);
switch (type) {
case ICMPV6_MGM_REPORT:
pstats->mstats.mld_v1reports[dir]++;
@@ -2286,7 +2306,10 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
pstats->mstats.mld_v2reports[dir]++;
break;
case ICMPV6_MGM_QUERY:
- pstats->mstats.mld_queries[dir]++;
+ if (t_len != sizeof(struct mld_msg))
+ pstats->mstats.mld_v2queries[dir]++;
+ else
+ pstats->mstats.mld_v1queries[dir]++;
break;
case ICMPV6_MGM_REDUCTION:
pstats->mstats.mld_leaves[dir]++;
@@ -2299,7 +2322,7 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
}
void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
- __be16 proto, u8 type, u8 dir)
+ const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats __percpu *stats;
@@ -2314,7 +2337,7 @@ void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
if (WARN_ON(!stats))
return;
- br_mcast_stats_add(stats, proto, type, dir);
+ br_mcast_stats_add(stats, skb, type, dir);
}
int br_multicast_init_stats(struct net_bridge *br)
@@ -2359,14 +2382,17 @@ void br_multicast_get_stats(const struct net_bridge *br,
memcpy(&temp, &cpu_stats->mstats, sizeof(temp));
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- mcast_stats_add_dir(tdst.igmp_queries, temp.igmp_queries);
+ mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries);
+ mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries);
+ mcast_stats_add_dir(tdst.igmp_v3queries, temp.igmp_v3queries);
mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves);
mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports);
mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports);
mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports);
tdst.igmp_parse_errors += temp.igmp_parse_errors;
- mcast_stats_add_dir(tdst.mld_queries, temp.mld_queries);
+ mcast_stats_add_dir(tdst.mld_v1queries, temp.mld_v1queries);
+ mcast_stats_add_dir(tdst.mld_v2queries, temp.mld_v2queries);
mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves);
mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports);
mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4dc851166ad1..40f200947ddc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -586,7 +586,7 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
int type);
void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
- __be16 proto, u8 type, u8 dir);
+ const struct sk_buff *skb, u8 type, u8 dir);
int br_multicast_init_stats(struct net_bridge *br);
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
@@ -719,7 +719,8 @@ static inline void br_mdb_uninit(void)
static inline void br_multicast_count(struct net_bridge *br,
const struct net_bridge_port *p,
- __be16 proto, u8 type, u8 dir)
+ const struct sk_buff *skb,
+ u8 type, u8 dir)
{
}
diff --git a/net/core/dev.c b/net/core/dev.c
index b92d63bfde7a..7894e406c806 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4972,7 +4972,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
rc = napi->poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi);
+ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
if (rc == BUSY_POLL_BUDGET) {
napi_complete_done(napi, rc);
napi_schedule(napi);
@@ -5128,7 +5128,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
- trace_napi_poll(n);
+ trace_napi_poll(n, work, weight);
}
WARN_ON_ONCE(work > weight);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b2e592a198c0..1b5063088f1a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -26,6 +26,10 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/devlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/devlink.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
static LIST_HEAD(devlink_list);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 252e155c837b..d6b3b579560d 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -187,7 +187,8 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *locatio
trace_drop_common(skb, location);
}
-static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
+ int work, int budget)
{
struct dm_hw_stat_delta *new_stat;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94acfc89ad97..53599bd0c82d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -163,7 +163,7 @@ static void poll_one_napi(struct napi_struct *napi)
*/
work = napi->poll(napi, 0);
WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
- trace_napi_poll(napi);
+ trace_napi_poll(napi, work, 0);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
}
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 766d2a525ada..7e68bc6bc853 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -774,11 +774,17 @@ static int dsa_of_probe(struct device *dev)
chip_index = -1;
for_each_available_child_of_node(np, child) {
+ int i;
+
chip_index++;
cd = &pd->chip[chip_index];
cd->of_node = child;
+ /* Initialize the routing table */
+ for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+ cd->rtable[i] = DSA_RTABLE_NONE;
+
/* When assigning the host device, increment its refcount */
cd->host_dev = get_device(&mdio_bus->dev);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 83b95fc4cede..f30bad9678f0 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -595,7 +595,7 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
struct device_node *ports = dsa_get_ports(ds, np);
struct dsa_switch_tree *dst;
u32 tree, index;
- int err;
+ int i, err;
err = dsa_parse_member(np, &tree, &index);
if (err)
@@ -622,6 +622,11 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
ds->dst = dst;
ds->index = index;
+
+ /* Initialize the routing table */
+ for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+ ds->rtable[i] = DSA_RTABLE_NONE;
+
dsa_dst_add_ds(dst, ds, index);
err = dsa_dst_complete(dst);
@@ -672,7 +677,7 @@ int dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
}
EXPORT_SYMBOL_GPL(dsa_register_switch);
-void _dsa_unregister_switch(struct dsa_switch *ds)
+static void _dsa_unregister_switch(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d39e9e47a26e..55513e654d79 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -73,7 +73,7 @@
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/kmod.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
@@ -1916,6 +1916,3 @@ static int __init ipv4_proc_init(void)
return 0;
}
#endif /* CONFIG_PROC_FS */
-
-MODULE_ALIAS_NETPROTO(PF_INET);
-
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e333bc86bd39..415e117967c7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1834,7 +1834,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -1846,7 +1846,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
return;
errout:
if (err < 0)
@@ -1903,7 +1903,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
}
err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
if (!skb)
goto errout;
@@ -2027,16 +2027,16 @@ static void inet_forward_change(struct net *net)
for_each_netdev(net, dev) {
struct in_device *in_dev;
+
if (on)
dev_disable_lro(dev);
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dev);
+
+ in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
dev->ifindex, &in_dev->cnf);
}
- rcu_read_unlock();
}
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 978370132f29..4ae3f8e6c6cc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,14 +148,14 @@ static int ipip_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, IPPROTO_IPIP, 0);
+ t->parms.link, 0, iph->protocol, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- IPPROTO_IPIP, 0);
+ iph->protocol, 0);
err = 0;
goto out;
}
@@ -177,12 +177,19 @@ out:
return err;
}
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
/* no tunnel info required for ipip. */
.proto = htons(ETH_P_IP),
};
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+ /* no tunnel info required for mplsip. */
+ .proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
@@ -193,11 +200,23 @@ static int ipip_rcv(struct sk_buff *skb)
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel) {
+ const struct tnl_ptk_info *tpi;
+
+ if (tunnel->parms.iph.protocol != ipproto &&
+ tunnel->parms.iph.protocol != 0)
+ goto drop;
+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+ if (ipproto == IPPROTO_MPLS)
+ tpi = &mplsip_tpi;
+ else
+#endif
+ tpi = &ipip_tpi;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
return -1;
@@ -207,24 +226,51 @@ drop:
return 0;
}
+static int ipip_rcv(struct sk_buff *skb)
+{
+ return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+ return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
/*
* This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function.
*/
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
+ u8 ipproto;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ipproto = IPPROTO_IPIP;
+ break;
+#if IS_ENABLED(CONFIG_MPLS)
+ case htons(ETH_P_MPLS_UC):
+ ipproto = IPPROTO_MPLS;
+ break;
+#endif
+ default:
+ goto tx_error;
+ }
- if (unlikely(skb->protocol != htons(ETH_P_IP)))
+ if (tiph->protocol != ipproto && tiph->protocol != 0)
goto tx_error;
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
goto tx_error;
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+ skb_set_inner_ipproto(skb, ipproto);
- ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
+ ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;
tx_error:
@@ -234,6 +280,20 @@ tx_error:
return NETDEV_TX_OK;
}
+static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
+{
+ switch (ipproto) {
+ case 0:
+ case IPPROTO_IPIP:
+#if IS_ENABLED(CONFIG_MPLS)
+ case IPPROTO_MPLS:
+#endif
+ return true;
+ }
+
+ return false;
+}
+
static int
ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -244,7 +304,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EFAULT;
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+ if (p.iph.version != 4 ||
+ !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
return -EINVAL;
}
@@ -301,10 +362,23 @@ static int ipip_tunnel_init(struct net_device *dev)
tunnel->tun_hlen = 0;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
- tunnel->parms.iph.protocol = IPPROTO_IPIP;
return ip_tunnel_init(dev);
}
+static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data || !data[IFLA_IPTUN_PROTO])
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
static void ipip_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms)
{
@@ -335,6 +409,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_TOS])
parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+ if (data[IFLA_IPTUN_PROTO])
+ parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
}
@@ -427,6 +504,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_IPTUN_TOS */
nla_total_size(1) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
/* IFLA_IPTUN_PMTUDISC */
nla_total_size(1) +
/* IFLA_IPTUN_ENCAP_TYPE */
@@ -450,6 +529,7 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
@@ -476,6 +556,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
[IFLA_IPTUN_TTL] = { .type = NLA_U8 },
[IFLA_IPTUN_TOS] = { .type = NLA_U8 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
[IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
[IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
@@ -489,6 +570,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
.policy = ipip_policy,
.priv_size = sizeof(struct ip_tunnel),
.setup = ipip_tunnel_setup,
+ .validate = ipip_tunnel_validate,
.newlink = ipip_newlink,
.changelink = ipip_changelink,
.dellink = ip_tunnel_dellink,
@@ -503,6 +585,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 1,
};
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+ .handler = mplsip_rcv,
+ .err_handler = ipip_err,
+ .priority = 1,
+};
+#endif
+
static int __net_init ipip_init_net(struct net *net)
{
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
@@ -525,7 +615,7 @@ static int __init ipip_init(void)
{
int err;
- pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
+ pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
err = register_pernet_device(&ipip_net_ops);
if (err < 0)
@@ -533,8 +623,15 @@ static int __init ipip_init(void)
err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
if (err < 0) {
pr_info("%s: can't register tunnel\n", __func__);
- goto xfrm_tunnel_failed;
+ goto xfrm_tunnel_ipip_failed;
+ }
+#if IS_ENABLED(CONFIG_MPLS)
+ err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+ if (err < 0) {
+ pr_info("%s: can't register tunnel\n", __func__);
+ goto xfrm_tunnel_mplsip_failed;
}
+#endif
err = rtnl_link_register(&ipip_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -543,8 +640,13 @@ out:
return err;
rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
+xfrm_tunnel_mplsip_failed:
+
+#endif
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
-xfrm_tunnel_failed:
+xfrm_tunnel_ipip_failed:
unregister_pernet_device(&ipip_net_ops);
goto out;
}
@@ -554,7 +656,10 @@ static void __exit ipip_fini(void)
rtnl_link_unregister(&ipip_link_ops);
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
pr_info("%s: can't deregister tunnel\n", __func__);
-
+#if IS_ENABLED(CONFIG_MPLS)
+ if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
+ pr_info("%s: can't deregister tunnel\n", __func__);
+#endif
unregister_pernet_device(&ipip_net_ops);
}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 0d0171830620..ec35eaa5c029 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -6,6 +6,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/mpls.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
@@ -16,11 +17,14 @@
static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnelmpls4_handlers __read_mostly;
static DEFINE_MUTEX(tunnel4_mutex);
static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
{
- return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
+ return (family == AF_INET) ? &tunnel4_handlers :
+ (family == AF_INET6) ? &tunnel64_handlers :
+ &tunnelmpls4_handlers;
}
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
@@ -125,6 +129,26 @@ drop:
}
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static int tunnelmpls4_rcv(struct sk_buff *skb)
+{
+ struct xfrm_tunnel *handler;
+
+ if (!pskb_may_pull(skb, sizeof(struct mpls_label)))
+ goto drop;
+
+ for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+ if (!handler->handler(skb))
+ return 0;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+#endif
+
static void tunnel4_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
@@ -145,6 +169,17 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
}
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static void tunnelmpls4_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm_tunnel *handler;
+
+ for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+#endif
+
static const struct net_protocol tunnel4_protocol = {
.handler = tunnel4_rcv,
.err_handler = tunnel4_err,
@@ -161,24 +196,47 @@ static const struct net_protocol tunnel64_protocol = {
};
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct net_protocol tunnelmpls4_protocol = {
+ .handler = tunnelmpls4_rcv,
+ .err_handler = tunnelmpls4_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+#endif
+
static int __init tunnel4_init(void)
{
- if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP)) {
- pr_err("%s: can't add protocol\n", __func__);
- return -EAGAIN;
- }
+ if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP))
+ goto err;
#if IS_ENABLED(CONFIG_IPV6)
if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
- pr_err("tunnel64 init: can't add protocol\n");
inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
- return -EAGAIN;
+ goto err;
+ }
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+ if (inet_add_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS)) {
+ inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
+#if IS_ENABLED(CONFIG_IPV6)
+ inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6);
+#endif
+ goto err;
}
#endif
return 0;
+
+err:
+ pr_err("%s: can't add protocol\n", __func__);
+ return -EAGAIN;
}
static void __exit tunnel4_fini(void)
{
+#if IS_ENABLED(CONFIG_MPLS)
+ if (inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS))
+ pr_err("tunnelmpls4 close: can't remove protocol\n");
+#endif
#if IS_ENABLED(CONFIG_IPV6)
if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
pr_err("tunnel64 close: can't remove protocol\n");
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a1f6b7b31531..24f1b0898e40 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -547,7 +547,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -559,7 +559,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_KERNEL);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 487ef3bc7bbc..c7ca0f5d1a3b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1592,14 +1592,15 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
if (likely(mrt->mroute6_sk == NULL)) {
mrt->mroute6_sk = sk;
net->ipv6.devconf_all->mc_forwarding++;
- inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
- NETCONFA_IFINDEX_ALL,
- net->ipv6.devconf_all);
- }
- else
+ } else {
err = -EADDRINUSE;
+ }
write_unlock_bh(&mrt_lock);
+ if (!err)
+ inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
rtnl_unlock();
return err;
@@ -1617,11 +1618,11 @@ int ip6mr_sk_done(struct sock *sk)
write_lock_bh(&mrt_lock);
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
+ write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- write_unlock_bh(&mrt_lock);
mroute_clean_tables(mrt, false);
err = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 917a5cd4b8fc..182b6a9be29d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -688,12 +688,19 @@ out:
return 0;
}
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
/* no tunnel info required for ipip. */
.proto = htons(ETH_P_IP),
};
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+ /* no tunnel info required for mplsip. */
+ .proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
const struct iphdr *iph;
struct ip_tunnel *tunnel;
@@ -702,15 +709,23 @@ static int ipip_rcv(struct sk_buff *skb)
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
if (tunnel) {
- if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+ const struct tnl_ptk_info *tpi;
+
+ if (tunnel->parms.iph.protocol != ipproto &&
tunnel->parms.iph.protocol != 0)
goto drop;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+ if (ipproto == IPPROTO_MPLS)
+ tpi = &mplsip_tpi;
+ else
+#endif
+ tpi = &ipip_tpi;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
return 1;
@@ -720,6 +735,18 @@ drop:
return 0;
}
+static int ipip_rcv(struct sk_buff *skb)
+{
+ return sit_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+ return sit_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
/*
* If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
* stores the embedded IPv4 address in v4dst and returns true.
@@ -958,7 +985,8 @@ tx_error:
return NETDEV_TX_OK;
}
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
+ struct net_device *dev, u8 ipproto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
@@ -966,9 +994,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
goto tx_error;
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+ skb_set_inner_ipproto(skb, ipproto);
- ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+ ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;
tx_error:
kfree_skb(skb);
@@ -981,11 +1009,16 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
{
switch (skb->protocol) {
case htons(ETH_P_IP):
- ipip_tunnel_xmit(skb, dev);
+ sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
break;
case htons(ETH_P_IPV6):
ipip6_tunnel_xmit(skb, dev);
break;
+#if IS_ENABLED(CONFIG_MPLS)
+ case htons(ETH_P_MPLS_UC):
+ sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS);
+ break;
+#endif
default:
goto tx_err;
}
@@ -1093,6 +1126,16 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
}
#endif
+bool ipip6_valid_ip_proto(u8 ipproto)
+{
+ return ipproto == IPPROTO_IPV6 ||
+ ipproto == IPPROTO_IPIP ||
+#if IS_ENABLED(CONFIG_MPLS)
+ ipproto == IPPROTO_MPLS ||
+#endif
+ ipproto == 0;
+}
+
static int
ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -1152,9 +1195,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
goto done;
err = -EINVAL;
- if (p.iph.protocol != IPPROTO_IPV6 &&
- p.iph.protocol != IPPROTO_IPIP &&
- p.iph.protocol != 0)
+ if (!ipip6_valid_ip_proto(p.iph.protocol))
goto done;
if (p.iph.version != 4 ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
@@ -1379,9 +1420,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
- if (proto != IPPROTO_IPV6 &&
- proto != IPPROTO_IPIP &&
- proto != 0)
+ if (!ipip6_valid_ip_proto(proto))
return -EINVAL;
return 0;
@@ -1723,6 +1762,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 2,
};
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+ .handler = mplsip_rcv,
+ .err_handler = ipip6_err,
+ .priority = 2,
+};
+#endif
+
static void __net_exit sit_destroy_tunnels(struct net *net,
struct list_head *head)
{
@@ -1818,6 +1865,9 @@ static void __exit sit_cleanup(void)
rtnl_link_unregister(&sit_link_ops);
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+#endif
unregister_pernet_device(&sit_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1827,7 +1877,7 @@ static int __init sit_init(void)
{
int err;
- pr_info("IPv6 over IPv4 tunneling driver\n");
+ pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n");
err = register_pernet_device(&sit_net_ops);
if (err < 0)
@@ -1842,6 +1892,13 @@ static int __init sit_init(void)
pr_info("%s: can't register ip4ip4\n", __func__);
goto xfrm_tunnel4_failed;
}
+#if IS_ENABLED(CONFIG_MPLS)
+ err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+ if (err < 0) {
+ pr_info("%s: can't register mplsip\n", __func__);
+ goto xfrm_tunnel_mpls_failed;
+ }
+#endif
err = rtnl_link_register(&sit_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1850,6 +1907,10 @@ out:
return err;
rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+xfrm_tunnel_mpls_failed:
+#endif
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm_tunnel4_failed:
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index e9beaa58573c..5c161e7759b5 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1009,10 +1009,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
unsigned int flags;
if (event == NETDEV_REGISTER) {
- /* For now just support Ethernet and IPGRE devices */
+ /* For now just support Ethernet, IPGRE, SIT and IPIP devices */
if (dev->type == ARPHRD_ETHER ||
dev->type == ARPHRD_LOOPBACK ||
- dev->type == ARPHRD_IPGRE) {
+ dev->type == ARPHRD_IPGRE ||
+ dev->type == ARPHRD_SIT ||
+ dev->type == ARPHRD_TUNNEL) {
mdev = mpls_add_dev(dev);
if (IS_ERR(mdev))
return notifier_from_errno(PTR_ERR(mdev));
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6522e50fb750..10f3f48a16a8 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -10,6 +10,7 @@ af-rxrpc-y := \
conn_client.o \
conn_event.o \
conn_object.o \
+ conn_service.o \
input.o \
insecure.o \
key.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 5d3e795a7c48..88effadd4b16 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -766,9 +766,9 @@ error_key_type:
error_sock:
proto_unregister(&rxrpc_proto);
error_proto:
- destroy_workqueue(rxrpc_workqueue);
-error_security:
rxrpc_exit_security();
+error_security:
+ destroy_workqueue(rxrpc_workqueue);
error_work_queue:
kmem_cache_destroy(rxrpc_call_jar);
error_call_jar:
@@ -788,27 +788,7 @@ static void __exit af_rxrpc_exit(void)
proto_unregister(&rxrpc_proto);
rxrpc_destroy_all_calls();
rxrpc_destroy_all_connections();
-
ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
-
- /* We need to flush the scheduled work twice because the local endpoint
- * records involve a work item in their destruction as they can only be
- * destroyed from process context. However, a connection may have a
- * work item outstanding - and this will pin the local endpoint record
- * until the connection goes away.
- *
- * Peers don't pin locals and calls pin sockets - which prevents the
- * module from being unloaded - so we should only need two flushes.
- */
- _debug("flush scheduled work");
- flush_workqueue(rxrpc_workqueue);
- _debug("flush scheduled work 2");
- flush_workqueue(rxrpc_workqueue);
- _debug("synchronise RCU");
- rcu_barrier();
- _debug("destroy locals");
- ASSERT(idr_is_empty(&rxrpc_client_conn_ids));
- idr_destroy(&rxrpc_client_conn_ids);
rxrpc_destroy_all_locals();
remove_proc_entry("rxrpc_conns", init_net.proc_net);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 702db72196fb..1bb9e7ac9e14 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -10,6 +10,7 @@
*/
#include <linux/atomic.h>
+#include <linux/seqlock.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <rxrpc/packet.h>
@@ -35,7 +36,6 @@ struct rxrpc_crypt {
queue_delayed_work(rxrpc_workqueue, (WS), (D))
#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor)
-#define rxrpc_queue_conn(CONN) rxrpc_queue_work(&(CONN)->processor)
struct rxrpc_connection;
@@ -141,17 +141,16 @@ struct rxrpc_security {
int (*init_connection_security)(struct rxrpc_connection *);
/* prime a connection's packet security */
- void (*prime_packet_security)(struct rxrpc_connection *);
+ int (*prime_packet_security)(struct rxrpc_connection *);
/* impose security on a packet */
- int (*secure_packet)(const struct rxrpc_call *,
+ int (*secure_packet)(struct rxrpc_call *,
struct sk_buff *,
size_t,
void *);
/* verify the security on a received packet */
- int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
- u32 *);
+ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *);
/* issue a challenge */
int (*issue_challenge)(struct rxrpc_connection *);
@@ -208,7 +207,7 @@ struct rxrpc_peer {
struct hlist_head error_targets; /* targets for net error distribution */
struct work_struct error_distributor;
struct rb_root service_conns; /* Service connections */
- rwlock_t conn_lock;
+ seqlock_t service_conn_lock;
spinlock_t lock; /* access lock */
unsigned int if_mtu; /* interface MTU for this peer */
unsigned int mtu; /* network MTU for this peer */
@@ -231,18 +230,12 @@ struct rxrpc_peer {
* Keys for matching a connection.
*/
struct rxrpc_conn_proto {
- unsigned long hash_key;
- struct rxrpc_local *local; /* Representation of local endpoint */
- u32 epoch; /* epoch of this connection */
- u32 cid; /* connection ID */
- u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
- u8 addr_size; /* Size of the address */
- sa_family_t family; /* Transport protocol */
- __be16 port; /* Peer UDP/UDP6 port */
- union { /* Peer address */
- struct in_addr ipv4_addr;
- struct in6_addr ipv6_addr;
- u32 raw_addr[0];
+ union {
+ struct {
+ u32 epoch; /* epoch of this connection */
+ u32 cid; /* connection ID */
+ };
+ u64 index_key;
};
};
@@ -256,6 +249,37 @@ struct rxrpc_conn_parameters {
};
/*
+ * Bits in the connection flags.
+ */
+enum rxrpc_conn_flag {
+ RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */
+ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */
+ RXRPC_CONN_IN_CLIENT_CONNS, /* Conn is in local->client_conns */
+};
+
+/*
+ * Events that can be raised upon a connection.
+ */
+enum rxrpc_conn_event {
+ RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */
+};
+
+/*
+ * The connection protocol state.
+ */
+enum rxrpc_conn_proto_state {
+ RXRPC_CONN_UNUSED, /* Connection not yet attempted */
+ RXRPC_CONN_CLIENT, /* Client connection */
+ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */
+ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */
+ RXRPC_CONN_SERVICE, /* Service secured connection */
+ RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */
+ RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */
+ RXRPC_CONN_NETWORK_ERROR, /* Conn terminated by network error */
+ RXRPC_CONN__NR_STATES
+};
+
+/*
* RxRPC connection definition
* - matched by { local, peer, epoch, conn_id, direction }
* - each connection can only handle four simultaneous calls
@@ -265,44 +289,38 @@ struct rxrpc_connection {
struct rxrpc_conn_parameters params;
spinlock_t channel_lock;
- struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* active calls */
+
+ struct rxrpc_channel {
+ struct rxrpc_call __rcu *call; /* Active call */
+ u32 call_id; /* ID of current call */
+ u32 call_counter; /* Call ID counter */
+ u32 last_call; /* ID of last call */
+ u32 last_result; /* Result of last call (0/abort) */
+ } channels[RXRPC_MAXCALLS];
wait_queue_head_t channel_wq; /* queue to wait for channel to become available */
+ struct rcu_head rcu;
struct work_struct processor; /* connection event processor */
union {
struct rb_node client_node; /* Node in local->client_conns */
struct rb_node service_node; /* Node in peer->service_conns */
};
struct list_head link; /* link in master connection list */
- struct rb_root calls; /* calls on this connection */
struct sk_buff_head rx_queue; /* received conn-level packets */
const struct rxrpc_security *security; /* applied security module */
struct key *server_key; /* security for this service */
struct crypto_skcipher *cipher; /* encryption handle */
struct rxrpc_crypt csum_iv; /* packet checksum base */
unsigned long flags;
-#define RXRPC_CONN_HAS_IDR 0 /* - Has a client conn ID assigned */
unsigned long events;
-#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
unsigned long put_time; /* Time at which last put */
- rwlock_t lock; /* access lock */
spinlock_t state_lock; /* state-change lock */
atomic_t usage;
- enum { /* current state of connection */
- RXRPC_CONN_UNUSED, /* - connection not yet attempted */
- RXRPC_CONN_CLIENT, /* - client connection */
- RXRPC_CONN_SERVER_UNSECURED, /* - server unsecured connection */
- RXRPC_CONN_SERVER_CHALLENGING, /* - server challenging for security */
- RXRPC_CONN_SERVER, /* - server secured connection */
- RXRPC_CONN_REMOTELY_ABORTED, /* - conn aborted by peer */
- RXRPC_CONN_LOCALLY_ABORTED, /* - conn aborted locally */
- RXRPC_CONN_NETWORK_ERROR, /* - conn terminated by network error */
- } state;
+ enum rxrpc_conn_proto_state state : 8; /* current state of connection */
u32 local_abort; /* local abort code */
u32 remote_abort; /* remote abort code */
int error; /* local error incurred */
int debug_id; /* debug ID for printks */
- unsigned int call_counter; /* call ID counter */
atomic_t serial; /* packet serial number counter */
atomic_t hi_serial; /* highest serial number received */
atomic_t avail_chans; /* number of channels available */
@@ -382,6 +400,7 @@ enum rxrpc_call_state {
* - matched by { connection, call_id }
*/
struct rxrpc_call {
+ struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
struct rxrpc_sock *socket; /* socket responsible */
struct timer_list lifetimer; /* lifetime remaining on call */
@@ -394,11 +413,11 @@ struct rxrpc_call {
struct hlist_node error_link; /* link in error distribution list */
struct list_head accept_link; /* calls awaiting acceptance */
struct rb_node sock_node; /* node in socket call tree */
- struct rb_node conn_node; /* node in connection call tree */
struct sk_buff_head rx_queue; /* received packets */
struct sk_buff_head rx_oos_queue; /* packets received out of sequence */
struct sk_buff *tx_pending; /* Tx socket buffer being filled */
wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */
+ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */
unsigned long user_call_ID; /* user-defined call ID */
unsigned long creation_jif; /* time of call creation */
unsigned long flags;
@@ -442,19 +461,12 @@ struct rxrpc_call {
#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
- struct hlist_node hash_node;
- unsigned long hash_key; /* Full hash key */
- u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */
- struct rxrpc_local *local; /* Local endpoint. Used for hashing. */
- sa_family_t family; /* Frame protocol */
+ u8 in_clientflag; /* Copy of conn->in_clientflag */
+ struct rxrpc_local *local; /* Local endpoint. */
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
u32 epoch; /* epoch of this connection */
u16 service_id; /* service ID */
- union { /* Peer IP address for hashing */
- __be32 ipv4_addr;
- __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */
- } peer_ip;
};
/*
@@ -502,8 +514,6 @@ extern struct kmem_cache *rxrpc_call_jar;
extern struct list_head rxrpc_calls;
extern rwlock_t rxrpc_call_lock;
-struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
- void *, sa_family_t, const void *);
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
@@ -522,8 +532,10 @@ void __exit rxrpc_destroy_all_calls(void);
*/
extern struct idr rxrpc_client_conn_ids;
-int rxrpc_get_client_connection_id(struct rxrpc_connection *, gfp_t);
-void rxrpc_put_client_connection_id(struct rxrpc_connection *);
+void rxrpc_destroy_client_conn_ids(void);
+int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
+ struct sockaddr_rxrpc *, gfp_t);
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
/*
* conn_event.c
@@ -539,17 +551,14 @@ extern unsigned int rxrpc_connection_expiry;
extern struct list_head rxrpc_connections;
extern rwlock_t rxrpc_connection_lock;
-int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
- struct sockaddr_rxrpc *, gfp_t);
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *,
- struct rxrpc_peer *,
- struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
+ struct sk_buff *);
+void __rxrpc_disconnect_call(struct rxrpc_call *);
void rxrpc_disconnect_call(struct rxrpc_call *);
void rxrpc_put_connection(struct rxrpc_connection *);
void __exit rxrpc_destroy_all_connections(void);
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
- struct rxrpc_peer *,
- struct sk_buff *);
static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
{
@@ -558,7 +567,7 @@ static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
{
- return conn->proto.in_clientflag;
+ return !rxrpc_conn_is_client(conn);
}
static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
@@ -566,6 +575,31 @@ static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
atomic_inc(&conn->usage);
}
+static inline
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+{
+ return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
+}
+
+static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+{
+ if (!rxrpc_get_connection_maybe(conn))
+ return false;
+ if (!rxrpc_queue_work(&conn->processor))
+ rxrpc_put_connection(conn);
+ return true;
+}
+
+/*
+ * conn_service.c
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
+ struct sk_buff *);
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
+ struct sockaddr_rxrpc *,
+ struct sk_buff *);
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
+
/*
* input.c
*/
@@ -618,6 +652,11 @@ static inline void rxrpc_put_local(struct rxrpc_local *local)
__rxrpc_put_local(local);
}
+static inline void rxrpc_queue_local(struct rxrpc_local *local)
+{
+ rxrpc_queue_work(&local->processor);
+}
+
/*
* misc.c
*/
@@ -722,8 +761,7 @@ static inline void rxrpc_sysctl_exit(void) {}
/*
* utils.c
*/
-void rxrpc_get_addr_from_skb(struct rxrpc_local *, const struct sk_buff *,
- struct sockaddr_rxrpc *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
/*
* debug tracing
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 202e053a3c6d..0b2832141bd0 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -75,7 +75,6 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
{
struct rxrpc_connection *conn;
struct rxrpc_skb_priv *sp, *nsp;
- struct rxrpc_peer *peer;
struct rxrpc_call *call;
struct sk_buff *notification;
int ret;
@@ -94,15 +93,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
rxrpc_new_skb(notification);
notification->mark = RXRPC_SKB_MARK_NEW_CALL;
- peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
- if (!peer) {
- _debug("no peer");
- ret = -EBUSY;
- goto error;
- }
-
- conn = rxrpc_incoming_connection(local, peer, skb);
- rxrpc_put_peer(peer);
+ conn = rxrpc_incoming_connection(local, srx, skb);
if (IS_ERR(conn)) {
_debug("no conn");
ret = PTR_ERR(conn);
@@ -128,12 +119,11 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
spin_lock(&call->conn->state_lock);
if (sp->hdr.securityIndex > 0 &&
- call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
+ call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
_debug("await conn sec");
list_add_tail(&call->accept_link, &rx->secureq);
- call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
- rxrpc_get_connection(call->conn);
- set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
+ call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
+ set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
rxrpc_queue_conn(call->conn);
} else {
_debug("conn ready");
@@ -227,20 +217,8 @@ void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
whdr._rsvd = 0;
whdr.serviceId = htons(sp->hdr.serviceId);
- /* determine the remote address */
- memset(&srx, 0, sizeof(srx));
- srx.srx_family = AF_RXRPC;
- srx.transport.family = local->srx.transport.family;
- srx.transport_type = local->srx.transport_type;
- switch (srx.transport.family) {
- case AF_INET:
- srx.transport_len = sizeof(struct sockaddr_in);
- srx.transport.sin.sin_port = udp_hdr(skb)->source;
- srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
- break;
- default:
- goto busy;
- }
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ goto drop;
/* get the socket providing the service */
read_lock_bh(&local->services_lock);
@@ -286,6 +264,10 @@ busy:
rxrpc_free_skb(skb);
return;
+drop:
+ rxrpc_free_skb(skb);
+ return;
+
invalid_service:
skb->priority = RX_INVALID_OPERATION;
rxrpc_reject_packet(local, skb);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 0ba84295f913..fc32aa5764a2 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -858,11 +858,6 @@ void rxrpc_process_call(struct work_struct *work)
iov[0].iov_len = sizeof(whdr);
/* deal with events of a final nature */
- if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
- rxrpc_release_call(call);
- clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
- }
-
if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
enum rxrpc_skb_mark mark;
int error;
@@ -1094,7 +1089,7 @@ void rxrpc_process_call(struct work_struct *work)
if (call->state == RXRPC_CALL_SERVER_SECURING) {
_debug("securing");
- write_lock(&call->conn->lock);
+ write_lock(&call->socket->call_lock);
if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
!test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
_debug("not released");
@@ -1102,7 +1097,7 @@ void rxrpc_process_call(struct work_struct *work)
list_move_tail(&call->accept_link,
&call->socket->acceptq);
}
- write_unlock(&call->conn->lock);
+ write_unlock(&call->socket->call_lock);
read_lock(&call->state_lock);
if (call->state < RXRPC_CALL_COMPLETE)
set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
@@ -1144,6 +1139,11 @@ void rxrpc_process_call(struct work_struct *work)
goto maybe_reschedule;
}
+ if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
+ rxrpc_release_call(call);
+ clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
+ }
+
/* other events may have been raised since we started checking */
goto maybe_reschedule;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index ad933daae13b..91287c9d01bb 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -14,7 +14,6 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/circ_buf.h>
-#include <linux/hashtable.h>
#include <linux/spinlock_types.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
@@ -61,142 +60,6 @@ static void rxrpc_dead_call_expired(unsigned long _call);
static void rxrpc_ack_time_expired(unsigned long _call);
static void rxrpc_resend_time_expired(unsigned long _call);
-static DEFINE_SPINLOCK(rxrpc_call_hash_lock);
-static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
-
-/*
- * Hash function for rxrpc_call_hash
- */
-static unsigned long rxrpc_call_hashfunc(
- u8 in_clientflag,
- u32 cid,
- u32 call_id,
- u32 epoch,
- u16 service_id,
- sa_family_t family,
- void *localptr,
- unsigned int addr_size,
- const u8 *peer_addr)
-{
- const u16 *p;
- unsigned int i;
- unsigned long key;
-
- _enter("");
-
- key = (unsigned long)localptr;
- /* We just want to add up the __be32 values, so forcing the
- * cast should be okay.
- */
- key += epoch;
- key += service_id;
- key += call_id;
- key += (cid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
- key += cid & RXRPC_CHANNELMASK;
- key += in_clientflag;
- key += family;
- /* Step through the peer address in 16-bit portions for speed */
- for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
- key += *p;
- _leave(" key = 0x%lx", key);
- return key;
-}
-
-/*
- * Add a call to the hashtable
- */
-static void rxrpc_call_hash_add(struct rxrpc_call *call)
-{
- unsigned long key;
- unsigned int addr_size = 0;
-
- _enter("");
- switch (call->family) {
- case AF_INET:
- addr_size = sizeof(call->peer_ip.ipv4_addr);
- break;
- case AF_INET6:
- addr_size = sizeof(call->peer_ip.ipv6_addr);
- break;
- default:
- break;
- }
- key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
- call->call_id, call->epoch,
- call->service_id, call->family,
- call->conn->params.local, addr_size,
- call->peer_ip.ipv6_addr);
- /* Store the full key in the call */
- call->hash_key = key;
- spin_lock(&rxrpc_call_hash_lock);
- hash_add_rcu(rxrpc_call_hash, &call->hash_node, key);
- spin_unlock(&rxrpc_call_hash_lock);
- _leave("");
-}
-
-/*
- * Remove a call from the hashtable
- */
-static void rxrpc_call_hash_del(struct rxrpc_call *call)
-{
- _enter("");
- spin_lock(&rxrpc_call_hash_lock);
- hash_del_rcu(&call->hash_node);
- spin_unlock(&rxrpc_call_hash_lock);
- _leave("");
-}
-
-/*
- * Find a call in the hashtable and return it, or NULL if it
- * isn't there.
- */
-struct rxrpc_call *rxrpc_find_call_hash(
- struct rxrpc_host_header *hdr,
- void *localptr,
- sa_family_t family,
- const void *peer_addr)
-{
- unsigned long key;
- unsigned int addr_size = 0;
- struct rxrpc_call *call = NULL;
- struct rxrpc_call *ret = NULL;
- u8 in_clientflag = hdr->flags & RXRPC_CLIENT_INITIATED;
-
- _enter("");
- switch (family) {
- case AF_INET:
- addr_size = sizeof(call->peer_ip.ipv4_addr);
- break;
- case AF_INET6:
- addr_size = sizeof(call->peer_ip.ipv6_addr);
- break;
- default:
- break;
- }
-
- key = rxrpc_call_hashfunc(in_clientflag, hdr->cid, hdr->callNumber,
- hdr->epoch, hdr->serviceId,
- family, localptr, addr_size,
- peer_addr);
- hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
- if (call->hash_key == key &&
- call->call_id == hdr->callNumber &&
- call->cid == hdr->cid &&
- call->in_clientflag == in_clientflag &&
- call->service_id == hdr->serviceId &&
- call->family == family &&
- call->local == localptr &&
- memcmp(call->peer_ip.ipv6_addr, peer_addr,
- addr_size) == 0 &&
- call->epoch == hdr->epoch) {
- ret = call;
- break;
- }
- }
- _leave(" = %p", ret);
- return ret;
-}
-
/*
* find an extant server call
* - called in process context with IRQs enabled
@@ -305,20 +168,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
call->socket = rx;
call->rx_data_post = 1;
- /* Record copies of information for hashtable lookup */
- call->family = rx->family;
call->local = rx->local;
- switch (call->family) {
- case AF_INET:
- call->peer_ip.ipv4_addr = srx->transport.sin.sin_addr.s_addr;
- break;
- case AF_INET6:
- memcpy(call->peer_ip.ipv6_addr,
- srx->transport.sin6.sin6_addr.in6_u.u6_addr8,
- sizeof(call->peer_ip.ipv6_addr));
- break;
- }
-
call->service_id = srx->srx_service;
call->in_clientflag = 0;
@@ -345,9 +195,6 @@ static int rxrpc_begin_client_call(struct rxrpc_call *call,
call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
- /* Add the new call to the hashtable */
- rxrpc_call_hash_add(call);
-
spin_lock(&call->conn->params.peer->lock);
hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets);
spin_unlock(&call->conn->params.peer->lock);
@@ -425,9 +272,10 @@ error:
rxrpc_put_call(call);
write_lock_bh(&rxrpc_call_lock);
- list_del(&call->link);
+ list_del_init(&call->link);
write_unlock_bh(&rxrpc_call_lock);
+ call->state = RXRPC_CALL_DEAD;
rxrpc_put_call(call);
_leave(" = %d", ret);
return ERR_PTR(ret);
@@ -439,6 +287,7 @@ error:
*/
found_user_ID_now_present:
write_unlock(&rx->call_lock);
+ call->state = RXRPC_CALL_DEAD;
rxrpc_put_call(call);
_leave(" = -EEXIST [%p]", call);
return ERR_PTR(-EEXIST);
@@ -454,8 +303,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_call *call, *candidate;
- struct rb_node **p, *parent;
- u32 call_id;
+ u32 call_id, chan;
_enter(",%d", conn->debug_id);
@@ -465,20 +313,23 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
if (!candidate)
return ERR_PTR(-EBUSY);
+ chan = sp->hdr.cid & RXRPC_CHANNELMASK;
candidate->socket = rx;
candidate->conn = conn;
candidate->cid = sp->hdr.cid;
candidate->call_id = sp->hdr.callNumber;
- candidate->channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ candidate->channel = chan;
candidate->rx_data_post = 0;
candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
if (conn->security_ix > 0)
candidate->state = RXRPC_CALL_SERVER_SECURING;
- write_lock_bh(&conn->lock);
+ spin_lock(&conn->channel_lock);
/* set the channel for this call */
- call = conn->channels[candidate->channel];
+ call = rcu_dereference_protected(conn->channels[chan].call,
+ lockdep_is_held(&conn->channel_lock));
+
_debug("channel[%u] is %p", candidate->channel, call);
if (call && call->call_id == sp->hdr.callNumber) {
/* already set; must've been a duplicate packet */
@@ -507,9 +358,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
call->debug_id, rxrpc_call_states[call->state]);
if (call->state >= RXRPC_CALL_COMPLETE) {
- conn->channels[call->channel] = NULL;
+ __rxrpc_disconnect_call(call);
} else {
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = -EBUSY");
return ERR_PTR(-EBUSY);
@@ -519,33 +370,22 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
/* check the call number isn't duplicate */
_debug("check dup");
call_id = sp->hdr.callNumber;
- p = &conn->calls.rb_node;
- parent = NULL;
- while (*p) {
- parent = *p;
- call = rb_entry(parent, struct rxrpc_call, conn_node);
-
- /* The tree is sorted in order of the __be32 value without
- * turning it into host order.
- */
- if (call_id < call->call_id)
- p = &(*p)->rb_left;
- else if (call_id > call->call_id)
- p = &(*p)->rb_right;
- else
- goto old_call;
- }
+
+ /* We just ignore calls prior to the current call ID. Terminated calls
+ * are handled via the connection.
+ */
+ if (call_id <= conn->channels[chan].call_counter)
+ goto old_call; /* TODO: Just drop packet */
/* make the call available */
_debug("new call");
call = candidate;
candidate = NULL;
- rb_link_node(&call->conn_node, parent, p);
- rb_insert_color(&call->conn_node, &conn->calls);
- conn->channels[call->channel] = call;
+ conn->channels[chan].call_counter = call_id;
+ rcu_assign_pointer(conn->channels[chan].call, call);
sock_hold(&rx->sk);
rxrpc_get_connection(conn);
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
spin_lock(&conn->params.peer->lock);
hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
@@ -555,27 +395,10 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
list_add_tail(&call->link, &rxrpc_calls);
write_unlock_bh(&rxrpc_call_lock);
- /* Record copies of information for hashtable lookup */
- call->family = rx->family;
call->local = conn->params.local;
- switch (call->family) {
- case AF_INET:
- call->peer_ip.ipv4_addr =
- conn->params.peer->srx.transport.sin.sin_addr.s_addr;
- break;
- case AF_INET6:
- memcpy(call->peer_ip.ipv6_addr,
- conn->params.peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
- sizeof(call->peer_ip.ipv6_addr));
- break;
- default:
- break;
- }
call->epoch = conn->proto.epoch;
call->service_id = conn->params.service_id;
- call->in_clientflag = conn->proto.in_clientflag;
- /* Add the new call to the hashtable */
- rxrpc_call_hash_add(call);
+ call->in_clientflag = RXRPC_CLIENT_INITIATED;
_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
@@ -585,19 +408,19 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
return call;
extant_call:
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
return call;
aborted_call:
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = -ECONNABORTED");
return ERR_PTR(-ECONNABORTED);
old_call:
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = -ECONNRESET [old]");
return ERR_PTR(-ECONNRESET);
@@ -626,6 +449,10 @@ void rxrpc_release_call(struct rxrpc_call *call)
*/
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+ spin_lock(&conn->params.peer->lock);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&conn->params.peer->lock);
+
write_lock_bh(&rx->call_lock);
if (!list_empty(&call->accept_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -641,24 +468,17 @@ void rxrpc_release_call(struct rxrpc_call *call)
write_unlock_bh(&rx->call_lock);
/* free up the channel for reuse */
- spin_lock(&conn->channel_lock);
- write_lock_bh(&conn->lock);
- write_lock(&call->state_lock);
-
- rxrpc_disconnect_call(call);
-
- spin_unlock(&conn->channel_lock);
+ write_lock_bh(&call->state_lock);
if (call->state < RXRPC_CALL_COMPLETE &&
call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
_debug("+++ ABORTING STATE %d +++\n", call->state);
call->state = RXRPC_CALL_LOCALLY_ABORTED;
call->local_abort = RX_CALL_DEAD;
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- rxrpc_queue_call(call);
}
- write_unlock(&call->state_lock);
- write_unlock_bh(&conn->lock);
+ write_unlock_bh(&call->state_lock);
+
+ rxrpc_disconnect_call(call);
/* clean up the Rx queue */
if (!skb_queue_empty(&call->rx_queue) ||
@@ -792,6 +612,17 @@ void __rxrpc_put_call(struct rxrpc_call *call)
}
/*
+ * Final call destruction under RCU.
+ */
+static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
+{
+ struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+
+ rxrpc_purge_queue(&call->rx_queue);
+ kmem_cache_free(rxrpc_call_jar, call);
+}
+
+/*
* clean up a call
*/
static void rxrpc_cleanup_call(struct rxrpc_call *call)
@@ -815,19 +646,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
return;
}
- if (call->conn) {
- spin_lock(&call->conn->params.peer->lock);
- hlist_del_init(&call->error_link);
- spin_unlock(&call->conn->params.peer->lock);
-
- write_lock_bh(&call->conn->lock);
- rb_erase(&call->conn_node, &call->conn->calls);
- write_unlock_bh(&call->conn->lock);
- rxrpc_put_connection(call->conn);
- }
-
- /* Remove the call from the hash */
- rxrpc_call_hash_del(call);
+ ASSERTCMP(call->conn, ==, NULL);
if (call->acks_window) {
_debug("kill Tx window %d",
@@ -855,7 +674,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
rxrpc_purge_queue(&call->rx_queue);
ASSERT(skb_queue_empty(&call->rx_oos_queue));
sock_put(&call->socket->sk);
- kmem_cache_free(rxrpc_call_jar, call);
+ call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
}
/*
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 82488d6adb83..9e91f27b0d0f 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -33,7 +33,8 @@ static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
* client conns away from the current allocation point to try and keep the IDs
* concentrated. We will also need to retire connections from an old epoch.
*/
-int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp)
+static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
+ gfp_t gfp)
{
u32 epoch;
int id;
@@ -83,7 +84,7 @@ error:
/*
* Release a connection ID for a client connection from the global pool.
*/
-void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
+static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
{
if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
spin_lock(&rxrpc_conn_id_lock);
@@ -92,3 +93,280 @@ void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
spin_unlock(&rxrpc_conn_id_lock);
}
}
+
+/*
+ * Destroy the client connection ID tree.
+ */
+void rxrpc_destroy_client_conn_ids(void)
+{
+ struct rxrpc_connection *conn;
+ int id;
+
+ if (!idr_is_empty(&rxrpc_client_conn_ids)) {
+ idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
+ pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
+ conn, atomic_read(&conn->usage));
+ }
+ BUG();
+ }
+
+ idr_destroy(&rxrpc_client_conn_ids);
+}
+
+/*
+ * Allocate a client connection. The caller must take care to clear any
+ * padding bytes in *cp.
+ */
+static struct rxrpc_connection *
+rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+ int ret;
+
+ _enter("");
+
+ conn = rxrpc_alloc_connection(gfp);
+ if (!conn) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ conn->params = *cp;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->state = RXRPC_CONN_CLIENT;
+
+ ret = rxrpc_get_client_connection_id(conn, gfp);
+ if (ret < 0)
+ goto error_0;
+
+ ret = rxrpc_init_client_conn_security(conn);
+ if (ret < 0)
+ goto error_1;
+
+ ret = conn->security->prime_packet_security(conn);
+ if (ret < 0)
+ goto error_2;
+
+ write_lock(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock(&rxrpc_connection_lock);
+
+ /* We steal the caller's peer ref. */
+ cp->peer = NULL;
+ rxrpc_get_local(conn->params.local);
+ key_get(conn->params.key);
+
+ _leave(" = %p", conn);
+ return conn;
+
+error_2:
+ conn->security->clear(conn);
+error_1:
+ rxrpc_put_client_connection_id(conn);
+error_0:
+ kfree(conn);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn, *candidate = NULL;
+ struct rxrpc_local *local = cp->local;
+ struct rb_node *p, **pp, *parent;
+ long diff;
+ int chan;
+
+ DECLARE_WAITQUEUE(myself, current);
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
+ if (!cp->peer)
+ return -ENOMEM;
+
+ if (!cp->exclusive) {
+ /* Search for a existing client connection unless this is going
+ * to be a connection that's used exclusively for a single call.
+ */
+ _debug("search 1");
+ spin_lock(&local->client_conns_lock);
+ p = local->client_conns.rb_node;
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, client_node);
+
+#define cmp(X) ((long)conn->params.X - (long)cp->X)
+ diff = (cmp(peer) ?:
+ cmp(key) ?:
+ cmp(security_level));
+ if (diff < 0)
+ p = p->rb_left;
+ else if (diff > 0)
+ p = p->rb_right;
+ else
+ goto found_extant_conn;
+ }
+ spin_unlock(&local->client_conns_lock);
+ }
+
+ /* We didn't find a connection or we want an exclusive one. */
+ _debug("get new conn");
+ candidate = rxrpc_alloc_client_connection(cp, gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ if (cp->exclusive) {
+ /* Assign the call on an exclusive connection to channel 0 and
+ * don't add the connection to the endpoint's shareable conn
+ * lookup tree.
+ */
+ _debug("exclusive chan 0");
+ conn = candidate;
+ atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+ spin_lock(&conn->channel_lock);
+ chan = 0;
+ goto found_channel;
+ }
+
+ /* We need to redo the search before attempting to add a new connection
+ * lest we race with someone else adding a conflicting instance.
+ */
+ _debug("search 2");
+ spin_lock(&local->client_conns_lock);
+
+ pp = &local->client_conns.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ conn = rb_entry(parent, struct rxrpc_connection, client_node);
+
+ diff = (cmp(peer) ?:
+ cmp(key) ?:
+ cmp(security_level));
+ if (diff < 0)
+ pp = &(*pp)->rb_left;
+ else if (diff > 0)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_conn;
+ }
+
+ /* The second search also failed; simply add the new connection with
+ * the new call in channel 0. Note that we need to take the channel
+ * lock before dropping the client conn lock.
+ */
+ _debug("new conn");
+ set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+ rb_link_node(&candidate->client_node, parent, pp);
+ rb_insert_color(&candidate->client_node, &local->client_conns);
+attached:
+ conn = candidate;
+ candidate = NULL;
+
+ atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+ spin_lock(&conn->channel_lock);
+ spin_unlock(&local->client_conns_lock);
+ chan = 0;
+
+found_channel:
+ _debug("found chan");
+ call->conn = conn;
+ call->channel = chan;
+ call->epoch = conn->proto.epoch;
+ call->cid = conn->proto.cid | chan;
+ call->call_id = ++conn->channels[chan].call_counter;
+ conn->channels[chan].call_id = call->call_id;
+ rcu_assign_pointer(conn->channels[chan].call, call);
+
+ _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+
+ spin_unlock(&conn->channel_lock);
+ rxrpc_put_peer(cp->peer);
+ cp->peer = NULL;
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return 0;
+
+ /* We found a potentially suitable connection already in existence. If
+ * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
+ * reaper), discard any candidate we may have allocated, and try to get
+ * a channel on this one, otherwise we have to replace it.
+ */
+found_extant_conn:
+ _debug("found conn");
+ if (!rxrpc_get_connection_maybe(conn)) {
+ set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+ rb_replace_node(&conn->client_node,
+ &candidate->client_node,
+ &local->client_conns);
+ clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+ goto attached;
+ }
+
+ spin_unlock(&local->client_conns_lock);
+
+ rxrpc_put_connection(candidate);
+
+ if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
+ if (!gfpflags_allow_blocking(gfp)) {
+ rxrpc_put_connection(conn);
+ _leave(" = -EAGAIN");
+ return -EAGAIN;
+ }
+
+ add_wait_queue(&conn->channel_wq, &myself);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (atomic_add_unless(&conn->avail_chans, -1, 0))
+ break;
+ if (signal_pending(current))
+ goto interrupted;
+ schedule();
+ }
+ remove_wait_queue(&conn->channel_wq, &myself);
+ __set_current_state(TASK_RUNNING);
+ }
+
+ /* The connection allegedly now has a free channel and we can now
+ * attach the call to it.
+ */
+ spin_lock(&conn->channel_lock);
+
+ for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+ if (!conn->channels[chan].call)
+ goto found_channel;
+ BUG();
+
+interrupted:
+ remove_wait_queue(&conn->channel_wq, &myself);
+ __set_current_state(TASK_RUNNING);
+ rxrpc_put_connection(conn);
+ rxrpc_put_peer(cp->peer);
+ cp->peer = NULL;
+ _leave(" = -ERESTARTSYS");
+ return -ERESTARTSYS;
+}
+
+/*
+ * Remove a client connection from the local endpoint's tree, thereby removing
+ * it as a target for reuse for new client calls.
+ */
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_local *local = conn->params.local;
+
+ spin_lock(&local->client_conns_lock);
+ if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
+ rb_erase(&conn->client_node, &local->client_conns);
+ spin_unlock(&local->client_conns_lock);
+
+ rxrpc_put_client_connection_id(conn);
+}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index bf6971555eac..cee0f35bc1cf 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -31,15 +31,17 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
u32 abort_code)
{
struct rxrpc_call *call;
- struct rb_node *p;
+ int i;
_enter("{%d},%x", conn->debug_id, abort_code);
- read_lock_bh(&conn->lock);
+ spin_lock(&conn->channel_lock);
- for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
- call = rb_entry(p, struct rxrpc_call, conn_node);
- write_lock(&call->state_lock);
+ for (i = 0; i < RXRPC_MAXCALLS; i++) {
+ call = rcu_dereference_protected(
+ conn->channels[i].call,
+ lockdep_is_held(&conn->channel_lock));
+ write_lock_bh(&call->state_lock);
if (call->state <= RXRPC_CALL_COMPLETE) {
call->state = state;
if (state == RXRPC_CALL_LOCALLY_ABORTED) {
@@ -51,10 +53,10 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
}
rxrpc_queue_call(call);
}
- write_unlock(&call->state_lock);
+ write_unlock_bh(&call->state_lock);
}
- read_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
_leave("");
}
@@ -188,18 +190,24 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
if (ret < 0)
return ret;
- conn->security->prime_packet_security(conn);
- read_lock_bh(&conn->lock);
+ ret = conn->security->prime_packet_security(conn);
+ if (ret < 0)
+ return ret;
+
+ spin_lock(&conn->channel_lock);
spin_lock(&conn->state_lock);
- if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
- conn->state = RXRPC_CONN_SERVER;
+ if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
+ conn->state = RXRPC_CONN_SERVICE;
for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
- rxrpc_call_is_secure(conn->channels[loop]);
+ rxrpc_call_is_secure(
+ rcu_dereference_protected(
+ conn->channels[loop].call,
+ lockdep_is_held(&conn->channel_lock)));
}
spin_unlock(&conn->state_lock);
- read_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
return 0;
default:
@@ -263,12 +271,8 @@ void rxrpc_process_connection(struct work_struct *work)
_enter("{%d}", conn->debug_id);
- rxrpc_get_connection(conn);
-
- if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
+ if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
- rxrpc_put_connection(conn);
- }
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
@@ -283,7 +287,6 @@ void rxrpc_process_connection(struct work_struct *work)
goto requeue_and_leave;
case -ECONNABORTED:
default:
- rxrpc_put_connection(conn);
rxrpc_free_skb(skb);
break;
}
@@ -301,7 +304,6 @@ requeue_and_leave:
protocol_error:
if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
goto requeue_and_leave;
- rxrpc_put_connection(conn);
rxrpc_free_skb(skb);
_leave(" [EPROTO]");
goto out;
@@ -315,7 +317,7 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
CHECK_SLAB_OKAY(&local->usage);
skb_queue_tail(&local->reject_queue, skb);
- rxrpc_queue_work(&local->processor);
+ rxrpc_queue_local(local);
}
/*
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 4bfad7cf96cb..896d84493a05 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/skbuff.h>
-#include <linux/crypto.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
@@ -34,7 +33,7 @@ static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
/*
* allocate a new connection
*/
-static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
{
struct rxrpc_connection *conn;
@@ -46,12 +45,13 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
init_waitqueue_head(&conn->channel_wq);
INIT_WORK(&conn->processor, &rxrpc_process_connection);
INIT_LIST_HEAD(&conn->link);
- conn->calls = RB_ROOT;
skb_queue_head_init(&conn->rx_queue);
conn->security = &rxrpc_no_security;
- rwlock_init(&conn->lock);
spin_lock_init(&conn->state_lock);
- atomic_set(&conn->usage, 1);
+ /* We maintain an extra ref on the connection whilst it is
+ * on the rxrpc_connections list.
+ */
+ atomic_set(&conn->usage, 2);
conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
conn->size_align = 4;
@@ -63,465 +63,118 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
}
/*
- * add a call to a connection's call-by-ID tree
- */
-static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
- struct rxrpc_call *call)
-{
- struct rxrpc_call *xcall;
- struct rb_node *parent, **p;
- __be32 call_id;
-
- write_lock_bh(&conn->lock);
-
- call_id = call->call_id;
- p = &conn->calls.rb_node;
- parent = NULL;
- while (*p) {
- parent = *p;
- xcall = rb_entry(parent, struct rxrpc_call, conn_node);
-
- if (call_id < xcall->call_id)
- p = &(*p)->rb_left;
- else if (call_id > xcall->call_id)
- p = &(*p)->rb_right;
- else
- BUG();
- }
-
- rb_link_node(&call->conn_node, parent, p);
- rb_insert_color(&call->conn_node, &conn->calls);
-
- write_unlock_bh(&conn->lock);
-}
-
-/*
- * Allocate a client connection. The caller must take care to clear any
- * padding bytes in *cp.
+ * Look up a connection in the cache by protocol parameters.
+ *
+ * If successful, a pointer to the connection is returned, but no ref is taken.
+ * NULL is returned if there is no match.
+ *
+ * The caller must be holding the RCU read lock.
*/
-static struct rxrpc_connection *
-rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
+ struct sk_buff *skb)
{
struct rxrpc_connection *conn;
- int ret;
-
- _enter("");
-
- conn = rxrpc_alloc_connection(gfp);
- if (!conn) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
- }
-
- conn->params = *cp;
- conn->proto.local = cp->local;
- conn->proto.epoch = rxrpc_epoch;
- conn->proto.cid = 0;
- conn->proto.in_clientflag = 0;
- conn->proto.family = cp->peer->srx.transport.family;
- conn->out_clientflag = RXRPC_CLIENT_INITIATED;
- conn->state = RXRPC_CONN_CLIENT;
-
- switch (conn->proto.family) {
- case AF_INET:
- conn->proto.addr_size = sizeof(conn->proto.ipv4_addr);
- conn->proto.ipv4_addr = cp->peer->srx.transport.sin.sin_addr;
- conn->proto.port = cp->peer->srx.transport.sin.sin_port;
- break;
- }
-
- ret = rxrpc_get_client_connection_id(conn, gfp);
- if (ret < 0)
- goto error_0;
-
- ret = rxrpc_init_client_conn_security(conn);
- if (ret < 0)
- goto error_1;
-
- conn->security->prime_packet_security(conn);
-
- write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- write_unlock(&rxrpc_connection_lock);
-
- /* We steal the caller's peer ref. */
- cp->peer = NULL;
- rxrpc_get_local(conn->params.local);
- key_get(conn->params.key);
-
- _leave(" = %p", conn);
- return conn;
-
-error_1:
- rxrpc_put_client_connection_id(conn);
-error_0:
- kfree(conn);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
- */
-int rxrpc_connect_call(struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
-{
- struct rxrpc_connection *conn, *candidate = NULL;
- struct rxrpc_local *local = cp->local;
- struct rb_node *p, **pp, *parent;
- long diff;
- int chan;
+ struct rxrpc_conn_proto k;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_peer *peer;
- DECLARE_WAITQUEUE(myself, current);
+ _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ goto not_found;
- cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
- if (!cp->peer)
- return -ENOMEM;
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
- if (!cp->exclusive) {
- /* Search for a existing client connection unless this is going
- * to be a connection that's used exclusively for a single call.
- */
- _debug("search 1");
- spin_lock(&local->client_conns_lock);
- p = local->client_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, client_node);
-
-#define cmp(X) ((long)conn->params.X - (long)cp->X)
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level));
- if (diff < 0)
- p = p->rb_left;
- else if (diff > 0)
- p = p->rb_right;
- else
- goto found_extant_conn;
- }
- spin_unlock(&local->client_conns_lock);
+ /* We may have to handle mixing IPv4 and IPv6 */
+ if (srx.transport.family != local->srx.transport.family) {
+ pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
+ srx.transport.family,
+ local->srx.transport.family);
+ goto not_found;
}
- /* We didn't find a connection or we want an exclusive one. */
- _debug("get new conn");
- candidate = rxrpc_alloc_client_connection(cp, gfp);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
- if (cp->exclusive) {
- /* Assign the call on an exclusive connection to channel 0 and
- * don't add the connection to the endpoint's shareable conn
- * lookup tree.
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
+ /* We need to look up service connections by the full protocol
+ * parameter set. We look up the peer first as an intermediate
+ * step and then the connection from the peer's tree.
*/
- _debug("exclusive chan 0");
- conn = candidate;
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
- chan = 0;
- goto found_channel;
- }
-
- /* We need to redo the search before attempting to add a new connection
- * lest we race with someone else adding a conflicting instance.
- */
- _debug("search 2");
- spin_lock(&local->client_conns_lock);
-
- pp = &local->client_conns.rb_node;
- parent = NULL;
- while (*pp) {
- parent = *pp;
- conn = rb_entry(parent, struct rxrpc_connection, client_node);
-
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level));
- if (diff < 0)
- pp = &(*pp)->rb_left;
- else if (diff > 0)
- pp = &(*pp)->rb_right;
- else
- goto found_extant_conn;
- }
-
- /* The second search also failed; simply add the new connection with
- * the new call in channel 0. Note that we need to take the channel
- * lock before dropping the client conn lock.
- */
- _debug("new conn");
- conn = candidate;
- candidate = NULL;
-
- rb_link_node(&conn->client_node, parent, pp);
- rb_insert_color(&conn->client_node, &local->client_conns);
-
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
- spin_unlock(&local->client_conns_lock);
- chan = 0;
-
-found_channel:
- _debug("found chan");
- call->conn = conn;
- call->channel = chan;
- call->epoch = conn->proto.epoch;
- call->cid = conn->proto.cid | chan;
- call->call_id = ++conn->call_counter;
- rcu_assign_pointer(conn->channels[chan], call);
-
- _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
-
- rxrpc_add_call_ID_to_conn(conn, call);
- spin_unlock(&conn->channel_lock);
- rxrpc_put_peer(cp->peer);
- cp->peer = NULL;
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return 0;
-
- /* We found a suitable connection already in existence. Discard any
- * candidate we may have allocated, and try to get a channel on this
- * one.
- */
-found_extant_conn:
- _debug("found conn");
- rxrpc_get_connection(conn);
- spin_unlock(&local->client_conns_lock);
-
- rxrpc_put_connection(candidate);
-
- if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
- if (!gfpflags_allow_blocking(gfp)) {
- rxrpc_put_connection(conn);
- _leave(" = -EAGAIN");
- return -EAGAIN;
+ peer = rxrpc_lookup_peer_rcu(local, &srx);
+ if (!peer)
+ goto not_found;
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+ if (!conn || atomic_read(&conn->usage) == 0)
+ goto not_found;
+ _leave(" = %p", conn);
+ return conn;
+ } else {
+ /* Look up client connections by connection ID alone as their
+ * IDs are unique for this machine.
+ */
+ conn = idr_find(&rxrpc_client_conn_ids,
+ sp->hdr.cid >> RXRPC_CIDSHIFT);
+ if (!conn || atomic_read(&conn->usage) == 0) {
+ _debug("no conn");
+ goto not_found;
}
- add_wait_queue(&conn->channel_wq, &myself);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_add_unless(&conn->avail_chans, -1, 0))
- break;
- if (signal_pending(current))
- goto interrupted;
- schedule();
+ if (conn->proto.epoch != k.epoch ||
+ conn->params.local != local)
+ goto not_found;
+
+ peer = conn->params.peer;
+ switch (srx.transport.family) {
+ case AF_INET:
+ if (peer->srx.transport.sin.sin_port !=
+ srx.transport.sin.sin_port ||
+ peer->srx.transport.sin.sin_addr.s_addr !=
+ srx.transport.sin.sin_addr.s_addr)
+ goto not_found;
+ break;
+ default:
+ BUG();
}
- remove_wait_queue(&conn->channel_wq, &myself);
- __set_current_state(TASK_RUNNING);
- }
-
- /* The connection allegedly now has a free channel and we can now
- * attach the call to it.
- */
- spin_lock(&conn->channel_lock);
-
- for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
- if (!conn->channels[chan])
- goto found_channel;
- BUG();
-
-interrupted:
- remove_wait_queue(&conn->channel_wq, &myself);
- __set_current_state(TASK_RUNNING);
- rxrpc_put_connection(conn);
- rxrpc_put_peer(cp->peer);
- cp->peer = NULL;
- _leave(" = -ERESTARTSYS");
- return -ERESTARTSYS;
-}
-
-/*
- * get a record of an incoming connection
- */
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- struct sk_buff *skb)
-{
- struct rxrpc_connection *conn, *candidate = NULL;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rb_node *p, **pp;
- const char *new = "old";
- __be32 epoch;
- u32 cid;
-
- _enter("");
-
- ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
-
- epoch = sp->hdr.epoch;
- cid = sp->hdr.cid & RXRPC_CIDMASK;
-
- /* search the connection list first */
- read_lock_bh(&peer->conn_lock);
-
- p = peer->service_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, service_node);
- _debug("maybe %x", conn->proto.cid);
-
- if (epoch < conn->proto.epoch)
- p = p->rb_left;
- else if (epoch > conn->proto.epoch)
- p = p->rb_right;
- else if (cid < conn->proto.cid)
- p = p->rb_left;
- else if (cid > conn->proto.cid)
- p = p->rb_right;
- else
- goto found_extant_connection;
- }
- read_unlock_bh(&peer->conn_lock);
-
- /* not yet present - create a candidate for a new record and then
- * redo the search */
- candidate = rxrpc_alloc_connection(GFP_NOIO);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
+ _leave(" = %p", conn);
+ return conn;
}
- candidate->proto.local = local;
- candidate->proto.epoch = sp->hdr.epoch;
- candidate->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
- candidate->proto.in_clientflag = RXRPC_CLIENT_INITIATED;
- candidate->params.local = local;
- candidate->params.peer = peer;
- candidate->params.service_id = sp->hdr.serviceId;
- candidate->security_ix = sp->hdr.securityIndex;
- candidate->out_clientflag = 0;
- candidate->state = RXRPC_CONN_SERVER;
- if (candidate->params.service_id)
- candidate->state = RXRPC_CONN_SERVER_UNSECURED;
-
- write_lock_bh(&peer->conn_lock);
-
- pp = &peer->service_conns.rb_node;
- p = NULL;
- while (*pp) {
- p = *pp;
- conn = rb_entry(p, struct rxrpc_connection, service_node);
-
- if (epoch < conn->proto.epoch)
- pp = &(*pp)->rb_left;
- else if (epoch > conn->proto.epoch)
- pp = &(*pp)->rb_right;
- else if (cid < conn->proto.cid)
- pp = &(*pp)->rb_left;
- else if (cid > conn->proto.cid)
- pp = &(*pp)->rb_right;
- else
- goto found_extant_second;
- }
-
- /* we can now add the new candidate to the list */
- conn = candidate;
- candidate = NULL;
- rb_link_node(&conn->service_node, p, pp);
- rb_insert_color(&conn->service_node, &peer->service_conns);
- rxrpc_get_peer(peer);
- rxrpc_get_local(local);
-
- write_unlock_bh(&peer->conn_lock);
-
- write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- write_unlock(&rxrpc_connection_lock);
-
- new = "new";
-
-success:
- _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
-
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return conn;
-
- /* we found the connection in the list immediately */
-found_extant_connection:
- if (sp->hdr.securityIndex != conn->security_ix) {
- read_unlock_bh(&peer->conn_lock);
- goto security_mismatch;
- }
- rxrpc_get_connection(conn);
- read_unlock_bh(&peer->conn_lock);
- goto success;
-
- /* we found the connection on the second time through the list */
-found_extant_second:
- if (sp->hdr.securityIndex != conn->security_ix) {
- write_unlock_bh(&peer->conn_lock);
- goto security_mismatch;
- }
- rxrpc_get_connection(conn);
- write_unlock_bh(&peer->conn_lock);
- kfree(candidate);
- goto success;
-
-security_mismatch:
- kfree(candidate);
- _leave(" = -EKEYREJECTED");
- return ERR_PTR(-EKEYREJECTED);
+not_found:
+ _leave(" = NULL");
+ return NULL;
}
/*
- * find a connection based on transport and RxRPC connection ID for an incoming
- * packet
+ * Disconnect a call and clear any channel it occupies when that call
+ * terminates. The caller must hold the channel_lock and must release the
+ * call's ref on the connection.
*/
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- struct sk_buff *skb)
+void __rxrpc_disconnect_call(struct rxrpc_call *call)
{
- struct rxrpc_connection *conn;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rb_node *p;
- u32 epoch, cid;
-
- _enter(",{%x,%x}", sp->hdr.cid, sp->hdr.flags);
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_channel *chan = &conn->channels[call->channel];
- read_lock_bh(&peer->conn_lock);
+ _enter("%d,%d", conn->debug_id, call->channel);
- cid = sp->hdr.cid & RXRPC_CIDMASK;
- epoch = sp->hdr.epoch;
+ if (rcu_access_pointer(chan->call) == call) {
+ /* Save the result of the call so that we can repeat it if necessary
+ * through the channel, whilst disposing of the actual call record.
+ */
+ chan->last_result = call->local_abort;
+ smp_wmb();
+ chan->last_call = chan->call_id;
+ chan->call_id = chan->call_counter;
- if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
- p = peer->service_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, service_node);
-
- _debug("maybe %x", conn->proto.cid);
-
- if (epoch < conn->proto.epoch)
- p = p->rb_left;
- else if (epoch > conn->proto.epoch)
- p = p->rb_right;
- else if (cid < conn->proto.cid)
- p = p->rb_left;
- else if (cid > conn->proto.cid)
- p = p->rb_right;
- else
- goto found;
- }
- } else {
- conn = idr_find(&rxrpc_client_conn_ids, cid >> RXRPC_CIDSHIFT);
- if (conn && conn->proto.epoch == epoch)
- goto found;
+ rcu_assign_pointer(chan->call, NULL);
+ atomic_inc(&conn->avail_chans);
+ wake_up(&conn->channel_wq);
}
- read_unlock_bh(&peer->conn_lock);
- _leave(" = NULL");
- return NULL;
-
-found:
- rxrpc_get_connection(conn);
- read_unlock_bh(&peer->conn_lock);
- _leave(" = %p", conn);
- return conn;
+ _leave("");
}
/*
@@ -531,15 +184,13 @@ found:
void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- unsigned chan = call->channel;
- _enter("%d,%d", conn->debug_id, call->channel);
+ spin_lock(&conn->channel_lock);
+ __rxrpc_disconnect_call(call);
+ spin_unlock(&conn->channel_lock);
- if (conn->channels[chan] == call) {
- rcu_assign_pointer(conn->channels[chan], NULL);
- atomic_inc(&conn->avail_chans);
- wake_up(&conn->channel_wq);
- }
+ call->conn = NULL;
+ rxrpc_put_connection(conn);
}
/*
@@ -553,10 +204,10 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
_enter("%p{u=%d,d=%d}",
conn, atomic_read(&conn->usage), conn->debug_id);
- ASSERTCMP(atomic_read(&conn->usage), >, 0);
+ ASSERTCMP(atomic_read(&conn->usage), >, 1);
conn->put_time = ktime_get_seconds();
- if (atomic_dec_and_test(&conn->usage)) {
+ if (atomic_dec_return(&conn->usage) == 1) {
_debug("zombie");
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
}
@@ -567,15 +218,17 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
/*
* destroy a virtual connection
*/
-static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
+static void rxrpc_destroy_connection(struct rcu_head *rcu)
{
- _enter("%p{%d}", conn, atomic_read(&conn->usage));
+ struct rxrpc_connection *conn =
+ container_of(rcu, struct rxrpc_connection, rcu);
+
+ _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage));
ASSERTCMP(atomic_read(&conn->usage), ==, 0);
_net("DESTROY CONN %d", conn->debug_id);
- ASSERT(RB_EMPTY_ROOT(&conn->calls));
rxrpc_purge_queue(&conn->rx_queue);
conn->security->clear(conn);
@@ -594,59 +247,41 @@ static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
static void rxrpc_connection_reaper(struct work_struct *work)
{
struct rxrpc_connection *conn, *_p;
- struct rxrpc_peer *peer;
- unsigned long now, earliest, reap_time;
+ unsigned long reap_older_than, earliest, put_time, now;
LIST_HEAD(graveyard);
_enter("");
now = ktime_get_seconds();
+ reap_older_than = now - rxrpc_connection_expiry;
earliest = ULONG_MAX;
write_lock(&rxrpc_connection_lock);
list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
- _debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, atomic_read(&conn->usage),
- (long) now - (long) conn->put_time);
-
- if (likely(atomic_read(&conn->usage) > 0))
+ ASSERTCMP(atomic_read(&conn->usage), >, 0);
+ if (likely(atomic_read(&conn->usage) > 1))
continue;
- if (rxrpc_conn_is_client(conn)) {
- struct rxrpc_local *local = conn->params.local;
- spin_lock(&local->client_conns_lock);
- reap_time = conn->put_time + rxrpc_connection_expiry;
-
- if (atomic_read(&conn->usage) > 0) {
- ;
- } else if (reap_time <= now) {
- list_move_tail(&conn->link, &graveyard);
- rxrpc_put_client_connection_id(conn);
- rb_erase(&conn->client_node,
- &local->client_conns);
- } else if (reap_time < earliest) {
- earliest = reap_time;
- }
-
- spin_unlock(&local->client_conns_lock);
- } else {
- peer = conn->params.peer;
- write_lock_bh(&peer->conn_lock);
- reap_time = conn->put_time + rxrpc_connection_expiry;
-
- if (atomic_read(&conn->usage) > 0) {
- ;
- } else if (reap_time <= now) {
- list_move_tail(&conn->link, &graveyard);
- rb_erase(&conn->service_node,
- &peer->service_conns);
- } else if (reap_time < earliest) {
- earliest = reap_time;
- }
-
- write_unlock_bh(&peer->conn_lock);
+ put_time = READ_ONCE(conn->put_time);
+ if (time_after(put_time, reap_older_than)) {
+ if (time_before(put_time, earliest))
+ earliest = put_time;
+ continue;
}
+
+ /* The usage count sits at 1 whilst the object is unused on the
+ * list; we reduce that to 0 to make the object unavailable.
+ */
+ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
+ continue;
+
+ if (rxrpc_conn_is_client(conn))
+ rxrpc_unpublish_client_conn(conn);
+ else
+ rxrpc_unpublish_service_conn(conn);
+
+ list_move_tail(&conn->link, &graveyard);
}
write_unlock(&rxrpc_connection_lock);
@@ -657,14 +292,14 @@ static void rxrpc_connection_reaper(struct work_struct *work)
(earliest - now) * HZ);
}
- /* then destroy all those pulled out */
while (!list_empty(&graveyard)) {
conn = list_entry(graveyard.next, struct rxrpc_connection,
link);
list_del_init(&conn->link);
ASSERTCMP(atomic_read(&conn->usage), ==, 0);
- rxrpc_destroy_connection(conn);
+ skb_queue_purge(&conn->rx_queue);
+ call_rcu(&conn->rcu, rxrpc_destroy_connection);
}
_leave("");
@@ -676,11 +311,30 @@ static void rxrpc_connection_reaper(struct work_struct *work)
*/
void __exit rxrpc_destroy_all_connections(void)
{
+ struct rxrpc_connection *conn, *_p;
+ bool leak = false;
+
_enter("");
rxrpc_connection_expiry = 0;
cancel_delayed_work(&rxrpc_connection_reap);
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+ flush_workqueue(rxrpc_workqueue);
+
+ write_lock(&rxrpc_connection_lock);
+ list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+ pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
+ conn, atomic_read(&conn->usage));
+ leak = true;
+ }
+ write_unlock(&rxrpc_connection_lock);
+ BUG_ON(leak);
+
+ /* Make sure the local and peer records pinned by any dying connections
+ * are released.
+ */
+ rcu_barrier();
+ rxrpc_destroy_client_conn_ids();
_leave("");
}
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
new file mode 100644
index 000000000000..7cbd612be0d7
--- /dev/null
+++ b/net/rxrpc/conn_service.c
@@ -0,0 +1,230 @@
+/* Service connection management
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include "ar-internal.h"
+
+/*
+ * Find a service connection under RCU conditions.
+ *
+ * We could use a hash table, but that is subject to bucket stuffing by an
+ * attacker as the client gets to pick the epoch and cid values and would know
+ * the hash function. So, instead, we use a hash table for the peer and from
+ * that an rbtree to find the service connection. Under ordinary circumstances
+ * it might be slower than a large hash table, but it is at least limited in
+ * depth.
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn = NULL;
+ struct rxrpc_conn_proto k;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rb_node *p;
+ unsigned int seq = 0;
+
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
+
+ do {
+ /* Unfortunately, rbtree walking doesn't give reliable results
+ * under just the RCU read lock, so we have to check for
+ * changes.
+ */
+ read_seqbegin_or_lock(&peer->service_conn_lock, &seq);
+
+ p = rcu_dereference_raw(peer->service_conns.rb_node);
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, service_node);
+
+ if (conn->proto.index_key < k.index_key)
+ p = rcu_dereference_raw(p->rb_left);
+ else if (conn->proto.index_key > k.index_key)
+ p = rcu_dereference_raw(p->rb_right);
+ else
+ goto done;
+ conn = NULL;
+ }
+ } while (need_seqretry(&peer->service_conn_lock, seq));
+
+done:
+ done_seqretry(&peer->service_conn_lock, seq);
+ _leave(" = %d", conn ? conn->debug_id : -1);
+ return conn;
+}
+
+/*
+ * Insert a service connection into a peer's tree, thereby making it a target
+ * for incoming packets.
+ */
+static struct rxrpc_connection *
+rxrpc_publish_service_conn(struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn)
+{
+ struct rxrpc_connection *cursor = NULL;
+ struct rxrpc_conn_proto k = conn->proto;
+ struct rb_node **pp, *parent;
+
+ write_seqlock_bh(&peer->service_conn_lock);
+
+ pp = &peer->service_conns.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ cursor = rb_entry(parent,
+ struct rxrpc_connection, service_node);
+
+ if (cursor->proto.index_key < k.index_key)
+ pp = &(*pp)->rb_left;
+ else if (cursor->proto.index_key > k.index_key)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_conn;
+ }
+
+ rb_link_node_rcu(&conn->service_node, parent, pp);
+ rb_insert_color(&conn->service_node, &peer->service_conns);
+conn_published:
+ set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
+ write_sequnlock_bh(&peer->service_conn_lock);
+ _leave(" = %d [new]", conn->debug_id);
+ return conn;
+
+found_extant_conn:
+ if (atomic_read(&cursor->usage) == 0)
+ goto replace_old_connection;
+ write_sequnlock_bh(&peer->service_conn_lock);
+ /* We should not be able to get here. rxrpc_incoming_connection() is
+ * called in a non-reentrant context, so there can't be a race to
+ * insert a new connection.
+ */
+ BUG();
+
+replace_old_connection:
+ /* The old connection is from an outdated epoch. */
+ _debug("replace conn");
+ rb_replace_node_rcu(&cursor->service_node,
+ &conn->service_node,
+ &peer->service_conns);
+ clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &cursor->flags);
+ goto conn_published;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_peer *peer;
+ const char *new = "old";
+
+ _enter("");
+
+ peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+ if (!peer) {
+ _debug("no peer");
+ return ERR_PTR(-EBUSY);
+ }
+
+ ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
+
+ rcu_read_lock();
+ peer = rxrpc_lookup_peer_rcu(local, srx);
+ if (peer) {
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+ if (conn) {
+ if (sp->hdr.securityIndex != conn->security_ix)
+ goto security_mismatch_rcu;
+ if (rxrpc_get_connection_maybe(conn))
+ goto found_extant_connection_rcu;
+
+ /* The conn has expired but we can't remove it without
+ * the appropriate lock, so we attempt to replace it
+ * when we have a new candidate.
+ */
+ }
+
+ if (!rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ }
+ rcu_read_unlock();
+
+ if (!peer) {
+ peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+ if (IS_ERR(peer))
+ goto enomem;
+ }
+
+ /* We don't have a matching record yet. */
+ conn = rxrpc_alloc_connection(GFP_NOIO);
+ if (!conn)
+ goto enomem_peer;
+
+ conn->proto.epoch = sp->hdr.epoch;
+ conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
+ conn->params.local = local;
+ conn->params.peer = peer;
+ conn->params.service_id = sp->hdr.serviceId;
+ conn->security_ix = sp->hdr.securityIndex;
+ conn->out_clientflag = 0;
+ conn->state = RXRPC_CONN_SERVICE;
+ if (conn->params.service_id)
+ conn->state = RXRPC_CONN_SERVICE_UNSECURED;
+
+ rxrpc_get_local(local);
+
+ write_lock(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock(&rxrpc_connection_lock);
+
+ /* Make the connection a target for incoming packets. */
+ rxrpc_publish_service_conn(peer, conn);
+
+ new = "new";
+
+success:
+ _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return conn;
+
+found_extant_connection_rcu:
+ rcu_read_unlock();
+ goto success;
+
+security_mismatch_rcu:
+ rcu_read_unlock();
+ _leave(" = -EKEYREJECTED");
+ return ERR_PTR(-EKEYREJECTED);
+
+enomem_peer:
+ rxrpc_put_peer(peer);
+enomem:
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Remove the service connection from the peer's tree, thereby removing it as a
+ * target for incoming packets.
+ */
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_peer *peer = conn->params.peer;
+
+ write_seqlock_bh(&peer->service_conn_lock);
+ if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags))
+ rb_erase(&conn->service_node, &peer->service_conns);
+ write_sequnlock_bh(&peer->service_conn_lock);
+}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index f4bd57b77b93..991a20d25093 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -476,7 +476,7 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
sp->hdr.seq += 1;
sp->hdr.serial += 1;
sp->hdr.flags = jhdr.flags;
- sp->hdr._rsvd = jhdr._rsvd;
+ sp->hdr._rsvd = ntohs(jhdr._rsvd);
_proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
@@ -575,14 +575,13 @@ done:
* post connection-level events to the connection
* - this includes challenges, responses and some aborts
*/
-static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
_enter("%p,%p", conn, skb);
- rxrpc_get_connection(conn);
skb_queue_tail(&conn->rx_queue, skb);
- rxrpc_queue_conn(conn);
+ return rxrpc_queue_conn(conn);
}
/*
@@ -595,7 +594,7 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
_enter("%p,%p", local, skb);
skb_queue_tail(&local->event_queue, skb);
- rxrpc_queue_work(&local->processor);
+ rxrpc_queue_local(local);
}
/*
@@ -627,32 +626,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
return 0;
}
-static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
- struct sk_buff *skb)
-{
- struct rxrpc_peer *peer;
- struct rxrpc_connection *conn;
- struct sockaddr_rxrpc srx;
-
- rxrpc_get_addr_from_skb(local, skb, &srx);
- rcu_read_lock();
- peer = rxrpc_lookup_peer_rcu(local, &srx);
- if (!peer)
- goto cant_find_peer;
-
- conn = rxrpc_find_connection(local, peer, skb);
- rcu_read_unlock();
- if (!conn)
- goto cant_find_conn;
-
- return conn;
-
-cant_find_peer:
- rcu_read_unlock();
-cant_find_conn:
- return NULL;
-}
-
/*
* handle data received on the local endpoint
* - may be called in interrupt context
@@ -663,6 +636,7 @@ cant_find_conn:
*/
void rxrpc_data_ready(struct sock *sk)
{
+ struct rxrpc_connection *conn;
struct rxrpc_skb_priv *sp;
struct rxrpc_local *local = sk->sk_user_data;
struct sk_buff *skb;
@@ -726,34 +700,37 @@ void rxrpc_data_ready(struct sock *sk)
(sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
goto bad_message;
- if (sp->hdr.callNumber == 0) {
- /* This is a connection-level packet. These should be
- * fairly rare, so the extra overhead of looking them up the
- * old-fashioned way doesn't really hurt */
- struct rxrpc_connection *conn;
+ rcu_read_lock();
- conn = rxrpc_conn_from_local(local, skb);
- if (!conn)
- goto cant_route_call;
+retry_find_conn:
+ conn = rxrpc_find_connection_rcu(local, skb);
+ if (!conn)
+ goto cant_route_call;
+ if (sp->hdr.callNumber == 0) {
+ /* Connection-level packet */
_debug("CONN %p {%d}", conn, conn->debug_id);
- rxrpc_post_packet_to_conn(conn, skb);
- rxrpc_put_connection(conn);
+ if (!rxrpc_post_packet_to_conn(conn, skb))
+ goto retry_find_conn;
} else {
- struct rxrpc_call *call;
+ /* Call-bound packets are routed by connection channel. */
+ unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ struct rxrpc_channel *chan = &conn->channels[channel];
+ struct rxrpc_call *call = rcu_dereference(chan->call);
- call = rxrpc_find_call_hash(&sp->hdr, local,
- AF_INET, &ip_hdr(skb)->saddr);
- if (call)
- rxrpc_post_packet_to_call(call, skb);
- else
+ if (!call || atomic_read(&call->usage) == 0)
goto cant_route_call;
+
+ rxrpc_post_packet_to_call(call, skb);
}
+ rcu_read_unlock();
out:
return;
cant_route_call:
+ rcu_read_unlock();
+
_debug("can't route call");
if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index e571403613c1..c21ad213b337 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -17,11 +17,12 @@ static int none_init_connection_security(struct rxrpc_connection *conn)
return 0;
}
-static void none_prime_packet_security(struct rxrpc_connection *conn)
+static int none_prime_packet_security(struct rxrpc_connection *conn)
{
+ return 0;
}
-static int none_secure_packet(const struct rxrpc_call *call,
+static int none_secure_packet(struct rxrpc_call *call,
struct sk_buff *skb,
size_t data_size,
void *sechdr)
@@ -29,7 +30,7 @@ static int none_secure_packet(const struct rxrpc_call *call,
return 0;
}
-static int none_verify_packet(const struct rxrpc_call *call,
+static int none_verify_packet(struct rxrpc_call *call,
struct sk_buff *skb,
u32 *_abort_code)
{
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 3ab7764f7cd8..a753796fbe8f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -374,14 +374,17 @@ void __exit rxrpc_destroy_all_locals(void)
_enter("");
- if (list_empty(&rxrpc_local_endpoints))
- return;
+ flush_workqueue(rxrpc_workqueue);
- mutex_lock(&rxrpc_local_mutex);
- list_for_each_entry(local, &rxrpc_local_endpoints, link) {
- pr_err("AF_RXRPC: Leaked local %p {%d}\n",
- local, atomic_read(&local->usage));
+ if (!list_empty(&rxrpc_local_endpoints)) {
+ mutex_lock(&rxrpc_local_mutex);
+ list_for_each_entry(local, &rxrpc_local_endpoints, link) {
+ pr_err("AF_RXRPC: Leaked local %p {%d}\n",
+ local, atomic_read(&local->usage));
+ }
+ mutex_unlock(&rxrpc_local_mutex);
+ BUG();
}
- mutex_unlock(&rxrpc_local_mutex);
- BUG();
+
+ rcu_barrier();
}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 01d4930a11f7..538e9831c699 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -189,7 +189,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
INIT_WORK(&peer->error_distributor,
&rxrpc_peer_error_distributor);
peer->service_conns = RB_ROOT;
- rwlock_init(&peer->conn_lock);
+ seqlock_init(&peer->service_conn_lock);
spin_lock_init(&peer->lock);
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 500cdcdc843c..ced5f07444e5 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -14,15 +14,15 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-static const char *const rxrpc_conn_states[] = {
- [RXRPC_CONN_UNUSED] = "Unused ",
- [RXRPC_CONN_CLIENT] = "Client ",
- [RXRPC_CONN_SERVER_UNSECURED] = "SvUnsec ",
- [RXRPC_CONN_SERVER_CHALLENGING] = "SvChall ",
- [RXRPC_CONN_SERVER] = "SvSecure",
- [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
- [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
- [RXRPC_CONN_NETWORK_ERROR] = "NetError",
+static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
+ [RXRPC_CONN_UNUSED] = "Unused ",
+ [RXRPC_CONN_CLIENT] = "Client ",
+ [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ",
+ [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ",
+ [RXRPC_CONN_SERVICE] = "SvSecure",
+ [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
+ [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CONN_NETWORK_ERROR] = "NetError",
};
/*
@@ -137,7 +137,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
if (v == &rxrpc_connections) {
seq_puts(seq,
"Proto Local Remote "
- " SvID ConnID Calls End Use State Key "
+ " SvID ConnID End Use State Key "
" Serial ISerial\n"
);
return 0;
@@ -154,13 +154,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
ntohs(conn->params.peer->srx.transport.sin.sin_port));
seq_printf(seq,
- "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
+ "UDP %-22.22s %-22.22s %4x %08x %s %3u"
" %s %08x %08x %08x\n",
lbuff,
rbuff,
conn->params.service_id,
conn->proto.cid,
- conn->call_counter,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
atomic_read(&conn->usage),
rxrpc_conn_states[conn->state],
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 23c05ec6fa28..63afa9e9cc08 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -103,43 +103,43 @@ error:
* prime the encryption state with the invariant parts of a connection's
* description
*/
-static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
+static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
{
struct rxrpc_key_token *token;
SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
- struct scatterlist sg[2];
+ struct scatterlist sg;
struct rxrpc_crypt iv;
- struct {
- __be32 x[4];
- } tmpbuf __attribute__((aligned(16))); /* must all be in same page */
+ __be32 *tmpbuf;
+ size_t tmpsize = 4 * sizeof(__be32);
_enter("");
if (!conn->params.key)
- return;
+ return 0;
+
+ tmpbuf = kmalloc(tmpsize, GFP_KERNEL);
+ if (!tmpbuf)
+ return -ENOMEM;
token = conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- tmpbuf.x[0] = htonl(conn->proto.epoch);
- tmpbuf.x[1] = htonl(conn->proto.cid);
- tmpbuf.x[2] = 0;
- tmpbuf.x[3] = htonl(conn->security_ix);
-
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ tmpbuf[0] = htonl(conn->proto.epoch);
+ tmpbuf[1] = htonl(conn->proto.cid);
+ tmpbuf[2] = 0;
+ tmpbuf[3] = htonl(conn->security_ix);
+ sg_init_one(&sg, tmpbuf, tmpsize);
skcipher_request_set_tfm(req, conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
- ASSERTCMP((u32 __force)conn->csum_iv.n[0], ==, (u32 __force)tmpbuf.x[2]);
-
- _leave("");
+ memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv));
+ kfree(tmpbuf);
+ _leave(" = 0");
+ return 0;
}
/*
@@ -152,12 +152,9 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
{
struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+ struct rxkad_level1_hdr hdr;
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
- struct {
- struct rxkad_level1_hdr hdr;
- __be32 first; /* first four bytes of data and padding */
- } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ struct scatterlist sg;
u16 check;
sp = rxrpc_skb(skb);
@@ -167,24 +164,19 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
check = sp->hdr.seq ^ sp->hdr.callNumber;
data_size |= (u32)check << 16;
- tmpbuf.hdr.data_size = htonl(data_size);
- memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
+ hdr.data_size = htonl(data_size);
+ memcpy(sechdr, &hdr, sizeof(hdr));
/* start the encryption afresh */
memset(&iv, 0, sizeof(iv));
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
-
+ sg_init_one(&sg, sechdr, 8);
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
-
_leave(" = 0");
return 0;
}
@@ -198,8 +190,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
void *sechdr)
{
const struct rxrpc_key_token *token;
- struct rxkad_level2_hdr rxkhdr
- __attribute__((aligned(8))); /* must be all on one page */
+ struct rxkad_level2_hdr rxkhdr;
struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
@@ -218,18 +209,16 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
rxkhdr.data_size = htonl(data_size | (u32)check << 16);
rxkhdr.checksum = 0;
+ memcpy(sechdr, &rxkhdr, sizeof(rxkhdr));
/* encrypt from the session key */
token = call->conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
- sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr));
-
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(rxkhdr), iv.x);
-
+ skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x);
crypto_skcipher_encrypt(req);
/* we want to encrypt the skbuff in-place */
@@ -243,9 +232,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
sg_init_table(sg, nsg);
skb_to_sgvec(skb, sg, 0, len);
-
skcipher_request_set_crypt(req, sg, sg, len, iv.x);
-
crypto_skcipher_encrypt(req);
_leave(" = 0");
@@ -259,7 +246,7 @@ out:
/*
* checksum an RxRPC packet header
*/
-static int rxkad_secure_packet(const struct rxrpc_call *call,
+static int rxkad_secure_packet(struct rxrpc_call *call,
struct sk_buff *skb,
size_t data_size,
void *sechdr)
@@ -267,10 +254,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
- struct {
- __be32 x[2];
- } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ struct scatterlist sg;
u32 x, y;
int ret;
@@ -293,20 +277,17 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
/* calculate the security checksum */
x = call->channel << (32 - RXRPC_CIDSHIFT);
x |= sp->hdr.seq & 0x3fffffff;
- tmpbuf.x[0] = htonl(sp->hdr.callNumber);
- tmpbuf.x[1] = htonl(x);
-
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ call->crypto_buf[0] = htonl(sp->hdr.callNumber);
+ call->crypto_buf[1] = htonl(x);
+ sg_init_one(&sg, call->crypto_buf, 8);
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- y = ntohl(tmpbuf.x[1]);
+ y = ntohl(call->crypto_buf[1]);
y = (y >> 16) & 0xffff;
if (y == 0)
y = 1; /* zero checksums are not permitted */
@@ -367,7 +348,6 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
@@ -452,7 +432,6 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
if (sg != _sg)
@@ -498,17 +477,14 @@ nomem:
/*
* verify the security on a received packet
*/
-static int rxkad_verify_packet(const struct rxrpc_call *call,
+static int rxkad_verify_packet(struct rxrpc_call *call,
struct sk_buff *skb,
u32 *_abort_code)
{
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_skb_priv *sp;
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
- struct {
- __be32 x[2];
- } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ struct scatterlist sg;
u16 cksum;
u32 x, y;
int ret;
@@ -533,20 +509,17 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
/* validate the security checksum */
x = call->channel << (32 - RXRPC_CIDSHIFT);
x |= sp->hdr.seq & 0x3fffffff;
- tmpbuf.x[0] = htonl(call->call_id);
- tmpbuf.x[1] = htonl(x);
-
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ call->crypto_buf[0] = htonl(call->call_id);
+ call->crypto_buf[1] = htonl(x);
+ sg_init_one(&sg, call->crypto_buf, 8);
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- y = ntohl(tmpbuf.x[1]);
+ y = ntohl(call->crypto_buf[1]);
cksum = (y >> 16) & 0xffff;
if (cksum == 0)
cksum = 1; /* zero checksums are not permitted */
@@ -710,29 +683,6 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response)
}
/*
- * load a scatterlist with a potentially split-page buffer
- */
-static void rxkad_sg_set_buf2(struct scatterlist sg[2],
- void *buf, size_t buflen)
-{
- int nsg = 1;
-
- sg_init_table(sg, 2);
-
- sg_set_buf(&sg[0], buf, buflen);
- if (sg[0].offset + buflen > PAGE_SIZE) {
- /* the buffer was split over two pages */
- sg[0].length = PAGE_SIZE - sg[0].offset;
- sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
- nsg++;
- }
-
- sg_mark_end(&sg[nsg - 1]);
-
- ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
-}
-
-/*
* encrypt the response packet
*/
static void rxkad_encrypt_response(struct rxrpc_connection *conn,
@@ -741,17 +691,16 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
{
SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
+ struct scatterlist sg[1];
/* continue encrypting from where we left off */
memcpy(&iv, s2->session_key, sizeof(iv));
- rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
skcipher_request_set_tfm(req, conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
}
@@ -818,14 +767,10 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
resp.kvno = htonl(token->kad->kvno);
resp.ticket_len = htonl(token->kad->ticket_len);
- resp.encrypted.call_id[0] =
- htonl(conn->channels[0] ? conn->channels[0]->call_id : 0);
- resp.encrypted.call_id[1] =
- htonl(conn->channels[1] ? conn->channels[1]->call_id : 0);
- resp.encrypted.call_id[2] =
- htonl(conn->channels[2] ? conn->channels[2]->call_id : 0);
- resp.encrypted.call_id[3] =
- htonl(conn->channels[3] ? conn->channels[3]->call_id : 0);
+ resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
+ resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
+ resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
+ resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
/* calculate the response checksum and then do the encryption */
rxkad_calc_response_checksum(&resp);
@@ -887,10 +832,8 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
}
sg_init_one(&sg[0], ticket, ticket_len);
-
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_free(req);
@@ -1001,7 +944,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
const struct rxrpc_crypt *session_key)
{
SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
- struct scatterlist sg[2];
+ struct scatterlist sg[1];
struct rxrpc_crypt iv;
_enter(",,%08x%08x",
@@ -1016,12 +959,11 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
memcpy(&iv, session_key, sizeof(iv));
- rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
skcipher_request_set_tfm(req, rxkad_ci);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
@@ -1045,7 +987,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
void *ticket;
u32 abort_code, version, kvno, ticket_len, level;
__be32 csum;
- int ret;
+ int ret, i;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
@@ -1108,11 +1050,26 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
if (response.encrypted.checksum != csum)
goto protocol_error_free;
- if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
- ntohl(response.encrypted.call_id[1]) > INT_MAX ||
- ntohl(response.encrypted.call_id[2]) > INT_MAX ||
- ntohl(response.encrypted.call_id[3]) > INT_MAX)
- goto protocol_error_free;
+ spin_lock(&conn->channel_lock);
+ for (i = 0; i < RXRPC_MAXCALLS; i++) {
+ struct rxrpc_call *call;
+ u32 call_id = ntohl(response.encrypted.call_id[i]);
+
+ if (call_id > INT_MAX)
+ goto protocol_error_unlock;
+
+ if (call_id < conn->channels[i].call_counter)
+ goto protocol_error_unlock;
+ if (call_id > conn->channels[i].call_counter) {
+ call = rcu_dereference_protected(
+ conn->channels[i].call,
+ lockdep_is_held(&conn->channel_lock));
+ if (call && call->state < RXRPC_CALL_COMPLETE)
+ goto protocol_error_unlock;
+ conn->channels[i].call_counter = call_id;
+ }
+ }
+ spin_unlock(&conn->channel_lock);
abort_code = RXKADOUTOFSEQUENCE;
if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
@@ -1137,6 +1094,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_leave(" = 0");
return 0;
+protocol_error_unlock:
+ spin_unlock(&conn->channel_lock);
protocol_error_free:
kfree(ticket);
protocol_error:
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index f28122a15a24..b88914d53ca5 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -10,32 +10,37 @@
*/
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/udp.h>
#include "ar-internal.h"
/*
- * Set up an RxRPC address from a socket buffer.
+ * Fill out a peer address from a socket buffer containing a packet.
*/
-void rxrpc_get_addr_from_skb(struct rxrpc_local *local,
- const struct sk_buff *skb,
- struct sockaddr_rxrpc *srx)
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
{
memset(srx, 0, sizeof(*srx));
- srx->transport_type = local->srx.transport_type;
- srx->transport.family = local->srx.transport.family;
- /* Can we see an ipv4 UDP packet on an ipv6 UDP socket? and vice
- * versa?
- */
- switch (srx->transport.family) {
- case AF_INET:
+ switch (ntohs(skb->protocol)) {
+ case ETH_P_IP:
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.sin.sin_family = AF_INET;
srx->transport.sin.sin_port = udp_hdr(skb)->source;
- srx->transport_len = sizeof(struct sockaddr_in);
- memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
- sizeof(struct in_addr));
- break;
+ srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ return 0;
+
+ case ETH_P_IPV6:
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin6);
+ srx->transport.sin6.sin6_family = AF_INET6;
+ srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
+ srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
+ return 0;
default:
- BUG();
+ pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",
+ ntohs(skb->protocol));
+ return -EAFNOSUPPORT;
}
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index dff92ea772fe..3ddc7bd74ecb 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -115,9 +115,9 @@ struct hfsc_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est64 rate_est;
- unsigned int level; /* class level in hierarchy */
struct tcf_proto __rcu *filter_list; /* filter list */
unsigned int filter_cnt; /* filter count */
+ unsigned int level; /* class level in hierarchy */
struct hfsc_sched *sched; /* scheduler data */
struct hfsc_class *cl_parent; /* parent class */
@@ -165,10 +165,10 @@ struct hfsc_class {
struct runtime_sc cl_virtual; /* virtual curve */
struct runtime_sc cl_ulimit; /* upperlimit curve */
- unsigned long cl_flags; /* which curves are valid */
- unsigned long cl_vtperiod; /* vt period sequence number */
- unsigned long cl_parentperiod;/* parent's vt period sequence number*/
- unsigned long cl_nactive; /* number of active children */
+ u8 cl_flags; /* which curves are valid */
+ u32 cl_vtperiod; /* vt period sequence number */
+ u32 cl_parentperiod;/* parent's vt period sequence number*/
+ u32 cl_nactive; /* number of active children */
};
struct hfsc_sched {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index e1849f3714ad..1c23060c41a6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -268,6 +268,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
goto fail_init;
asoc->active_key_id = ep->active_key_id;
+ asoc->prsctp_enable = ep->prsctp_enable;
/* Save the hmacs and chunks list into this association */
if (ep->auth_hmacs_list)
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 1eb94bf18ef4..a55e54738b81 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -335,13 +335,32 @@ errout:
/* Check whether this message has expired. */
int sctp_chunk_abandoned(struct sctp_chunk *chunk)
{
- struct sctp_datamsg *msg = chunk->msg;
+ if (!chunk->asoc->prsctp_enable ||
+ !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
+ struct sctp_datamsg *msg = chunk->msg;
+
+ if (!msg->can_abandon)
+ return 0;
+
+ if (time_after(jiffies, msg->expires_at))
+ return 1;
- if (!msg->can_abandon)
return 0;
+ }
- if (time_after(jiffies, msg->expires_at))
+ if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
+ time_after(jiffies, chunk->prsctp_param)) {
+ if (chunk->sent_count)
+ chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+ else
+ chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ return 1;
+ } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
+ chunk->sent_count > chunk->prsctp_param) {
+ chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
+ }
+ /* PRIO policy is processed by sendmsg, not here */
return 0;
}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 9d494e35e7f9..1f03065686fe 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -163,6 +163,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
*/
ep->auth_hmacs_list = auth_hmacs;
ep->auth_chunk_list = auth_chunks;
+ ep->prsctp_enable = net->sctp.prsctp_enable;
return ep;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1541a91d6d9d..7425f6c23888 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -316,6 +316,8 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
packet->has_data = 1;
/* timestamp the chunk for rtx purposes */
chunk->sent_at = jiffies;
+ /* Mainly used for prsctp RTX policy */
+ chunk->sent_count++;
break;
case SCTP_CID_COOKIE_ECHO:
packet->has_cookie_echo = 1;
@@ -582,9 +584,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
*/
pkt_size -= WORD_ROUND(chunk->skb->len);
- if (chunk == packet->auth && !list_empty(&packet->chunk_list))
- list_add(&chunk->list, &packet->chunk_list);
- else if (!sctp_chunk_is_data(chunk))
+ if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
sctp_chunk_free(chunk);
if (!pkt_size)
@@ -605,6 +605,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
(struct sctp_auth_chunk *)auth,
gfp);
+ if (packet->auth) {
+ if (!list_empty(&packet->chunk_list)) {
+ /* We will generate more packets, so re-queue
+ * auth chunk.
+ */
+ list_add(&chunk->list, &packet->chunk_list);
+ } else {
+ sctp_chunk_free(packet->auth);
+ packet->auth = NULL;
+ }
+ }
+
if (!gso)
break;
@@ -735,6 +747,8 @@ err:
}
goto out;
nomem:
+ if (packet->auth && list_empty(&packet->auth->list))
+ sctp_chunk_free(packet->auth);
err = -ENOMEM;
goto err;
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 084718f9b3da..72e54a416af6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -326,6 +326,9 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
sctp_chunk_hold(chunk);
sctp_outq_tail_data(q, chunk);
+ if (chunk->asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ chunk->asoc->sent_cnt_removable++;
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
else
@@ -372,6 +375,96 @@ static void sctp_insert_list(struct list_head *head, struct list_head *new)
list_add_tail(new, head);
}
+static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo,
+ struct list_head *queue, int msg_len)
+{
+ struct sctp_chunk *chk, *temp;
+
+ list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+ if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->prsctp_param <= sinfo->sinfo_timetolive)
+ continue;
+
+ list_del_init(&chk->transmitted_list);
+ sctp_insert_list(&asoc->outqueue.abandoned,
+ &chk->transmitted_list);
+
+ asoc->sent_cnt_removable--;
+ asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+
+ if (!chk->tsn_gap_acked) {
+ if (chk->transport)
+ chk->transport->flight_size -=
+ sctp_data_size(chk);
+ asoc->outqueue.outstanding_bytes -= sctp_data_size(chk);
+ }
+
+ msg_len -= SCTP_DATA_SNDSIZE(chk) +
+ sizeof(struct sk_buff) +
+ sizeof(struct sctp_chunk);
+ if (msg_len <= 0)
+ break;
+ }
+
+ return msg_len;
+}
+
+static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo,
+ struct list_head *queue, int msg_len)
+{
+ struct sctp_chunk *chk, *temp;
+
+ list_for_each_entry_safe(chk, temp, queue, list) {
+ if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->prsctp_param <= sinfo->sinfo_timetolive)
+ continue;
+
+ list_del_init(&chk->list);
+ asoc->sent_cnt_removable--;
+ asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+
+ msg_len -= SCTP_DATA_SNDSIZE(chk) +
+ sizeof(struct sk_buff) +
+ sizeof(struct sctp_chunk);
+ sctp_chunk_free(chk);
+ if (msg_len <= 0)
+ break;
+ }
+
+ return msg_len;
+}
+
+/* Abandon the chunks according their priorities */
+void sctp_prsctp_prune(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo, int msg_len)
+{
+ struct sctp_transport *transport;
+
+ if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+ return;
+
+ msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+ &asoc->outqueue.retransmit,
+ msg_len);
+ if (msg_len <= 0)
+ return;
+
+ list_for_each_entry(transport, &asoc->peer.transport_addr_list,
+ transports) {
+ msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+ &transport->transmitted,
+ msg_len);
+ if (msg_len <= 0)
+ return;
+ }
+
+ sctp_prsctp_prune_unsent(asoc, sinfo,
+ &asoc->outqueue.out_chunk_list,
+ msg_len);
+}
+
/* Mark all the eligible packets on a transport for retransmission. */
void sctp_retransmit_mark(struct sctp_outq *q,
struct sctp_transport *transport,
@@ -962,6 +1055,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
/* Mark as failed send. */
sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
+ if (asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ asoc->sent_cnt_removable--;
sctp_chunk_free(chunk);
continue;
}
@@ -1251,6 +1347,9 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
tsn = ntohl(tchunk->subh.data_hdr->tsn);
if (TSN_lte(tsn, ctsn)) {
list_del_init(&tchunk->transmitted_list);
+ if (asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ asoc->sent_cnt_removable--;
sctp_chunk_free(tchunk);
}
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 56f364d8f932..1c96f4740e67 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -261,7 +261,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
chunksize += sizeof(ecap_param);
- if (net->sctp.prsctp_enable)
+ if (asoc->prsctp_enable)
chunksize += sizeof(prsctp_param);
/* ADDIP: Section 4.2.7:
@@ -355,7 +355,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
sctp_addto_param(retval, num_ext, extensions);
}
- if (net->sctp.prsctp_enable)
+ if (asoc->prsctp_enable)
sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
if (sp->adaptation_ind) {
@@ -711,6 +711,20 @@ nodata:
return retval;
}
+static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
+ const struct sctp_sndrcvinfo *sinfo)
+{
+ if (!chunk->asoc->prsctp_enable)
+ return;
+
+ if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
+ chunk->prsctp_param =
+ jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
+ else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
+ SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
+ chunk->prsctp_param = sinfo->sinfo_timetolive;
+}
+
/* Make a DATA chunk for the given association from the provided
* parameters. However, do not populate the data payload.
*/
@@ -744,6 +758,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
+ sctp_set_prsctp_policy(retval, sinfo);
nodata:
return retval;
@@ -2024,8 +2039,8 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
for (i = 0; i < num_ext; i++) {
switch (param.ext->chunks[i]) {
case SCTP_CID_FWD_TSN:
- if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable)
- asoc->peer.prsctp_capable = 1;
+ if (asoc->prsctp_enable && !asoc->peer.prsctp_capable)
+ asoc->peer.prsctp_capable = 1;
break;
case SCTP_CID_AUTH:
/* if the peer reports AUTH, assume that he
@@ -2169,7 +2184,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (net->sctp.prsctp_enable)
+ if (ep->prsctp_enable)
break;
goto fallthrough;
@@ -2653,7 +2668,7 @@ do_addr_param:
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (net->sctp.prsctp_enable) {
+ if (asoc->prsctp_enable) {
asoc->peer.prsctp_capable = 1;
break;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index cdabbd8219b1..71c7dc5ea62e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1914,6 +1914,9 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_free;
}
+ if (sctp_wspace(asoc) < msg_len)
+ sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -3661,6 +3664,80 @@ static int sctp_setsockopt_recvnxtinfo(struct sock *sk,
return 0;
}
+static int sctp_setsockopt_pr_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ asoc->prsctp_enable = !!params.assoc_value;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ sp->ep->prsctp_enable = !!params.assoc_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_setsockopt_default_prinfo(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_default_prinfo info;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(info))
+ goto out;
+
+ if (copy_from_user(&info, optval, sizeof(info))) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (info.pr_policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ if (info.pr_policy == SCTP_PR_SCTP_NONE)
+ info.pr_value = 0;
+
+ asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+ if (asoc) {
+ SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy);
+ asoc->default_timetolive = info.pr_value;
+ } else if (!info.pr_assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
+ sp->default_timetolive = info.pr_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -3821,6 +3898,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_RECVNXTINFO:
retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen);
break;
+ case SCTP_PR_SUPPORTED:
+ retval = sctp_setsockopt_pr_supported(sk, optval, optlen);
+ break;
+ case SCTP_DEFAULT_PRINFO:
+ retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6166,6 +6249,148 @@ static int sctp_getsockopt_recvnxtinfo(struct sock *sk, int len,
return 0;
}
+static int sctp_getsockopt_pr_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ params.assoc_value = asoc->prsctp_enable;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ params.assoc_value = sp->ep->prsctp_enable;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_default_prinfo(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_default_prinfo info;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(info)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(info);
+ if (copy_from_user(&info, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+ if (asoc) {
+ info.pr_policy = SCTP_PR_POLICY(asoc->default_flags);
+ info.pr_value = asoc->default_timetolive;
+ } else if (!info.pr_assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ info.pr_policy = SCTP_PR_POLICY(sp->default_flags);
+ info.pr_value = sp->default_timetolive;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &info, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_prstatus params;
+ struct sctp_association *asoc;
+ int policy;
+ int retval = -EINVAL;
+
+ if (len < sizeof(params))
+ goto out;
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ policy = params.sprstat_policy;
+ if (policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+ if (!asoc)
+ goto out;
+
+ if (policy == SCTP_PR_SCTP_NONE) {
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+ params.sprstat_abandoned_unsent +=
+ asoc->abandoned_unsent[policy];
+ params.sprstat_abandoned_sent +=
+ asoc->abandoned_sent[policy];
+ }
+ } else {
+ params.sprstat_abandoned_unsent =
+ asoc->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ params.sprstat_abandoned_sent =
+ asoc->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ }
+
+ if (put_user(len, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (copy_to_user(optval, &params, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -6319,6 +6544,17 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_RECVNXTINFO:
retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen);
break;
+ case SCTP_PR_SUPPORTED:
+ retval = sctp_getsockopt_pr_supported(sk, len, optval, optlen);
+ break;
+ case SCTP_DEFAULT_PRINFO:
+ retval = sctp_getsockopt_default_prinfo(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_PR_ASSOC_STATUS:
+ retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6866,7 +7102,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->srinfo->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
- SCTP_SACK_IMMEDIATELY |
+ SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -6890,7 +7126,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->sinfo->snd_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
- SCTP_SACK_IMMEDIATELY |
+ SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;